Skip to content

Commit 80fa36c

Browse files
committed
Explanation comment
1 parent a7758a5 commit 80fa36c

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

object_store/src/azure/client.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,40 @@ fn marker_for_offset(offset: &str, is_emulator: bool) -> String {
560560
if is_emulator {
561561
return offset.to_string();
562562
} else {
563+
// Here we reconstruct an Azure marker (continuation token) from a key to be able to seek
564+
// into an arbitrary position in the key space.
565+
// The current format (July 2024) for the marker is as follows:
566+
//
567+
// +-> unpadded length of next field
568+
// |
569+
// | +-> unpadded length of base64 encoded field
570+
// | |
571+
// | | +-> base64 encoded field with padding characters (=) repaced with -
572+
// | | |
573+
// 2!72!MDAwMDA4IWZpbGUudHh0ITAwMDAyOCE5OTk5LTEyLTMxVDIzOjU5OjU5Ljk5OTk5OTlaIQ--
574+
// | | ^
575+
// terminators |
576+
// |
577+
// +------------+
578+
// Decoding the |base64 field| gives:
579+
// +------------+
580+
//
581+
// +-> length of key field padded to 6 digits
582+
// |
583+
// | +-> key to start listing at
584+
// | |
585+
// | | +-> length of timestamp field padded to 6 digits
586+
// | | |
587+
// | | | +-> constant max timestamp field
588+
// | | | |
589+
// 000008!file.txt!000028!9999-12-31T23:59:59.9999999Z!
590+
// | | | |
591+
// +----> field terminators <-------------------+
592+
//
593+
// When recostructing we add a space character (ASCII 0x20) to the end of the key to change the
594+
// `start_at` behavior into a `start_after` behavior as the space character is the first valid character
595+
// in the lexicographical order.
596+
563597
let encoded_part = BASE64_STANDARD.encode(
564598
&format!("{:06}!{} !000028!9999-12-31T23:59:59.9999999Z!", offset.len() + 1, offset)
565599
).replace("=", "-");

0 commit comments

Comments
 (0)