From 1b270f1c910c1edd9c648e94d858cdcce6894411 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 12:32:46 +0530 Subject: [PATCH 01/17] Page List Envelope Logic --- modules/rntuple.mjs | 49 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index a76c2075c..4211c8d09 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -349,16 +349,58 @@ _readClusterGroups(reader) { numClusters = reader.readU32(); console.log(`Cluster Record Size: ${clusterRecordSize}`); - console.log(`Min Entry: ${minEntry}, Entry Span: ${entrySpan}, Num Clusters: ${numClusters}`); + const pageListSeek = reader.readU64(), + pageListNBytes = reader.readU32(), + pageListLen = reader.readU32(), - clusterGroups.push({ + + group = { minEntry, entrySpan, numClusters, - }); + pageListLocator: { + seek: pageListSeek, + nbytes: pageListNBytes, + len: pageListLen + } + }; + console.log(`clusterGroup[${i}]:`, group); + + clusterGroups.push(group); } this.clusterGroups = clusterGroups; + if (clusterGroups.length > 0) + this.pageListLocator = clusterGroups[0].pageListLocator; +} + +_readPageListEnvelope(reader) { + // Read the envelope metadata + this._readEnvelopeMetadata(reader); + // Page list checksum (64-bit xxhash3) + const pageListChecksum = reader.readU64(); + console.log('Page List Checksum:', pageListChecksum); + + // Cluster summary Record Frame + const clusterListSize = reader.readS64(); + if (clusterListSize >= 0) + throw new Error('Expected list frame for cluster summary'); + + const clusterCount = reader.readU32(); + console.log('Cluster Count:', clusterCount); + + for (let i = 0; i < clusterCount; ++i) { + const recordSize = reader.readS64(), + firstEntry = reader.readU64(), + combined = reader.readU64(), + flags = Number(combined & 0xFFn), // lower 8 bits + numEntries = combined >> 8n; // higher 56 bits + + console.log(`Cluster ${i}: RecordSize=${recordSize} First=${firstEntry} Num=${numEntries} Flags=${flags} `); + if ((flags & 0x01) !== 0) + throw new Error('Reserved flag 0x01 for sharded clusters is set'); + } + //TODO: Read top-most list frame for clusters (page locations) } } @@ -392,6 +434,7 @@ async function readHeaderFooter(tuple) { tuple.builder.deserializeFooter(footer_blob); + return true; }); }); From 61fe6bd50189f8fd5d9f8546360a3a631ab1eac4 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 12:33:57 +0530 Subject: [PATCH 02/17] removed warnings --- modules/rntuple.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 4211c8d09..7eb8a3b26 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -400,7 +400,7 @@ _readPageListEnvelope(reader) { if ((flags & 0x01) !== 0) throw new Error('Reserved flag 0x01 for sharded clusters is set'); } - //TODO: Read top-most list frame for clusters (page locations) + // TODO: Read top-most list frame for clusters (page locations) } } From 5f08832f3e868f02d767737452a841220235a9c3 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 13:57:07 +0530 Subject: [PATCH 03/17] Called page List Envelope inside _readClusterGroups method --- modules/rntuple.mjs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 7eb8a3b26..410fa572a 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -349,34 +349,27 @@ _readClusterGroups(reader) { numClusters = reader.readU32(); console.log(`Cluster Record Size: ${clusterRecordSize}`); - const pageListSeek = reader.readU64(), - pageListNBytes = reader.readU32(), - pageListLen = reader.readU32(), + console.log(`Cluster Group ${i}: Min Entry=${minEntry}, Entry Span=${entrySpan}, Num Clusters=${numClusters}`); - - group = { + const group = { minEntry, entrySpan, numClusters, - pageListLocator: { - seek: pageListSeek, - nbytes: pageListNBytes, - len: pageListLen - } }; console.log(`clusterGroup[${i}]:`, group); clusterGroups.push(group); - } + console.log(`Reading Page List Envelope for Cluster Group ${i}`); + const pageList = this._readPageListEnvelope(reader); + group.pageList = pageList; + } this.clusterGroups = clusterGroups; - if (clusterGroups.length > 0) - this.pageListLocator = clusterGroups[0].pageListLocator; } _readPageListEnvelope(reader) { // Read the envelope metadata - this._readEnvelopeMetadata(reader); + this._readEnvelopeMetadata(reader); // Page list checksum (64-bit xxhash3) const pageListChecksum = reader.readU64(); console.log('Page List Checksum:', pageListChecksum); From 9e2d95b529a94a9261aa0d5ae3a9cd07159d5a18 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 12:32:46 +0530 Subject: [PATCH 04/17] Page List Envelope Logic --- modules/rntuple.mjs | 49 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index d9ac0f38d..08e76ba3b 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -364,16 +364,58 @@ _readClusterGroups(reader) { numClusters = reader.readU32(); console.log(`Cluster Record Size: ${clusterRecordSize}`); - console.log(`Min Entry: ${minEntry}, Entry Span: ${entrySpan}, Num Clusters: ${numClusters}`); + const pageListSeek = reader.readU64(), + pageListNBytes = reader.readU32(), + pageListLen = reader.readU32(), - clusterGroups.push({ + + group = { minEntry, entrySpan, numClusters, - }); + pageListLocator: { + seek: pageListSeek, + nbytes: pageListNBytes, + len: pageListLen + } + }; + console.log(`clusterGroup[${i}]:`, group); + + clusterGroups.push(group); } this.clusterGroups = clusterGroups; + if (clusterGroups.length > 0) + this.pageListLocator = clusterGroups[0].pageListLocator; +} + +_readPageListEnvelope(reader) { + // Read the envelope metadata + this._readEnvelopeMetadata(reader); + // Page list checksum (64-bit xxhash3) + const pageListChecksum = reader.readU64(); + console.log('Page List Checksum:', pageListChecksum); + + // Cluster summary Record Frame + const clusterListSize = reader.readS64(); + if (clusterListSize >= 0) + throw new Error('Expected list frame for cluster summary'); + + const clusterCount = reader.readU32(); + console.log('Cluster Count:', clusterCount); + + for (let i = 0; i < clusterCount; ++i) { + const recordSize = reader.readS64(), + firstEntry = reader.readU64(), + combined = reader.readU64(), + flags = Number(combined & 0xFFn), // lower 8 bits + numEntries = combined >> 8n; // higher 56 bits + + console.log(`Cluster ${i}: RecordSize=${recordSize} First=${firstEntry} Num=${numEntries} Flags=${flags} `); + if ((flags & 0x01) !== 0) + throw new Error('Reserved flag 0x01 for sharded clusters is set'); + } + //TODO: Read top-most list frame for clusters (page locations) } } @@ -407,6 +449,7 @@ async function readHeaderFooter(tuple) { tuple.builder.deserializeFooter(footer_blob); + return true; }); }); From efec327a78bcd78637269252934c4098be6577e6 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 12:33:57 +0530 Subject: [PATCH 05/17] removed warnings --- modules/rntuple.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 08e76ba3b..298089ead 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -415,7 +415,7 @@ _readPageListEnvelope(reader) { if ((flags & 0x01) !== 0) throw new Error('Reserved flag 0x01 for sharded clusters is set'); } - //TODO: Read top-most list frame for clusters (page locations) + // TODO: Read top-most list frame for clusters (page locations) } } From 6708a14301e907df7361f1265dfbd39a44a55859 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 13:57:07 +0530 Subject: [PATCH 06/17] Called page List Envelope inside _readClusterGroups method --- modules/rntuple.mjs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 298089ead..bdc8b5682 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -364,34 +364,27 @@ _readClusterGroups(reader) { numClusters = reader.readU32(); console.log(`Cluster Record Size: ${clusterRecordSize}`); - const pageListSeek = reader.readU64(), - pageListNBytes = reader.readU32(), - pageListLen = reader.readU32(), + console.log(`Cluster Group ${i}: Min Entry=${minEntry}, Entry Span=${entrySpan}, Num Clusters=${numClusters}`); - - group = { + const group = { minEntry, entrySpan, numClusters, - pageListLocator: { - seek: pageListSeek, - nbytes: pageListNBytes, - len: pageListLen - } }; console.log(`clusterGroup[${i}]:`, group); clusterGroups.push(group); - } + console.log(`Reading Page List Envelope for Cluster Group ${i}`); + const pageList = this._readPageListEnvelope(reader); + group.pageList = pageList; + } this.clusterGroups = clusterGroups; - if (clusterGroups.length > 0) - this.pageListLocator = clusterGroups[0].pageListLocator; } _readPageListEnvelope(reader) { // Read the envelope metadata - this._readEnvelopeMetadata(reader); + this._readEnvelopeMetadata(reader); // Page list checksum (64-bit xxhash3) const pageListChecksum = reader.readU64(); console.log('Page List Checksum:', pageListChecksum); From 8679ad68d3838dd8f4c7b410766aaa79e5931369 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Fri, 20 Jun 2025 17:43:00 +0530 Subject: [PATCH 07/17] Added a locator method --- modules/rntuple.mjs | 73 +++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index bdc8b5682..4a5174fb9 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -364,24 +364,45 @@ _readClusterGroups(reader) { numClusters = reader.readU32(); console.log(`Cluster Record Size: ${clusterRecordSize}`); - console.log(`Cluster Group ${i}: Min Entry=${minEntry}, Entry Span=${entrySpan}, Num Clusters=${numClusters}`); + + // Locator method to get the page list locator offset + const pageListLocator = this._readLocator(reader); + + // Seek to the page list offset + reader.seek(pageListLocator.fOffset); + console.log('Page List Locator:', pageListLocator); + console.log(`Page List Locator Offset (hex): 0x${pageListLocator.offset.toString(16).toUpperCase()}`); + // Deserialize the Page List Envelope from there + this._readPageListEnvelope(reader); + const group = { minEntry, entrySpan, numClusters, - }; - console.log(`clusterGroup[${i}]:`, group); - + + }; + clusterGroups.push(group); - - console.log(`Reading Page List Envelope for Cluster Group ${i}`); - const pageList = this._readPageListEnvelope(reader); - group.pageList = pageList; } this.clusterGroups = clusterGroups; } +_readLocator(reader) { + const sizeAndType = reader.readU32(), // 4 bytes: size + T bit + type = sizeAndType & 1, // last bit + size = sizeAndType >>> 1, // top 31 bits + offset = reader.readU64(); // 8 bytes: offset + // TODO : need to do the case for t!=0 + if (type !== 0) + throw new Error('Non-standard locators (T=1) not supported yet.'); + return { + type, + size, + offset + }; +} + _readPageListEnvelope(reader) { // Read the envelope metadata this._readEnvelopeMetadata(reader); @@ -390,24 +411,24 @@ _readPageListEnvelope(reader) { console.log('Page List Checksum:', pageListChecksum); // Cluster summary Record Frame - const clusterListSize = reader.readS64(); - if (clusterListSize >= 0) - throw new Error('Expected list frame for cluster summary'); - - const clusterCount = reader.readU32(); - console.log('Cluster Count:', clusterCount); - - for (let i = 0; i < clusterCount; ++i) { - const recordSize = reader.readS64(), - firstEntry = reader.readU64(), - combined = reader.readU64(), - flags = Number(combined & 0xFFn), // lower 8 bits - numEntries = combined >> 8n; // higher 56 bits - - console.log(`Cluster ${i}: RecordSize=${recordSize} First=${firstEntry} Num=${numEntries} Flags=${flags} `); - if ((flags & 0x01) !== 0) - throw new Error('Reserved flag 0x01 for sharded clusters is set'); - } + // const clusterListSize = reader.readS64(); + // if (clusterListSize >= 0) + // throw new Error('Expected list frame for cluster summary'); + + // const clusterCount = reader.readU32(); + // console.log('Cluster Count:', clusterCount); + + // for (let i = 0; i < clusterCount; ++i) { + // const recordSize = reader.readS64(), + // firstEntry = reader.readU64(), + // combined = reader.readU64(), + // flags = Number(combined & 0xFFn), // lower 8 bits + // numEntries = combined >> 8n; // higher 56 bits + + // console.log(`Cluster ${i}: RecordSize=${recordSize} First=${firstEntry} Num=${numEntries} Flags=${flags} `); + // if ((flags & 0x01) !== 0) + // throw new Error('Reserved flag 0x01 for sharded clusters is set'); + // } // TODO: Read top-most list frame for clusters (page locations) } From 41649cbce2e2f6ea80a61ec9143207e2ef677ec9 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Fri, 20 Jun 2025 17:54:04 +0530 Subject: [PATCH 08/17] added locator method --- modules/rntuple.mjs | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 4a5174fb9..a9847fa2a 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -374,8 +374,6 @@ _readClusterGroups(reader) { console.log(`Page List Locator Offset (hex): 0x${pageListLocator.offset.toString(16).toUpperCase()}`); // Deserialize the Page List Envelope from there this._readPageListEnvelope(reader); - - const group = { minEntry, entrySpan, From 4a715ffed65674c81416f4029d0035f103eaadc3 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Sat, 21 Jun 2025 11:28:32 +0530 Subject: [PATCH 09/17] Correct Page list Envelope Offset --- modules/rntuple.mjs | 47 +++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index a9847fa2a..e5a579163 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -20,8 +20,14 @@ class RBufferReader { // Move to a specific position in the buffer seek(position) { + if (typeof position === 'bigint') { + if (position > BigInt(Number.MAX_SAFE_INTEGER)) + throw new Error(`Offset too large to seek safely: ${position}`); + this.offset = Number(position); + } else this.offset = position; - } +} + // Read unsigned 8-bit integer (1 BYTE) readU8() { @@ -361,26 +367,26 @@ _readClusterGroups(reader) { const clusterRecordSize = reader.readS64(), minEntry = reader.readU64(), entrySpan = reader.readU64(), - numClusters = reader.readU32(); + numClusters = reader.readU32(), + pagelength = reader.readU32(); console.log(`Cluster Record Size: ${clusterRecordSize}`); // Locator method to get the page list locator offset const pageListLocator = this._readLocator(reader); - // Seek to the page list offset - reader.seek(pageListLocator.fOffset); - console.log('Page List Locator:', pageListLocator); + // Seek to the page list offset + reader.seek(pageListLocator.offset); + console.log('Page Length', pagelength); + console.log('Page List Locator:', pageListLocator); console.log(`Page List Locator Offset (hex): 0x${pageListLocator.offset.toString(16).toUpperCase()}`); // Deserialize the Page List Envelope from there - this._readPageListEnvelope(reader); + // this._readPageListEnvelope(reader); const group = { - minEntry, - entrySpan, - numClusters, - + minEntry, + entrySpan, + numClusters, }; - clusterGroups.push(group); } this.clusterGroups = clusterGroups; @@ -388,25 +394,28 @@ _readClusterGroups(reader) { _readLocator(reader) { const sizeAndType = reader.readU32(), // 4 bytes: size + T bit - type = sizeAndType & 1, // last bit - size = sizeAndType >>> 1, // top 31 bits - offset = reader.readU64(); // 8 bytes: offset + type = sizeAndType & 1; // TODO : need to do the case for t!=0 if (type !== 0) throw new Error('Non-standard locators (T=1) not supported yet.'); + else { + const size = sizeAndType, + offset = reader.readU64(); // 8 bytes: offset return { type, size, offset }; } +} -_readPageListEnvelope(reader) { + +// _readPageListEnvelope(reader) { // Read the envelope metadata - this._readEnvelopeMetadata(reader); + // this._readEnvelopeMetadata(reader); // Page list checksum (64-bit xxhash3) - const pageListChecksum = reader.readU64(); - console.log('Page List Checksum:', pageListChecksum); + // const pageListChecksum = reader.readU64(); + // console.log('Page List Checksum:', pageListChecksum); // Cluster summary Record Frame // const clusterListSize = reader.readS64(); @@ -428,7 +437,7 @@ _readPageListEnvelope(reader) { // throw new Error('Reserved flag 0x01 for sharded clusters is set'); // } // TODO: Read top-most list frame for clusters (page locations) -} +// } } From f7668ccf7a756673b0cdfb7e6fb17ac0b63a4219 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Sat, 21 Jun 2025 11:41:07 +0530 Subject: [PATCH 10/17] correct pagelength from 32 to 64 --- modules/rntuple.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index e5a579163..976be5c5a 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -368,7 +368,7 @@ _readClusterGroups(reader) { minEntry = reader.readU64(), entrySpan = reader.readU64(), numClusters = reader.readU32(), - pagelength = reader.readU32(); + pagelength = reader.readU64(); console.log(`Cluster Record Size: ${clusterRecordSize}`); From ffb9da4c74098443af645ffd18e1ef74a9fd59a1 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 25 Jun 2025 10:09:15 +0530 Subject: [PATCH 11/17] added logic to call deserializePageList --- modules/rntuple.mjs | 52 +++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 976be5c5a..5aad405da 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -375,17 +375,14 @@ _readClusterGroups(reader) { // Locator method to get the page list locator offset const pageListLocator = this._readLocator(reader); - // Seek to the page list offset - reader.seek(pageListLocator.offset); console.log('Page Length', pagelength); - console.log('Page List Locator:', pageListLocator); console.log(`Page List Locator Offset (hex): 0x${pageListLocator.offset.toString(16).toUpperCase()}`); - // Deserialize the Page List Envelope from there - // this._readPageListEnvelope(reader); + const group = { minEntry, entrySpan, - numClusters, + numClusters, + locator: pageListLocator, }; clusterGroups.push(group); } @@ -408,36 +405,13 @@ _readLocator(reader) { }; } } +deserializePageList(page_list_blob){ + if (!page_list_blob) return; + const reader = new RBufferReader(page_list_blob); + console.log(reader.offset); +} -// _readPageListEnvelope(reader) { - // Read the envelope metadata - // this._readEnvelopeMetadata(reader); - // Page list checksum (64-bit xxhash3) - // const pageListChecksum = reader.readU64(); - // console.log('Page List Checksum:', pageListChecksum); - - // Cluster summary Record Frame - // const clusterListSize = reader.readS64(); - // if (clusterListSize >= 0) - // throw new Error('Expected list frame for cluster summary'); - - // const clusterCount = reader.readU32(); - // console.log('Cluster Count:', clusterCount); - - // for (let i = 0; i < clusterCount; ++i) { - // const recordSize = reader.readS64(), - // firstEntry = reader.readU64(), - // combined = reader.readU64(), - // flags = Number(combined & 0xFFn), // lower 8 bits - // numEntries = combined >> 8n; // higher 56 bits - - // console.log(`Cluster ${i}: RecordSize=${recordSize} First=${firstEntry} Num=${numEntries} Flags=${flags} `); - // if ((flags & 0x01) !== 0) - // throw new Error('Reserved flag 0x01 for sharded clusters is set'); - // } - // TODO: Read top-most list frame for clusters (page locations) -// } } @@ -470,8 +444,16 @@ async function readHeaderFooter(tuple) { tuple.builder.deserializeFooter(footer_blob); + const group = tuple.builder.clusterGroups[0], +offset = Number(group.locator.offset), +size = Number(group.locator.size); - return true; +return tuple.$file.readBuffer([offset, size]).then(page_list_blob => { + return R__unzip(page_list_blob).then(unzipped => { + tuple.builder.deserializePageList(unzipped); + return true; + }); +}); }); }); } From 07648bfdb8541ccbc5dada9bcc2e170d99ee340a Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 25 Jun 2025 10:53:31 +0530 Subject: [PATCH 12/17] commit changes --- modules/rntuple.mjs | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 5aad405da..dea00645a 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -383,6 +383,7 @@ _readClusterGroups(reader) { entrySpan, numClusters, locator: pageListLocator, + pagelength }; clusterGroups.push(group); } @@ -409,7 +410,9 @@ deserializePageList(page_list_blob){ if (!page_list_blob) return; const reader = new RBufferReader(page_list_blob); - console.log(reader.offset); + console.log('hello reader ', reader.offset); + + this._readEnvelopeMetadata(reader); } @@ -444,16 +447,29 @@ async function readHeaderFooter(tuple) { tuple.builder.deserializeFooter(footer_blob); - const group = tuple.builder.clusterGroups[0], -offset = Number(group.locator.offset), +const group = tuple.builder.clusterGroups?.[0]; +if (!group || !group.locator) + throw new Error('No valid cluster group or locator found'); + +const offset = Number(group.locator.offset), size = Number(group.locator.size); -return tuple.$file.readBuffer([offset, size]).then(page_list_blob => { - return R__unzip(page_list_blob).then(unzipped => { - tuple.builder.deserializePageList(unzipped); - return true; - }); -}); +if (!Number.isFinite(offset) || offset < 0 || !Number.isFinite(size) || size <= 0) + throw new Error(`Invalid PageList location or size — offset=${offset}, size=${size}`); + +tuple.$file.readBuffer([offset, size]) + .then(page_list_blob => { + if (page_list_blob instanceof DataView) + page_list_blob = new Uint8Array(page_list_blob.buffer, page_list_blob.byteOffset, page_list_blob.byteLength); + + if (!page_list_blob || !(page_list_blob instanceof Uint8Array)) + throw new Error(`Failed to read page list buffer: got ${Object.prototype.toString.call(page_list_blob)}`); + + tuple.builder.deserializePageList(page_list_blob); + }) + .catch(err => { + console.error('Error while reading or processing Page List:', err); + }); }); }); } From cd70a7d5c295b8024b96433443b7e16b3d8cbe87 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 25 Jun 2025 13:19:15 +0530 Subject: [PATCH 13/17] envelope working correctly --- modules/rntuple.mjs | 57 +++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index dea00645a..13f217ceb 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -413,6 +413,9 @@ deserializePageList(page_list_blob){ console.log('hello reader ', reader.offset); this._readEnvelopeMetadata(reader); + // Page list checksum (64-bit xxhash3) + const pageListChecksum = reader.readU64(); + console.log('Page List Checksum:', pageListChecksum); } @@ -447,30 +450,48 @@ async function readHeaderFooter(tuple) { tuple.builder.deserializeFooter(footer_blob); -const group = tuple.builder.clusterGroups?.[0]; -if (!group || !group.locator) - throw new Error('No valid cluster group or locator found'); -const offset = Number(group.locator.offset), -size = Number(group.locator.size); + const group = tuple.builder.clusterGroups?.[0]; + if (!group || !group.locator) + throw new Error('No valid cluster group or locator found'); -if (!Number.isFinite(offset) || offset < 0 || !Number.isFinite(size) || size <= 0) - throw new Error(`Invalid PageList location or size — offset=${offset}, size=${size}`); + const offset = Number(group.locator.offset), + size = Number(group.locator.size), + uncompressedSize = Number(group.pagelength); -tuple.$file.readBuffer([offset, size]) - .then(page_list_blob => { - if (page_list_blob instanceof DataView) - page_list_blob = new Uint8Array(page_list_blob.buffer, page_list_blob.byteOffset, page_list_blob.byteLength); + if (!Number.isFinite(offset) || offset < 0 || !Number.isFinite(size) || size <= 0) + throw new Error(`Invalid PageList location or size — offset=${offset}, size=${size}`); - if (!page_list_blob || !(page_list_blob instanceof Uint8Array)) - throw new Error(`Failed to read page list buffer: got ${Object.prototype.toString.call(page_list_blob)}`); + return tuple.$file.readBuffer([offset, size]).then(page_list_blob => { + if (page_list_blob instanceof DataView){ + page_list_blob = new Uint8Array( + page_list_blob.buffer, + page_list_blob.byteOffset, + page_list_blob.byteLength + ); + } - tuple.builder.deserializePageList(page_list_blob); - }) - .catch(err => { - console.error('Error while reading or processing Page List:', err); - }); + if (!(page_list_blob instanceof Uint8Array)) + throw new Error(`Failed to read page list buffer: got ${Object.prototype.toString.call(page_list_blob)}`); + + return R__unzip( + new DataView(page_list_blob.buffer, page_list_blob.byteOffset, page_list_blob.byteLength), + uncompressedSize + ).then(unzipped_blob => { + if (unzipped_blob instanceof DataView) + unzipped_blob = new Uint8Array(unzipped_blob.buffer, unzipped_blob.byteOffset, unzipped_blob.byteLength); + + if (!(unzipped_blob instanceof Uint8Array)) + throw new Error(`Unzipped page list is not a Uint8Array, got ${Object.prototype.toString.call(unzipped_blob)}`); + + tuple.builder.deserializePageList(unzipped_blob); + return true; + }); + }); }); + }).catch(err => { + console.error('Error during readHeaderFooter execution:', err); + throw err; }); } From 2d3e6bdcb00ff6e0e39b0a832be7641adc5407bd Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 25 Jun 2025 15:40:28 +0530 Subject: [PATCH 14/17] Commiting changes --- modules/rntuple.mjs | 66 +++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 45 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 13f217ceb..1ae3080cd 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -368,22 +368,22 @@ _readClusterGroups(reader) { minEntry = reader.readU64(), entrySpan = reader.readU64(), numClusters = reader.readU32(), - pagelength = reader.readU64(); + pageListLength = reader.readU64(); console.log(`Cluster Record Size: ${clusterRecordSize}`); // Locator method to get the page list locator offset const pageListLocator = this._readLocator(reader); - console.log('Page Length', pagelength); + console.log('Page Length', pageListLength); console.log(`Page List Locator Offset (hex): 0x${pageListLocator.offset.toString(16).toUpperCase()}`); const group = { minEntry, entrySpan, numClusters, - locator: pageListLocator, - pagelength + pageListLocator, + pageListLength }; clusterGroups.push(group); } @@ -391,27 +391,21 @@ _readClusterGroups(reader) { } _readLocator(reader) { - const sizeAndType = reader.readU32(), // 4 bytes: size + T bit - type = sizeAndType & 1; - // TODO : need to do the case for t!=0 - if (type !== 0) - throw new Error('Non-standard locators (T=1) not supported yet.'); - else { + const sizeAndType = reader.readU32(); // 4 bytes: size + T bit + if ((sizeAndType | 0) < 0) // | makes the sizeAndType as signed + throw new Error('Non-standard locators (T=1) not supported yet'); const size = sizeAndType, offset = reader.readU64(); // 8 bytes: offset return { - type, size, offset }; } -} deserializePageList(page_list_blob){ - if (!page_list_blob) return; + if (!page_list_blob) + throw new Error('deserializePageList: received an invalid or empty page list blob'); - const reader = new RBufferReader(page_list_blob); - console.log('hello reader ', reader.offset); - + const reader = new RBufferReader(page_list_blob); this._readEnvelopeMetadata(reader); // Page list checksum (64-bit xxhash3) const pageListChecksum = reader.readU64(); @@ -450,39 +444,21 @@ async function readHeaderFooter(tuple) { tuple.builder.deserializeFooter(footer_blob); - const group = tuple.builder.clusterGroups?.[0]; - if (!group || !group.locator) - throw new Error('No valid cluster group or locator found'); + if (!group || !group.pageListLocator) + throw new Error('No valid cluster group or page list locator found'); - const offset = Number(group.locator.offset), - size = Number(group.locator.size), - uncompressedSize = Number(group.pagelength); - - if (!Number.isFinite(offset) || offset < 0 || !Number.isFinite(size) || size <= 0) - throw new Error(`Invalid PageList location or size — offset=${offset}, size=${size}`); + const offset = Number(group.pageListLocator.offset), + size = Number(group.pageListLocator.size), + uncompressedSize = Number(group.pageListLength); return tuple.$file.readBuffer([offset, size]).then(page_list_blob => { - if (page_list_blob instanceof DataView){ - page_list_blob = new Uint8Array( - page_list_blob.buffer, - page_list_blob.byteOffset, - page_list_blob.byteLength - ); - } - - if (!(page_list_blob instanceof Uint8Array)) - throw new Error(`Failed to read page list buffer: got ${Object.prototype.toString.call(page_list_blob)}`); - - return R__unzip( - new DataView(page_list_blob.buffer, page_list_blob.byteOffset, page_list_blob.byteLength), - uncompressedSize - ).then(unzipped_blob => { - if (unzipped_blob instanceof DataView) - unzipped_blob = new Uint8Array(unzipped_blob.buffer, unzipped_blob.byteOffset, unzipped_blob.byteLength); - - if (!(unzipped_blob instanceof Uint8Array)) - throw new Error(`Unzipped page list is not a Uint8Array, got ${Object.prototype.toString.call(unzipped_blob)}`); + if (!(page_list_blob instanceof DataView)) + throw new Error(`Expected DataView from readBuffer, got ${Object.prototype.toString.call(page_list_blob)}`); + + return R__unzip(page_list_blob, uncompressedSize).then(unzipped_blob => { + if (!(unzipped_blob instanceof DataView)) + throw new Error(`Unzipped page list is not a DataView, got ${Object.prototype.toString.call(unzipped_blob)}`); tuple.builder.deserializePageList(unzipped_blob); return true; From f1498588808e5e4d004abf9cc3f6ad584a248446 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 25 Jun 2025 16:01:56 +0530 Subject: [PATCH 15/17] Read cluster summaries list frame logic added --- modules/rntuple.mjs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 1ae3080cd..86d13c60b 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -410,8 +410,31 @@ deserializePageList(page_list_blob){ // Page list checksum (64-bit xxhash3) const pageListChecksum = reader.readU64(); console.log('Page List Checksum:', pageListChecksum); -} + // Read cluster summaries list frame + const clusterSummaryListSize = reader.readS64(); + if (clusterSummaryListSize>=0) + throw new Error('Expected a list frame for cluster summaries'); + const clusterCount = reader.readU32(), + + clusterSummaries = []; + + for (let i = 0; i < clusterCount; ++i) { + const clusterSummaryRecordSize = reader.readS64(), + firstEntry = reader.readU64(), + combined = reader.readU64(), + flags = combined >> 56n; + if (flags & 0x01n) + throw new Error('Cluster summary uses unsupported sharded flag (0x01)'); + const numEntries = Number(combined & 0x00FFFFFFFFFFFFFFn); + console.log(`Cluster Summary Record Size : ${clusterSummaryRecordSize}`); + clusterSummaries.push({ + firstEntry, + numEntries, + flags + }); +} +} } From 015b479961ade3ea8a53c0432ad71f2b24999b5a Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 25 Jun 2025 17:25:10 +0530 Subject: [PATCH 16/17] Added full deserialize Page List Envelope --- modules/rntuple.mjs | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 86d13c60b..7ed54b0bb 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -434,10 +434,62 @@ deserializePageList(page_list_blob){ flags }); } +this.clusterSummaries = clusterSummaries; +this._readNestedFrames(reader, clusterSummaries.length); } +_readNestedFrames(reader, numClusters) { + const clusterPageLocations = []; + + for (let i = 0; i < numClusters; ++i) { + const outerListSize = reader.readS64(); + if (outerListSize >= 0) + throw new Error('Expected outer list frame for columns'); + + const numColumns = reader.readU32(); + const columns = []; + + for (let c = 0; c < numColumns; ++c) { + const innerListSize = reader.readS64(); + if (innerListSize >= 0) + throw new Error('Expected inner list frame for pages'); + + const numPages = reader.readU32(); + console.log(`Column ${c} has ${numPages} page(s)`); + const pages = []; + + for (let p = 0; p < numPages; ++p) { + const numElementsWithBit = reader.readS64(), + hasChecksum = numElementsWithBit < 0, + numElements = BigInt(Math.abs(Number(numElementsWithBit))), + + locator = this._readLocator(reader); + console.log(`Page ${p} → elements: ${numElements}, checksum: ${hasChecksum}, locator offset: ${locator.offset}, size: ${locator.size}`); + pages.push({ numElements, hasChecksum, locator }); + } + + const elementOffset = reader.readS64(), + isSuppressed = elementOffset < 0; + + let compression = null; + if (!isSuppressed) { + compression = reader.readU32(); + console.log(`Column ${c} is NOT suppressed, offset: ${elementOffset}, compression: ${compression}`); + } else + console.log(`Column ${c} is suppressed, offset: ${elementOffset}`); + + columns.push({ pages, elementOffset, isSuppressed, compression }); + } + + clusterPageLocations.push(columns); + } + + this.pageLocations = clusterPageLocations; } +} + + /** @summary Very preliminary function to read header/footer from RNTuple * @private */ async function readHeaderFooter(tuple) { From 40473bba4558b1f0c59b85798bd41d41527839eb Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 26 Jun 2025 14:56:51 +0530 Subject: [PATCH 17/17] Suggested Changes done --- modules/rntuple.mjs | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 7ed54b0bb..9d82bfc00 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -408,18 +408,20 @@ deserializePageList(page_list_blob){ const reader = new RBufferReader(page_list_blob); this._readEnvelopeMetadata(reader); // Page list checksum (64-bit xxhash3) - const pageListChecksum = reader.readU64(); - console.log('Page List Checksum:', pageListChecksum); + const pageListHeaderChecksum = reader.readU64(); + if (pageListHeaderChecksum !== this.headerEnvelopeChecksum) + throw new Error('RNTuple corrupted: header checksum does not match Page List Header checksum.'); + // Read cluster summaries list frame const clusterSummaryListSize = reader.readS64(); if (clusterSummaryListSize>=0) throw new Error('Expected a list frame for cluster summaries'); - const clusterCount = reader.readU32(), + const clusterSummaryCount = reader.readU32(), clusterSummaries = []; - for (let i = 0; i < clusterCount; ++i) { + for (let i = 0; i < clusterSummaryCount; ++i) { const clusterSummaryRecordSize = reader.readS64(), firstEntry = reader.readU64(), combined = reader.readU64(), @@ -435,23 +437,30 @@ deserializePageList(page_list_blob){ }); } this.clusterSummaries = clusterSummaries; -this._readNestedFrames(reader, clusterSummaries.length); +this._readNestedFrames(reader); + +const checksumPagelist = reader.readU64(); +console.log('Page List Checksum', checksumPagelist); } -_readNestedFrames(reader, numClusters) { - const clusterPageLocations = []; +_readNestedFrames(reader) { + const clusterPageLocations = [], + numListClusters = reader.readS64(); + if (numListClusters>=0) + throw new Error('Expected list frame for clusters'); +const numRecordCluster = reader.readU32(); - for (let i = 0; i < numClusters; ++i) { + for (let i = 0; i < numRecordCluster; ++i) { const outerListSize = reader.readS64(); if (outerListSize >= 0) throw new Error('Expected outer list frame for columns'); - const numColumns = reader.readU32(); - const columns = []; + const numColumns = reader.readU32(), + columns = []; for (let c = 0; c < numColumns; ++c) { const innerListSize = reader.readS64(); - if (innerListSize >= 0) + if (innerListSize >= 0) throw new Error('Expected inner list frame for pages'); const numPages = reader.readU32(); @@ -459,7 +468,7 @@ _readNestedFrames(reader, numClusters) { const pages = []; for (let p = 0; p < numPages; ++p) { - const numElementsWithBit = reader.readS64(), + const numElementsWithBit = reader.readS32(), hasChecksum = numElementsWithBit < 0, numElements = BigInt(Math.abs(Number(numElementsWithBit))),