Skip to content

Commit 8af9393

Browse files
authored
Merge pull request #342 from Krmjn09/feature/page-list-Envelope
Feature/page list envelope
2 parents 3f9414f + 40473bb commit 8af9393

File tree

1 file changed

+153
-10
lines changed

1 file changed

+153
-10
lines changed

modules/rntuple.mjs

Lines changed: 153 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,14 @@ class RBufferReader {
2020

2121
// Move to a specific position in the buffer
2222
seek(position) {
23+
if (typeof position === 'bigint') {
24+
if (position > BigInt(Number.MAX_SAFE_INTEGER))
25+
throw new Error(`Offset too large to seek safely: ${position}`);
26+
this.offset = Number(position);
27+
} else
2328
this.offset = position;
24-
}
29+
}
30+
2531

2632
// Read unsigned 8-bit integer (1 BYTE)
2733
readU8() {
@@ -361,23 +367,138 @@ _readClusterGroups(reader) {
361367
const clusterRecordSize = reader.readS64(),
362368
minEntry = reader.readU64(),
363369
entrySpan = reader.readU64(),
364-
numClusters = reader.readU32();
370+
numClusters = reader.readU32(),
371+
pageListLength = reader.readU64();
365372

366373
console.log(`Cluster Record Size: ${clusterRecordSize}`);
367-
console.log(`Min Entry: ${minEntry}, Entry Span: ${entrySpan}, Num Clusters: ${numClusters}`);
374+
375+
// Locator method to get the page list locator offset
376+
const pageListLocator = this._readLocator(reader);
377+
378+
console.log('Page Length', pageListLength);
379+
console.log(`Page List Locator Offset (hex): 0x${pageListLocator.offset.toString(16).toUpperCase()}`);
380+
381+
const group = {
382+
minEntry,
383+
entrySpan,
384+
numClusters,
385+
pageListLocator,
386+
pageListLength
387+
};
388+
clusterGroups.push(group);
389+
}
390+
this.clusterGroups = clusterGroups;
391+
}
368392

369-
clusterGroups.push({
370-
minEntry,
371-
entrySpan,
372-
numClusters,
373-
});
393+
_readLocator(reader) {
394+
const sizeAndType = reader.readU32(); // 4 bytes: size + T bit
395+
if ((sizeAndType | 0) < 0) // | makes the sizeAndType as signed
396+
throw new Error('Non-standard locators (T=1) not supported yet');
397+
const size = sizeAndType,
398+
offset = reader.readU64(); // 8 bytes: offset
399+
return {
400+
size,
401+
offset
402+
};
403+
}
404+
deserializePageList(page_list_blob){
405+
if (!page_list_blob)
406+
throw new Error('deserializePageList: received an invalid or empty page list blob');
407+
408+
const reader = new RBufferReader(page_list_blob);
409+
this._readEnvelopeMetadata(reader);
410+
// Page list checksum (64-bit xxhash3)
411+
const pageListHeaderChecksum = reader.readU64();
412+
if (pageListHeaderChecksum !== this.headerEnvelopeChecksum)
413+
throw new Error('RNTuple corrupted: header checksum does not match Page List Header checksum.');
414+
415+
416+
// Read cluster summaries list frame
417+
const clusterSummaryListSize = reader.readS64();
418+
if (clusterSummaryListSize>=0)
419+
throw new Error('Expected a list frame for cluster summaries');
420+
const clusterSummaryCount = reader.readU32(),
421+
422+
clusterSummaries = [];
423+
424+
for (let i = 0; i < clusterSummaryCount; ++i) {
425+
const clusterSummaryRecordSize = reader.readS64(),
426+
firstEntry = reader.readU64(),
427+
combined = reader.readU64(),
428+
flags = combined >> 56n;
429+
if (flags & 0x01n)
430+
throw new Error('Cluster summary uses unsupported sharded flag (0x01)');
431+
const numEntries = Number(combined & 0x00FFFFFFFFFFFFFFn);
432+
console.log(`Cluster Summary Record Size : ${clusterSummaryRecordSize}`);
433+
clusterSummaries.push({
434+
firstEntry,
435+
numEntries,
436+
flags
437+
});
438+
}
439+
this.clusterSummaries = clusterSummaries;
440+
this._readNestedFrames(reader);
441+
442+
const checksumPagelist = reader.readU64();
443+
console.log('Page List Checksum', checksumPagelist);
444+
}
445+
446+
_readNestedFrames(reader) {
447+
const clusterPageLocations = [],
448+
numListClusters = reader.readS64();
449+
if (numListClusters>=0)
450+
throw new Error('Expected list frame for clusters');
451+
const numRecordCluster = reader.readU32();
452+
453+
for (let i = 0; i < numRecordCluster; ++i) {
454+
const outerListSize = reader.readS64();
455+
if (outerListSize >= 0)
456+
throw new Error('Expected outer list frame for columns');
457+
458+
const numColumns = reader.readU32(),
459+
columns = [];
460+
461+
for (let c = 0; c < numColumns; ++c) {
462+
const innerListSize = reader.readS64();
463+
if (innerListSize >= 0)
464+
throw new Error('Expected inner list frame for pages');
465+
466+
const numPages = reader.readU32();
467+
console.log(`Column ${c} has ${numPages} page(s)`);
468+
const pages = [];
469+
470+
for (let p = 0; p < numPages; ++p) {
471+
const numElementsWithBit = reader.readS32(),
472+
hasChecksum = numElementsWithBit < 0,
473+
numElements = BigInt(Math.abs(Number(numElementsWithBit))),
474+
475+
locator = this._readLocator(reader);
476+
console.log(`Page ${p} → elements: ${numElements}, checksum: ${hasChecksum}, locator offset: ${locator.offset}, size: ${locator.size}`);
477+
pages.push({ numElements, hasChecksum, locator });
478+
}
479+
480+
const elementOffset = reader.readS64(),
481+
isSuppressed = elementOffset < 0;
482+
483+
let compression = null;
484+
if (!isSuppressed) {
485+
compression = reader.readU32();
486+
console.log(`Column ${c} is NOT suppressed, offset: ${elementOffset}, compression: ${compression}`);
487+
} else
488+
console.log(`Column ${c} is suppressed, offset: ${elementOffset}`);
489+
490+
columns.push({ pages, elementOffset, isSuppressed, compression });
491+
}
492+
493+
clusterPageLocations.push(columns);
374494
}
375495

376-
this.clusterGroups = clusterGroups;
496+
this.pageLocations = clusterPageLocations;
377497
}
378498

379499
}
380500

501+
381502
/** @summary Very preliminary function to read header/footer from RNTuple
382503
* @private */
383504
async function readHeaderFooter(tuple) {
@@ -407,8 +528,30 @@ async function readHeaderFooter(tuple) {
407528

408529
tuple.builder.deserializeFooter(footer_blob);
409530

410-
return true;
531+
const group = tuple.builder.clusterGroups?.[0];
532+
if (!group || !group.pageListLocator)
533+
throw new Error('No valid cluster group or page list locator found');
534+
535+
const offset = Number(group.pageListLocator.offset),
536+
size = Number(group.pageListLocator.size),
537+
uncompressedSize = Number(group.pageListLength);
538+
539+
return tuple.$file.readBuffer([offset, size]).then(page_list_blob => {
540+
if (!(page_list_blob instanceof DataView))
541+
throw new Error(`Expected DataView from readBuffer, got ${Object.prototype.toString.call(page_list_blob)}`);
542+
543+
return R__unzip(page_list_blob, uncompressedSize).then(unzipped_blob => {
544+
if (!(unzipped_blob instanceof DataView))
545+
throw new Error(`Unzipped page list is not a DataView, got ${Object.prototype.toString.call(unzipped_blob)}`);
546+
547+
tuple.builder.deserializePageList(unzipped_blob);
548+
return true;
549+
});
550+
});
411551
});
552+
}).catch(err => {
553+
console.error('Error during readHeaderFooter execution:', err);
554+
throw err;
412555
});
413556
}
414557

0 commit comments

Comments
 (0)