@@ -20,8 +20,14 @@ class RBufferReader {
20
20
21
21
// Move to a specific position in the buffer
22
22
seek ( position ) {
23
+ if ( typeof position === 'bigint' ) {
24
+ if ( position > BigInt ( Number . MAX_SAFE_INTEGER ) )
25
+ throw new Error ( `Offset too large to seek safely: ${ position } ` ) ;
26
+ this . offset = Number ( position ) ;
27
+ } else
23
28
this . offset = position ;
24
- }
29
+ }
30
+
25
31
26
32
// Read unsigned 8-bit integer (1 BYTE)
27
33
readU8 ( ) {
@@ -361,23 +367,138 @@ _readClusterGroups(reader) {
361
367
const clusterRecordSize = reader . readS64 ( ) ,
362
368
minEntry = reader . readU64 ( ) ,
363
369
entrySpan = reader . readU64 ( ) ,
364
- numClusters = reader . readU32 ( ) ;
370
+ numClusters = reader . readU32 ( ) ,
371
+ pageListLength = reader . readU64 ( ) ;
365
372
366
373
console . log ( `Cluster Record Size: ${ clusterRecordSize } ` ) ;
367
- console . log ( `Min Entry: ${ minEntry } , Entry Span: ${ entrySpan } , Num Clusters: ${ numClusters } ` ) ;
374
+
375
+ // Locator method to get the page list locator offset
376
+ const pageListLocator = this . _readLocator ( reader ) ;
377
+
378
+ console . log ( 'Page Length' , pageListLength ) ;
379
+ console . log ( `Page List Locator Offset (hex): 0x${ pageListLocator . offset . toString ( 16 ) . toUpperCase ( ) } ` ) ;
380
+
381
+ const group = {
382
+ minEntry,
383
+ entrySpan,
384
+ numClusters,
385
+ pageListLocator,
386
+ pageListLength
387
+ } ;
388
+ clusterGroups . push ( group ) ;
389
+ }
390
+ this . clusterGroups = clusterGroups ;
391
+ }
368
392
369
- clusterGroups . push ( {
370
- minEntry,
371
- entrySpan,
372
- numClusters,
373
- } ) ;
393
+ _readLocator ( reader ) {
394
+ const sizeAndType = reader . readU32 ( ) ; // 4 bytes: size + T bit
395
+ if ( ( sizeAndType | 0 ) < 0 ) // | makes the sizeAndType as signed
396
+ throw new Error ( 'Non-standard locators (T=1) not supported yet' ) ;
397
+ const size = sizeAndType ,
398
+ offset = reader . readU64 ( ) ; // 8 bytes: offset
399
+ return {
400
+ size,
401
+ offset
402
+ } ;
403
+ }
404
+ deserializePageList ( page_list_blob ) {
405
+ if ( ! page_list_blob )
406
+ throw new Error ( 'deserializePageList: received an invalid or empty page list blob' ) ;
407
+
408
+ const reader = new RBufferReader ( page_list_blob ) ;
409
+ this . _readEnvelopeMetadata ( reader ) ;
410
+ // Page list checksum (64-bit xxhash3)
411
+ const pageListHeaderChecksum = reader . readU64 ( ) ;
412
+ if ( pageListHeaderChecksum !== this . headerEnvelopeChecksum )
413
+ throw new Error ( 'RNTuple corrupted: header checksum does not match Page List Header checksum.' ) ;
414
+
415
+
416
+ // Read cluster summaries list frame
417
+ const clusterSummaryListSize = reader . readS64 ( ) ;
418
+ if ( clusterSummaryListSize >= 0 )
419
+ throw new Error ( 'Expected a list frame for cluster summaries' ) ;
420
+ const clusterSummaryCount = reader . readU32 ( ) ,
421
+
422
+ clusterSummaries = [ ] ;
423
+
424
+ for ( let i = 0 ; i < clusterSummaryCount ; ++ i ) {
425
+ const clusterSummaryRecordSize = reader . readS64 ( ) ,
426
+ firstEntry = reader . readU64 ( ) ,
427
+ combined = reader . readU64 ( ) ,
428
+ flags = combined >> 56n ;
429
+ if ( flags & 0x01n )
430
+ throw new Error ( 'Cluster summary uses unsupported sharded flag (0x01)' ) ;
431
+ const numEntries = Number ( combined & 0x00FFFFFFFFFFFFFFn ) ;
432
+ console . log ( `Cluster Summary Record Size : ${ clusterSummaryRecordSize } ` ) ;
433
+ clusterSummaries . push ( {
434
+ firstEntry,
435
+ numEntries,
436
+ flags
437
+ } ) ;
438
+ }
439
+ this . clusterSummaries = clusterSummaries ;
440
+ this . _readNestedFrames ( reader ) ;
441
+
442
+ const checksumPagelist = reader . readU64 ( ) ;
443
+ console . log ( 'Page List Checksum' , checksumPagelist ) ;
444
+ }
445
+
446
+ _readNestedFrames ( reader ) {
447
+ const clusterPageLocations = [ ] ,
448
+ numListClusters = reader . readS64 ( ) ;
449
+ if ( numListClusters >= 0 )
450
+ throw new Error ( 'Expected list frame for clusters' ) ;
451
+ const numRecordCluster = reader . readU32 ( ) ;
452
+
453
+ for ( let i = 0 ; i < numRecordCluster ; ++ i ) {
454
+ const outerListSize = reader . readS64 ( ) ;
455
+ if ( outerListSize >= 0 )
456
+ throw new Error ( 'Expected outer list frame for columns' ) ;
457
+
458
+ const numColumns = reader . readU32 ( ) ,
459
+ columns = [ ] ;
460
+
461
+ for ( let c = 0 ; c < numColumns ; ++ c ) {
462
+ const innerListSize = reader . readS64 ( ) ;
463
+ if ( innerListSize >= 0 )
464
+ throw new Error ( 'Expected inner list frame for pages' ) ;
465
+
466
+ const numPages = reader . readU32 ( ) ;
467
+ console . log ( `Column ${ c } has ${ numPages } page(s)` ) ;
468
+ const pages = [ ] ;
469
+
470
+ for ( let p = 0 ; p < numPages ; ++ p ) {
471
+ const numElementsWithBit = reader . readS32 ( ) ,
472
+ hasChecksum = numElementsWithBit < 0 ,
473
+ numElements = BigInt ( Math . abs ( Number ( numElementsWithBit ) ) ) ,
474
+
475
+ locator = this . _readLocator ( reader ) ;
476
+ console . log ( `Page ${ p } → elements: ${ numElements } , checksum: ${ hasChecksum } , locator offset: ${ locator . offset } , size: ${ locator . size } ` ) ;
477
+ pages . push ( { numElements, hasChecksum, locator } ) ;
478
+ }
479
+
480
+ const elementOffset = reader . readS64 ( ) ,
481
+ isSuppressed = elementOffset < 0 ;
482
+
483
+ let compression = null ;
484
+ if ( ! isSuppressed ) {
485
+ compression = reader . readU32 ( ) ;
486
+ console . log ( `Column ${ c } is NOT suppressed, offset: ${ elementOffset } , compression: ${ compression } ` ) ;
487
+ } else
488
+ console . log ( `Column ${ c } is suppressed, offset: ${ elementOffset } ` ) ;
489
+
490
+ columns . push ( { pages, elementOffset, isSuppressed, compression } ) ;
491
+ }
492
+
493
+ clusterPageLocations . push ( columns ) ;
374
494
}
375
495
376
- this . clusterGroups = clusterGroups ;
496
+ this . pageLocations = clusterPageLocations ;
377
497
}
378
498
379
499
}
380
500
501
+
381
502
/** @summary Very preliminary function to read header/footer from RNTuple
382
503
* @private */
383
504
async function readHeaderFooter ( tuple ) {
@@ -407,8 +528,30 @@ async function readHeaderFooter(tuple) {
407
528
408
529
tuple . builder . deserializeFooter ( footer_blob ) ;
409
530
410
- return true ;
531
+ const group = tuple . builder . clusterGroups ?. [ 0 ] ;
532
+ if ( ! group || ! group . pageListLocator )
533
+ throw new Error ( 'No valid cluster group or page list locator found' ) ;
534
+
535
+ const offset = Number ( group . pageListLocator . offset ) ,
536
+ size = Number ( group . pageListLocator . size ) ,
537
+ uncompressedSize = Number ( group . pageListLength ) ;
538
+
539
+ return tuple . $file . readBuffer ( [ offset , size ] ) . then ( page_list_blob => {
540
+ if ( ! ( page_list_blob instanceof DataView ) )
541
+ throw new Error ( `Expected DataView from readBuffer, got ${ Object . prototype . toString . call ( page_list_blob ) } ` ) ;
542
+
543
+ return R__unzip ( page_list_blob , uncompressedSize ) . then ( unzipped_blob => {
544
+ if ( ! ( unzipped_blob instanceof DataView ) )
545
+ throw new Error ( `Unzipped page list is not a DataView, got ${ Object . prototype . toString . call ( unzipped_blob ) } ` ) ;
546
+
547
+ tuple . builder . deserializePageList ( unzipped_blob ) ;
548
+ return true ;
549
+ } ) ;
550
+ } ) ;
411
551
} ) ;
552
+ } ) . catch ( err => {
553
+ console . error ( 'Error during readHeaderFooter execution:' , err ) ;
554
+ throw err ;
412
555
} ) ;
413
556
}
414
557
0 commit comments