From e131ec3fb73d16ac79a4e5ed60386258a0ece6fc Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 18 Jun 2025 21:28:40 +0530 Subject: [PATCH 1/3] Added logic for extracting and matching header checksum --- demo/node/rntuple_test.js | 20 +++++++++++++------- modules/rntuple.mjs | 18 +++++++++++++----- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/demo/node/rntuple_test.js b/demo/node/rntuple_test.js index 1a4d8d35b..b607c45ea 100644 --- a/demo/node/rntuple_test.js +++ b/demo/node/rntuple_test.js @@ -43,15 +43,12 @@ else { console.log(`OK: Field ${i}: ${field.fieldName} (${field.typeName})`); if (i === 0) { if (field.fieldName !== 'Category' || field.typeName !== 'std::int32_t') - console.error(`FAILURE: First field should be 'Category (std::int32_t)' but got '${field.fieldName} (${field.typeName})'`); - + console.error(`FAILURE: First field should be 'Category (std::int32_t)' but got '${field.fieldName} (${field.typeName})'`); } else if (i === rntuple.builder.fieldDescriptors.length - 1){ if (field.fieldName !== 'Nation' || field.typeName !== 'std::string') - console.error(`FAILURE: Last field should be 'Nation (std::string)' but got '${field.fieldName} (${field.typeName})'`); - + console.error(`FAILURE: Last field should be 'Nation (std::string)' but got '${field.fieldName} (${field.typeName})'`); } } - } // Column Check @@ -68,12 +65,21 @@ else { console.log(`OK: Column ${i} fieldId: ${column.fieldId} `); if (i === 0) { if (column.fieldId !== 0) - console.error(`FAILURE: First column should be for fieldId 0 (Category)`); + console.error('FAILURE: First column should be for fieldId 0 (Category)'); } else if (i === rntuple.builder.columnDescriptors.length - 1){ if (column.fieldId !== 10) - console.error(`FAILURE: Last column should be for fieldId 10 (Nation)`); + console.error('FAILURE: Last column should be for fieldId 10 (Nation)'); } } } +// Checksum Validation Test +if (rntuple.builder?.headerEnvelopeChecksum === undefined) + console.warn('WARNING: Header envelope checksum was not read or stored'); +else if (rntuple.builder?.headerChecksumFromFooter === undefined) + console.warn('WARNING: Footer did not contain a stored copy of the header checksum'); +else if (rntuple.builder.headerEnvelopeChecksum !== rntuple.builder.headerChecksumFromFooter) + console.error('FAILURE: Header checksum mismatch between envelope and footer'); +else + console.log('OK: Header checksum matches the one in Footer'); diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index a76c2075c..275e4c2bb 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -110,12 +110,20 @@ class RNTupleDescriptorBuilder { deserializeHeader(header_blob) { if (!header_blob) return; - const reader = new RBufferReader(header_blob); + const reader = new RBufferReader(header_blob), + + payloadStart = reader.offset, // Read the envelope metadata - this._readEnvelopeMetadata(reader); + { envelopeLength } = this._readEnvelopeMetadata(reader), - // TODO: Validate the envelope checksum at the end of deserialization - // const payloadStart = reader.offset; + // Seek to end of envelope to get checksum + checksumPos = payloadStart + envelopeLength - 8, + currentPos = reader.offset; + + reader.seek(checksumPos); + this.headerEnvelopeChecksum = reader.readU64(); + + reader.seek(currentPos); // Read feature flags list (may span multiple 64-bit words) this._readFeatureFlags(reader); @@ -148,7 +156,7 @@ deserializeFooter(footer_blob) { // Feature flag(32 bits) this._readFeatureFlags(reader); // Header checksum (64-bit xxhash3) - this.headerChecksum = reader.readU64(); + this.headerChecksumFromFooter = reader.readU64(); const schemaExtensionSize = reader.readS64(); From d4c8b3c693818ea4debd517af45fbdebb40e0293 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Wed, 18 Jun 2025 21:31:49 +0530 Subject: [PATCH 2/3] SchemaExtension Frame Reader --- modules/rntuple.mjs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 275e4c2bb..5147556fa 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -133,15 +133,8 @@ deserializeHeader(header_blob) { this.description = reader.readString(); this.writer = reader.readString(); - // List frame: list of field record frames - this._readFieldDescriptors(reader); - - // List frame: list of column record frames - this._readColumnDescriptors(reader); - // Read alias column descriptors - this._readAliasColumn(reader); - // Read Extra Type Information - this._readExtraTypeInformation(reader); + // 4 list frames inside the header envelope + this._readSchemaExtension(reader); } deserializeFooter(footer_blob) { @@ -165,10 +158,7 @@ deserializeFooter(footer_blob) { throw new Error('Schema extension frame is not a record frame, which is unexpected.'); // Schema extension record frame (4 list frames inside) - this._readFieldDescriptors(reader); - this._readColumnDescriptors(reader); - this._readAliasColumn(reader); - this._readExtraTypeInformation(reader); + this._readSchemaExtension(reader); // Cluster Group record frame this._readClusterGroups(reader); @@ -188,6 +178,14 @@ _readEnvelopeMetadata(reader) { return { envelopeType, envelopeLength }; } +_readSchemaExtension(reader) { + this._readFieldDescriptors(reader); + this._readColumnDescriptors(reader); + this._readAliasColumn(reader); + this._readExtraTypeInformation(reader); +} + + _readFeatureFlags(reader) { this.featureFlags = []; while (true) { From 2a0e27a41c73b60178db3875fdb152c5802615b2 Mon Sep 17 00:00:00 2001 From: Krmjn09 Date: Thu, 19 Jun 2025 12:53:07 +0530 Subject: [PATCH 3/3] Adding suggested changes --- demo/node/rntuple_test.js | 13 +------------ modules/rntuple.mjs | 33 +++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/demo/node/rntuple_test.js b/demo/node/rntuple_test.js index b607c45ea..a16a02c42 100644 --- a/demo/node/rntuple_test.js +++ b/demo/node/rntuple_test.js @@ -71,15 +71,4 @@ else { console.error('FAILURE: Last column should be for fieldId 10 (Nation)'); } } -} - -// Checksum Validation Test - -if (rntuple.builder?.headerEnvelopeChecksum === undefined) - console.warn('WARNING: Header envelope checksum was not read or stored'); -else if (rntuple.builder?.headerChecksumFromFooter === undefined) - console.warn('WARNING: Footer did not contain a stored copy of the header checksum'); -else if (rntuple.builder.headerEnvelopeChecksum !== rntuple.builder.headerChecksumFromFooter) - console.error('FAILURE: Header checksum mismatch between envelope and footer'); -else - console.log('OK: Header checksum matches the one in Footer'); +} \ No newline at end of file diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 5147556fa..d9ac0f38d 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -134,7 +134,7 @@ deserializeHeader(header_blob) { this.writer = reader.readString(); // 4 list frames inside the header envelope - this._readSchemaExtension(reader); + this._readSchemaDescription(reader); } deserializeFooter(footer_blob) { @@ -149,7 +149,9 @@ deserializeFooter(footer_blob) { // Feature flag(32 bits) this._readFeatureFlags(reader); // Header checksum (64-bit xxhash3) - this.headerChecksumFromFooter = reader.readU64(); + const headerChecksumFromFooter = reader.readU64(); + if (headerChecksumFromFooter !== this.headerEnvelopeChecksum) + throw new Error('RNTuple corrupted: header checksum does not match footer checksum.'); const schemaExtensionSize = reader.readS64(); @@ -158,7 +160,7 @@ deserializeFooter(footer_blob) { throw new Error('Schema extension frame is not a record frame, which is unexpected.'); // Schema extension record frame (4 list frames inside) - this._readSchemaExtension(reader); + this._readSchemaDescription(reader); // Cluster Group record frame this._readClusterGroups(reader); @@ -178,11 +180,18 @@ _readEnvelopeMetadata(reader) { return { envelopeType, envelopeLength }; } -_readSchemaExtension(reader) { - this._readFieldDescriptors(reader); - this._readColumnDescriptors(reader); - this._readAliasColumn(reader); - this._readExtraTypeInformation(reader); +_readSchemaDescription(reader) { + // Reading new descriptor arrays from the input + const newFields = this._readFieldDescriptors(reader), + newColumns = this._readColumnDescriptors(reader), + newAliases = this._readAliasColumn(reader), + newExtra = this._readExtraTypeInformation(reader); + + // Merging these new arrays into existing arrays + this.fieldDescriptors = (this.fieldDescriptors || []).concat(newFields); + this.columnDescriptors = (this.columnDescriptors || []).concat(newColumns); + this.aliasColumns = (this.aliasColumns || []).concat(newAliases); + this.extraTypeInfo = (this.extraTypeInfo || []).concat(newExtra); } @@ -247,7 +256,7 @@ fieldListIsList = fieldListSize < 0; checksum }); } - this.fieldDescriptors = fieldDescriptors; + return fieldDescriptors; } _readColumnDescriptors(reader) { @@ -293,7 +302,7 @@ _readColumnDescriptors(reader) { columnDescriptors.push(column); } - this.columnDescriptors = columnDescriptors; + return columnDescriptors; } _readAliasColumn(reader){ const aliasColumnListSize = reader.readS64(), @@ -313,7 +322,7 @@ _readAliasColumn(reader){ fieldId }); } - this.aliasColumns = aliasColumns; + return aliasColumns; } _readExtraTypeInformation(reader) { const extraTypeInfoListSize = reader.readS64(), @@ -336,7 +345,7 @@ _readExtraTypeInformation(reader) { typeVersion }); } - this.extraTypeInfo = extraTypeInfo; + return extraTypeInfo; } _readClusterGroups(reader) { const clusterGroupListSize = reader.readS64(),