-
Notifications
You must be signed in to change notification settings - Fork 576
feat(tree): internals for field batch format with specialized node shapes #27200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,15 +40,17 @@ import { | |
| import type { IncrementalDecoder } from "./codecs.js"; | ||
| import { | ||
| type EncodedAnyShape, | ||
| type EncodedChunkShapeV1OrV2, | ||
| type EncodedChunkShape, | ||
| type EncodedChunkShapeV2, | ||
| type EncodedFieldBatchV1OrV2, | ||
| type EncodedFieldBatchV2, | ||
| type EncodedIncrementalChunkShape, | ||
| type EncodedInlineArrayShape, | ||
| type EncodedNestedArrayShape, | ||
| type EncodedNodeShape, | ||
| type EncodedSpecializedNodeShape, | ||
| type EncodedValueShape, | ||
| type ShapeIndex, | ||
| SpecialField, | ||
| supportsIncrementalEncoding, | ||
| } from "./format/index.js"; | ||
|
|
@@ -76,9 +78,129 @@ export function decode( | |
| ); | ||
| } | ||
|
|
||
| /** | ||
| * Resolves `shapeIndex` to a fully-resolved {@link EncodedNodeShape}, normalizing away any | ||
| * specialized node shapes (`f`) along the way by applying their overlays via | ||
| * {@link applySpecialization} until a concrete node shape is reached. | ||
| * | ||
| * @param input - The index of the shape to resolve, which must be a concrete or specialized node shape. | ||
| * @param context - The decoding context containing the shape definitions. | ||
| * @param pendingResolution - (Internal) A set of shape indices visited so far in the current resolution chain, used to detect cycles in the specialization chain. Most callers should not provide this argument. | ||
| * | ||
| * @remarks | ||
| * Exported for testing. | ||
| */ | ||
| export function normalizeToNodeShape( | ||
| input: EncodedNodeShape | EncodedSpecializedNodeShape, | ||
| context: DecoderContext<EncodedChunkShape>, | ||
| pendingResolution: Set<ShapeIndex> = new Set(), | ||
| ): EncodedNodeShape { | ||
| if (!("base" in input)) { | ||
| return input; | ||
| } | ||
|
|
||
| const baseIndex = input.base; | ||
| assert(!pendingResolution.has(baseIndex), "cyclic specialized node shape chain"); | ||
| pendingResolution.add(baseIndex); | ||
| const encoded = context.shapes[baseIndex]; | ||
| assert(encoded !== undefined, "shape index out of bounds"); | ||
|
|
||
| const baseShape = encoded.c ?? ("f" in encoded ? encoded.f : undefined); | ||
| assert( | ||
| baseShape !== undefined, | ||
| "shape at index must be a concrete (c) or specialized (f) node shape", | ||
| ); | ||
|
|
||
| return applySpecialization( | ||
| normalizeToNodeShape(baseShape, context, pendingResolution), | ||
| input, | ||
| context, | ||
| ); | ||
| } | ||
|
|
||
| /** | ||
| * Produces a specialized {@link EncodedNodeShape} by overlaying `overrides` onto `base`. | ||
| * | ||
| * See {@link EncodedSpecializedNodeShape} for the override/inherit/clear semantics. | ||
| * | ||
| * @remarks | ||
| * Exported for testing. | ||
| */ | ||
| export function applySpecialization( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EncodedSpecializedNodeShape documents the merge rules: this should refer to/link that and that should likely link here as well. Also I noticed an issue with those documented rules:
These types are supposed to be JSON compatible since they get json serialized. JSON does not preserve undefined properties, so we should not have them be significant or it won't work with real encoded data (I'm not sure if this is a docs bug or a real format issue)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also that doc calls these rules "Merge rules": given we have a bunch of logic about merges in this codebase which is about a different thing, calling them specialization rules or override rules would be more clear I think. |
||
| base: EncodedNodeShape, | ||
| overrides: EncodedSpecializedNodeShape, | ||
| context: DecoderContext<EncodedChunkShape>, | ||
| ): EncodedNodeShape { | ||
| const fields = [...(base.fields ?? [])]; | ||
| const indexFromKey = new Map<FieldKey, number>(); | ||
| for (const [i, [keyEncoded]] of fields.entries()) { | ||
| const key = context.identifier<FieldKey>(keyEncoded); | ||
| assert(!indexFromKey.has(key), "duplicate field key in base node shape"); | ||
| indexFromKey.set(key, i); | ||
| } | ||
|
|
||
| // Replace fields in base with overrides, append new keys in overrides in the order they are specified. | ||
| const seenOverrideKeys = new Set<FieldKey>(); | ||
| for (const [keyEncoded, shapeIndex] of overrides.fields ?? []) { | ||
| const key = context.identifier<FieldKey>(keyEncoded); | ||
| assert(!seenOverrideKeys.has(key), "duplicate field key in specialized node shape"); | ||
| seenOverrideKeys.add(key); | ||
| const existingIndex = indexFromKey.get(key); | ||
| if (existingIndex === undefined) { | ||
| fields.push([keyEncoded, shapeIndex]); | ||
| } else { | ||
| const index = fields[existingIndex]; | ||
| assert(index !== undefined, "expected existing field index"); | ||
| fields[existingIndex] = [index[0], shapeIndex]; | ||
| } | ||
| } | ||
|
|
||
| return { | ||
| type: base.type, | ||
| value: resolveOverride(overrides.value, base.value), | ||
| fields: fields.length > 0 ? fields : undefined, | ||
| extraFields: resolveOverride(overrides.extraFields, base.extraFields), | ||
| }; | ||
| } | ||
|
|
||
| // `undefined` means the override is absent (inherit from base); `null` is the explicit-clear | ||
| // sentinel needed because JSON.stringify drops `undefined`-valued properties, making | ||
| // property-presence indistinguishable from absent on the wire. | ||
| function resolveOverride<T>( | ||
| // eslint-disable-next-line @rushstack/no-new-null | ||
| override: T | null | undefined, | ||
| baseValue: T | undefined, | ||
| ): T | undefined { | ||
| if (override === undefined) { | ||
| return baseValue; | ||
| } | ||
| if (override === null) { | ||
| return undefined; | ||
| } | ||
| return override; | ||
| } | ||
|
|
||
| /** | ||
| * Decoder for {@link EncodedSpecializedNodeShape}s. | ||
| * Applies the specialization's field overrides to the resolved base node shape, then delegates | ||
| * to a {@link NodeDecoder} built from the resulting shape. | ||
| */ | ||
| export class SpecializedNodeDecoder implements ChunkDecoder { | ||
| private readonly inner: NodeDecoder; | ||
| public constructor( | ||
| shape: EncodedSpecializedNodeShape, | ||
| context: DecoderContext<EncodedChunkShape>, | ||
| ) { | ||
| this.inner = new NodeDecoder(normalizeToNodeShape(shape, context), context); | ||
| } | ||
| public decode(decoders: readonly ChunkDecoder[], stream: StreamCursor): TreeChunk { | ||
| return this.inner.decode(decoders, stream); | ||
| } | ||
| } | ||
|
|
||
| const decoderLibrary = new DiscriminatedUnionDispatcher< | ||
| EncodedChunkShapeV1OrV2, | ||
| [context: DecoderContext<EncodedChunkShapeV1OrV2>], | ||
| EncodedChunkShape, | ||
| [context: DecoderContext<EncodedChunkShape>], | ||
| ChunkDecoder | ||
| >({ | ||
| a(shape: EncodedNestedArrayShape, context): ChunkDecoder { | ||
|
|
@@ -99,6 +221,9 @@ const decoderLibrary = new DiscriminatedUnionDispatcher< | |
| ): ChunkDecoder { | ||
| return new IncrementalChunkDecoder(context); | ||
| }, | ||
| f(shape: EncodedSpecializedNodeShape, context): ChunkDecoder { | ||
| return new SpecializedNodeDecoder(shape, context); | ||
| }, | ||
| }); | ||
|
|
||
| /** | ||
|
|
@@ -300,7 +425,7 @@ type BasicFieldDecoder = ( | |
| * Get a decoder for fields of a provided (via `shape` and `context`). | ||
| */ | ||
| function fieldDecoder( | ||
| context: DecoderContext<EncodedChunkShapeV1OrV2>, | ||
| context: DecoderContext<EncodedChunkShape>, | ||
| key: FieldKey, | ||
| shape: number, | ||
| ): BasicFieldDecoder { | ||
|
|
@@ -319,7 +444,7 @@ export class NodeDecoder implements ChunkDecoder { | |
| private readonly fieldDecoders: readonly BasicFieldDecoder[]; | ||
| public constructor( | ||
| private readonly shape: EncodedNodeShape, | ||
| private readonly context: DecoderContext<EncodedChunkShapeV1OrV2>, | ||
| private readonly context: DecoderContext<EncodedChunkShape>, | ||
| ) { | ||
| this.type = shape.type === undefined ? undefined : context.identifier(shape.type); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| /*! | ||
| * Copyright (c) Microsoft Corporation and contributors. All rights reserved. | ||
| * Licensed under the MIT License. | ||
| */ | ||
|
|
||
| import { type Static, Type } from "@sinclair/typebox"; | ||
|
|
||
| import { unionOptions } from "../../../../codec/index.js"; | ||
|
|
||
| import { ShapeIndex } from "./formatGeneric.js"; | ||
| import { EncodedFieldShape, EncodedValueShape } from "./formatV1.js"; | ||
| import { shapesV2 } from "./formatV2.js"; | ||
|
|
||
| /** | ||
| * A node shape that derives from another node shape by overlaying property-level overrides. | ||
| * | ||
| * @remarks | ||
| * Compresses runs of node shapes that differ only in a few properties: a base node shape | ||
| * defines the structural skeleton, and the specialization narrows specific properties. | ||
| * | ||
| * For example, a base `FormatNode` with a variable-boolean `bold` field can be specialized | ||
| * to a shape that pins `bold` to a constant `true` — every node decoded with the | ||
| * specialization contributes zero stream tokens for `bold`. | ||
| * | ||
| * Specialization rules: `type` is always inherited from the resolved base. `fields` overrides | ||
| * apply per-key: entries whose key matches a base field replace that entry's shape index in | ||
| * place; entries with new keys are appended after all base fields. For `value` and | ||
| * `extraFields`: if the property is absent on the wire, the base's value is inherited; if | ||
| * `null`, the resulting shape has no value / no extraFields (explicit clear); any other value | ||
| * replaces the base's. | ||
| * | ||
| * The `null` sentinel exists because JSON does not preserve `undefined`-valued properties, | ||
| * so override-vs-inherit cannot be discriminated by property presence after persistence. | ||
| * | ||
| * Decoded by {@link applySpecialization}. | ||
| */ | ||
| export type EncodedSpecializedNodeShape = Static<typeof EncodedSpecializedNodeShape>; | ||
| export const EncodedSpecializedNodeShape = Type.Object( | ||
| { | ||
| /** | ||
| * Index into the enclosing batch's shapes array of the shape this specializes. | ||
| * Must resolve to either an {@link EncodedNodeShape} or another | ||
| * `EncodedSpecializedNodeShape`; chains are followed transitively until a node shape | ||
| * is reached. This restriction is enforced at runtime, not by the schema. | ||
| */ | ||
| base: ShapeIndex, | ||
| /** | ||
| * Field-level overrides applied to the resolved base's `fields`. Entries whose key | ||
| * matches a base field replace that field's shape index in place; entries with new | ||
| * keys are appended after all base fields, in the order given here. Base field order | ||
| * is preserved — this is the stream consumption order at decode time, so encoders | ||
| * must serialize per-field tokens in the resulting field order, not in this list's order. | ||
| */ | ||
| fields: Type.Optional(Type.Array(EncodedFieldShape)), | ||
| /** | ||
| * If absent, inherits the resolved base's value shape. If `null`, the resulting shape | ||
| * has no value shape (explicit clear). Any other value replaces the base's. | ||
| */ | ||
| value: Type.Optional(Type.Union([EncodedValueShape, Type.Null()])), | ||
| /** | ||
| * If absent, inherits the resolved base's extraFields shape. If `null`, the resulting | ||
| * shape has no extraFields (explicit clear). Any other value replaces the base's. | ||
| */ | ||
| extraFields: Type.Optional(Type.Union([ShapeIndex, Type.Null()])), | ||
| }, | ||
| { additionalProperties: false }, | ||
| ); | ||
|
|
||
| /** | ||
| * Experimental extension of {@link EncodedChunkShapeV2}. | ||
| * @remarks | ||
| * See {@link DiscriminatedUnionDispatcher} for more information on this pattern. | ||
| */ | ||
| export type EncodedChunkShapeVTextExperimental = Static< | ||
| typeof EncodedChunkShapeVTextExperimental | ||
| >; | ||
| export const EncodedChunkShapeVTextExperimental = Type.Object( | ||
| { | ||
| ...shapesV2, | ||
| f: Type.Optional(EncodedSpecializedNodeShape), | ||
| }, | ||
| unionOptions, | ||
| ); |
Uh oh!
There was an error while loading. Please reload this page.