Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@ import {
import type { IncrementalDecoder } from "./codecs.js";
import {
type EncodedAnyShape,
type EncodedChunkShapeV1OrV2,
type EncodedChunkShape,
type EncodedChunkShapeV2,
type EncodedFieldBatchV1OrV2,
type EncodedFieldBatchV2,
type EncodedIncrementalChunkShape,
type EncodedInlineArrayShape,
type EncodedNestedArrayShape,
type EncodedNodeShape,
type EncodedSpecializedNodeShape,
type EncodedValueShape,
type ShapeIndex,
SpecialField,
supportsIncrementalEncoding,
} from "./format/index.js";
Expand Down Expand Up @@ -76,9 +78,129 @@ export function decode(
);
}

/**
* Resolves `shapeIndex` to a fully-resolved {@link EncodedNodeShape}, normalizing away any
* specialized node shapes (`f`) along the way by applying their overlays via
* {@link applySpecialization} until a concrete node shape is reached.
*
* @param input - The index of the shape to resolve, which must be a concrete or specialized node shape.
* @param context - The decoding context containing the shape definitions.
* @param pendingResolution - (Internal) A set of shape indices visited so far in the current resolution chain, used to detect cycles in the specialization chain. Most callers should not provide this argument.
*
* @remarks
* Exported for testing.
*/
export function normalizeToNodeShape(
input: EncodedNodeShape | EncodedSpecializedNodeShape,
context: DecoderContext<EncodedChunkShape>,
pendingResolution: Set<ShapeIndex> = new Set(),
): EncodedNodeShape {
if (!("base" in input)) {
return input;
}

const baseIndex = input.base;
assert(!pendingResolution.has(baseIndex), "cyclic specialized node shape chain");
pendingResolution.add(baseIndex);
const encoded = context.shapes[baseIndex];
assert(encoded !== undefined, "shape index out of bounds");

const baseShape = encoded.c ?? ("f" in encoded ? encoded.f : undefined);
assert(
baseShape !== undefined,
"shape at index must be a concrete (c) or specialized (f) node shape",
);

return applySpecialization(
normalizeToNodeShape(baseShape, context, pendingResolution),
input,
context,
);
Comment thread
justus-camp-microsoft marked this conversation as resolved.
}

/**
* Produces a specialized {@link EncodedNodeShape} by overlaying `overrides` onto `base`.
*
* See {@link EncodedSpecializedNodeShape} for the override/inherit/clear semantics.
*
* @remarks
* Exported for testing.
*/
export function applySpecialization(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EncodedSpecializedNodeShape documents the merge rules: this should refer to/link that and that should likely link here as well.

Also I noticed an issue with those documented rules:

extraFields are inherited unless the specialization sets them as own properties — to

  • inherit, the property must be omitted; setting it explicitly (even to false or
  • undefined) is treated as an override.

These types are supposed to be JSON compatible since they get json serialized. JSON does not preserve undefined properties, so we should not have them be significant or it won't work with real encoded data (I'm not sure if this is a docs bug or a real format issue)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also that doc calls these rules "Merge rules": given we have a bunch of logic about merges in this codebase which is about a different thing, calling them specialization rules or override rules would be more clear I think.

base: EncodedNodeShape,
overrides: EncodedSpecializedNodeShape,
context: DecoderContext<EncodedChunkShape>,
): EncodedNodeShape {
const fields = [...(base.fields ?? [])];
const indexFromKey = new Map<FieldKey, number>();
for (const [i, [keyEncoded]] of fields.entries()) {
const key = context.identifier<FieldKey>(keyEncoded);
assert(!indexFromKey.has(key), "duplicate field key in base node shape");
indexFromKey.set(key, i);
}

// Replace fields in base with overrides, append new keys in overrides in the order they are specified.
const seenOverrideKeys = new Set<FieldKey>();
for (const [keyEncoded, shapeIndex] of overrides.fields ?? []) {
const key = context.identifier<FieldKey>(keyEncoded);
assert(!seenOverrideKeys.has(key), "duplicate field key in specialized node shape");
seenOverrideKeys.add(key);
const existingIndex = indexFromKey.get(key);
if (existingIndex === undefined) {
fields.push([keyEncoded, shapeIndex]);
} else {
const index = fields[existingIndex];
assert(index !== undefined, "expected existing field index");
fields[existingIndex] = [index[0], shapeIndex];
}
}

return {
type: base.type,
value: resolveOverride(overrides.value, base.value),
fields: fields.length > 0 ? fields : undefined,
extraFields: resolveOverride(overrides.extraFields, base.extraFields),
};
}

// `undefined` means the override is absent (inherit from base); `null` is the explicit-clear
// sentinel needed because JSON.stringify drops `undefined`-valued properties, making
// property-presence indistinguishable from absent on the wire.
function resolveOverride<T>(
// eslint-disable-next-line @rushstack/no-new-null
override: T | null | undefined,
baseValue: T | undefined,
): T | undefined {
if (override === undefined) {
return baseValue;
}
if (override === null) {
return undefined;
}
return override;
}

/**
* Decoder for {@link EncodedSpecializedNodeShape}s.
* Applies the specialization's field overrides to the resolved base node shape, then delegates
* to a {@link NodeDecoder} built from the resulting shape.
*/
export class SpecializedNodeDecoder implements ChunkDecoder {
private readonly inner: NodeDecoder;
public constructor(
shape: EncodedSpecializedNodeShape,
context: DecoderContext<EncodedChunkShape>,
) {
this.inner = new NodeDecoder(normalizeToNodeShape(shape, context), context);
}
public decode(decoders: readonly ChunkDecoder[], stream: StreamCursor): TreeChunk {
return this.inner.decode(decoders, stream);
}
}

const decoderLibrary = new DiscriminatedUnionDispatcher<
EncodedChunkShapeV1OrV2,
[context: DecoderContext<EncodedChunkShapeV1OrV2>],
EncodedChunkShape,
[context: DecoderContext<EncodedChunkShape>],
ChunkDecoder
>({
a(shape: EncodedNestedArrayShape, context): ChunkDecoder {
Expand All @@ -99,6 +221,9 @@ const decoderLibrary = new DiscriminatedUnionDispatcher<
): ChunkDecoder {
return new IncrementalChunkDecoder(context);
},
f(shape: EncodedSpecializedNodeShape, context): ChunkDecoder {
return new SpecializedNodeDecoder(shape, context);
},
});

/**
Expand Down Expand Up @@ -300,7 +425,7 @@ type BasicFieldDecoder = (
* Get a decoder for fields of a provided (via `shape` and `context`).
*/
function fieldDecoder(
context: DecoderContext<EncodedChunkShapeV1OrV2>,
context: DecoderContext<EncodedChunkShape>,
key: FieldKey,
shape: number,
): BasicFieldDecoder {
Expand All @@ -319,7 +444,7 @@ export class NodeDecoder implements ChunkDecoder {
private readonly fieldDecoders: readonly BasicFieldDecoder[];
public constructor(
private readonly shape: EncodedNodeShape,
private readonly context: DecoderContext<EncodedChunkShapeV1OrV2>,
private readonly context: DecoderContext<EncodedChunkShape>,
) {
this.type = shape.type === undefined ? undefined : context.identifier(shape.type);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import type { FieldBatch } from "./fieldBatch.js";
import {
type EncodedAnyShape,
type EncodedChunkShapeV1,
type EncodedChunkShapeV1OrV2,
type EncodedChunkShape,
type EncodedChunkShapeV2,
type EncodedFieldBatchV1OrV2,
type EncodedNestedArrayShape,
Expand Down Expand Up @@ -63,8 +63,8 @@ export function compressedEncode(
return updateShapesAndIdentifiersEncoding(context.version, batchBuffer);
}

export type BufferFormat = BufferFormatGeneric<EncodedChunkShapeV1OrV2>;
export type Shape = ShapeGeneric<EncodedChunkShapeV1OrV2>;
export type BufferFormat = BufferFormatGeneric<EncodedChunkShape>;
export type Shape = ShapeGeneric<EncodedChunkShape>;

/**
* Like {@link FieldEncoder}, except data will be prefixed with the key.
Expand Down Expand Up @@ -166,7 +166,7 @@ export function asNodesEncoder(encoder: NodeEncoder): NodesEncoder {
/**
* Encodes a chunk with {@link EncodedAnyShape} by prefixing the data with its shape.
*/
export class AnyShape extends ShapeGeneric<EncodedChunkShapeV1OrV2> {
export class AnyShape extends ShapeGeneric<EncodedChunkShape> {
private constructor() {
super();
}
Expand Down Expand Up @@ -271,7 +271,7 @@ export const anyFieldEncoder: FieldEncoder = {
* which is an easy way to keep all the related code together without extra objects.
*/
export class InlineArrayEncoder
extends ShapeGeneric<EncodedChunkShapeV1OrV2>
extends ShapeGeneric<EncodedChunkShape>
implements NodesEncoder, FieldEncoder
{
public static readonly empty: InlineArrayEncoder = new InlineArrayEncoder(0, {
Expand Down Expand Up @@ -355,7 +355,7 @@ export class InlineArrayEncoder
/**
* Encodes the shape for a nested array as {@link EncodedNestedArrayShape} shape.
*/
export class NestedArrayShape extends ShapeGeneric<EncodedChunkShapeV1OrV2> {
export class NestedArrayShape extends ShapeGeneric<EncodedChunkShape> {
/**
* @param innerShape - The shape of each item in this nested array.
*/
Expand All @@ -366,7 +366,7 @@ export class NestedArrayShape extends ShapeGeneric<EncodedChunkShapeV1OrV2> {
public encodeShape(
identifiers: DeduplicationTable<string>,
shapes: DeduplicationTable<Shape>,
): EncodedChunkShapeV1OrV2 {
): EncodedChunkShape {
const shape: EncodedNestedArrayShape =
shapes.valueToIndex.get(this.innerShape) ??
fail(0xb4f /* index for shape not found in table */);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,20 @@ import { shapesV1 } from "./formatV1.js";
export type EncodedIncrementalChunkShape = Static<typeof EncodedIncrementalChunkShape>;
export const EncodedIncrementalChunkShape = Type.Literal(0);

/**
* The chunk shapes supported by the V2 format.
* @remarks
* See {@link EncodedChunkShapeV2}.
*/
export const shapesV2 = {
...shapesV1,
e: Type.Optional(EncodedIncrementalChunkShape),
} as const;

/**
* V2 extension of {@link EncodedChunkShapeV1}.
* @remarks
* See {@link DiscriminatedUnionDispatcher} for more information on this pattern.
*/
export type EncodedChunkShapeV2 = Static<typeof EncodedChunkShapeV2>;
export const EncodedChunkShapeV2 = Type.Object(
{
...shapesV1,
e: Type.Optional(EncodedIncrementalChunkShape),
},
unionOptions,
);
export const EncodedChunkShapeV2 = Type.Object(shapesV2, unionOptions);
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*!
* Copyright (c) Microsoft Corporation and contributors. All rights reserved.
* Licensed under the MIT License.
*/

import { type Static, Type } from "@sinclair/typebox";

import { unionOptions } from "../../../../codec/index.js";

import { ShapeIndex } from "./formatGeneric.js";
import { EncodedFieldShape, EncodedValueShape } from "./formatV1.js";
import { shapesV2 } from "./formatV2.js";

/**
* A node shape that derives from another node shape by overlaying property-level overrides.
*
* @remarks
* Compresses runs of node shapes that differ only in a few properties: a base node shape
* defines the structural skeleton, and the specialization narrows specific properties.
*
* For example, a base `FormatNode` with a variable-boolean `bold` field can be specialized
* to a shape that pins `bold` to a constant `true` — every node decoded with the
* specialization contributes zero stream tokens for `bold`.
*
* Specialization rules: `type` is always inherited from the resolved base. `fields` overrides
* apply per-key: entries whose key matches a base field replace that entry's shape index in
* place; entries with new keys are appended after all base fields. For `value` and
* `extraFields`: if the property is absent on the wire, the base's value is inherited; if
* `null`, the resulting shape has no value / no extraFields (explicit clear); any other value
* replaces the base's.
*
* The `null` sentinel exists because JSON does not preserve `undefined`-valued properties,
* so override-vs-inherit cannot be discriminated by property presence after persistence.
*
* Decoded by {@link applySpecialization}.
*/
export type EncodedSpecializedNodeShape = Static<typeof EncodedSpecializedNodeShape>;
export const EncodedSpecializedNodeShape = Type.Object(
{
/**
* Index into the enclosing batch's shapes array of the shape this specializes.
* Must resolve to either an {@link EncodedNodeShape} or another
* `EncodedSpecializedNodeShape`; chains are followed transitively until a node shape
* is reached. This restriction is enforced at runtime, not by the schema.
*/
base: ShapeIndex,
/**
* Field-level overrides applied to the resolved base's `fields`. Entries whose key
* matches a base field replace that field's shape index in place; entries with new
* keys are appended after all base fields, in the order given here. Base field order
* is preserved — this is the stream consumption order at decode time, so encoders
* must serialize per-field tokens in the resulting field order, not in this list's order.
*/
fields: Type.Optional(Type.Array(EncodedFieldShape)),
/**
* If absent, inherits the resolved base's value shape. If `null`, the resulting shape
* has no value shape (explicit clear). Any other value replaces the base's.
*/
value: Type.Optional(Type.Union([EncodedValueShape, Type.Null()])),
/**
* If absent, inherits the resolved base's extraFields shape. If `null`, the resulting
* shape has no extraFields (explicit clear). Any other value replaces the base's.
*/
extraFields: Type.Optional(Type.Union([ShapeIndex, Type.Null()])),
},
{ additionalProperties: false },
);

/**
* Experimental extension of {@link EncodedChunkShapeV2}.
* @remarks
* See {@link DiscriminatedUnionDispatcher} for more information on this pattern.
*/
export type EncodedChunkShapeVTextExperimental = Static<
typeof EncodedChunkShapeVTextExperimental
>;
export const EncodedChunkShapeVTextExperimental = Type.Object(
{
...shapesV2,
f: Type.Optional(EncodedSpecializedNodeShape),
},
unionOptions,
);
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@ export {
SpecialField,
} from "./formatV1.js";
export { EncodedIncrementalChunkShape, EncodedChunkShapeV2 } from "./formatV2.js";
export {
EncodedChunkShapeVTextExperimental,
EncodedSpecializedNodeShape,
} from "./formatVText.js";
export {
FieldBatchFormatVersion,
EncodedFieldBatchV1,
EncodedFieldBatchV2,
EncodedFieldBatchVTextExperimental,
supportsIncrementalEncoding,
type EncodedFieldBatchV1OrV2,
type EncodedFieldBatchV1AndV2,
type EncodedChunkShapeV1OrV2,
type EncodedChunkShape,
} from "./versions.js";
export type {
ShapeIndex,
Expand Down
Loading
Loading