microsoft · justus-camp-microsoft · Apr 20, 2026 · Apr 30, 2026 · May 5, 2026 · May 6, 2026
@@ -40,14 +40,17 @@ import {
 import type { IncrementalDecoder } from "./codecs.js";
 import {
 	type EncodedAnyShape,
-	type EncodedChunkShapeV1OrV2,
+	type EncodedChunkShape,
 	type EncodedChunkShapeV2,
+	type EncodedChunkShapeVTextExperimental,
 	type EncodedFieldBatchV1OrV2,
 	type EncodedFieldBatchV2,
+	type EncodedFieldShape,
 	type EncodedIncrementalChunkShape,
 	type EncodedInlineArrayShape,
 	type EncodedNestedArrayShape,
 	type EncodedNodeShape,
+	type EncodedSpecializedNodeShape,
 	type EncodedValueShape,
 	SpecialField,
 	supportsIncrementalEncoding,
@@ -76,9 +79,120 @@ export function decode(
 	);
 }
 
+/**
+ * Resolves `shapeIndex` to a fully-merged {@link EncodedNodeShape}, normalizing away any
+ * specialized node shapes (`f`) along the way by applying their overlays via
+ * {@link applySpecialization} until a concrete node shape is reached.
+ *
+ * @remarks
+ * Exported for testing.
+ */
+export function normalizeToNodeShape(
+	shapeIndex: number,
+	context: DecoderContext<EncodedChunkShape>,
+	visited: Set<number> = new Set(),
+): EncodedNodeShape {
+	assert(!visited.has(shapeIndex), "cyclic specialized node shape chain");
+	visited.add(shapeIndex);
+	const encoded = context.shapes[shapeIndex];
+	assert(encoded !== undefined, "shape index out of bounds");
+	// Persisted shape variants are discriminated by single-letter property names (see
+	// `EncodedChunkShape`): `c` is a concrete node shape, `f` is a specialized node shape that
+	// derives from another shape via overrides. Other variants (`a`/`b`/`d`/`e`) are
+	// non-node shapes and cannot appear in a specialization chain.
+	if (encoded.c !== undefined) {
+		return encoded.c;
+	}
+	assert(
+		"f" in encoded && encoded.f !== undefined,
+		"shape in specialization chain must be a node shape (c) or specialized node shape (f)",
+	);
+	return applySpecialization(
+		normalizeToNodeShape(encoded.f.base, context, visited),
+		encoded.f,
+		context,
+	);
+}
+
+/**
+ * Produces a merged {@link EncodedNodeShape} by overlaying `spec` onto `base`.
+ *
+ * `type` is always inherited from `base`. `value` and `extraFields` are inherited from `base`
+ * unless `spec` includes them as own properties (regardless of value), in which case `spec`'s
+ * value wins.
+ *
+ * For `fields`, override entries whose key matches a key in `base.fields` replace that entry's
+ * shape index in place; override entries with keys not present in `base` are appended at the end
+ * in `spec.fields` order. Field order from `base` is preserved.
+ *
+ * @remarks
+ * Exported for testing.
+ */
+export function applySpecialization(
+	base: EncodedNodeShape,
+	spec: EncodedSpecializedNodeShape,
+	context: DecoderContext<EncodedChunkShape>,
+): EncodedNodeShape {
+	const overrides = new Map<FieldKey, number>();
+	for (const [keyEncoded, shapeIndex] of spec.fields ?? []) {
+		const key = context.identifier<FieldKey>(keyEncoded);
+		assert(!overrides.has(key), "duplicate field key in specialized node shape");
+		overrides.set(key, shapeIndex);
+	}
+
+	const mergedFields: EncodedFieldShape[] = [];
+	const overriddenKeys = new Set<FieldKey>();
+	const baseKeys = new Set<FieldKey>();
+	for (const [keyEncoded, shapeIndex] of base.fields ?? []) {
+		const key = context.identifier<FieldKey>(keyEncoded);
+		assert(!baseKeys.has(key), "duplicate field key in base node shape");
+		baseKeys.add(key);
+		const overrideShape = overrides.get(key);
+		if (overrideShape === undefined) {
+			mergedFields.push([keyEncoded, shapeIndex]);
+		} else {
+			overriddenKeys.add(key);
+			mergedFields.push([keyEncoded, overrideShape]);
+		}
+	}
+	for (const [keyEncoded, shapeIndex] of spec.fields ?? []) {
-	for (const [keyEncoded, shapeIndex] of spec.fields ?? []) {
+	// Add all fields for all overrides from spec that are new fields, in the order they are specified
+	for (const [keyEncoded, shapeIndex] of spec.fields ?? []) {
-	for (const [keyEncoded, shapeIndex] of spec.fields ?? []) {
+	// Add all fields for all overrides from spec that are new fields, in the order they are specified
+	for (const [keyEncoded, shapeIndex] of spec.fields ?? []) {
+		if (!overriddenKeys.has(context.identifier<FieldKey>(keyEncoded))) {
+			mergedFields.push([keyEncoded, shapeIndex]);
+		}
+	}
+
+	return {
+		type: base.type,
+		value: "value" in spec ? spec.value : base.value,
+		fields: mergedFields.length > 0 ? mergedFields : undefined,
+		extraFields: "extraFields" in spec ? spec.extraFields : base.extraFields,
+	};
+}
+
+/**
+ * Decoder for {@link EncodedSpecializedNodeShape}s.
+ * Merges the specialization's field overrides with the resolved base node shape, then delegates
+ * to a {@link NodeDecoder} built from the merged shape.
+ */
+export class SpecializedNodeDecoder implements ChunkDecoder {
+	private readonly inner: NodeDecoder;
+	public constructor(
+		shape: EncodedSpecializedNodeShape,
+		context: DecoderContext<EncodedChunkShape>,
+	) {
+		this.inner = new NodeDecoder(
+			applySpecialization(normalizeToNodeShape(shape.base, context), shape, context),
+			context,
+		);
+	}
+	public decode(decoders: readonly ChunkDecoder[], stream: StreamCursor): TreeChunk {
+		return this.inner.decode(decoders, stream);
+	}
+}
+
 const decoderLibrary = new DiscriminatedUnionDispatcher<
-	EncodedChunkShapeV1OrV2,
-	[context: DecoderContext<EncodedChunkShapeV1OrV2>],
+	EncodedChunkShapeVTextExperimental,
+	[context: DecoderContext<EncodedChunkShape>],
 	ChunkDecoder
 >({
 	a(shape: EncodedNestedArrayShape, context): ChunkDecoder {
@@ -99,6 +213,9 @@ const decoderLibrary = new DiscriminatedUnionDispatcher<
 	): ChunkDecoder {
 		return new IncrementalChunkDecoder(context);
 	},
+	f(shape: EncodedSpecializedNodeShape, context): ChunkDecoder {
+		return new SpecializedNodeDecoder(shape, context);
+	},
 });
 
 /**
@@ -300,7 +417,7 @@ type BasicFieldDecoder = (
  * Get a decoder for fields of a provided (via `shape` and `context`).
  */
 function fieldDecoder(
-	context: DecoderContext<EncodedChunkShapeV1OrV2>,
+	context: DecoderContext<EncodedChunkShape>,
 	key: FieldKey,
 	shape: number,
 ): BasicFieldDecoder {
@@ -319,7 +436,7 @@ export class NodeDecoder implements ChunkDecoder {
 	private readonly fieldDecoders: readonly BasicFieldDecoder[];
 	public constructor(
 		private readonly shape: EncodedNodeShape,
-		private readonly context: DecoderContext<EncodedChunkShapeV1OrV2>,
+		private readonly context: DecoderContext<EncodedChunkShape>,
 	) {
 		this.type = shape.type === undefined ? undefined : context.identifier(shape.type);
 

@@ -31,7 +31,7 @@ import type { FieldBatch } from "./fieldBatch.js";
 import {
 	type EncodedAnyShape,
 	type EncodedChunkShapeV1,
-	type EncodedChunkShapeV1OrV2,
+	type EncodedChunkShape,
 	type EncodedChunkShapeV2,
 	type EncodedFieldBatchV1OrV2,
 	type EncodedNestedArrayShape,
@@ -63,8 +63,8 @@ export function compressedEncode(
 	return updateShapesAndIdentifiersEncoding(context.version, batchBuffer);
 }
 
-export type BufferFormat = BufferFormatGeneric<EncodedChunkShapeV1OrV2>;
-export type Shape = ShapeGeneric<EncodedChunkShapeV1OrV2>;
+export type BufferFormat = BufferFormatGeneric<EncodedChunkShape>;
+export type Shape = ShapeGeneric<EncodedChunkShape>;
 
 /**
  * Like {@link FieldEncoder}, except data will be prefixed with the key.
@@ -166,7 +166,7 @@ export function asNodesEncoder(encoder: NodeEncoder): NodesEncoder {
 /**
  * Encodes a chunk with {@link EncodedAnyShape} by prefixing the data with its shape.
  */
-export class AnyShape extends ShapeGeneric<EncodedChunkShapeV1OrV2> {
+export class AnyShape extends ShapeGeneric<EncodedChunkShape> {
 	private constructor() {
 		super();
 	}
@@ -271,7 +271,7 @@ export const anyFieldEncoder: FieldEncoder = {
  * which is an easy way to keep all the related code together without extra objects.
  */
 export class InlineArrayEncoder
-	extends ShapeGeneric<EncodedChunkShapeV1OrV2>
+	extends ShapeGeneric<EncodedChunkShape>
 	implements NodesEncoder, FieldEncoder
 {
 	public static readonly empty: InlineArrayEncoder = new InlineArrayEncoder(0, {
@@ -355,7 +355,7 @@ export class InlineArrayEncoder
 /**
  * Encodes the shape for a nested array as {@link EncodedNestedArrayShape} shape.
  */
-export class NestedArrayShape extends ShapeGeneric<EncodedChunkShapeV1OrV2> {
+export class NestedArrayShape extends ShapeGeneric<EncodedChunkShape> {
 	/**
 	 * @param innerShape - The shape of each item in this nested array.
 	 */
@@ -366,7 +366,7 @@ export class NestedArrayShape extends ShapeGeneric<EncodedChunkShapeV1OrV2> {
 	public encodeShape(
 		identifiers: DeduplicationTable<string>,
 		shapes: DeduplicationTable<Shape>,
-	): EncodedChunkShapeV1OrV2 {
+	): EncodedChunkShape {
 		const shape: EncodedNestedArrayShape =
 			shapes.valueToIndex.get(this.innerShape) ??
 			fail(0xb4f /* index for shape not found in table */);

@@ -16,16 +16,20 @@ import { shapesV1 } from "./formatV1.js";
 export type EncodedIncrementalChunkShape = Static<typeof EncodedIncrementalChunkShape>;
 export const EncodedIncrementalChunkShape = Type.Literal(0);
 
+/**
+ * The chunk shapes supported by the V2 format.
+ * @remarks
+ * See {@link EncodedChunkShapeV2}.
+ */
+export const shapesV2 = {
+	...shapesV1,
+	e: Type.Optional(EncodedIncrementalChunkShape),
+} as const;
+
 /**
  * V2 extension of {@link EncodedChunkShapeV1}.
  * @remarks
  * See {@link DiscriminatedUnionDispatcher} for more information on this pattern.
  */
 export type EncodedChunkShapeV2 = Static<typeof EncodedChunkShapeV2>;
-export const EncodedChunkShapeV2 = Type.Object(
-	{
-		...shapesV1,
-		e: Type.Optional(EncodedIncrementalChunkShape),
-	},
-	unionOptions,
-);
+export const EncodedChunkShapeV2 = Type.Object(shapesV2, unionOptions);
@@ -0,0 +1,74 @@
+/*!
+ * Copyright (c) Microsoft Corporation and contributors. All rights reserved.
+ * Licensed under the MIT License.
+ */
+
+import { type Static, Type } from "@sinclair/typebox";
+
+import { unionOptions } from "../../../../codec/index.js";
+
+import { ShapeIndex } from "./formatGeneric.js";
+import { EncodedFieldShape, EncodedValueShape } from "./formatV1.js";
+import { shapesV2 } from "./formatV2.js";
+
+/**
+ * A node shape that derives from another node shape by overlaying property-level overrides.
+ *
+ * @remarks
+ * Compresses runs of node shapes that differ only in a few properties: a base node shape
+ * defines the structural skeleton, and the specialization narrows specific properties.
+ *
+ * For example, a base `FormatNode` with a variable-boolean `bold` field can be specialized
+ * to a shape that pins `bold` to a constant `true` — every node decoded with the
+ * specialization contributes zero stream tokens for `bold`.
+ *
+ * Merge rules: `type` is always inherited from the resolved base. `fields`, `value`, and
+ * `extraFields` are inherited unless the specialization sets them as own properties — to
+ * inherit, the property must be omitted; setting it explicitly (even to `false` or
+ * `undefined`) is treated as an override.
+ */
+export type EncodedSpecializedNodeShape = Static<typeof EncodedSpecializedNodeShape>;
+export const EncodedSpecializedNodeShape = Type.Object(
+	{
+		/**
+		 * Index into the enclosing batch's shapes array of the shape this specializes.
+		 * Must resolve to either an {@link EncodedNodeShape} or another
+		 * `EncodedSpecializedNodeShape`; chains are followed transitively until a node shape
+		 * is reached. This restriction is enforced at runtime, not by the schema.
+		 */
+		base: ShapeIndex,
+		/**
+		 * Field-level overrides applied to the resolved base's `fields`. Entries whose key
+		 * matches a base field replace that field's shape index in place; entries with new
+		 * keys are appended after all base fields, in the order given here. Base field order
+		 * is preserved — this is the stream consumption order at decode time, so encoders
+		 * must serialize per-field tokens in the merged order, not in this list's order.
+		 */
+		fields: Type.Optional(Type.Array(EncodedFieldShape)),
+		/**
+		 * If present, replaces the resolved base's value shape.
+		 */
+		value: Type.Optional(EncodedValueShape),
+		/**
+		 * If present, replaces the resolved base's extraFields shape.
+		 */
+		extraFields: Type.Optional(ShapeIndex),
+	},
+	{ additionalProperties: false },
+);
+
+/**
+ * Experimental extension of {@link EncodedChunkShapeV2}.
+ * @remarks
+ * See {@link DiscriminatedUnionDispatcher} for more information on this pattern.
+ */
+export type EncodedChunkShapeVTextExperimental = Static<
+	typeof EncodedChunkShapeVTextExperimental
+>;
+export const EncodedChunkShapeVTextExperimental = Type.Object(
+	{
+		...shapesV2,
+		f: Type.Optional(EncodedSpecializedNodeShape),
+	},
+	unionOptions,
+);
@@ -16,14 +16,19 @@ export {
 	SpecialField,
 } from "./formatV1.js";
 export { EncodedIncrementalChunkShape, EncodedChunkShapeV2 } from "./formatV2.js";
+export {
+	EncodedChunkShapeVTextExperimental,
+	EncodedSpecializedNodeShape,
+} from "./formatVText.js";
 export {
 	FieldBatchFormatVersion,
 	EncodedFieldBatchV1,
 	EncodedFieldBatchV2,
+	EncodedFieldBatchVTextExperimental,
 	supportsIncrementalEncoding,
 	type EncodedFieldBatchV1OrV2,
 	type EncodedFieldBatchV1AndV2,
-	type EncodedChunkShapeV1OrV2,
+	type EncodedChunkShape,
 } from "./versions.js";
 export type {
 	ShapeIndex,