From cdceb7533f2889e851da8a4cc7f623e5e517b25d Mon Sep 17 00:00:00 2001 From: Joshua Smithrud <54606601+Josmithr@users.noreply.github.com> Date: Tue, 5 May 2026 22:33:54 +0000 Subject: [PATCH 1/3] improvement(tree): Maintain column/row ID -> node cache for faster lookup --- packages/dds/tree/src/tableSchema.ts | 140 ++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 13 deletions(-) diff --git a/packages/dds/tree/src/tableSchema.ts b/packages/dds/tree/src/tableSchema.ts index bc288a2f696c..54fc766b0972 100644 --- a/packages/dds/tree/src/tableSchema.ts +++ b/packages/dds/tree/src/tableSchema.ts @@ -7,7 +7,7 @@ import { fail } from "@fluidframework/core-utils/internal"; import { UsageError } from "@fluidframework/telemetry-utils/internal"; import { EmptyKey } from "./core/index.js"; -import { TreeAlpha } from "./shared-tree/index.js"; +import { Tree, TreeAlpha } from "./shared-tree/index.js"; import { type FieldHasDefault, type ImplicitAllowedTypes, @@ -951,6 +951,62 @@ export namespace System_TableSchema { return cell; } + // #region ID lookup caches + + // Looking up rows/columns by string ID is a hot path (every getCell / setCell / etc. call goes + // through #tryGetRow or #tryGetColumn). Rather than scanning the arrays linearly on every call, + // we maintain lazily-built Maps from ID → node. + // + // Cache invalidation: + // Each cache is marked stale (reset to `undefined`) by a `nodeChanged` listener registered on + // the corresponding array node (this.table.rows / this.table.columns). `nodeChanged` fires on + // an array node whenever an element is inserted, removed, or moved — exactly the set of + // operations that could change which ID maps to which node. The event fires after the full + // batch of edits has been applied (including remote edits received from collaborators), so the + // cache is always rebuilt from a consistent, in-schema state. + // + // Listener lifetime: + // The listener is registered exactly once per cache (guarded by the non-undefined check on the + // stored unsubscribe callback). Subsequent cache rebuilds after invalidation reuse the same + // listener — no additional subscriptions accumulate. The unsubscribe callback is stored so + // that explicit cleanup is possible in the future if needed. + // + // Unhydrated trees: + // `Tree.on` and `nodeChanged` work for unhydrated nodes as well — all mutations go through + // `#applyEditsInBatch`, which wraps them in `withBufferedTreeEvents`, causing `nodeChanged` + // to fire after each batch regardless of whether the node is hydrated. The invalidation path + // is therefore identical for hydrated and unhydrated tables. + + /** + * Cache from row ID → row node for O(1) lookups in {@link Table.#tryGetRow}. + * `undefined` means the cache is stale and must be rebuilt before use. + */ + #rowCache: Map | undefined = undefined; + + /** + * Unsubscribe function for the `nodeChanged` listener on `this.table.rows`. + * `undefined` means the listener has not yet been registered (first cache build is pending). + * After the first build, this is always defined and the listener remains active for the + * lifetime of the Table node. + */ + #rowCacheUnsubscribe: (() => void) | undefined = undefined; + + /** + * Cache from column ID → column node for O(1) lookups in {@link Table.#tryGetColumn}. + * `undefined` means the cache is stale and must be rebuilt before use. + */ + #columnCache: Map | undefined = undefined; + + /** + * Unsubscribe function for the `nodeChanged` listener on `this.table.columns`. + * `undefined` means the listener has not yet been registered (first cache build is pending). + * After the first build, this is always defined and the listener remains active for the + * lifetime of the Table node. + */ + #columnCacheUnsubscribe: (() => void) | undefined = undefined; + + // #endregion + /** * Applies the provided edits in a "batch". * @@ -994,6 +1050,40 @@ export namespace System_TableSchema { }); } + /** + * Returns the column ID → column node cache, building and caching it first if stale. + * + * @remarks + * The first time this is called, it builds the map from the current contents of + * `this.table.columns` and registers a `nodeChanged` listener on that array. + * The listener invalidates the cache (sets `#columnCache` to `undefined`) whenever + * the column array is structurally modified (insert / remove / move), so every + * subsequent call that follows a structural change automatically triggers a rebuild. + * The listener is registered only once — it is not re-registered on cache rebuilds. + */ + #getColumnCache(): Map { + let cache = this.#columnCache; + if (cache === undefined) { + cache = new Map(); + for (const column of this.table.columns) { + // TypeScript is unable to narrow array element types correctly here. + // See: https://github.com/microsoft/TypeScript/issues/52144 + cache.set((column as ColumnValueType).id, column as ColumnValueType); + } + this.#columnCache = cache; + + // Register the invalidation listener once. The `nodeChanged` event fires on the + // array node itself after any structural change (insert / remove / move), which is + // exactly the set of changes that can alter the ID → node mapping. + if (this.#columnCacheUnsubscribe === undefined) { + this.#columnCacheUnsubscribe = Tree.on(this.columns, "nodeChanged", () => { + this.#columnCache = undefined; + }); + } + } + return cache; + } + /** * Attempts to resolve the provided Column node or ID to a Column node in the table. * Returns `undefined` if there is no match. @@ -1012,12 +1102,7 @@ export namespace System_TableSchema { } if (typeof columnOrIdOrIndex === "string") { - const columnId = columnOrIdOrIndex; - // TypeScript is unable to narrow the types correctly here, hence the casts. - // See: https://github.com/microsoft/TypeScript/issues/52144 - return this.table.columns.find((col) => (col as ColumnValueType).id === columnId) as - | ColumnValueType - | undefined; + return this.#getColumnCache().get(columnOrIdOrIndex); } // If the user provided a node, ensure it actually exists in this table. @@ -1062,6 +1147,40 @@ export namespace System_TableSchema { ); } + /** + * Returns the row ID → row node cache, building and caching it first if stale. + * + * @remarks + * The first time this is called, it builds the map from the current contents of + * `this.table.rows` and registers a `nodeChanged` listener on that array. + * The listener invalidates the cache (sets `#rowCache` to `undefined`) whenever + * the row array is structurally modified (insert / remove / move), so every + * subsequent call that follows a structural change automatically triggers a rebuild. + * The listener is registered only once — it is not re-registered on cache rebuilds. + */ + #getRowCache(): Map { + let cache = this.#rowCache; + if (cache === undefined) { + cache = new Map(); + for (const row of this.table.rows) { + // TypeScript is unable to narrow array element types correctly here. + // See: https://github.com/microsoft/TypeScript/issues/52144 + cache.set((row as RowValueType).id, row as RowValueType); + } + this.#rowCache = cache; + + // Register the invalidation listener once. The `nodeChanged` event fires on the + // array node itself after any structural change (insert / remove / move), which is + // exactly the set of changes that can alter the ID → node mapping. + if (this.#rowCacheUnsubscribe === undefined) { + this.#rowCacheUnsubscribe = Tree.on(this.rows, "nodeChanged", () => { + this.#rowCache = undefined; + }); + } + } + return cache; + } + /** * Attempts to resolve the provided Row node or ID to a Row node in the table. * Returns `undefined` if there is no match. @@ -1078,12 +1197,7 @@ export namespace System_TableSchema { } if (typeof rowOrIdOrIndex === "string") { - const rowId = rowOrIdOrIndex; - // TypeScript is unable to narrow the types correctly here, hence the casts. - // See: https://github.com/microsoft/TypeScript/issues/52144 - return this.table.rows.find((row) => (row as RowValueType).id === rowId) as - | RowValueType - | undefined; + return this.#getRowCache().get(rowOrIdOrIndex); } // If the user provided a node, ensure it actually exists in this table. From 1ebb0fd4dd0d893450243c868c4621b4fa106b6d Mon Sep 17 00:00:00 2001 From: Joshua Smithrud <54606601+Josmithr@users.noreply.github.com> Date: Wed, 6 May 2026 20:11:08 +0000 Subject: [PATCH 2/3] test: Add unit tests --- .../dds/tree/src/test/tableSchema.spec.ts | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/packages/dds/tree/src/test/tableSchema.spec.ts b/packages/dds/tree/src/test/tableSchema.spec.ts index a1500856a7d7..8e06dbe83889 100644 --- a/packages/dds/tree/src/test/tableSchema.spec.ts +++ b/packages/dds/tree/src/test/tableSchema.spec.ts @@ -2503,6 +2503,140 @@ describe("TableFactory unit tests", () => { }); }); + // These tests exercise ID-based lookups across structural mutations + // (insert / remove / reorder) on the columns and rows lists. They are + // intended to catch stale-state regressions in the ID → node lookup + // path, including the lazy ID caches inside Table. + describe("ID lookup after structural changes", () => { + it("getColumn by ID resolves a column inserted after a prior lookup", () => { + const table = create2x2Table(); + + // Prime any internal lookup state with an existing-ID query. + assert(table.getColumn("column-0") !== undefined); + + table.insertColumns({ + index: table.columns.length, + columns: [{ id: "column-2", props: {} }], + }); + + const inserted = table.getColumn("column-2"); + assert(inserted !== undefined); + assert.strictEqual(inserted.id, "column-2"); + // Existing IDs still resolve correctly. + assert.strictEqual(table.getColumn("column-0")?.id, "column-0"); + }); + + it("getColumn by ID returns undefined for a column removed after a prior lookup", () => { + const table = create2x2Table(); + + // Prime any internal lookup state for both columns. + assert(table.getColumn("column-0") !== undefined); + assert(table.getColumn("column-1") !== undefined); + + table.removeColumns(["column-0"]); + + assert.strictEqual(table.getColumn("column-0"), undefined); + assert.strictEqual(table.getColumn("column-1")?.id, "column-1"); + }); + + it("getColumn by ID resolves the moved node after a column reorder", () => { + const table = create2x2Table(); + + // Prime any internal lookup state. + assert(table.getColumn("column-1") !== undefined); + + // Move column-1 to the start. + table.columns.moveToStart(1); + + const after = table.getColumn("column-1"); + assert(after !== undefined); + assert.strictEqual(after.id, "column-1"); + // The other column still resolves. + assert.strictEqual(table.getColumn("column-0")?.id, "column-0"); + // And the table order reflects the move. + assert.deepEqual( + [...table.columns].map((c) => c.id), + ["column-1", "column-0"], + ); + }); + + it("getRow by ID resolves a row inserted after a prior lookup", () => { + const table = create2x2Table(); + + assert(table.getRow("row-0") !== undefined); + + table.insertRows({ + index: table.rows.length, + rows: [{ id: "row-2", cells: {}, props: {} }], + }); + + const inserted = table.getRow("row-2"); + assert(inserted !== undefined); + assert.strictEqual(inserted.id, "row-2"); + assert.strictEqual(table.getRow("row-0")?.id, "row-0"); + }); + + it("getRow by ID returns undefined for a row removed after a prior lookup", () => { + const table = create2x2Table(); + + assert(table.getRow("row-0") !== undefined); + assert(table.getRow("row-1") !== undefined); + + table.removeRows(["row-0"]); + + assert.strictEqual(table.getRow("row-0"), undefined); + assert.strictEqual(table.getRow("row-1")?.id, "row-1"); + }); + + it("getRow by ID resolves the moved node after a row reorder", () => { + const table = create2x2Table(); + + assert(table.getRow("row-1") !== undefined); + + table.rows.moveToStart(1); + + const after = table.getRow("row-1"); + assert(after !== undefined); + assert.strictEqual(after.id, "row-1"); + assert.strictEqual(table.getRow("row-0")?.id, "row-0"); + assert.deepEqual( + [...table.rows].map((r) => r.id), + ["row-1", "row-0"], + ); + }); + + it("setCell by ID targets the correct cell after rows and columns are mutated", () => { + const table = create2x2Table(); + + // Prime any internal lookup state. + assert(table.getRow("row-0") !== undefined); + assert(table.getColumn("column-0") !== undefined); + + // Remove an ID and reuse it on a freshly inserted node. + // If a stale lookup leaked, setCell would target the removed node + // instead of the new one. + table.removeRows(["row-0"]); + table.insertRows({ + index: 0, + rows: [{ id: "row-0", cells: {}, props: {} }], + }); + table.removeColumns(["column-0"]); + table.insertColumns({ + index: 0, + columns: [{ id: "column-0", props: {} }], + }); + + table.setCell({ + key: { row: "row-0", column: "column-0" }, + cell: { value: "0-0" }, + }); + + const cell = table.getCell({ row: "row-0", column: "column-0" }); + assert(cell !== undefined); + assertEqualTrees(cell, { value: "0-0" }); + }); + }); + describe("Recursive tables", () => { it("Can create table schema with recursive types", () => { const mySchemaFactory = new SchemaFactoryBeta("test-recursive"); From 6fa68ebb0b99945ed3c4c7de7c96c52394151388 Mon Sep 17 00:00:00 2001 From: Joshua Smithrud <54606601+Josmithr@users.noreply.github.com> Date: Wed, 6 May 2026 20:22:51 +0000 Subject: [PATCH 3/3] docs: Update comments --- packages/dds/tree/src/tableSchema.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packages/dds/tree/src/tableSchema.ts b/packages/dds/tree/src/tableSchema.ts index 54fc766b0972..87accfd1b154 100644 --- a/packages/dds/tree/src/tableSchema.ts +++ b/packages/dds/tree/src/tableSchema.ts @@ -954,28 +954,24 @@ export namespace System_TableSchema { // #region ID lookup caches // Looking up rows/columns by string ID is a hot path (every getCell / setCell / etc. call goes - // through #tryGetRow or #tryGetColumn). Rather than scanning the arrays linearly on every call, + // through #tryGetRow or #tryGetColumn). Rather than scanning the arrays linearly on every call, // we maintain lazily-built Maps from ID → node. // // Cache invalidation: // Each cache is marked stale (reset to `undefined`) by a `nodeChanged` listener registered on - // the corresponding array node (this.table.rows / this.table.columns). `nodeChanged` fires on + // the corresponding array node (this.table.rows / this.table.columns). `nodeChanged` fires on // an array node whenever an element is inserted, removed, or moved — exactly the set of - // operations that could change which ID maps to which node. The event fires after the full + // operations that could change which ID maps to which node. The event fires after the full // batch of edits has been applied (including remote edits received from collaborators), so the // cache is always rebuilt from a consistent, in-schema state. + // TODO: Consider if we should do more fine-grained invalidation here. E.g. look at the specific deltas + // returned by the `nodeChanged` event and only invalidate entries as needed. // // Listener lifetime: // The listener is registered exactly once per cache (guarded by the non-undefined check on the // stored unsubscribe callback). Subsequent cache rebuilds after invalidation reuse the same // listener — no additional subscriptions accumulate. The unsubscribe callback is stored so // that explicit cleanup is possible in the future if needed. - // - // Unhydrated trees: - // `Tree.on` and `nodeChanged` work for unhydrated nodes as well — all mutations go through - // `#applyEditsInBatch`, which wraps them in `withBufferedTreeEvents`, causing `nodeChanged` - // to fire after each batch regardless of whether the node is hydrated. The invalidation path - // is therefore identical for hydrated and unhydrated tables. /** * Cache from row ID → row node for O(1) lookups in {@link Table.#tryGetRow}.