diff --git a/architecture.config.json b/architecture.config.json index bd56395967..4da2ca8511 100644 --- a/architecture.config.json +++ b/architecture.config.json @@ -444,6 +444,48 @@ "layer": "adapters", "plane": "shared" }, + { + "glob": "packages/3-extensions/cipherstash/src/core/**", + "domain": "extensions", + "layer": "adapters", + "plane": "shared" + }, + { + "glob": "packages/3-extensions/cipherstash/src/middleware/**", + "domain": "extensions", + "layer": "adapters", + "plane": "runtime" + }, + { + "glob": "packages/3-extensions/cipherstash/src/exports/index.ts", + "domain": "extensions", + "layer": "adapters", + "plane": "shared" + }, + { + "glob": "packages/3-extensions/cipherstash/src/exports/column-types.ts", + "domain": "extensions", + "layer": "adapters", + "plane": "shared" + }, + { + "glob": "packages/3-extensions/cipherstash/src/exports/control.ts", + "domain": "extensions", + "layer": "adapters", + "plane": "migration" + }, + { + "glob": "packages/3-extensions/cipherstash/src/exports/runtime.ts", + "domain": "extensions", + "layer": "adapters", + "plane": "runtime" + }, + { + "glob": "packages/3-extensions/cipherstash/src/exports/middleware.ts", + "domain": "extensions", + "layer": "adapters", + "plane": "runtime" + }, { "glob": "packages/2-mongo-family/1-foundation/**", "domain": "mongo", diff --git a/packages/1-framework/1-core/framework-components/src/execution/race-against-abort.ts b/packages/1-framework/1-core/framework-components/src/execution/race-against-abort.ts index 1a1b5f612a..5f7f9bdc4d 100644 --- a/packages/1-framework/1-core/framework-components/src/execution/race-against-abort.ts +++ b/packages/1-framework/1-core/framework-components/src/execution/race-against-abort.ts @@ -1,14 +1,18 @@ -import type { CodecCallContext } from '../shared/codec-types'; import type { RuntimeAbortedPhase } from './runtime-error'; import { runtimeAborted } from './runtime-error'; /** * Throw a phase-tagged `RUNTIME.ABORTED` envelope if the supplied - * codec-call context is already aborted at the precheck site. Centralises - * the `if (ctx.signal?.aborted) throw runtimeAborted(...)` pattern that - * every codec dispatch site repeats. + * context is already aborted at the precheck site. Centralises the + * `if (ctx.signal?.aborted) throw runtimeAborted(...)` pattern that + * every codec dispatch site (and the `beforeExecute` middleware phase) + * repeats. Accepts both the framework `CodecCallContext` and the + * `RuntimeMiddlewareContext`; both expose `signal?: AbortSignal`. */ -export function checkAborted(ctx: CodecCallContext, phase: RuntimeAbortedPhase): void { +export function checkAborted( + ctx: { readonly signal?: AbortSignal }, + phase: RuntimeAbortedPhase, +): void { if (ctx.signal?.aborted) { throw runtimeAborted(phase, ctx.signal.reason); } diff --git a/packages/1-framework/1-core/framework-components/src/execution/run-with-middleware.ts b/packages/1-framework/1-core/framework-components/src/execution/run-with-middleware.ts index a30e3bcf10..fc4e08a65f 100644 --- a/packages/1-framework/1-core/framework-components/src/execution/run-with-middleware.ts +++ b/packages/1-framework/1-core/framework-components/src/execution/run-with-middleware.ts @@ -1,12 +1,26 @@ import { AsyncIterableResult } from './async-iterable-result'; import type { ExecutionPlan } from './query-plan'; -import type { RuntimeMiddleware, RuntimeMiddlewareContext } from './runtime-middleware'; +import { checkAborted, raceAgainstAbort } from './race-against-abort'; +import type { + ParamRefMutator, + RuntimeMiddleware, + RuntimeMiddlewareContext, +} from './runtime-middleware'; /** * Drives a single execution of `runDriver()` through the middleware lifecycle. * * Lifecycle, in order: - * 1. For each middleware in registration order: `beforeExecute(exec, ctx)`. + * 1. For each middleware in registration order: `beforeExecute(exec, ctx, + * paramsMutator)`. The mutator is the family-specific + * {@link ParamRefMutator} the caller passes through (`undefined` for + * plans / families with no mutator surface). Cooperative cancellation: + * before each middleware body, an already-aborted `ctx.signal` throws + * `RUNTIME.ABORTED { phase: 'beforeExecute' }`; mid-flight aborts race + * the body via `raceAgainstAbort` so the runtime returns + * `RUNTIME.ABORTED` promptly even when the middleware ignores the + * signal. Non-abort errors thrown by a middleware body pass through + * unchanged. * 2. For each row yielded by `runDriver()`: for each middleware in registration * order: `onRow(row, exec, ctx)`; then yield the row to the consumer. * 3. On successful completion: for each middleware in registration order: @@ -20,11 +34,16 @@ import type { RuntimeMiddleware, RuntimeMiddlewareContext } from './runtime-midd * This helper is the single canonical implementation of the middleware * orchestration loop; family runtimes should not reimplement it. */ -export function runWithMiddleware( +export function runWithMiddleware< + TExec extends ExecutionPlan, + Row, + TMutator extends ParamRefMutator = ParamRefMutator, +>( exec: TExec, - middleware: ReadonlyArray>, + middleware: ReadonlyArray>, ctx: RuntimeMiddlewareContext, runDriver: () => AsyncIterable, + paramsMutator?: TMutator, ): AsyncIterableResult { const iterator = async function* (): AsyncGenerator { const startedAt = Date.now(); @@ -34,7 +53,22 @@ export function runWithMiddleware( try { for (const mw of middleware) { if (mw.beforeExecute) { - await mw.beforeExecute(exec, ctx); + // Already-aborted at entry to this middleware short-circuits with + // a phase-tagged envelope before the body runs (AC-ABT2). + checkAborted(ctx, 'beforeExecute'); + // The framework only forwards the mutator the caller supplied; a + // pass-through `undefined` for non-mutating families is safe — the + // base `RuntimeMiddleware` declares the third parameter, and + // existing `(plan, ctx)` bodies that ignore it stay unchanged. + // The cast below is the single point at which the framework's + // generic mutator slot meets the (possibly absent) caller value; + // `runWithMiddleware` cannot synthesize a TMutator instance. + const work = mw.beforeExecute(exec, ctx, paramsMutator as TMutator); + if (work !== undefined) { + // Mid-flight abort surfaces RUNTIME.ABORTED promptly even when + // the middleware body ignores ctx.signal (AC-ABT3). + await raceAgainstAbort(Promise.resolve(work), ctx.signal, 'beforeExecute'); + } } } diff --git a/packages/1-framework/1-core/framework-components/src/execution/runtime-core.ts b/packages/1-framework/1-core/framework-components/src/execution/runtime-core.ts index 1b12aaee4f..381fa72a67 100644 --- a/packages/1-framework/1-core/framework-components/src/execution/runtime-core.ts +++ b/packages/1-framework/1-core/framework-components/src/execution/runtime-core.ts @@ -118,12 +118,19 @@ export abstract class RuntimeCore< const compiled = await self.runBeforeCompile(plan); const exec = await self.lower(compiled, codecCtx); + // Merge the per-execute signal onto the persistent middleware ctx + // for the duration of this execute() call. The ctx object itself is + // freshly allocated per-execute so middleware sees the signal that + // belongs to *its* invocation, not a shared one. Identity matches + // codecCtx.signal so middleware authors who compare `ctx.signal` + // across the codec/middleware boundary observe the same reference. + const execMiddlewareCtx = signal === undefined ? self.ctx : { ...self.ctx, signal }; // The driver yields raw `Record`; we cast to `Row` here. // The Row contract is enforced by the caller via `plan._row`. yield* runWithMiddleware( exec, self.middleware, - self.ctx, + execMiddlewareCtx, () => self.runDriver(exec) as AsyncIterable, ); } diff --git a/packages/1-framework/1-core/framework-components/src/execution/runtime-error.ts b/packages/1-framework/1-core/framework-components/src/execution/runtime-error.ts index e6a2276b46..1aee13667c 100644 --- a/packages/1-framework/1-core/framework-components/src/execution/runtime-error.ts +++ b/packages/1-framework/1-core/framework-components/src/execution/runtime-error.ts @@ -15,11 +15,20 @@ export interface RuntimeErrorEnvelope extends Error { * - `'decode'` — abort fired during `decodeRow` / `decodeField`. * - `'stream'` — abort fired between rows or before any codec call * (already-aborted at entry). + * - `'beforeExecute'` / `'afterExecute'` / `'onRow'` — abort fired + * on entry to or during the corresponding middleware phase + * (cooperative cancellation per the param-transform seam). */ export const RUNTIME_ABORTED = 'RUNTIME.ABORTED' as const; /** Discriminator placed in `details.phase` of a `RUNTIME.ABORTED` envelope. */ -export type RuntimeAbortedPhase = 'encode' | 'decode' | 'stream'; +export type RuntimeAbortedPhase = + | 'encode' + | 'decode' + | 'stream' + | 'beforeExecute' + | 'afterExecute' + | 'onRow'; /** * Type guard for the runtime-error envelope produced by `runtimeError`. diff --git a/packages/1-framework/1-core/framework-components/src/execution/runtime-middleware.ts b/packages/1-framework/1-core/framework-components/src/execution/runtime-middleware.ts index 62772faa6d..a3bfcd8862 100644 --- a/packages/1-framework/1-core/framework-components/src/execution/runtime-middleware.ts +++ b/packages/1-framework/1-core/framework-components/src/execution/runtime-middleware.ts @@ -9,11 +9,31 @@ export interface RuntimeLog { debug?(event: unknown): void; } +/** + * Per-execute context threaded through every middleware phase + * (`beforeExecute`, `onRow`, `afterExecute`). Allocated once per + * `runtime.execute()` call and shared by reference across all + * middleware in the chain. + * + * - `signal` carries the per-query `AbortSignal` -- the same + * reference that `runtime.execute(plan, { signal })` was invoked + * with, and the same reference threaded into the per-call + * `CodecCallContext` (ADR 207). Middleware that wraps a + * network-backed SDK forwards `ctx.signal` into that SDK to + * propagate caller cancellation; pure-CPU middleware ignores it. + * + * Symmetric plumbing across all middleware phases (rather than only + * `beforeExecute`) is a deliberate choice: a middleware that wraps a + * downstream observability hook or post-processor in `afterExecute` / + * `onRow` needs the same cancellation reach as its `beforeExecute` + * counterpart. + */ export interface RuntimeMiddlewareContext { readonly contract: unknown; readonly mode: 'strict' | 'permissive'; readonly now: () => number; readonly log: RuntimeLog; + readonly signal?: AbortSignal; } export interface AfterExecuteResult { @@ -22,6 +42,19 @@ export interface AfterExecuteResult { readonly completed: boolean; } +/** + * Marker interface for family-specific param-ref mutators threaded into + * `beforeExecute` as the third argument. The framework treats the mutator + * opaquely — it allocates and forwards the family's mutator instance so + * `runWithMiddleware` can stay family-agnostic. SQL extends this with + * `SqlParamRefMutator` (over `ParamRef`); Mongo extends with + * `MongoParamRefMutator` (over `MongoParamRef`). + * + * Extension authors target the family-specific mutator type, not this + * marker. + */ +export type ParamRefMutator = {}; + /** * Family-agnostic middleware SPI parameterized over the plan marker. * @@ -29,12 +62,25 @@ export interface AfterExecuteResult { * middleware (e.g. cross-family telemetry) can be authored without * naming a family. Family-specific middleware (`SqlMiddleware`, * `MongoMiddleware`) narrow `TPlan` to their concrete plan type. + * + * `TMutator` is the family-specific {@link ParamRefMutator} the runtime + * threads into `beforeExecute(plan, ctx, params)` as a third argument. + * Existing `(plan)` / `(plan, ctx)` middleware bodies continue to compile + * — TypeScript permits assigning a function with fewer parameters to a + * function-typed slot that declares more. The third arg is additive. */ -export interface RuntimeMiddleware { +export interface RuntimeMiddleware< + TPlan extends QueryPlan = QueryPlan, + TMutator extends ParamRefMutator = ParamRefMutator, +> { readonly name: string; readonly familyId?: string; readonly targetId?: string; - beforeExecute?(plan: TPlan, ctx: RuntimeMiddlewareContext): Promise; + beforeExecute?( + plan: TPlan, + ctx: RuntimeMiddlewareContext, + params?: TMutator, + ): void | Promise; onRow?(row: Record, plan: TPlan, ctx: RuntimeMiddlewareContext): Promise; afterExecute?( plan: TPlan, diff --git a/packages/1-framework/1-core/framework-components/src/exports/runtime.ts b/packages/1-framework/1-core/framework-components/src/exports/runtime.ts index 0d8fad6c16..b368941d0e 100644 --- a/packages/1-framework/1-core/framework-components/src/exports/runtime.ts +++ b/packages/1-framework/1-core/framework-components/src/exports/runtime.ts @@ -13,6 +13,7 @@ export { } from '../execution/runtime-error'; export type { AfterExecuteResult, + ParamRefMutator, RuntimeExecuteOptions, RuntimeExecutor, RuntimeLog, diff --git a/packages/1-framework/1-core/framework-components/src/shared/framework-authoring.ts b/packages/1-framework/1-core/framework-components/src/shared/framework-authoring.ts index e9d4f4fb32..934f4b2299 100644 --- a/packages/1-framework/1-core/framework-components/src/shared/framework-authoring.ts +++ b/packages/1-framework/1-core/framework-components/src/shared/framework-authoring.ts @@ -24,6 +24,7 @@ interface AuthoringArgumentDescriptorCommon { export type AuthoringArgumentDescriptor = AuthoringArgumentDescriptorCommon & ( | { readonly kind: 'string' } + | { readonly kind: 'boolean' } | { readonly kind: 'number'; readonly integer?: boolean; @@ -188,6 +189,13 @@ function validateAuthoringArgument( return; } + if (descriptor.kind === 'boolean') { + if (typeof value !== 'boolean') { + throw new Error(`Authoring helper argument at ${path} must be a boolean`); + } + return; + } + if (descriptor.kind === 'stringArray') { if (!Array.isArray(value)) { throw new Error(`Authoring helper argument at ${path} must be an array of strings`); diff --git a/packages/1-framework/1-core/framework-components/test/framework-components.authoring.test.ts b/packages/1-framework/1-core/framework-components/test/framework-components.authoring.test.ts index 11c2ab21d2..af11ae64fd 100644 --- a/packages/1-framework/1-core/framework-components/test/framework-components.authoring.test.ts +++ b/packages/1-framework/1-core/framework-components/test/framework-components.authoring.test.ts @@ -76,15 +76,49 @@ describe('authoring template resolution', () => { properties: { label: { kind: 'string' }, length: { kind: 'number', integer: true, minimum: 1, maximum: 3 }, + enabled: { kind: 'boolean', optional: true }, }, }, { kind: 'number', optional: true, minimum: 0 }, ], - ['vector', ['a', 'b'], { label: 'embedding', length: 2 }, 0], + ['vector', ['a', 'b'], { label: 'embedding', length: 2, enabled: true }, 0], ), ).not.toThrow(); }); + it('accepts boolean leaf arguments at the top level', () => { + expect(() => + validateAuthoringHelperArguments( + 'type.flag', + [{ kind: 'boolean' }, { kind: 'boolean', optional: true }], + [true, false], + ), + ).not.toThrow(); + expect(() => + validateAuthoringHelperArguments( + 'type.flag', + [{ kind: 'boolean' }, { kind: 'boolean', optional: true }], + [false], + ), + ).not.toThrow(); + }); + + it('rejects non-boolean values where boolean is expected', () => { + expect(() => + validateAuthoringHelperArguments('type.flag', [{ kind: 'boolean' }], ['yes']), + ).toThrow(/must be a boolean/); + expect(() => validateAuthoringHelperArguments('type.flag', [{ kind: 'boolean' }], [1])).toThrow( + /must be a boolean/, + ); + expect(() => + validateAuthoringHelperArguments( + 'type.flag', + [{ kind: 'object', properties: { equality: { kind: 'boolean' } } }], + [{ equality: 'yes' }], + ), + ).toThrow(/type\.flag\[0\]\.equality must be a boolean/); + }); + it('allows omitted optional helper arguments', () => { expect(() => validateAuthoringHelperArguments( diff --git a/packages/1-framework/1-core/framework-components/test/run-with-middleware-signal.test.ts b/packages/1-framework/1-core/framework-components/test/run-with-middleware-signal.test.ts new file mode 100644 index 0000000000..a9c102886e --- /dev/null +++ b/packages/1-framework/1-core/framework-components/test/run-with-middleware-signal.test.ts @@ -0,0 +1,86 @@ +import type { PlanMeta } from '@prisma-next/contract/types'; +import { describe, expect, it } from 'vitest'; +import type { ExecutionPlan } from '../src/execution/query-plan'; +import { runWithMiddleware } from '../src/execution/run-with-middleware'; +import type { + RuntimeMiddleware, + RuntimeMiddlewareContext, +} from '../src/execution/runtime-middleware'; + +const meta: PlanMeta = { + target: 'mock', + storageHash: 'sha256:test', + lane: 'raw-sql', +}; + +interface MockExec extends ExecutionPlan { + readonly id: string; +} + +const mockExec: MockExec = { id: 'exec-1', meta }; + +function makeCtx(signal?: AbortSignal): RuntimeMiddlewareContext { + return { + contract: {}, + mode: 'strict', + now: () => Date.now(), + log: { info: () => {}, warn: () => {}, error: () => {} }, + ...(signal ? { signal } : {}), + }; +} + +async function* yieldRows(rows: ReadonlyArray): AsyncGenerator { + for (const row of rows) { + yield row; + } +} + +describe('RuntimeMiddlewareContext.signal (AC-ABT1)', () => { + it('AC-ABT1: ctx.signal is the exact same reference at every middleware phase', async () => { + const controller = new AbortController(); + const ctx = makeCtx(controller.signal); + const observed: AbortSignal[] = []; + + const mw: RuntimeMiddleware = { + name: 'observer', + async beforeExecute(_plan, c) { + if (c.signal) observed.push(c.signal); + }, + async onRow(_row, _plan, c) { + if (c.signal) observed.push(c.signal); + }, + async afterExecute(_plan, _result, c) { + if (c.signal) observed.push(c.signal); + }, + }; + + const result = runWithMiddleware>(mockExec, [mw], ctx, () => + yieldRows([{ id: 1 }]), + ); + await result.toArray(); + + expect(observed).toHaveLength(3); + expect(observed[0]).toBe(controller.signal); + expect(observed[1]).toBe(controller.signal); + expect(observed[2]).toBe(controller.signal); + }); + + it('ctx.signal is undefined when no signal was supplied', async () => { + const ctx = makeCtx(undefined); + let observedSignal: AbortSignal | undefined = new AbortController().signal; + + const mw: RuntimeMiddleware = { + name: 'observer', + async beforeExecute(_plan, c) { + observedSignal = c.signal; + }, + }; + + const result = runWithMiddleware>(mockExec, [mw], ctx, () => + yieldRows([{ id: 1 }]), + ); + await result.toArray(); + + expect(observedSignal).toBeUndefined(); + }); +}); diff --git a/packages/2-mongo-family/7-runtime/src/exports/index.ts b/packages/2-mongo-family/7-runtime/src/exports/index.ts index daddc3fa90..527e7664cf 100644 --- a/packages/2-mongo-family/7-runtime/src/exports/index.ts +++ b/packages/2-mongo-family/7-runtime/src/exports/index.ts @@ -16,5 +16,15 @@ export { createMongoExecutionStack, } from '../mongo-execution-stack'; export type { MongoMiddleware, MongoMiddlewareContext } from '../mongo-middleware'; +export type { + MongoParamRefEntry, + MongoParamRefEntryUnion, + MongoParamRefHandle, + MongoParamRefMutator, +} from '../mongo-param-ref-mutator'; +export { + createMongoParamRefMutator, + flattenMongoParamRefs, +} from '../mongo-param-ref-mutator'; export type { MongoRuntime, MongoRuntimeOptions } from '../mongo-runtime'; export { createMongoRuntime } from '../mongo-runtime'; diff --git a/packages/2-mongo-family/7-runtime/src/mongo-middleware.ts b/packages/2-mongo-family/7-runtime/src/mongo-middleware.ts index d69fde004b..3fef74c9d1 100644 --- a/packages/2-mongo-family/7-runtime/src/mongo-middleware.ts +++ b/packages/2-mongo-family/7-runtime/src/mongo-middleware.ts @@ -4,6 +4,7 @@ import type { RuntimeMiddlewareContext, } from '@prisma-next/framework-components/runtime'; import type { MongoExecutionPlan } from './mongo-execution-plan'; +import type { MongoParamRefMutator } from './mongo-param-ref-mutator'; export interface MongoMiddlewareContext extends RuntimeMiddlewareContext {} @@ -17,10 +18,22 @@ export interface MongoMiddlewareContext extends RuntimeMiddlewareContext {} * telemetry) — which carry no `familyId` — remain assignable. When * present, it must be `'mongo'`; the runtime rejects mismatches at * construction time via `checkMiddlewareCompatibility`. + * + * `beforeExecute` accepts an additive third {@link MongoParamRefMutator} + * argument matching the SQL family's seam (AC-FAM1). Existing 2-arg + * middleware bodies remain valid — TypeScript permits assigning a + * function with fewer parameters to a function-typed slot that declares + * more. */ -export interface MongoMiddleware extends RuntimeMiddleware { +export interface MongoMiddleware< + TCodecMap extends Record = Record, +> extends RuntimeMiddleware> { readonly familyId?: 'mongo'; - beforeExecute?(plan: MongoExecutionPlan, ctx: MongoMiddlewareContext): Promise; + beforeExecute?( + plan: MongoExecutionPlan, + ctx: MongoMiddlewareContext, + params?: MongoParamRefMutator, + ): void | Promise; onRow?( row: Record, plan: MongoExecutionPlan, diff --git a/packages/2-mongo-family/7-runtime/src/mongo-param-ref-mutator.ts b/packages/2-mongo-family/7-runtime/src/mongo-param-ref-mutator.ts new file mode 100644 index 0000000000..70d0a6f5ff --- /dev/null +++ b/packages/2-mongo-family/7-runtime/src/mongo-param-ref-mutator.ts @@ -0,0 +1,157 @@ +import type { ParamRefMutator } from '@prisma-next/framework-components/runtime'; +import { MongoParamRef } from '@prisma-next/mongo-value'; + +/** + * Brand applied to {@link MongoParamRefHandle} so user-constructed + * handles are rejected by the type system. Phantom-typed only — at + * runtime the handle is the underlying `MongoParamRef` instance from + * the lowered tree. + */ +declare const mongoParamRefHandleBrand: unique symbol; + +/** + * Opaque token identifying a single `MongoParamRef` in the lowered + * Mongo command tree. Produced by {@link MongoParamRefMutator.entries}; + * consumed by `replaceValue` / `replaceValues`. + */ +export interface MongoParamRefHandle { + readonly [mongoParamRefHandleBrand]: TCodecId; +} + +/** + * One outbound `MongoParamRef` slot exposed to middleware. `value` is + * the current authored value; `codecId` is the codec id declared on + * the underlying `MongoParamRef`. + */ +export interface MongoParamRefEntry { + readonly ref: MongoParamRefHandle; + readonly value: unknown; + readonly codecId: TCodecId; +} + +/** + * Discriminated entry union over a codec map (matches the SQL family's + * pattern). Pattern-matching on `entry.codecId` narrows the entry to a + * single `TCodecMap` arm. + */ +export type MongoParamRefEntryUnion> = + | { [K in keyof TCodecMap & string]: MongoParamRefEntry }[keyof TCodecMap & string] + | MongoParamRefEntry; + +/** + * Mongo-family mutator threaded into `MongoMiddleware.beforeExecute` as + * `params`. Walks the lowered tree (objects, arrays, leaves) and yields + * `MongoParamRef` slots in pre-order; mutator semantics match the SQL + * family's `SqlParamRefMutator` (AC-FAM1, AC-FAM2). + */ +export interface MongoParamRefMutator< + TCodecMap extends Record = Record, +> extends ParamRefMutator { + /** Iterate every `MongoParamRef` reachable from the lowered tree (AC-FAM2). */ + entries(): IterableIterator>; + + replaceValue( + ref: MongoParamRefHandle, + newValue: TCodecMap[TCodecId], + ): void; + replaceValue(ref: MongoParamRefHandle, newValue: unknown): void; + + replaceValues( + updates: Iterable<{ + readonly ref: MongoParamRefHandle<(keyof TCodecMap & string) | undefined>; + readonly newValue: unknown; + }>, + ): void; +} + +/** + * Walk an arbitrary value (object / array / leaf) and yield every + * reachable `MongoParamRef` in pre-order. Stable order matches the + * resolveValue walk so `entries()` consumers see ParamRefs in the order + * the runtime would encode them. + */ +export function* flattenMongoParamRefs(value: unknown): IterableIterator { + if (value instanceof MongoParamRef) { + yield value; + return; + } + if (value === null || typeof value !== 'object') { + return; + } + if (value instanceof Date) { + return; + } + if (Array.isArray(value)) { + for (const item of value) { + yield* flattenMongoParamRefs(item); + } + return; + } + for (const child of Object.values(value as Record)) { + yield* flattenMongoParamRefs(child); + } +} + +type AnyHandle = MongoParamRefHandle; + +/** + * Build a {@link MongoParamRefMutator} over an arbitrary lowered tree + * (typically a Mongo wire command). The mutator's `entries()` walks the + * tree on demand via {@link flattenMongoParamRefs}; mutations are + * tracked in a per-MongoParamRef map and applied to a working tree + * lazily on `currentTree()`. + * + * The actual integration into `MongoRuntime` is not yet wired (Mongo's + * lower step resolves `MongoParamRef`s into raw values; deferring that + * is a follow-on). The mutator type and flatten helper land here so + * extension authors can target the seam in the meantime. + */ +export function createMongoParamRefMutator< + TCodecMap extends Record = Record, +>(tree: unknown): MongoParamRefMutator { + const refs: ReadonlyArray = [...flattenMongoParamRefs(tree)]; + const replacements = new Map(); + + const indexOfRef = (handle: AnyHandle): MongoParamRef | undefined => { + const ref = handle as unknown as MongoParamRef; + return refs.includes(ref) ? ref : undefined; + }; + + function* entries(): IterableIterator> { + for (const ref of refs) { + const handle = ref as unknown as MongoParamRefHandle; + const value = replacements.has(ref) ? replacements.get(ref) : ref.value; + const entry: MongoParamRefEntry = { + ref: handle, + value, + codecId: ref.codecId, + }; + yield entry as MongoParamRefEntryUnion; + } + } + + function replaceValue(handle: AnyHandle, newValue: unknown): void { + const ref = indexOfRef(handle); + if (!ref) return; + replacements.set(ref, newValue); + } + + function replaceValues( + updates: Iterable<{ readonly ref: AnyHandle; readonly newValue: unknown }>, + ): void { + for (const { ref, newValue } of updates) { + replaceValue(ref, newValue); + } + } + + // The public `MongoParamRefMutator` declares overloaded `replaceValue` + // signatures (typed-by-codec / unresolvable-codec). The implementation + // is one function with a permissive runtime signature; the cast below + // is the single point at which the runtime function meets the typed + // overload surface, matching the overload-implementation pattern. + return { + entries, + replaceValue: replaceValue as MongoParamRefMutator['replaceValue'], + replaceValues, + }; +} diff --git a/packages/2-mongo-family/7-runtime/test/mongo-param-ref-mutator.test.ts b/packages/2-mongo-family/7-runtime/test/mongo-param-ref-mutator.test.ts new file mode 100644 index 0000000000..ea5f577dc2 --- /dev/null +++ b/packages/2-mongo-family/7-runtime/test/mongo-param-ref-mutator.test.ts @@ -0,0 +1,54 @@ +import { MongoParamRef } from '@prisma-next/mongo-value'; +import { describe, expect, it } from 'vitest'; +import { createMongoParamRefMutator, flattenMongoParamRefs } from '../src/mongo-param-ref-mutator'; + +describe('MongoParamRefMutator (AC-FAM2)', () => { + it('flattenMongoParamRefs walks objects, arrays, and nested leaves in pre-order', () => { + const a = MongoParamRef.of('a', { codecId: 'cipherstash/string@1' }); + const b = MongoParamRef.of(42, { codecId: 'pg/text@1' }); + const c = MongoParamRef.of(true); + const tree = { + filter: { email: a, age: { $gt: b } }, + list: [c, { unrelated: 'literal' }], + }; + const result = [...flattenMongoParamRefs(tree)]; + expect(result).toEqual([a, b, c]); + }); + + it('entries() yields one entry per MongoParamRef in walk order', () => { + const a = MongoParamRef.of('alice', { codecId: 'cipherstash/string@1' }); + const b = MongoParamRef.of('bob', { codecId: 'cipherstash/string@1' }); + const c = MongoParamRef.of('plain', { codecId: 'pg/text@1' }); + const command = { documents: [{ email: a }, { email: b }, { tag: c }] }; + const mutator = createMongoParamRefMutator(command); + const entries = [...mutator.entries()]; + expect(entries).toHaveLength(3); + expect(entries[0]?.value).toBe('alice'); + expect(entries[0]?.codecId).toBe('cipherstash/string@1'); + expect(entries[2]?.codecId).toBe('pg/text@1'); + }); + + it('replaceValue / replaceValues update entries() view in subsequent walks', () => { + const a = MongoParamRef.of('alice', { codecId: 'cipherstash/string@1' }); + const b = MongoParamRef.of('bob', { codecId: 'cipherstash/string@1' }); + const command = { documents: [{ email: a }, { email: b }] }; + const mutator = createMongoParamRefMutator(command); + + const allEntries = [...mutator.entries()]; + const first = allEntries[0]!; + const second = allEntries[1]!; + if (first.codecId === 'cipherstash/string@1') { + mutator.replaceValue(first.ref, 'cipher:alice'); + } + mutator.replaceValues([{ ref: second.ref, newValue: 'cipher:bob' }]); + + const after = [...mutator.entries()]; + expect(after.map((e) => e.value)).toEqual(['cipher:alice', 'cipher:bob']); + }); + + it('handles trees with no MongoParamRefs (returns empty walk)', () => { + const command = { documents: [{ email: 'literal' }] }; + const mutator = createMongoParamRefMutator(command); + expect([...mutator.entries()]).toEqual([]); + }); +}); diff --git a/packages/2-sql/2-authoring/contract-psl/src/psl-authoring-arguments.ts b/packages/2-sql/2-authoring/contract-psl/src/psl-authoring-arguments.ts index 7a3f506ff8..fb6558b9e6 100644 --- a/packages/2-sql/2-authoring/contract-psl/src/psl-authoring-arguments.ts +++ b/packages/2-sql/2-authoring/contract-psl/src/psl-authoring-arguments.ts @@ -311,6 +311,12 @@ function parsePslAuthoringArgumentValue( switch (descriptor.kind) { case 'string': return unquoteStringLiteral(rawValue); + case 'boolean': { + const trimmed = rawValue.trim(); + if (trimmed === 'true') return true; + if (trimmed === 'false') return false; + return INVALID_AUTHORING_ARGUMENT; + } case 'number': { const parsed = Number(unquoteStringLiteral(rawValue)); return Number.isNaN(parsed) ? INVALID_AUTHORING_ARGUMENT : parsed; diff --git a/packages/2-sql/2-authoring/contract-ts/src/authoring-type-utils.ts b/packages/2-sql/2-authoring/contract-ts/src/authoring-type-utils.ts index d3ec82e9fa..ef808b691b 100644 --- a/packages/2-sql/2-authoring/contract-ts/src/authoring-type-utils.ts +++ b/packages/2-sql/2-authoring/contract-ts/src/authoring-type-utils.ts @@ -38,19 +38,21 @@ export type ArgTypeFromDescriptor = Arg readonly kind: 'string'; } ? string - : Arg extends { readonly kind: 'number' } - ? number - : Arg extends { readonly kind: 'stringArray' } - ? readonly string[] - : Arg extends { - readonly kind: 'object'; - readonly properties: infer Properties extends Record< - string, - AuthoringArgumentDescriptor - >; - } - ? ObjectArgumentType - : never; + : Arg extends { readonly kind: 'boolean' } + ? boolean + : Arg extends { readonly kind: 'number' } + ? number + : Arg extends { readonly kind: 'stringArray' } + ? readonly string[] + : Arg extends { + readonly kind: 'object'; + readonly properties: infer Properties extends Record< + string, + AuthoringArgumentDescriptor + >; + } + ? ObjectArgumentType + : never; export type TupleFromArgumentDescriptors = { readonly [K in keyof Args]: Args[K] extends AuthoringArgumentDescriptor diff --git a/packages/2-sql/4-lanes/relational-core/package.json b/packages/2-sql/4-lanes/relational-core/package.json index 4d11cf77d5..502e312d08 100644 --- a/packages/2-sql/4-lanes/relational-core/package.json +++ b/packages/2-sql/4-lanes/relational-core/package.json @@ -42,6 +42,7 @@ "./ast": "./dist/exports/ast.mjs", "./errors": "./dist/exports/errors.mjs", "./expression": "./dist/exports/expression.mjs", + "./middleware": "./dist/exports/middleware.mjs", "./plan": "./dist/exports/plan.mjs", "./query-lane-context": "./dist/exports/query-lane-context.mjs", "./types": "./dist/exports/types.mjs", diff --git a/packages/2-sql/4-lanes/relational-core/src/ast/types.ts b/packages/2-sql/4-lanes/relational-core/src/ast/types.ts index 08ca9c127e..b198a15680 100644 --- a/packages/2-sql/4-lanes/relational-core/src/ast/types.ts +++ b/packages/2-sql/4-lanes/relational-core/src/ast/types.ts @@ -1626,7 +1626,60 @@ export class DeleteAst extends QueryAst { } } -export type AnyQueryAst = SelectAst | InsertAst | UpdateAst | DeleteAst; +/** + * Raw-SQL query AST node carrying interpolated parameter / expression nodes + * embedded inside literal SQL fragments. + * + * `fragments` and `args` are interleaved during lowering: + * `fragments[0] + lower(args[0]) + fragments[1] + ... + fragments[n]`. + * Construction enforces `fragments.length === args.length + 1`. + * + * Extends {@link QueryAst} (whole-query AST, not a sub-expression). + * Construction does not validate that each arg is a `ParamRef` / + * `AnyExpression`: the type system already rejects bare values because + * `args` is typed `readonly AnyExpression[]`. The user-facing `raw\`...\`` + * factory (separate `sql-raw-factory` component) layers stricter + * type-level rejection on top of this AST node. + */ +export class RawSqlExpr extends QueryAst { + readonly kind = 'raw-sql' as const; + readonly fragments: readonly string[]; + readonly args: readonly AnyExpression[]; + + constructor(fragments: readonly string[], args: readonly AnyExpression[]) { + super(); + if (fragments.length !== args.length + 1) { + throw new Error( + `RawSqlExpr: fragments.length must equal args.length + 1 (got fragments=${fragments.length}, args=${args.length})`, + ); + } + this.fragments = Object.freeze([...fragments]); + this.args = Object.freeze([...args]); + this.freeze(); + } + + static of(fragments: readonly string[], args: readonly AnyExpression[]): RawSqlExpr { + return new RawSqlExpr(fragments, args); + } + + override collectParamRefs(): ParamRef[] { + const refs: ParamRef[] = []; + for (const arg of this.args) { + if (arg.kind === 'param-ref') { + refs.push(arg); + } else { + refs.push(...arg.collectParamRefs()); + } + } + return refs; + } + + override toQueryAst(): AnyQueryAst { + return this; + } +} + +export type AnyQueryAst = SelectAst | InsertAst | UpdateAst | DeleteAst | RawSqlExpr; export type AnyFromSource = TableSource | DerivedTableSource; export type AnyExpression = | ColumnRef @@ -1654,6 +1707,7 @@ export const queryAstKinds: ReadonlySet = new Set([ 'insert', 'update', 'delete', + 'raw-sql', ]); export const whereExprKinds: ReadonlySet = new Set([ 'binary', diff --git a/packages/2-sql/4-lanes/relational-core/src/exports/middleware.ts b/packages/2-sql/4-lanes/relational-core/src/exports/middleware.ts new file mode 100644 index 0000000000..27daf1fae6 --- /dev/null +++ b/packages/2-sql/4-lanes/relational-core/src/exports/middleware.ts @@ -0,0 +1,8 @@ +export type { + ParamRefEntry, + ParamRefEntryUnion, + ParamRefHandle, + SqlParamRefMutator, + SqlParamRefMutatorInternal, +} from '../middleware/param-ref-mutator'; +export { createSqlParamRefMutator } from '../middleware/param-ref-mutator'; diff --git a/packages/2-sql/4-lanes/relational-core/src/index.ts b/packages/2-sql/4-lanes/relational-core/src/index.ts index 92bc54ea63..7af467ee74 100644 --- a/packages/2-sql/4-lanes/relational-core/src/index.ts +++ b/packages/2-sql/4-lanes/relational-core/src/index.ts @@ -1,6 +1,7 @@ export * from './exports/ast'; export * from './exports/errors'; export * from './exports/expression'; +export * from './exports/middleware'; export * from './exports/plan'; export * from './exports/query-lane-context'; export * from './exports/types'; diff --git a/packages/2-sql/4-lanes/relational-core/src/middleware/param-ref-mutator.ts b/packages/2-sql/4-lanes/relational-core/src/middleware/param-ref-mutator.ts new file mode 100644 index 0000000000..2a9651ae94 --- /dev/null +++ b/packages/2-sql/4-lanes/relational-core/src/middleware/param-ref-mutator.ts @@ -0,0 +1,224 @@ +import type { ParamRefMutator } from '@prisma-next/framework-components/runtime'; +import type { SqlColumnRef } from '../ast/codec-types'; +import type { ParamRef } from '../ast/types'; +import { collectOrderedParamRefs } from '../ast/util'; +import type { SqlExecutionPlan } from '../sql-execution-plan'; + +/** + * Brand applied to {@link ParamRefHandle} so user-constructed handles + * are rejected by the type system. The mutator only accepts handles it + * produced from `entries()`. + * + * The brand is a phantom type — there is no runtime token. At runtime + * the handle is the underlying `ParamRef` instance from the plan's + * `ast`; the brand only narrows the type-level surface so callers + * cannot fabricate a handle from a fresh `ParamRef.of(...)`. + */ +declare const paramRefHandleBrand: unique symbol; + +/** + * Opaque token identifying a single `ParamRef` in the plan. Produced by + * {@link SqlParamRefMutator.entries}; consumed by `replaceValue` / + * `replaceValues`. + * + * The phantom `TCodecId` parameter records the codec id of the + * referenced `ParamRef` so type-level inference can route replacement + * values through `TCodecMap` to the codec's declared `TInput` (AC-TYPE1). + */ +export interface ParamRefHandle { + readonly [paramRefHandleBrand]: TCodecId; +} + +/** + * One outbound `ParamRef` slot in the plan exposed to middleware. + * `value` is the current value (post any prior middleware mutations); + * `codecId` is the codec id declared on the underlying `ParamRef`; + * `column` is populated for `ParamRef`s the lowering site could resolve + * to a single `(table, column)` (encode-side column metadata is the + * middleware's domain — encode itself currently leaves `ctx.column` + * unset). + */ +export interface ParamRefEntry { + readonly ref: ParamRefHandle; + readonly value: unknown; + readonly codecId: TCodecId; + readonly column?: SqlColumnRef; +} + +/** + * Discriminated entry union over a codec map. For each `K` in + * `TCodecMap`, `entries()` may yield a `ParamRefEntry`; ParamRefs + * with no codec id (or a codec id outside the map) yield a + * `ParamRefEntry`. Pattern-matching on `entry.codecId` + * narrows `entry.ref` to a `ParamRefHandle`, which routes through + * the typed `replaceValue` overload. + */ +export type ParamRefEntryUnion> = + | { [K in keyof TCodecMap & string]: ParamRefEntry }[keyof TCodecMap & string] + | ParamRefEntry; + +/** + * SQL-family mutator threaded into `SqlMiddleware.beforeExecute` as + * `params`. Scope is `ParamRef.value` slots only — middleware cannot + * insert / remove `ParamRef`s, rewrite SQL, or modify projection. The + * type-level `ParamRefHandle` brand and the `replaceValue(ref, + * newValue)` shape enforce this at compile time (AC-MUT4). + * + * Allocation discipline: the mutator is constructed lazily from the + * plan. `entries()` walks the plan's existing AST without allocating + * an intermediate array; the working params buffer is only allocated + * on the first `replaceValue` / `replaceValues` call. If no middleware + * mutates, `currentParams()` returns the plan's original `params` by + * reference identity (AC-MUT5). + * + * The `TCodecMap` parameter is a record keyed by codec id; `replaceValue` + * infers `newValue` from `TCodecMap[H['codecId']]` for handles whose + * codec id is statically resolvable. For codec ids the type system + * cannot resolve, `newValue` falls back to `unknown` and the middleware + * is on the hook for runtime correctness. + */ +export interface SqlParamRefMutator< + TCodecMap extends Record = Record, +> extends ParamRefMutator { + /** Iterate every outbound `ParamRef` the plan currently carries, in canonical order (AC-MUT2). */ + entries(): IterableIterator>; + + /** + * Replace one `ParamRef`'s value with the result of bulk processing. + * `newValue` is constrained to the codec's declared `TInput` for codec + * ids the type system can resolve via `TCodecMap`; for unresolvable + * codec ids `newValue` is `unknown` (the second overload). + */ + replaceValue( + ref: ParamRefHandle, + newValue: TCodecMap[TCodecId], + ): void; + replaceValue(ref: ParamRefHandle, newValue: unknown): void; + + /** Replace many at once (typical for bulk-pattern middleware). */ + replaceValues( + updates: Iterable<{ + readonly ref: ParamRefHandle<(keyof TCodecMap & string) | undefined>; + readonly newValue: unknown; + }>, + ): void; +} + +/** + * Internal-only view of the mutator that exposes the post-mutation params + * array to the SQL runtime. The runtime calls `currentParams()` after the + * `beforeExecute` chain has run; the result is the plan's original + * `params` by reference identity if no middleware mutated, otherwise a + * frozen new array carrying the mutations applied in chain order. + * + * Family-internal contract — `SqlMiddleware` consumers never see this + * shape; they receive the public `SqlParamRefMutator` view above. + */ +export interface SqlParamRefMutatorInternal< + TCodecMap extends Record = Record, +> extends SqlParamRefMutator { + currentParams(): readonly unknown[]; +} + +type AnyHandle = ParamRefHandle; + +/** + * Build a {@link SqlParamRefMutatorInternal} for the given lowered plan. + * + * The mutator captures `plan.params` by reference and walks + * `plan.ast` (via `collectOrderedParamRefs`) on demand to build + * entries. Mutations write to a lazily-allocated working copy so the + * fast path (no mutation) preserves bit-for-bit reference identity to + * the original `plan.params`. + * + * Threading: `plan.ast` carries the canonical `ParamRef` ordering used + * by every consumer (renderer's `$N` index map, encode-side metadata + * walk, etc.). The mutator's `entries()` yields the same order so + * middleware that filters by codec id sees ParamRefs in the order the + * runtime will encode them. + */ +export function createSqlParamRefMutator< + TCodecMap extends Record = Record, +>(plan: SqlExecutionPlan): SqlParamRefMutatorInternal { + const originalParams = plan.params; + const refs: ReadonlyArray = plan.ast ? collectOrderedParamRefs(plan.ast) : []; + let workingParams: unknown[] | undefined; + + const indexOfRef = (handle: AnyHandle): number => { + // The handle is the underlying ParamRef instance the mutator yielded + // from entries(); equality is identity equality on the ParamRef. The + // brand on ParamRefHandle is unforgeable from outside, so the only + // legal handles came from this mutator's entries(). + return refs.indexOf(handle as unknown as ParamRef); + }; + + const ensureWorkingParams = (): unknown[] => { + if (!workingParams) { + workingParams = [...originalParams]; + } + return workingParams; + }; + + const writeAt = (index: number, value: unknown): void => { + const buffer = ensureWorkingParams(); + buffer[index] = value; + }; + + function* entries(): IterableIterator> { + const view = workingParams ?? originalParams; + for (let i = 0; i < refs.length; i++) { + const ref = refs[i]; + if (!ref) continue; + const handle = ref as unknown as ParamRefHandle; + const value = i < view.length ? view[i] : ref.value; + const codecId = ref.codecId; + // The runtime erases the discriminated union to a single shape; the + // public type pins each entry's `ref` to the matching `codecId` + // arm at compile time. + const entry: ParamRefEntry = { + ref: handle, + value, + codecId, + }; + yield entry as ParamRefEntryUnion; + } + } + + function replaceValue(handle: AnyHandle, newValue: unknown): void { + const index = indexOfRef(handle); + if (index < 0) { + // Handle does not belong to this plan. The type system pins this + // at the brand level; this runtime check guards against handles + // smuggled across plans. + return; + } + writeAt(index, newValue); + } + + function replaceValues( + updates: Iterable<{ readonly ref: AnyHandle; readonly newValue: unknown }>, + ): void { + for (const { ref, newValue } of updates) { + const index = indexOfRef(ref); + if (index < 0) continue; + writeAt(index, newValue); + } + } + + // The public `SqlParamRefMutator` declares overloaded `replaceValue` + // signatures (typed-by-codec / unresolvable-codec). The implementation + // is one function with a permissive runtime signature; the cast is the + // single point at which the runtime function meets the typed overload + // surface, matching the overload-implementation pattern. + return { + entries, + replaceValue: replaceValue as SqlParamRefMutator['replaceValue'], + replaceValues, + currentParams(): readonly unknown[] { + if (!workingParams) { + return originalParams; + } + return Object.freeze([...workingParams]); + }, + }; +} diff --git a/packages/2-sql/4-lanes/relational-core/src/plan.ts b/packages/2-sql/4-lanes/relational-core/src/plan.ts index d686675a22..a1adf2c929 100644 --- a/packages/2-sql/4-lanes/relational-core/src/plan.ts +++ b/packages/2-sql/4-lanes/relational-core/src/plan.ts @@ -1,4 +1,6 @@ +import type { Contract } from '@prisma-next/contract/types'; import type { QueryPlan } from '@prisma-next/framework-components/runtime'; +import type { SqlStorage } from '@prisma-next/sql-contract/types'; import type { AnyQueryAst } from './ast/types'; /** @@ -18,3 +20,37 @@ export interface SqlQueryPlan extends QueryPlan { readonly ast: AnyQueryAst; readonly params: readonly unknown[]; } + +/** + * Wraps an `AnyQueryAst` (typically a `RawSqlExpr` constructed package-internally + * by an extension's migration factory) in a fully-populated `SqlQueryPlan` + * whose `meta` is sourced from the supplied contract. + * + * Centralising the envelope here means consumers (cipherstash migration + * factories today; future raw-sql callers) cannot drift on `storageHash` / + * `target` / `targetFamily`, which would otherwise surface as a subtle + * `assertContractMatches` failure inside `dataTransform`. `params` defaults + * to `[]` because parameters embedded in the AST as `ParamRef`s are resolved + * at lowering time (`encodeParams` walks `plan.ast.collectParamRefs()`), + * not at plan-construction time. + * + * The default `laneId` of `'raw'` reflects raw-SQL plans' standard lane tag; + * callers (e.g. a future `sql-raw-factory`) may override to differentiate + * the plan's provenance. + */ +export function planFromAst( + ast: AnyQueryAst, + contract: Contract, + laneId = 'raw', +): SqlQueryPlan { + return { + ast, + params: [], + meta: { + target: contract.target, + targetFamily: contract.targetFamily, + storageHash: contract.storage.storageHash, + lane: laneId, + }, + }; +} diff --git a/packages/2-sql/4-lanes/relational-core/test/ast/raw-sql-expr.test.ts b/packages/2-sql/4-lanes/relational-core/test/ast/raw-sql-expr.test.ts new file mode 100644 index 0000000000..7ffaffd99d --- /dev/null +++ b/packages/2-sql/4-lanes/relational-core/test/ast/raw-sql-expr.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from 'vitest'; +import { + type AnyQueryAst, + isQueryAst, + ParamRef, + queryAstKinds, + RawSqlExpr, +} from '../../src/exports/ast'; + +describe('RawSqlExpr', () => { + it('exposes kind "raw-sql"', () => { + const node = new RawSqlExpr(['SELECT 1'], []); + expect(node.kind).toBe('raw-sql'); + }); + + it('static of() and constructor produce frozen instances', () => { + const fromCtor = new RawSqlExpr(['SELECT 1'], []); + const fromOf = RawSqlExpr.of(['SELECT 1'], []); + expect(Object.isFrozen(fromCtor)).toBe(true); + expect(Object.isFrozen(fromOf)).toBe(true); + expect(Object.isFrozen(fromCtor.fragments)).toBe(true); + expect(Object.isFrozen(fromCtor.args)).toBe(true); + }); + + it('throws when fragments.length !== args.length + 1', () => { + expect(() => new RawSqlExpr([], [])).toThrow(/fragments\.length must equal args\.length \+ 1/); + expect(() => new RawSqlExpr(['a', 'b', 'c'], [ParamRef.of(1)])).toThrow( + /fragments\.length must equal args\.length \+ 1/, + ); + expect(() => RawSqlExpr.of(['only-one'], [ParamRef.of(1)])).toThrow( + /fragments\.length must equal args\.length \+ 1/, + ); + }); + + it('accepts ParamRef args at the gaps between fragments', () => { + const ref = ParamRef.of('a@example.com', { codecId: 'pg/text@1' }); + const node = RawSqlExpr.of( + ['SELECT eql.eq(', ', ', ')'], + [ParamRef.of('email', { codecId: 'pg/text@1' }), ref], + ); + expect(node.fragments).toHaveLength(3); + expect(node.args).toHaveLength(2); + expect(node.args[1]).toBe(ref); + }); + + it('AnyQueryAst includes "raw-sql" arm (assignability)', () => { + const node: AnyQueryAst = RawSqlExpr.of(['SELECT 1'], []); + expect(node.kind).toBe('raw-sql'); + }); + + it('queryAstKinds and isQueryAst recognize "raw-sql"', () => { + expect(queryAstKinds.has('raw-sql')).toBe(true); + expect(isQueryAst(RawSqlExpr.of(['SELECT 1'], []))).toBe(true); + expect(isQueryAst({ kind: 'raw-sql' })).toBe(true); + expect(isQueryAst({ kind: 'unknown' })).toBe(false); + }); + + it('collectParamRefs returns the embedded ParamRefs in declaration order', () => { + const a = ParamRef.of('a', { codecId: 'pg/text@1' }); + const b = ParamRef.of('b', { codecId: 'pg/text@1' }); + const node = RawSqlExpr.of(['fn(', ', ', ')'], [a, b]); + expect(node.collectParamRefs()).toEqual([a, b]); + }); + + it('collectParamRefs returns an empty array when there are no args', () => { + const node = RawSqlExpr.of(['SELECT 1'], []); + expect(node.collectParamRefs()).toEqual([]); + }); +}); diff --git a/packages/2-sql/4-lanes/relational-core/test/middleware/param-ref-mutator.test.ts b/packages/2-sql/4-lanes/relational-core/test/middleware/param-ref-mutator.test.ts new file mode 100644 index 0000000000..81e5d66b38 --- /dev/null +++ b/packages/2-sql/4-lanes/relational-core/test/middleware/param-ref-mutator.test.ts @@ -0,0 +1,126 @@ +import { coreHash } from '@prisma-next/contract/types'; +import { describe, expect, it } from 'vitest'; +import { ParamRef, RawSqlExpr } from '../../src/ast/types'; +import { createSqlParamRefMutator } from '../../src/middleware/param-ref-mutator'; +import type { SqlExecutionPlan } from '../../src/sql-execution-plan'; + +const TEST_HASH = coreHash('sha256:param-ref-mutator-test'); + +function buildPlan(): { + plan: SqlExecutionPlan; + refs: readonly ParamRef[]; +} { + const a = ParamRef.of('alice@example.com', { codecId: 'pg/text@1', name: 'email' }); + const b = ParamRef.of(42, { codecId: 'pg/int4@1', name: 'age' }); + const c = ParamRef.of('legacy', { name: 'plain' }); + const ast = RawSqlExpr.of( + ['SELECT a, b, c FROM t WHERE a = ', ' AND b = ', ' AND c = ', ''], + [a, b, c], + ); + const plan: SqlExecutionPlan = { + sql: 'SELECT a, b, c FROM t WHERE a = $1 AND b = $2 AND c = $3', + params: [a.value, b.value, c.value], + ast, + meta: { + target: 'postgres', + storageHash: TEST_HASH, + lane: 'raw', + }, + }; + return { plan, refs: [a, b, c] }; +} + +describe('createSqlParamRefMutator', () => { + it('AC-MUT2: entries() enumerates every ParamRef with { ref, value, codecId }', () => { + const { plan, refs } = buildPlan(); + const mutator = createSqlParamRefMutator(plan); + const entries = [...mutator.entries()]; + + expect(entries).toHaveLength(3); + expect(entries[0]).toMatchObject({ + value: 'alice@example.com', + codecId: 'pg/text@1', + }); + expect(entries[1]).toMatchObject({ + value: 42, + codecId: 'pg/int4@1', + }); + expect(entries[2]).toMatchObject({ + value: 'legacy', + codecId: undefined, + }); + // ref tokens are the underlying ParamRefs; the public type erases this + // via the unforgeable brand, but identity is verifiable here. + expect(entries[0]?.ref).toBe(refs[0] as unknown); + expect(entries[1]?.ref).toBe(refs[1] as unknown); + expect(entries[2]?.ref).toBe(refs[2] as unknown); + }); + + it('AC-MUT5: currentParams() returns plan.params by reference identity when no middleware mutates', () => { + const { plan } = buildPlan(); + const mutator = createSqlParamRefMutator(plan); + + // entries() walk does NOT trip allocation + for (const _ of mutator.entries()) { + // intentionally empty + } + + expect(mutator.currentParams()).toBe(plan.params); + }); + + it('replaceValue updates currentParams() with a fresh frozen array carrying the mutation', () => { + const { plan } = buildPlan(); + const mutator = createSqlParamRefMutator(plan); + + const firstEntry = mutator.entries().next().value!; + mutator.replaceValue(firstEntry.ref, 'mutated@example.com'); + + const finalParams = mutator.currentParams(); + expect(finalParams).not.toBe(plan.params); + expect([...finalParams]).toEqual(['mutated@example.com', 42, 'legacy']); + expect(Object.isFrozen(finalParams)).toBe(true); + // Original plan.params is untouched + expect([...plan.params]).toEqual(['alice@example.com', 42, 'legacy']); + }); + + it('replaceValues applies bulk updates in iteration order (chain-order writeback)', () => { + const { plan } = buildPlan(); + const mutator = createSqlParamRefMutator(plan); + const entries = [...mutator.entries()]; + + mutator.replaceValues([ + { ref: entries[0]!.ref, newValue: 'wire-a' }, + { ref: entries[1]!.ref, newValue: 999 }, + ]); + + expect([...mutator.currentParams()]).toEqual(['wire-a', 999, 'legacy']); + }); + + it('subsequent entries() reflects prior mutations (chain-composition semantics)', () => { + const { plan } = buildPlan(); + const mutator = createSqlParamRefMutator(plan); + const firstEntry = mutator.entries().next().value!; + mutator.replaceValue(firstEntry.ref, 'mutated'); + + const re = [...mutator.entries()]; + expect(re[0]?.value).toBe('mutated'); + expect(re[1]?.value).toBe(42); + }); + + it('handles plans with no ParamRefs (empty entries(), currentParams identity preserved)', () => { + const ast = RawSqlExpr.of(['SELECT 1'], []); + const plan: SqlExecutionPlan = { + sql: 'SELECT 1', + params: [], + ast, + meta: { + target: 'postgres', + storageHash: TEST_HASH, + lane: 'raw', + }, + }; + const mutator = createSqlParamRefMutator(plan); + expect([...mutator.entries()]).toEqual([]); + expect(mutator.currentParams()).toBe(plan.params); + }); +}); diff --git a/packages/2-sql/4-lanes/relational-core/test/middleware/param-ref-mutator.types.test-d.ts b/packages/2-sql/4-lanes/relational-core/test/middleware/param-ref-mutator.types.test-d.ts new file mode 100644 index 0000000000..0dacbe7d92 --- /dev/null +++ b/packages/2-sql/4-lanes/relational-core/test/middleware/param-ref-mutator.types.test-d.ts @@ -0,0 +1,46 @@ +import { expectTypeOf, test } from 'vitest'; +import type { ParamRefHandle, SqlParamRefMutator } from '../../src/middleware/param-ref-mutator'; + +type Codecs = { + 'cipherstash/string@1': string; + 'pg/numeric@1': number; +}; + +declare const mutator: SqlParamRefMutator; +declare const stringHandle: ParamRefHandle<'cipherstash/string@1'>; +declare const numericHandle: ParamRefHandle<'pg/numeric@1'>; + +test("AC-TYPE1: replaceValue accepts the codec's declared TInput", () => { + mutator.replaceValue(stringHandle, 'a string'); + mutator.replaceValue(numericHandle, 42); +}); + +test('AC-TYPE1: entries() narrows ref by codecId discriminant', () => { + for (const entry of mutator.entries()) { + if (entry.codecId === 'cipherstash/string@1') { + expectTypeOf(entry.ref).toEqualTypeOf>(); + mutator.replaceValue(entry.ref, 'narrowed-as-string'); + } else if (entry.codecId === 'pg/numeric@1') { + expectTypeOf(entry.ref).toEqualTypeOf>(); + mutator.replaceValue(entry.ref, 99); + } else { + expectTypeOf(entry.codecId).toEqualTypeOf(); + mutator.replaceValue(entry.ref, 'anything goes for unresolved codecs'); + } + } +}); + +test('AC-TYPE2: passing wrong-shape value to replaceValue is a type error', () => { + // @ts-expect-error - cipherstash/string@1's TInput is `string`, not number + mutator.replaceValue(stringHandle, 42); + + // @ts-expect-error - pg/numeric@1's TInput is `number`, not string + mutator.replaceValue(numericHandle, 'not a number'); +}); + +test('AC-MUT4: handles cannot be fabricated by callers (brand check)', () => { + // Using the public surface alone, callers cannot construct a ParamRefHandle + // because the brand is a unique symbol declared inside the module. + // @ts-expect-error - object literals do not satisfy the branded handle + const _bad: ParamRefHandle<'cipherstash/string@1'> = {}; +}); diff --git a/packages/2-sql/4-lanes/relational-core/test/plan-from-ast.test.ts b/packages/2-sql/4-lanes/relational-core/test/plan-from-ast.test.ts new file mode 100644 index 0000000000..39f760cec9 --- /dev/null +++ b/packages/2-sql/4-lanes/relational-core/test/plan-from-ast.test.ts @@ -0,0 +1,58 @@ +import type { Contract } from '@prisma-next/contract/types'; +import { coreHash, profileHash } from '@prisma-next/contract/types'; +import type { SqlStorage } from '@prisma-next/sql-contract/types'; +import { describe, expect, it } from 'vitest'; +import { ParamRef, RawSqlExpr } from '../src/exports/ast'; +import { planFromAst } from '../src/plan'; + +const contract: Contract = { + target: 'postgres', + targetFamily: 'sql', + profileHash: profileHash('sha256:plan-from-ast-test'), + roots: {}, + capabilities: {}, + extensionPacks: {}, + meta: {}, + storage: { + storageHash: coreHash('sha256:plan-from-ast-test-storage'), + tables: {}, + } as unknown as SqlStorage, + models: {}, +}; + +describe('planFromAst (AC-PLAN1..3)', () => { + it('AC-PLAN1: meta.storageHash matches contract.storage.storageHash', () => { + const ast = RawSqlExpr.of(['SELECT 1'], []); + const plan = planFromAst(ast, contract); + expect(plan.meta.storageHash).toBe(contract.storage.storageHash); + }); + + it('forwards target and targetFamily from the contract onto plan.meta', () => { + const ast = RawSqlExpr.of(['SELECT 1'], []); + const plan = planFromAst(ast, contract); + expect(plan.meta.target).toBe(contract.target); + expect(plan.meta.targetFamily).toBe(contract.targetFamily); + }); + + it("AC-PLAN2: meta.lane defaults to 'raw' and is overridable via the third arg", () => { + const ast = RawSqlExpr.of(['SELECT 1'], []); + const defaultPlan = planFromAst(ast, contract); + expect(defaultPlan.meta.lane).toBe('raw'); + + const overridden = planFromAst(ast, contract, 'sql-raw'); + expect(overridden.meta.lane).toBe('sql-raw'); + }); + + it('returns a plan whose ast is the supplied AST and whose params are empty (resolved at lowering)', () => { + const ast = RawSqlExpr.of( + ['SELECT eql_v2.eq(', ', ', ')'], + [ + ParamRef.of('email', { codecId: 'pg/text@1' }), + ParamRef.of('alice', { codecId: 'pg/text@1' }), + ], + ); + const plan = planFromAst(ast, contract); + expect(plan.ast).toBe(ast); + expect(plan.params).toEqual([]); + }); +}); diff --git a/packages/2-sql/4-lanes/relational-core/tsdown.config.ts b/packages/2-sql/4-lanes/relational-core/tsdown.config.ts index 6a4a984e27..d9b9c2ea54 100644 --- a/packages/2-sql/4-lanes/relational-core/tsdown.config.ts +++ b/packages/2-sql/4-lanes/relational-core/tsdown.config.ts @@ -7,6 +7,7 @@ export default defineConfig({ 'src/exports/errors.ts', 'src/exports/ast.ts', 'src/exports/expression.ts', + 'src/exports/middleware.ts', 'src/exports/plan.ts', 'src/exports/query-lane-context.ts', ], diff --git a/packages/2-sql/5-runtime/src/codecs/decoding.ts b/packages/2-sql/5-runtime/src/codecs/decoding.ts index f6c186bd4b..fa5ffaa838 100644 --- a/packages/2-sql/5-runtime/src/codecs/decoding.ts +++ b/packages/2-sql/5-runtime/src/codecs/decoding.ts @@ -38,6 +38,9 @@ function projectionListFromAst(ast: AnyQueryAst): ReadonlyArray if (ast.kind === 'select') { return ast.projection; } + if (ast.kind === 'raw-sql') { + return undefined; + } return ast.returning; } diff --git a/packages/2-sql/5-runtime/src/middleware/lints.ts b/packages/2-sql/5-runtime/src/middleware/lints.ts index 8e75fa0a3b..52b841beae 100644 --- a/packages/2-sql/5-runtime/src/middleware/lints.ts +++ b/packages/2-sql/5-runtime/src/middleware/lints.ts @@ -94,6 +94,9 @@ function evaluateAstLints(ast: AnyQueryAst): LintFinding[] { case 'insert': break; + case 'raw-sql': + break; + // v8 ignore next 2 default: throw new Error(`Unsupported AST kind: ${(ast satisfies never as { kind: string }).kind}`); diff --git a/packages/2-sql/5-runtime/src/middleware/sql-middleware.ts b/packages/2-sql/5-runtime/src/middleware/sql-middleware.ts index b69611e8a3..29b570f298 100644 --- a/packages/2-sql/5-runtime/src/middleware/sql-middleware.ts +++ b/packages/2-sql/5-runtime/src/middleware/sql-middleware.ts @@ -6,6 +6,7 @@ import type { } from '@prisma-next/framework-components/runtime'; import type { SqlStorage } from '@prisma-next/sql-contract/types'; import type { AnyQueryAst } from '@prisma-next/sql-relational-core/ast'; +import type { SqlParamRefMutator } from '@prisma-next/sql-relational-core/middleware'; import type { SqlExecutionPlan } from '@prisma-next/sql-relational-core/plan'; export interface SqlMiddlewareContext extends RuntimeMiddlewareContext { @@ -21,7 +22,8 @@ export interface DraftPlan { readonly meta: PlanMeta; } -export interface SqlMiddleware extends RuntimeMiddleware { +export interface SqlMiddleware = Record> + extends RuntimeMiddleware> { readonly familyId?: 'sql'; /** * Rewrite the query AST before it is lowered to SQL. Middlewares run in @@ -42,7 +44,24 @@ export interface SqlMiddleware extends RuntimeMiddleware { * See `docs/architecture docs/subsystems/4. Runtime & Middleware Framework.md`. */ beforeCompile?(draft: DraftPlan, ctx: SqlMiddlewareContext): Promise; - beforeExecute?(plan: SqlExecutionPlan, ctx: SqlMiddlewareContext): Promise; + /** + * Mutate `ParamRef.value` slots before encode runs. The third `params` + * argument is a {@link SqlParamRefMutator} scoped to value slots only — + * SQL strings, projections, and `ParamRef` membership are not mutable. + * Existing `(plan)` and `(plan, ctx)` middleware bodies that ignore the + * additional argument continue to compile and run unchanged. + * + * `ctx.signal` carries the per-query `AbortSignal` (ADR 207); middleware + * that wraps a network SDK forwards `ctx.signal` to that SDK. + * Cooperative cancellation: a body that ignores the signal still + * surfaces `RUNTIME.ABORTED { phase: 'beforeExecute' }` promptly via + * the runtime's race against the signal. + */ + beforeExecute?( + plan: SqlExecutionPlan, + ctx: SqlMiddlewareContext, + params?: SqlParamRefMutator, + ): void | Promise; onRow?( row: Record, plan: SqlExecutionPlan, diff --git a/packages/2-sql/5-runtime/src/sql-runtime.ts b/packages/2-sql/5-runtime/src/sql-runtime.ts index 518f377c44..87633b768d 100644 --- a/packages/2-sql/5-runtime/src/sql-runtime.ts +++ b/packages/2-sql/5-runtime/src/sql-runtime.ts @@ -25,6 +25,10 @@ import type { SqlQueryable, SqlTransaction, } from '@prisma-next/sql-relational-core/ast'; +import { + createSqlParamRefMutator, + type SqlParamRefMutator, +} from '@prisma-next/sql-relational-core/middleware'; import type { SqlExecutionPlan, SqlQueryPlan } from '@prisma-next/sql-relational-core/plan'; import type { CodecDescriptorRegistry, @@ -204,6 +208,12 @@ class SqlRuntimeImpl = Contract = Contract { checkAborted(codecCtx, 'stream'); - const exec: SqlExecutionPlan = isExecutionPlan(plan) - ? Object.freeze({ - ...plan, - params: await encodeParams(plan, self.codecRegistry, codecCtx, self.contractCodecs), - }) - : await self.lower(await self.runBeforeCompile(plan), codecCtx); - - self.familyAdapter.validatePlan(exec, self.contract); + // Lower without encoding so `beforeExecute` middleware can mutate + // `ParamRef.value` slots before any `codec.encode` call runs. + // `lowered.params` carries the AST's raw values for both branches + // (`SqlExecutionPlan` callers pass in already-lowered SQL + raw + // params; `SqlQueryPlan` callers go through the lane → adapter + // lowering, which extracts params from `collectOrderedParamRefs`). + const lowered: SqlExecutionPlan = isExecutionPlan(plan) + ? plan + : Object.freeze( + lowerSqlPlan(self.adapter, self.contract, await self.runBeforeCompile(plan)), + ); + + // `mutator` is built once per execute() and exposes the plan's + // outbound `ParamRef` slots to `beforeExecute`. If no middleware + // mutates, `mutator.currentParams()` returns `lowered.params` by + // reference identity (AC-MUT5). + const mutator = createSqlParamRefMutator(lowered); + + self.familyAdapter.validatePlan(lowered, self.contract); self._telemetry = null; if (!self.startupVerified && self.verify.mode === 'startup') { @@ -302,15 +328,40 @@ class SqlRuntimeImpl = Contract>( - exec, + // The runDriver thunk is invoked by `runWithMiddleware` AFTER + // every `beforeExecute` body has settled. At that point the + // mutator carries any mutations the chain made; we read them, + // run `encodeParams` once, and hand the encoded params to the + // driver. Encoding therefore runs exactly once per execute() + // and middleware sees pre-encode (raw) values throughout. + const stream = runWithMiddleware< + SqlExecutionPlan, + Record, + SqlParamRefMutator + >( + lowered, self.middleware, - self.ctx, - () => - queryable.execute>({ - sql: exec.sql, - params: exec.params, - }), + execMiddlewareCtx, + (): AsyncIterable> => ({ + async *[Symbol.asyncIterator]() { + const finalParams = mutator.currentParams(); + const planForEncode: SqlExecutionPlan = + finalParams === lowered.params ? lowered : { ...lowered, params: finalParams }; + const encoded = await encodeParams( + planForEncode, + self.codecRegistry, + codecCtx, + self.contractCodecs, + ); + for await (const row of queryable.execute>({ + sql: lowered.sql, + params: encoded, + })) { + yield row; + } + }, + }), + mutator, ); // Manually drive the driver's async iterator so the between-row @@ -328,7 +379,7 @@ class SqlRuntimeImpl = Contract = Contract; + +import type { + SqlRuntimeAdapterDescriptor, + SqlRuntimeAdapterInstance, + SqlRuntimeTargetDescriptor, +} from '../src/sql-context'; +import { createExecutionContext, createSqlExecutionStack } from '../src/sql-context'; +import { createRuntime } from '../src/sql-runtime'; + +const testContract: Contract = { + targetFamily: 'sql', + target: 'postgres', + profileHash: profileHash('sha256:test'), + models: {}, + roots: {}, + storage: { storageHash: coreHash('sha256:test'), tables: {} }, + extensionPacks: {}, + capabilities: {}, + meta: {}, +}; + +function createStubAdapter() { + const codecs = createCodecRegistry(); + codecs.register( + codec({ + typeId: 'pg/text@1', + targetTypes: ['text'], + encode: (v: string) => v, + decode: (w: string) => w, + }), + ); + codecs.register( + codec({ + typeId: 'cipherstash/string@1', + targetTypes: ['eql_v2_encrypted'], + encode: async (v: string) => `wire:${v}`, + decode: (w: string) => w, + }), + ); + return { + familyId: 'sql' as const, + targetId: 'postgres' as const, + profile: { + id: 'test', + target: 'postgres', + capabilities: {}, + codecs() { + return codecs; + }, + readMarkerStatement() { + return { + sql: 'select core_hash, profile_hash, contract_json, canonical_version, updated_at, app_tag, meta, invariants from prisma_contract.marker where id = $1', + params: [1], + }; + }, + parseMarkerRow: parseContractMarkerRow, + }, + lower(_ast: unknown, ctx: { params?: readonly unknown[] }) { + // Stub the lower step. The real adapter would render the AST; for the + // test we only care that `params` flow through. The adapter writes + // the params it received from the lane (raw, pre-encode values), so + // mutator visibility into pre-encode values is preserved. + return Object.freeze({ + sql: 'SELECT FROM stub', + params: ctx.params ? [...ctx.params] : [], + }); + }, + }; +} + +function createMockDriver() { + const rootExecute = vi.fn().mockImplementation(async function* (_request: SqlExecuteRequest) { + yield { id: 1 }; + }); + const driver: SqlDriver = { + execute: rootExecute, + query: vi.fn().mockResolvedValue({ rows: [], rowCount: 0 }), + connect: vi.fn().mockResolvedValue(undefined), + acquireConnection: vi.fn().mockResolvedValue({ + execute: vi.fn(), + query: vi.fn(), + release: vi.fn(), + destroy: vi.fn(), + beginTransaction: vi.fn(), + }), + close: vi.fn().mockResolvedValue(undefined), + }; + return Object.assign(driver, { __rootExecute: rootExecute }); +} + +function createTestSetup(middleware: readonly SqlMiddleware[]) { + const adapter = createStubAdapter(); + const driver = createMockDriver(); + const targetDescriptor: SqlRuntimeTargetDescriptor<'postgres'> = { + kind: 'target', + id: 'postgres', + version: '0.0.1', + familyId: 'sql', + targetId: 'postgres', + codecs: () => createCodecRegistry(), + parameterizedCodecs: () => [], + create: () => ({ familyId: 'sql', targetId: 'postgres' }), + }; + const adapterDescriptor: SqlRuntimeAdapterDescriptor<'postgres'> = { + kind: 'adapter', + id: 'a', + version: '0.0.1', + familyId: 'sql', + targetId: 'postgres', + codecs: () => adapter.profile.codecs(), + parameterizedCodecs: () => [], + create: () => + Object.assign( + { familyId: 'sql' as const, targetId: 'postgres' as const }, + adapter, + ) as SqlRuntimeAdapterInstance<'postgres'>, + }; + const stack = createSqlExecutionStack({ + target: targetDescriptor, + adapter: adapterDescriptor, + extensionPacks: [], + }); + type StackInstance = ExecutionStackInstance< + 'sql', + 'postgres', + SqlRuntimeAdapterInstance<'postgres'>, + RuntimeDriverInstance<'sql', 'postgres'>, + RuntimeExtensionInstance<'sql', 'postgres'> + >; + const stackInstance = instantiateExecutionStack(stack) as StackInstance; + const context = createExecutionContext({ + contract: testContract, + stack: { target: targetDescriptor, adapter: adapterDescriptor, extensionPacks: [] }, + }); + const runtime = createRuntime({ + stackInstance, + context, + driver, + middleware, + verify: { mode: 'onFirstUse', requireMarker: false }, + }); + return { runtime, driver }; +} + +function buildPlan(): SqlQueryPlan { + const a = ParamRef.of('alice@example.com', { codecId: 'cipherstash/string@1', name: 'email1' }); + const b = ParamRef.of('bob@example.com', { codecId: 'cipherstash/string@1', name: 'email2' }); + const c = ParamRef.of('plain', { codecId: 'pg/text@1', name: 'tag' }); + const ast = RawSqlExpr.of( + ['INSERT INTO t (email1, email2, tag) VALUES (', ', ', ', ', ') RETURNING id'], + [a, b, c], + ); + return { + ast, + params: [a.value, b.value, c.value], + meta: { + target: testContract.target, + targetFamily: testContract.targetFamily, + storageHash: testContract.storage.storageHash, + lane: 'raw', + }, + }; +} + +describe('beforeExecute mutator (AC-MUT3, AC-MUT5, AC-EX1, AC-ABT2..4)', () => { + it('AC-MUT3: a mutated value reaches subsequent codec.encode', async () => { + const mutating: SqlMiddleware = { + name: 'mutate-emails', + familyId: 'sql', + async beforeExecute(_plan, _ctx, params) { + if (!params) return; + for (const entry of params.entries()) { + if (entry.codecId === 'cipherstash/string@1') { + params.replaceValue(entry.ref, `mutated:${entry.value as string}`); + } + } + }, + }; + const { runtime, driver } = createTestSetup([mutating]); + await runtime.execute(buildPlan()).toArray(); + + const sentRequest = driver.__rootExecute.mock.calls[0]?.[0] as + | { params?: readonly unknown[] } + | undefined; + // The cipherstash/string@1 codec wraps with `wire:`. If `replaceValue` + // reached encode, the driver receives `wire:mutated:`. + expect(sentRequest?.params).toEqual([ + 'wire:mutated:alice@example.com', + 'wire:mutated:bob@example.com', + 'plain', + ]); + }); + + it('AC-EX1: bulk-pattern fixture — entries() walk, codec-id filter, single async call, replaceValues writeback, encode reflects writeback', async () => { + let bulkCalls = 0; + const bulkMiddleware: SqlMiddleware = { + name: 'bulk-encrypt-stub', + familyId: 'sql', + async beforeExecute(_plan, ctx, params) { + if (!params) return; + const targets: { ref: ParamRefHandleAny; plain: string }[] = []; + for (const entry of params.entries()) { + if (entry.codecId === 'cipherstash/string@1' && typeof entry.value === 'string') { + targets.push({ ref: entry.ref, plain: entry.value }); + } + } + if (targets.length === 0) return; + + // One bulk async call per execute() — forwarding ctx.signal. + bulkCalls++; + const ciphertexts: string[] = await new Promise((resolve) => { + setImmediate(() => resolve(targets.map((t) => `bulk:${t.plain}`))); + }); + // ctx.signal must be present and identity-equal to the one supplied. + // (Defensively typed; the test's signal assertions live elsewhere.) + expect(ctx).toBeDefined(); + + params.replaceValues( + targets.map((t, i) => ({ ref: t.ref, newValue: ciphertexts[i] as string })), + ); + }, + }; + const { runtime, driver } = createTestSetup([bulkMiddleware]); + await runtime.execute(buildPlan()).toArray(); + + expect(bulkCalls).toBe(1); + + expect(driver.__rootExecute).toHaveBeenCalledOnce(); + const sentRequest = driver.__rootExecute.mock.calls[0]?.[0] as + | { params?: readonly unknown[] } + | undefined; + // After mutation, the bulk middleware wrote ciphertexts; the codec + // then runs as identity-with-wire-prefix. So the cipherstash params + // arrive at the driver as `wire:bulk:` (encode adds `wire:`). + expect(sentRequest?.params).toEqual([ + 'wire:bulk:alice@example.com', + 'wire:bulk:bob@example.com', + 'plain', + ]); + }); + + it('AC-MUT5: with no mutating middleware, plan.params reaches encodeParams without allocation', async () => { + let observed: unknown[] | undefined; + const observer: SqlMiddleware = { + name: 'observer', + familyId: 'sql', + async beforeExecute(plan, _ctx, params) { + // Walking entries() must not trigger working-array allocation + for (const _ of params?.entries() ?? []) { + // intentionally empty + } + observed = [...plan.params]; + }, + }; + const { runtime, driver } = createTestSetup([observer]); + await runtime.execute(buildPlan()).toArray(); + expect(observed).toEqual(['alice@example.com', 'bob@example.com', 'plain']); + + // Encoded params arrive at driver: cipherstash/string@1 wraps with `wire:`. + const sentRequest = driver.__rootExecute.mock.calls[0]?.[0] as + | { params?: readonly unknown[] } + | undefined; + expect(sentRequest?.params).toEqual([ + 'wire:alice@example.com', + 'wire:bob@example.com', + 'plain', + ]); + }); + + it('AC-ABT2: pre-check at second middleware entry throws phase: "beforeExecute"', async () => { + const events: string[] = []; + const ctrl = new AbortController(); + const first: SqlMiddleware = { + name: 'first', + familyId: 'sql', + async beforeExecute() { + events.push('first'); + // Abort BEFORE returning so the loop's next iteration sees + // an already-aborted signal at entry. + ctrl.abort(new Error('caller cancelled')); + }, + }; + const second: SqlMiddleware = { + name: 'second', + familyId: 'sql', + async beforeExecute() { + events.push('second'); + }, + }; + const { runtime } = createTestSetup([first, second]); + await expect( + runtime.execute(buildPlan(), { signal: ctrl.signal }).toArray(), + ).rejects.toMatchObject({ + code: 'RUNTIME.ABORTED', + details: { phase: 'beforeExecute' }, + }); + expect(events).toEqual(['first']); + }); + + it('AC-ABT3: mid-flight abort surfaces RUNTIME.ABORTED promptly even when middleware ignores the signal', async () => { + const ctrl = new AbortController(); + const mw: SqlMiddleware = { + name: 'slow-and-deaf', + familyId: 'sql', + // Ignores ctx.signal entirely; just blocks for a long time. + async beforeExecute() { + await new Promise((resolve) => setTimeout(resolve, 100)); + }, + }; + const { runtime } = createTestSetup([mw]); + setTimeout(() => ctrl.abort(new Error('mid-flight')), 5); + + await expect( + runtime.execute(buildPlan(), { signal: ctrl.signal }).toArray(), + ).rejects.toMatchObject({ + code: 'RUNTIME.ABORTED', + details: { phase: 'beforeExecute' }, + }); + }); + + it('AC-ABT4: middleware bodies that throw non-abort errors pass through unchanged (no re-wrap)', async () => { + const customError = new Error('something else'); + const mw: SqlMiddleware = { + name: 'throws', + familyId: 'sql', + async beforeExecute() { + throw customError; + }, + }; + const { runtime } = createTestSetup([mw]); + await expect(runtime.execute(buildPlan()).toArray()).rejects.toBe(customError); + }); +}); diff --git a/packages/2-sql/5-runtime/test/raw-sql-expr-encode.test.ts b/packages/2-sql/5-runtime/test/raw-sql-expr-encode.test.ts new file mode 100644 index 0000000000..14c6a820ed --- /dev/null +++ b/packages/2-sql/5-runtime/test/raw-sql-expr-encode.test.ts @@ -0,0 +1,56 @@ +import { coreHash } from '@prisma-next/contract/types'; +import { + codec, + createCodecRegistry, + ParamRef, + RawSqlExpr, +} from '@prisma-next/sql-relational-core/ast'; +import type { SqlExecutionPlan } from '@prisma-next/sql-relational-core/plan'; +import { describe, expect, it } from 'vitest'; +import { encodeParams } from '../src/codecs/encoding'; + +const TEST_HASH = coreHash('sha256:raw-sql-expr-encode'); + +describe('encodeParams over a RawSqlExpr-backed plan (AC-LOW4)', () => { + it('runs async codec.encode for ParamRefs interpolated inside a RawSqlExpr AST', async () => { + const registry = createCodecRegistry(); + registry.register( + codec({ + typeId: 'test/async-text@1', + targetTypes: ['text'], + encode: async (value: string) => `wire:${value}`, + decode: async (wire: string) => wire, + }), + ); + registry.register( + codec({ + typeId: 'test/sync-int@1', + targetTypes: ['int4'], + encode: (value: number) => value * 10, + decode: (wire: number) => wire, + }), + ); + + const ast = RawSqlExpr.of( + ['SELECT eql_v2.add_search_config(', ', ', ')'], + [ + ParamRef.of('email', { codecId: 'test/async-text@1' }), + ParamRef.of(7, { codecId: 'test/sync-int@1' }), + ], + ); + + const plan: SqlExecutionPlan = { + sql: 'SELECT eql_v2.add_search_config($1, $2)', + params: ['email', 7], + ast, + meta: { + target: 'postgres', + storageHash: TEST_HASH, + lane: 'raw', + }, + }; + + const encoded = await encodeParams(plan, registry, {}); + expect([...encoded]).toEqual(['wire:email', 70]); + }); +}); diff --git a/packages/3-extensions/cipherstash/DEVELOPING.md b/packages/3-extensions/cipherstash/DEVELOPING.md new file mode 100644 index 0000000000..68e17e876c --- /dev/null +++ b/packages/3-extensions/cipherstash/DEVELOPING.md @@ -0,0 +1,96 @@ +# Developing `@prisma-next/extension-cipherstash` + +Contributor-facing notes for the cipherstash extension. The user-facing +surface lives in `README.md`; this file collects the in-progress +milestones, internal layout, and design choices a contributor needs +to know when extending the package. + +## Source layout + +``` +packages/3-extensions/cipherstash/ +└── src/ + ├── core/ + │ ├── envelope.ts EncryptedString class + handle helpers + │ ├── sdk.ts CipherstashSdk interface (framework-native shape) + │ ├── codecs.ts cipherstash/string@1 codec factory + │ ├── parameterized.ts RuntimeParameterizedCodecDescriptor + arktype params schema + │ └── eql-bundle.ts EQL install SQL (placeholder until live-DB integration) + └── exports/ + ├── index.ts EncryptedString + CipherstashSdk types + ├── runtime.ts SqlRuntimeExtensionDescriptor factory + ├── control.ts SqlControlExtensionDescriptor + databaseDependencies.init + ├── column-types.ts placeholder for the encryptedString({...}) TS factory + └── middleware.ts placeholder for the bulkEncryptMiddleware factory +``` + +## Implemented surface + +- `EncryptedString.from(plaintext)` and `EncryptedString.fromInternal({...})` envelope constructors. +- `envelope.decrypt({ signal? })` — returns cached plaintext when present, otherwise routes through the SDK's single-cell `decrypt` and forwards the caller-supplied `AbortSignal` by identity. +- `cipherstash/string@1` codec with target type `eql_v2_encrypted`, traits `['equality']`, and `renderOutputType` returning `EncryptedString`. +- `RuntimeParameterizedCodecDescriptor<{equality, freeTextSearch}>` with arktype params schema validated at the contract boundary. +- `SqlControlExtensionDescriptor` with one `databaseDependencies.init` entry installing EQL via the placeholder bundle in `core/eql-bundle.ts`. + +## Forthcoming surface (in-flight work) + +| Surface | Tracked under | +| -------------------------------------------------- | ---------------------------- | +| `encryptedString({...})` TS contract factory | next milestone (M2.b) | +| `bulkEncryptMiddleware(sdk)` factory | M2.c | +| Real EQL install bundle (replaces placeholder) | M2.c | +| Live-Postgres + live-EQL integration tests | M2.c | +| `eq` / `ilike` operator lowering | M3 / M4 | +| `decryptAll(rows, opts?)` walker | M4 | + +The shipping package surface — subpath exports, codec id, descriptor +shapes — is stable across these milestones; the placeholders in +`exports/column-types.ts` and `exports/middleware.ts` get populated +in place rather than restructured. + +## Design choices worth knowing + +### Handle storage — `WeakMap` + +The `EncryptedStringHandle` shape is a module-private mutable record +keyed off a module-scoped `WeakMap`. The +alternative — `#`-prefixed class fields — provides the same +package-internal isolation, but the `WeakMap` shape keeps +`Object.keys(envelope)` and the default `JSON.stringify` shape +trivially clean across every JS host without per-class `toJSON` +overrides. (A `toJSON()` override ships anyway to produce the +documented `{ "$encryptedString": "" }` placeholder.) + +### Codec is constructed per SDK binding + +`createCipherstashStringCodec(sdk)` is a factory rather than a module +singleton. The codec's `decode` body captures the SDK so the +read-side envelope can issue `decrypt({ signal? })` against it. This +differs from pgvector (whose codec is fully stateless and *can* be a +module singleton) but aligns with multi-tenant deployments +constructing one extension descriptor per tenant. + +### `CipherstashSdk` is framework-native, not the upstream SDK shape + +The interface declares three async methods (`decrypt`, `bulkEncrypt`, +`bulkDecrypt`), each accepting an optional `AbortSignal`. This is +deliberately smaller than CipherStash's upstream `EncryptionClient` +(rich `EncryptOperation` / `LockContext` / lazy-init machinery) so +real-world usage wraps the upstream client behind a thin adapter +satisfying `CipherstashSdk`. Keeps the framework-side surface free +of upstream-specific types. + +### EQL install SQL is a placeholder + +`src/core/eql-bundle.ts` ships a placeholder string today; the real +~170 KB bundle gets vendored in alongside the live-Postgres + live-EQL +integration tests. The placeholder makes the +`databaseDependencies.init` shape exercise-able in unit tests +without committing the large vendored file ahead of the integration +plumbing. + +## References + +- [pgvector extension](../pgvector/README.md) and its `src/exports/runtime.ts` — the structural precedent for codec, parameterized descriptor, and `databaseDependencies.init` shape. +- [ADR 207 — Codec call context (per-query AbortSignal and column metadata)](../../../docs/architecture%20docs/adrs/ADR%20207%20-%20Codec%20call%20context%20per-query%20AbortSignal%20and%20column%20metadata.md). +- [ADR 208 — Higher-order codecs for parameterized types](../../../docs/architecture%20docs/adrs/ADR%20208%20-%20Higher-order%20codecs%20for%20parameterized%20types.md). diff --git a/packages/3-extensions/cipherstash/README.md b/packages/3-extensions/cipherstash/README.md new file mode 100644 index 0000000000..afb7bca5dc --- /dev/null +++ b/packages/3-extensions/cipherstash/README.md @@ -0,0 +1,54 @@ +# `@prisma-next/extension-cipherstash` + +Searchable-encryption integration for Prisma Next, backed by +[CipherStash](https://cipherstash.com/) and the EQL Postgres extension. + +> **Status:** in development. The currently-implemented surface is the +> **storage** path: an `EncryptedString` envelope, its codec, and the +> EQL-bundle install dependency. Search operators (`eq`, `ilike`), +> `decryptAll`, the bulk-encrypt middleware, the PSL constructor, and the +> `encryptedString({...})` TS factory ship in subsequent releases. + +## Subpath exports + +| Subpath | Purpose | +| ----------------- | ------------------------------------------------------------- | +| `.` | `EncryptedString` envelope (and, in a future release, `decryptAll`) | +| `./column-types` | `encryptedString({...})` TS contract factory (forthcoming) | +| `./runtime` | `SqlRuntimeExtensionDescriptor` with `parameterizedCodecs` | +| `./control` | `SqlControlExtensionDescriptor` with `databaseDependencies` | +| `./middleware` | `bulkEncryptMiddleware` factory (forthcoming) | + +## Usage + +```ts +import { EncryptedString } from '@prisma-next/extension-cipherstash'; + +const envelope = EncryptedString.from('alice@example.com'); +const plaintext = await envelope.decrypt(); +``` + +The codec registers under the `cipherstash/string@1` codec id and +maps to the EQL `eql_v2_encrypted` Postgres native type. Per-column +search-mode parameters (`equality`, `freeTextSearch`) are validated +at the contract boundary by an arktype schema and threaded through +the parameterized-codec descriptor model — see [ADR 208 — Higher-order +codecs for parameterized types](../../../docs/architecture%20docs/adrs/ADR%20208%20-%20Higher-order%20codecs%20for%20parameterized%20types.md). +The codec`s `decode` site reads the cell's `(table, column)` from +the per-call codec context — see [ADR 207 — Codec call context per-query +AbortSignal and column metadata](../../../docs/architecture%20docs/adrs/ADR%20207%20-%20Codec%20call%20context%20per-query%20AbortSignal%20and%20column%20metadata.md). + +## Database setup + +The package declares the EQL extension as a database dependency. +When using `prisma-next db init`, the migration planner runs the +EQL install bundle; the precheck short-circuits if EQL is already +installed. + +## References + +- [pgvector extension](../pgvector/README.md) — structural precedent for codec, parameterized descriptor, and `databaseDependencies.init` shape +- [ADR 207 — Codec call context (per-query AbortSignal and column metadata)](../../../docs/architecture%20docs/adrs/ADR%20207%20-%20Codec%20call%20context%20per-query%20AbortSignal%20and%20column%20metadata.md) +- [ADR 208 — Higher-order codecs for parameterized types](../../../docs/architecture%20docs/adrs/ADR%20208%20-%20Higher-order%20codecs%20for%20parameterized%20types.md) +- [Prisma Next Architecture Overview](../../../docs/Architecture%20Overview.md) +- [DEVELOPING.md](DEVELOPING.md) — contributor-facing notes on the in-progress milestones and current source layout diff --git a/packages/3-extensions/cipherstash/biome.jsonc b/packages/3-extensions/cipherstash/biome.jsonc new file mode 100644 index 0000000000..b8994a7330 --- /dev/null +++ b/packages/3-extensions/cipherstash/biome.jsonc @@ -0,0 +1,4 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json", + "extends": "//" +} diff --git a/packages/3-extensions/cipherstash/package.json b/packages/3-extensions/cipherstash/package.json new file mode 100644 index 0000000000..93312da884 --- /dev/null +++ b/packages/3-extensions/cipherstash/package.json @@ -0,0 +1,52 @@ +{ + "name": "@prisma-next/extension-cipherstash", + "version": "0.0.1", + "type": "module", + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "test:coverage": "vitest run --coverage", + "typecheck": "tsc --project tsconfig.json --noEmit", + "lint": "biome check . --error-on-warnings", + "lint:fix": "biome check --write .", + "lint:fix:unsafe": "biome check --write --unsafe .", + "clean": "rm -rf dist dist-tsc dist-tsc-prod coverage .tmp-output" + }, + "dependencies": { + "@prisma-next/family-sql": "workspace:*", + "@prisma-next/framework-components": "workspace:*", + "@prisma-next/sql-relational-core": "workspace:*", + "@prisma-next/sql-runtime": "workspace:*", + "@prisma-next/utils": "workspace:*", + "arktype": "catalog:" + }, + "devDependencies": { + "@prisma-next/psl-parser": "workspace:*", + "@prisma-next/sql-contract-psl": "workspace:*", + "@prisma-next/test-utils": "workspace:*", + "@prisma-next/tsconfig": "workspace:*", + "@prisma-next/tsdown": "workspace:*", + "tsdown": "catalog:", + "typescript": "catalog:", + "vitest": "catalog:" + }, + "files": [ + "dist", + "src" + ], + "exports": { + ".": "./dist/index.mjs", + "./column-types": "./dist/column-types.mjs", + "./control": "./dist/control.mjs", + "./middleware": "./dist/middleware.mjs", + "./pack": "./dist/pack.mjs", + "./runtime": "./dist/runtime.mjs", + "./package.json": "./package.json" + }, + "repository": { + "type": "git", + "url": "https://github.com/prisma/prisma-next.git", + "directory": "packages/3-extensions/cipherstash" + } +} diff --git a/packages/3-extensions/cipherstash/src/core/authoring.ts b/packages/3-extensions/cipherstash/src/core/authoring.ts new file mode 100644 index 0000000000..e71bf3806d --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/authoring.ts @@ -0,0 +1,51 @@ +/** + * Authoring contributions for the cipherstash extension. + * + * Registers `cipherstash.EncryptedString({ equality?, freeTextSearch? })` as + * a namespaced PSL type constructor and its TS-side equivalent. The same + * constructor descriptor lowers a PSL field-type expression like + * `cipherstash.EncryptedString({ equality: true })` and a TS factory call + * like `type.cipherstash.EncryptedString({ equality: true })` to an + * identical `ColumnTypeDescriptor` so authoring sources stay byte-equal at + * the contract IR. + * + * Mirrors `packages/3-extensions/pgvector/src/core/authoring.ts` — the + * differences are (a) `cipherstash` is the namespace, (b) the constructor + * takes a single object argument with two optional booleans, and (c) the + * default value for both flags is `false` (storage-only encryption is the + * legitimate default per the project's M2 standing decision). + */ + +import type { AuthoringTypeNamespace } from '@prisma-next/framework-components/authoring'; +import { CIPHERSTASH_STRING_CODEC_ID, CIPHERSTASH_STRING_TARGET_TYPE } from './codecs'; + +export const cipherstashAuthoringTypes = { + cipherstash: { + EncryptedString: { + kind: 'typeConstructor', + args: [ + { + kind: 'object', + name: 'options', + properties: { + equality: { kind: 'boolean', optional: true }, + freeTextSearch: { kind: 'boolean', optional: true }, + }, + }, + ], + output: { + codecId: CIPHERSTASH_STRING_CODEC_ID, + nativeType: CIPHERSTASH_STRING_TARGET_TYPE, + typeParams: { + equality: { kind: 'arg', index: 0, path: ['equality'], default: false }, + freeTextSearch: { + kind: 'arg', + index: 0, + path: ['freeTextSearch'], + default: false, + }, + }, + }, + }, + }, +} as const satisfies AuthoringTypeNamespace; diff --git a/packages/3-extensions/cipherstash/src/core/codecs.ts b/packages/3-extensions/cipherstash/src/core/codecs.ts new file mode 100644 index 0000000000..91116ec59e --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/codecs.ts @@ -0,0 +1,148 @@ +/** + * Cipherstash storage codec — wraps the `EncryptedString` envelope at + * the SQL codec boundary. + * + * The codec is intentionally thin: + * + * - `decode(wire, ctx)` constructs a fresh envelope carrying the wire + * ciphertext + the cell's `(table, column)` from `ctx.column` + the + * SDK reference captured by the codec factory at construction time. + * The envelope's `decrypt({signal?})` later routes through the + * captured SDK; callers can also `await decryptAll(rows)` (M4) to + * coalesce decrypts across many envelopes into one bulk SDK call. + * + * - `encode(envelope, ctx)` extracts the ciphertext from the envelope's + * handle. The bulk-encrypt middleware (M2.c) populates the + * ciphertext slot before the codec runs; an envelope whose + * ciphertext slot is empty at encode time is a programmer error + * (the middleware was not registered, or this codec instance was + * used in a non-cipherstash context). + */ + +import type { Codec, SqlCodecCallContext } from '@prisma-next/sql-relational-core/ast'; +import { codec } from '@prisma-next/sql-relational-core/ast'; +import { EncryptedString, getInternalHandle } from './envelope'; +import type { CipherstashSdk } from './sdk'; + +export const CIPHERSTASH_STRING_CODEC_ID = 'cipherstash/string@1' as const; + +export const CIPHERSTASH_STRING_TARGET_TYPE = 'eql_v2_encrypted' as const; + +const CIPHERSTASH_STRING_TRAITS = ['equality'] as const; + +/** + * SDK-free codec used in pack-meta (`cipherstashPackMeta.types.codecTypes + * .codecInstances`). The framework's lookup machinery only reads codec + * *metadata* (`typeId`, `targetTypes`, `traits`, `renderOutputType`) from + * pack-meta codec instances; encode/decode never fire on a pack-meta + * codec because the SQL runtime always resolves codecs through the + * SDK-bound runtime descriptor instead. + * + * Encode/decode throw with a clear message in the misuse case so it's + * obvious the runtime descriptor wasn't wired up. + */ +export const cipherstashStringCodecMetadata = codec({ + typeId: CIPHERSTASH_STRING_CODEC_ID, + targetTypes: [CIPHERSTASH_STRING_TARGET_TYPE], + traits: CIPHERSTASH_STRING_TRAITS, + renderOutputType: () => 'EncryptedString', + encode: () => { + throw new Error( + 'cipherstash codec: encode called on the pack-meta metadata codec. ' + + 'Construct a runtime descriptor with `createCipherstashRuntimeDescriptor({ sdk })` and use that instead.', + ); + }, + decode: () => { + throw new Error( + 'cipherstash codec: decode called on the pack-meta metadata codec. ' + + 'Construct a runtime descriptor with `createCipherstashRuntimeDescriptor({ sdk })` and use that instead.', + ); + }, + encodeJson: (value) => { + void value; + return { $encryptedString: '' }; + }, + decodeJson: () => { + throw new Error( + 'cipherstash codec: decodeJson is not supported; envelopes do not round-trip through JSON.', + ); + }, + meta: { + db: { + sql: { + postgres: { + nativeType: CIPHERSTASH_STRING_TARGET_TYPE, + }, + }, + }, + }, +}); + +/** + * Construct the cipherstash storage codec, capturing the `sdk` + * reference for read-side single-cell decrypts. + * + * The codec is recreated per runtime extension descriptor instance — + * each `cipherstashRuntime({ sdk })` call (added in M2.b/M2.c with the + * full extension wiring) produces a codec closed over its own SDK so + * multi-tenant deployments can construct one extension per tenant + * without cross-talk. + */ +export function createCipherstashStringCodec( + sdk: CipherstashSdk, +): Codec< + typeof CIPHERSTASH_STRING_CODEC_ID, + typeof CIPHERSTASH_STRING_TRAITS, + unknown, + EncryptedString +> { + return codec({ + typeId: CIPHERSTASH_STRING_CODEC_ID, + targetTypes: [CIPHERSTASH_STRING_TARGET_TYPE], + traits: CIPHERSTASH_STRING_TRAITS, + renderOutputType: () => 'EncryptedString', + encode: (envelope: EncryptedString, _ctx: SqlCodecCallContext): unknown => { + const handle = getInternalHandle(envelope); + if (handle.ciphertext === undefined) { + throw new Error( + 'cipherstash codec: envelope has no ciphertext at encode time. ' + + 'Register the bulk-encrypt middleware in the runtime so envelopes are encrypted before encoding.', + ); + } + return handle.ciphertext; + }, + decode: (wire: unknown, ctx: SqlCodecCallContext): EncryptedString => { + const column = ctx.column; + if (!column) { + throw new Error( + 'cipherstash codec: decode requires ctx.column to construct a routing-aware envelope. ' + + 'The SQL runtime populates `ctx.column` for projected columns; aggregate/computed cells are not supported by this codec.', + ); + } + return EncryptedString.fromInternal({ + ciphertext: wire, + table: column.table, + column: column.name, + sdk, + }); + }, + encodeJson: (value) => { + void value; + return { $encryptedString: '' }; + }, + decodeJson: () => { + throw new Error( + 'cipherstash codec: decodeJson is not supported; envelopes do not round-trip through JSON.', + ); + }, + meta: { + db: { + sql: { + postgres: { + nativeType: CIPHERSTASH_STRING_TARGET_TYPE, + }, + }, + }, + }, + }); +} diff --git a/packages/3-extensions/cipherstash/src/core/descriptor-meta.ts b/packages/3-extensions/cipherstash/src/core/descriptor-meta.ts new file mode 100644 index 0000000000..268fbddfab --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/descriptor-meta.ts @@ -0,0 +1,49 @@ +/** + * Pack metadata for the cipherstash extension. + * + * Mirrors `packages/3-extensions/pgvector/src/core/descriptor-meta.ts` — + * the metadata block that gets serialized into `contract.json`'s + * `extensionPacks.cipherstash` slot at emit time. SDK-free; the runtime + * descriptor (`exports/runtime.ts`) layers SDK-bound codec instances on + * top at execution time. + * + * The `codecInstances` entry uses the metadata-only codec from + * `core/codecs.ts` because pack-meta consumers only read codec metadata + * (typeId, targetTypes, traits, renderOutputType); execution-time + * encode/decode always go through the runtime descriptor's SDK-bound + * codec. + */ + +import { cipherstashAuthoringTypes } from './authoring'; +import { + CIPHERSTASH_STRING_CODEC_ID, + CIPHERSTASH_STRING_TARGET_TYPE, + cipherstashStringCodecMetadata, +} from './codecs'; + +export const CIPHERSTASH_EXTENSION_ID = 'cipherstash' as const; +export const CIPHERSTASH_EXTENSION_VERSION = '0.0.1' as const; + +export const cipherstashPackMeta = { + kind: 'extension', + id: CIPHERSTASH_EXTENSION_ID, + familyId: 'sql', + targetId: 'postgres', + version: CIPHERSTASH_EXTENSION_VERSION, + authoring: { + type: cipherstashAuthoringTypes, + }, + types: { + codecTypes: { + codecInstances: [cipherstashStringCodecMetadata], + }, + storage: [ + { + typeId: CIPHERSTASH_STRING_CODEC_ID, + familyId: 'sql', + targetId: 'postgres', + nativeType: CIPHERSTASH_STRING_TARGET_TYPE, + }, + ], + }, +} as const; diff --git a/packages/3-extensions/cipherstash/src/core/envelope.ts b/packages/3-extensions/cipherstash/src/core/envelope.ts new file mode 100644 index 0000000000..ef68fdd6ed --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/envelope.ts @@ -0,0 +1,158 @@ +import { ifDefined } from '@prisma-next/utils/defined'; +import type { CipherstashSdk } from './sdk'; + +/** + * Package-internal mutable state for an `EncryptedString`. Lives in a + * module-private `WeakMap` keyed on the envelope; never surfaced to + * package consumers (no `EncryptedStringHandle` is exported, no + * accessor on the envelope returns the handle). + * + * - **Write side** — `EncryptedString.from(plaintext)` produces a handle + * with `plaintext` populated and `ciphertext` empty. The bulk-encrypt + * middleware (M2.c) populates `ciphertext` and overwrites `plaintext` + * with `undefined` for memory hygiene before `codec.encode` runs. + * - **Read side** — `EncryptedString.fromInternal({...})` (called from + * `codec.decode`) produces a handle with `ciphertext` populated and + * `{table, column, sdk}` carrying the routing context for + * `decrypt({signal?})` and `bulkDecrypt(...)` (M4). + */ +interface EncryptedStringHandle { + plaintext: string | undefined; + ciphertext: unknown; + table: string | undefined; + column: string | undefined; + sdk: CipherstashSdk | undefined; +} + +const handles = new WeakMap(); + +/** + * Internal accessors used by the codec, the bulk-encrypt middleware, + * and `decryptAll`. Not exported from any subpath; package-internal + * call sites import these from this module directly. + */ +export function getInternalHandle(envelope: EncryptedString): EncryptedStringHandle { + const handle = handles.get(envelope); + if (!handle) { + throw new Error( + 'EncryptedString: handle missing — envelope was not constructed via the official factories.', + ); + } + return handle; +} + +export function setHandleCiphertext(envelope: EncryptedString, ciphertext: unknown): void { + const handle = getInternalHandle(envelope); + handle.ciphertext = ciphertext; + handle.plaintext = undefined; +} + +export function setHandlePlaintextCache(envelope: EncryptedString, plaintext: string): void { + const handle = getInternalHandle(envelope); + handle.plaintext = plaintext; +} + +export function isHandleDecrypted(envelope: EncryptedString): boolean { + return getInternalHandle(envelope).plaintext !== undefined; +} + +export interface EncryptedStringFromInternalArgs { + readonly ciphertext: unknown; + readonly table: string; + readonly column: string; + readonly sdk: CipherstashSdk; +} + +/** + * Envelope wrapping a CipherStash-encrypted string value. + * + * The class owns its handle internally; the handle is never returned + * from any public method and no public accessor exposes its slots + * (AC-ENV4). The handle's storage choice — a module-scoped `WeakMap` — + * is an implementation detail; the same security/isolation guarantees + * apply with `#`-prefixed fields, but `WeakMap` keeps the runtime + * surface (`Object.keys`, `JSON.stringify`) trivially clean across + * every JS host without extra `toJSON`/`Symbol.toPrimitive` work. + */ +export class EncryptedString { + /** + * Construct from plaintext. The bulk-encrypt middleware (M2.c) + * populates the handle's ciphertext slot and overwrites the + * plaintext slot before the codec encodes the envelope to wire. + */ + static from(plaintext: string): EncryptedString { + const envelope = new EncryptedString(); + handles.set(envelope, { + plaintext, + ciphertext: undefined, + table: undefined, + column: undefined, + sdk: undefined, + }); + return envelope; + } + + /** + * Construct a read-side envelope from a wire ciphertext + the column + * identity + the SDK used to decrypt the cell. Called from + * `codec.decode`; not part of the public user-facing API but + * intentionally exported from `core/envelope.ts` for the codec and + * for tests. + */ + static fromInternal(args: EncryptedStringFromInternalArgs): EncryptedString { + const envelope = new EncryptedString(); + handles.set(envelope, { + plaintext: undefined, + ciphertext: args.ciphertext, + table: args.table, + column: args.column, + sdk: args.sdk, + }); + return envelope; + } + + /** + * Decrypt and return the plaintext. + * + * - If the handle's `plaintext` slot is already populated (write-side + * envelopes constructed via `from(plaintext)`, or read-side + * envelopes already materialized by `decryptAll(...)`), returns + * the cached plaintext synchronously without consulting the SDK. + * - Otherwise (read-side handle without a cached plaintext), invokes + * the SDK's single-cell `decrypt` with the handle's routing + * context. The caller-supplied `signal` is forwarded to the SDK + * by identity per the umbrella cancellation contract. + */ + async decrypt(opts?: { signal?: AbortSignal }): Promise { + const handle = getInternalHandle(this); + if (handle.plaintext !== undefined) { + return handle.plaintext; + } + if (!handle.sdk || handle.table === undefined || handle.column === undefined) { + throw new Error( + 'EncryptedString.decrypt(): envelope has no cached plaintext and no SDK binding. ' + + 'This typically means the bulk-encrypt middleware did not run before the encode site.', + ); + } + const plaintext = await handle.sdk.decrypt({ + ciphertext: handle.ciphertext, + table: handle.table, + column: handle.column, + ...ifDefined('signal', opts?.signal), + }); + handle.plaintext = plaintext; + return plaintext; + } + + /** + * `JSON.stringify(envelope)` produces a non-revealing placeholder + * regardless of which slot of the handle is populated. Without this + * override, `JSON.stringify` would still produce `{}` (since handle + * data lives in a `WeakMap`) — but the placeholder makes the intent + * explicit and is the documented shape per the open-question + * default in the envelope-codec spec. + */ + toJSON(): unknown { + return { $encryptedString: '' }; + } +} diff --git a/packages/3-extensions/cipherstash/src/core/eql-bundle.ts b/packages/3-extensions/cipherstash/src/core/eql-bundle.ts new file mode 100644 index 0000000000..19a4cbff8e --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/eql-bundle.ts @@ -0,0 +1,17 @@ +/** + * Placeholder for the vendored EQL Postgres install bundle. + * + * The real bundle (~170KB of inlined SQL) lives at + * `reference/cipherstash/stack/packages/stack/src/prisma/core/eql-bundle.ts` + * in the first-attempt repo (adjacent worktree); it gets copied into + * this package in M2.c when the live-Postgres + live-EQL integration + * tests come online and exercise AC-INSTALL2 / AC-INSTALL3. + * + * For M2.a (this round), AC-INSTALL1 verifies only the *shape* of the + * `databaseDependencies.init` declaration; the placeholder SQL string + * keeps the descriptor exercise-able without committing the large + * vendored file ahead of the integration-test plumbing. + */ + +export const EQL_INSTALL_SQL = + '-- TODO M2.c: vendor EQL_INSTALL_SQL from reference/cipherstash/stack/packages/stack/src/prisma/core/eql-bundle.ts'; diff --git a/packages/3-extensions/cipherstash/src/core/parameterized.ts b/packages/3-extensions/cipherstash/src/core/parameterized.ts new file mode 100644 index 0000000000..6b5a917594 --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/parameterized.ts @@ -0,0 +1,62 @@ +/** + * `RuntimeParameterizedCodecDescriptor` for the cipherstash storage + * codec. Mirrors pgvector's post-#402 shape: a static metadata block + * (`codecId`, `traits`, `targetTypes`, `paramsSchema`, + * `renderOutputType`) plus a curried higher-order `factory` that the + * runtime calls once per `storage.types` instance to resolve a codec + * for that instance. + * + * Cipherstash differs from pgvector in one respect: the codec depends + * on the SDK (read-side single-cell `decrypt`, the bulk-encrypt + * middleware in M2.c). Each `cipherstashRuntime({ sdk })` call must + * therefore produce its own descriptor list closed over its SDK so + * multi-tenant deployments can side-by-side multiple cipherstash + * extensions without cross-talk. We expose + * `createParameterizedCodecDescriptors(sdk)` for that purpose; the + * static `paramsSchema` and `renderOutputType` slots are reusable + * across SDK bindings. + */ + +import type { CodecInstanceContext } from '@prisma-next/framework-components/codec'; +import type { RuntimeParameterizedCodecDescriptor } from '@prisma-next/sql-runtime'; +import { type as arktype } from 'arktype'; +import { CIPHERSTASH_STRING_CODEC_ID, createCipherstashStringCodec } from './codecs'; +import type { CipherstashSdk } from './sdk'; + +export interface CipherstashStringParams { + readonly equality: boolean; + readonly freeTextSearch: boolean; +} + +export const encryptedStringParamsSchema = arktype({ + equality: 'boolean', + freeTextSearch: 'boolean', +}); + +export function renderEncryptedStringOutputType(_params: CipherstashStringParams): string { + return 'EncryptedString'; +} + +export function createParameterizedCodecDescriptors( + sdk: CipherstashSdk, +): ReadonlyArray> { + // The codec is per-cell stateless across `(equality, freeTextSearch)` + // params on the write side (encode reads ciphertext from the handle, + // independent of the search-mode flags). The factory therefore + // returns the same shared codec for every params instance, mirroring + // pgvector's `vectorFactory` precedent. When future search-mode + // wiring needs per-instance state (e.g. decode-time index gating), + // the closure is the place to add it. + const sharedCodec = createCipherstashStringCodec(sdk); + const factory = (_params: CipherstashStringParams) => (_ctx: CodecInstanceContext) => sharedCodec; + return [ + { + codecId: CIPHERSTASH_STRING_CODEC_ID, + traits: ['equality'] as const, + targetTypes: ['eql_v2_encrypted'] as const, + paramsSchema: encryptedStringParamsSchema, + renderOutputType: renderEncryptedStringOutputType, + factory, + }, + ] as const satisfies ReadonlyArray>; +} diff --git a/packages/3-extensions/cipherstash/src/core/sdk.ts b/packages/3-extensions/cipherstash/src/core/sdk.ts new file mode 100644 index 0000000000..0234d0a740 --- /dev/null +++ b/packages/3-extensions/cipherstash/src/core/sdk.ts @@ -0,0 +1,79 @@ +/** + * Framework-native shape for the CipherStash SDK that the cipherstash + * extension wraps. + * + * The first-attempt SDK (see `reference/cipherstash/stack/...`) is rich + * and Prisma-adapter shaped (e.g. `EncryptOperation`, `LockContext`, + * lazy-initialized `EncryptionClient`). The framework-native shape we + * consume from the bulk-encrypt middleware (`beforeExecute`), the codec + * (`decode`), and the `decryptAll` walker is intentionally smaller: + * three async methods that each map cleanly to one CipherStash bulk-call + * shape. + * + * - `decrypt` — single-cell read used by `EncryptedString#decrypt()` + * when the user opts out of bulk decryption. + * - `bulkEncrypt` — write-side coalesced encrypt. M2.c wires this from + * the bulk-encrypt middleware (`beforeExecute`); declared here in + * M2.a so the SDK shape stays single-source-of-truth. + * - `bulkDecrypt` — read-side coalesced decrypt. M4 wires this from + * `decryptAll`. + * + * Each method accepts an optional `AbortSignal`; cancellation is + * forwarded directly to the SDK per the umbrella spec's cancellation + * contract (the per-execute `MiddlewareContext.signal` from M1's + * middleware-param-transform seam, or the caller-supplied signal on + * `decrypt({signal})`). + */ + +/** + * Routing-key tuple used by `bulkEncrypt`/`bulkDecrypt` to group + * requests so each ZeroKMS round-trip handles one homogeneous batch. + * + * Default shape: derived from `(table, column)`. Per-column key-id + * overrides are an open question on the umbrella spec; today the SDK + * routing is fully derived. + */ +export interface CipherstashRoutingKey { + readonly table: string; + readonly column: string; +} + +export interface CipherstashSingleDecryptArgs { + /** + * The wire ciphertext to decrypt. Opaque to the framework; the SDK + * inspects the embedded `i.t` / `i.c` schema markers to pick the + * right `cast_as` for the round-trip. + */ + readonly ciphertext: unknown; + /** Routing-key — the source `(table, column)` for the cell. */ + readonly table: string; + readonly column: string; + /** Optional caller-provided signal forwarded directly to the SDK. */ + readonly signal?: AbortSignal; +} + +export interface CipherstashBulkEncryptArgs { + readonly routingKey: CipherstashRoutingKey; + readonly values: ReadonlyArray; + readonly signal?: AbortSignal; +} + +export interface CipherstashBulkDecryptArgs { + readonly routingKey: CipherstashRoutingKey; + readonly ciphertexts: ReadonlyArray; + readonly signal?: AbortSignal; +} + +/** + * The framework-native CipherStash SDK contract consumed by the + * envelope, codec, middleware, and `decryptAll` surfaces. + * + * Real implementations wrap the CipherStash `EncryptionClient` + * (currently `@cipherstash/stack`'s `Encryption({ schemas })` factory). + * Tests construct mock SDKs that implement these three methods directly. + */ +export interface CipherstashSdk { + decrypt(args: CipherstashSingleDecryptArgs): Promise; + bulkEncrypt(args: CipherstashBulkEncryptArgs): Promise>; + bulkDecrypt(args: CipherstashBulkDecryptArgs): Promise>; +} diff --git a/packages/3-extensions/cipherstash/src/exports/column-types.ts b/packages/3-extensions/cipherstash/src/exports/column-types.ts new file mode 100644 index 0000000000..59cc267e2e --- /dev/null +++ b/packages/3-extensions/cipherstash/src/exports/column-types.ts @@ -0,0 +1,54 @@ +/** + * TS contract factory for cipherstash-encrypted string columns. + * + * Counterpart to the PSL constructor `cipherstash.EncryptedString({...})` + * registered in `core/authoring.ts`. Both factories produce the same + * `ColumnTypeDescriptor` shape so PSL- and TS-authored contracts emit + * byte-identical `contract.json`. See `DEVELOPING.md` § Forthcoming + * surface for the rest of the in-progress milestones. + */ + +import { CIPHERSTASH_STRING_CODEC_ID, CIPHERSTASH_STRING_TARGET_TYPE } from '../core/codecs'; + +/** + * Search-mode parameters for `encryptedString({...})`. Both flags are + * optional and default to `false` when omitted; storage-only encryption + * is the legitimate default per the project's M2 standing decision. + */ +export interface EncryptedStringOptions { + readonly equality?: boolean; + readonly freeTextSearch?: boolean; +} + +export interface EncryptedStringColumnDescriptor { + readonly codecId: typeof CIPHERSTASH_STRING_CODEC_ID; + readonly nativeType: typeof CIPHERSTASH_STRING_TARGET_TYPE; + readonly typeParams: { + readonly equality: boolean; + readonly freeTextSearch: boolean; + }; +} + +/** + * `encryptedString({ equality?, freeTextSearch? })` — TS contract + * factory that lowers to a `ColumnTypeDescriptor` with the + * `cipherstash/string@1` codec and the `eql_v2_encrypted` Postgres + * native type. The two boolean flags become `typeParams.equality` + * and `typeParams.freeTextSearch`. + * + * The shape matches what the PSL constructor + * `cipherstash.EncryptedString({...})` lowers to, byte-for-byte; the + * authoring parity fixture under + * `test/integration/test/authoring/parity/cipherstash-encrypted-string/` + * pins this equivalence. + */ +export function encryptedString(options: EncryptedStringOptions): EncryptedStringColumnDescriptor { + return { + codecId: CIPHERSTASH_STRING_CODEC_ID, + nativeType: CIPHERSTASH_STRING_TARGET_TYPE, + typeParams: { + equality: options.equality ?? false, + freeTextSearch: options.freeTextSearch ?? false, + }, + }; +} diff --git a/packages/3-extensions/cipherstash/src/exports/control.ts b/packages/3-extensions/cipherstash/src/exports/control.ts new file mode 100644 index 0000000000..ad712a0894 --- /dev/null +++ b/packages/3-extensions/cipherstash/src/exports/control.ts @@ -0,0 +1,93 @@ +/** + * SQL control extension descriptor for cipherstash. + * + * Spreads `cipherstashPackMeta` (authoring contributions, capabilities, + * storage type registration, codec metadata) and adds the + * `databaseDependencies.init` block that installs the EQL Postgres + * extension before any cipherstash-bound migration executes. + * + * **AC-INSTALL1** is satisfied at the *shape* level in M2.a; the + * placeholder install SQL points at the M2.c bundle vendor task. + * **AC-INSTALL2** (live-Postgres `dbInit` succeeds) and + * **AC-INSTALL3** (idempotency) require the real bundle and a live + * Postgres harness — both deferred to M2.c. + */ + +import type { + CodecControlHooks, + ComponentDatabaseDependencies, + SqlControlExtensionDescriptor, +} from '@prisma-next/family-sql/control'; +import { CIPHERSTASH_STRING_CODEC_ID } from '../core/codecs'; +import { cipherstashPackMeta } from '../core/descriptor-meta'; +import { EQL_INSTALL_SQL } from '../core/eql-bundle'; + +/** + * Cipherstash columns carry search-mode `typeParams` (`equality`, + * `freeTextSearch`) that govern *operator* lowering at runtime — + * they are not part of the column's SQL DDL signature, which is + * always the bare `eql_v2_encrypted` Postgres native type. The + * framework's DDL builder requires every typeParam-carrying column + * to declare an `expandNativeType` hook to make the "no parameters + * affect DDL" decision explicit; this hook records that decision. + */ +const cipherstashStringControlPlaneHooks: CodecControlHooks = { + expandNativeType: ({ nativeType }) => nativeType, +}; + +const cipherstashDatabaseDependencies: ComponentDatabaseDependencies = { + init: [ + { + id: 'postgres.extension.eql', + label: 'Install EQL extension', + install: [ + { + id: 'eql.install', + label: 'Install EQL bundle', + summary: + 'Installs the EQL Postgres extension bundle (encrypted-aware operators + cs_configuration_v2)', + operationClass: 'additive', + target: { id: 'postgres' }, + precheck: [ + { + description: 'verify EQL is not already installed', + sql: "SELECT NOT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = 'cs_configuration_v2')", + }, + ], + execute: [ + { + description: 'install EQL bundle', + sql: EQL_INSTALL_SQL, + }, + ], + postcheck: [ + { + description: 'confirm EQL is installed', + sql: "SELECT EXISTS (SELECT 1 FROM information_schema.schemata WHERE schema_name = 'eql_v2')", + }, + ], + }, + ], + }, + ], +}; + +export const cipherstashControlDescriptor: SqlControlExtensionDescriptor<'postgres'> = { + ...cipherstashPackMeta, + types: { + ...cipherstashPackMeta.types, + codecTypes: { + ...cipherstashPackMeta.types.codecTypes, + controlPlaneHooks: { + [CIPHERSTASH_STRING_CODEC_ID]: cipherstashStringControlPlaneHooks, + }, + }, + }, + databaseDependencies: cipherstashDatabaseDependencies, + create: () => ({ + familyId: 'sql' as const, + targetId: 'postgres' as const, + }), +}; + +export default cipherstashControlDescriptor; diff --git a/packages/3-extensions/cipherstash/src/exports/index.ts b/packages/3-extensions/cipherstash/src/exports/index.ts new file mode 100644 index 0000000000..ac79763edd --- /dev/null +++ b/packages/3-extensions/cipherstash/src/exports/index.ts @@ -0,0 +1,8 @@ +export { EncryptedString } from '../core/envelope'; +export type { + CipherstashBulkDecryptArgs, + CipherstashBulkEncryptArgs, + CipherstashRoutingKey, + CipherstashSdk, + CipherstashSingleDecryptArgs, +} from '../core/sdk'; diff --git a/packages/3-extensions/cipherstash/src/exports/middleware.ts b/packages/3-extensions/cipherstash/src/exports/middleware.ts new file mode 100644 index 0000000000..5ac84cb9fa --- /dev/null +++ b/packages/3-extensions/cipherstash/src/exports/middleware.ts @@ -0,0 +1,7 @@ +/** + * Placeholder for the `bulkEncryptMiddleware(sdk: CipherstashSdk)` factory. + * The subpath ships today so the package's surface area stays stable across + * the in-progress milestones; see `DEVELOPING.md` § Forthcoming surface for + * the implementation timeline. + */ +export {}; diff --git a/packages/3-extensions/cipherstash/src/exports/pack.ts b/packages/3-extensions/cipherstash/src/exports/pack.ts new file mode 100644 index 0000000000..fb2581b9ec --- /dev/null +++ b/packages/3-extensions/cipherstash/src/exports/pack.ts @@ -0,0 +1 @@ +export { cipherstashPackMeta as default } from '../core/descriptor-meta'; diff --git a/packages/3-extensions/cipherstash/src/exports/runtime.ts b/packages/3-extensions/cipherstash/src/exports/runtime.ts new file mode 100644 index 0000000000..a7fa94f150 --- /dev/null +++ b/packages/3-extensions/cipherstash/src/exports/runtime.ts @@ -0,0 +1,60 @@ +/** + * SQL runtime extension descriptor for cipherstash. + * + * Mirrors `packages/3-extensions/pgvector/src/exports/runtime.ts` + * structurally, with one difference: cipherstash's codec depends on a + * caller-supplied `CipherstashSdk`, so the descriptor is a *factory* + * (`createCipherstashRuntimeDescriptor({ sdk })`) rather than a static + * default-export. M2.b/M2.c will likely add a thinner top-level + * factory (`cipherstashRuntime({ sdk, ... })`) that returns this + * descriptor; this file ships the descriptor builder in M2.a so the + * codec + parameterized-codec wiring can be unit-tested in isolation. + */ + +import { createCodecRegistry } from '@prisma-next/sql-relational-core/ast'; +import type { SqlRuntimeExtensionDescriptor } from '@prisma-next/sql-runtime'; +import { createCipherstashStringCodec } from '../core/codecs'; +import { CIPHERSTASH_EXTENSION_ID, CIPHERSTASH_EXTENSION_VERSION } from '../core/descriptor-meta'; +import { createParameterizedCodecDescriptors } from '../core/parameterized'; +import type { CipherstashSdk } from '../core/sdk'; + +export { CIPHERSTASH_EXTENSION_ID, CIPHERSTASH_EXTENSION_VERSION }; + +export interface CreateCipherstashRuntimeDescriptorOptions { + readonly sdk: CipherstashSdk; +} + +export function createCipherstashRuntimeDescriptor( + opts: CreateCipherstashRuntimeDescriptorOptions, +): SqlRuntimeExtensionDescriptor<'postgres'> { + const { sdk } = opts; + const codec = createCipherstashStringCodec(sdk); + const descriptors = createParameterizedCodecDescriptors(sdk); + + function buildCodecRegistry() { + const registry = createCodecRegistry(); + registry.register(codec); + return registry; + } + + return { + kind: 'extension' as const, + id: CIPHERSTASH_EXTENSION_ID, + version: CIPHERSTASH_EXTENSION_VERSION, + familyId: 'sql' as const, + targetId: 'postgres' as const, + types: { + codecTypes: { + codecInstances: [codec], + }, + }, + codecs: buildCodecRegistry, + parameterizedCodecs: () => descriptors, + create() { + return { + familyId: 'sql' as const, + targetId: 'postgres' as const, + }; + }, + }; +} diff --git a/packages/3-extensions/cipherstash/test/authoring.test.ts b/packages/3-extensions/cipherstash/test/authoring.test.ts new file mode 100644 index 0000000000..42f1dd6810 --- /dev/null +++ b/packages/3-extensions/cipherstash/test/authoring.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from 'vitest'; +import { cipherstashAuthoringTypes } from '../src/core/authoring'; +import cipherstashPack from '../src/exports/pack'; + +describe('cipherstash pack authoring contributions', () => { + it('exposes cipherstash.EncryptedString as a namespaced type constructor (AC-CTOR1)', () => { + expect(cipherstashPack.authoring?.type).toMatchObject({ + cipherstash: { + EncryptedString: { + kind: 'typeConstructor', + }, + }, + }); + }); + + it('declares a single object argument with optional equality + freeTextSearch boolean properties (AC-CTOR2)', () => { + expect(cipherstashAuthoringTypes.cipherstash.EncryptedString).toMatchObject({ + kind: 'typeConstructor', + args: [ + { + kind: 'object', + properties: { + equality: { kind: 'boolean', optional: true }, + freeTextSearch: { kind: 'boolean', optional: true }, + }, + }, + ], + }); + }); + + it('lowers to ColumnTypeDescriptor with codecId cipherstash/string@1 + nativeType eql_v2_encrypted (AC-LOWER1 shape)', () => { + expect(cipherstashAuthoringTypes.cipherstash.EncryptedString.output).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + typeParams: { + equality: { kind: 'arg', index: 0, path: ['equality'], default: false }, + freeTextSearch: { + kind: 'arg', + index: 0, + path: ['freeTextSearch'], + default: false, + }, + }, + }); + }); + + it('exposes the storage type registration via pack meta', () => { + expect(cipherstashPack.types?.storage).toContainEqual({ + typeId: 'cipherstash/string@1', + familyId: 'sql', + targetId: 'postgres', + nativeType: 'eql_v2_encrypted', + }); + }); +}); diff --git a/packages/3-extensions/cipherstash/test/codecs.test.ts b/packages/3-extensions/cipherstash/test/codecs.test.ts new file mode 100644 index 0000000000..786a3abe86 --- /dev/null +++ b/packages/3-extensions/cipherstash/test/codecs.test.ts @@ -0,0 +1,81 @@ +import type { SqlCodecCallContext } from '@prisma-next/sql-relational-core/ast'; +import { describe, expect, it, vi } from 'vitest'; +import { CIPHERSTASH_STRING_CODEC_ID, createCipherstashStringCodec } from '../src/core/codecs'; +import { EncryptedString, getInternalHandle, setHandleCiphertext } from '../src/core/envelope'; +import type { CipherstashSdk } from '../src/core/sdk'; + +function makeSdk(): CipherstashSdk { + return { + decrypt: vi.fn().mockResolvedValue('decrypted'), + bulkEncrypt: vi.fn(), + bulkDecrypt: vi.fn(), + }; +} + +describe('cipherstash codec — AC-CODEC1 (registration shape)', () => { + it('codec id is `cipherstash/string@1` with target type `eql_v2_encrypted` and traits `[equality]`', () => { + const codec = createCipherstashStringCodec(makeSdk()); + expect(codec.id).toBe(CIPHERSTASH_STRING_CODEC_ID); + expect(codec.targetTypes).toEqual(['eql_v2_encrypted']); + expect(codec.traits).toEqual(['equality']); + }); + + it('codec carries postgres-native-type meta `eql_v2_encrypted`', () => { + const codec = createCipherstashStringCodec(makeSdk()); + expect(codec.meta).toMatchObject({ + db: { sql: { postgres: { nativeType: 'eql_v2_encrypted' } } }, + }); + }); +}); + +describe('cipherstash codec — AC-CODEC2 (decode constructs envelope from ctx.column)', () => { + it('decode(wire, ctx) builds an envelope whose handle carries {table, column} from ctx.column', async () => { + const sdk = makeSdk(); + const codec = createCipherstashStringCodec(sdk); + const ctx: SqlCodecCallContext = { + column: { table: 'user', name: 'email' }, + }; + const wire = { c: 'cipher-blob', i: { t: 'user', c: 'email' } }; + + const envelope = await codec.decode(wire, ctx); + + expect(envelope).toBeInstanceOf(EncryptedString); + const handle = getInternalHandle(envelope); + expect(handle.table).toBe('user'); + expect(handle.column).toBe('email'); + expect(handle.ciphertext).toBe(wire); + expect(handle.sdk).toBe(sdk); + }); + + it('decode without ctx.column throws (the codec needs the column ref to construct a routing-aware envelope)', async () => { + const codec = createCipherstashStringCodec(makeSdk()); + await expect(codec.decode('wire', {})).rejects.toThrow(/requires ctx\.column/); + }); +}); + +describe('cipherstash codec — AC-CODEC3 (encode reads ciphertext from handle)', () => { + it('after the middleware has populated ciphertext, encode returns the ciphertext', async () => { + const codec = createCipherstashStringCodec(makeSdk()); + const envelope = EncryptedString.from('secret'); + setHandleCiphertext(envelope, { c: 'wire-blob' }); + + const wire = await codec.encode(envelope, {}); + expect(wire).toEqual({ c: 'wire-blob' }); + }); + + it('encode of an envelope whose ciphertext slot is empty (middleware did not run) throws a clear error', async () => { + const codec = createCipherstashStringCodec(makeSdk()); + const envelope = EncryptedString.from('secret'); + await expect(codec.encode(envelope, {})).rejects.toThrow(/bulk-encrypt middleware/); + }); +}); + +describe('cipherstash codec — AC-CODEC4 (renderOutputType)', () => { + it('renderOutputType returns `EncryptedString`', () => { + const codec = createCipherstashStringCodec(makeSdk()); + expect(codec.renderOutputType?.({})).toBe('EncryptedString'); + expect(codec.renderOutputType?.({ equality: true, freeTextSearch: false })).toBe( + 'EncryptedString', + ); + }); +}); diff --git a/packages/3-extensions/cipherstash/test/column-types.test.ts b/packages/3-extensions/cipherstash/test/column-types.test.ts new file mode 100644 index 0000000000..2796060733 --- /dev/null +++ b/packages/3-extensions/cipherstash/test/column-types.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from 'vitest'; +import { encryptedString } from '../src/exports/column-types'; + +describe('cipherstash column-types', () => { + describe('encryptedString({...}) factory', () => { + it('produces a ColumnTypeDescriptor with cipherstash/string@1 codec id', () => { + const descriptor = encryptedString({}); + expect(descriptor).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + }); + }); + + it('applies false defaults when both flags are omitted', () => { + expect(encryptedString({})).toMatchObject({ + typeParams: { equality: false, freeTextSearch: false }, + }); + }); + + it('preserves equality flag when provided', () => { + expect(encryptedString({ equality: true })).toMatchObject({ + typeParams: { equality: true, freeTextSearch: false }, + }); + }); + + it('preserves both flags when provided', () => { + expect(encryptedString({ equality: true, freeTextSearch: true })).toMatchObject({ + typeParams: { equality: true, freeTextSearch: true }, + }); + }); + + it('returns a structurally equivalent descriptor to the PSL constructor lowering', () => { + // The TS factory must produce the same shape the PSL interpreter's + // type-constructor lowering produces, so the parity fixture (AC-PARITY1) + // can compare PSL- and TS-emitted contract.json byte-for-byte. + expect(encryptedString({ equality: true, freeTextSearch: true })).toEqual({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + typeParams: { equality: true, freeTextSearch: true }, + }); + }); + }); +}); diff --git a/packages/3-extensions/cipherstash/test/control.test.ts b/packages/3-extensions/cipherstash/test/control.test.ts new file mode 100644 index 0000000000..a5c77f7d55 --- /dev/null +++ b/packages/3-extensions/cipherstash/test/control.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest'; +import { cipherstashControlDescriptor } from '../src/exports/control'; + +describe('cipherstash control descriptor — AC-INSTALL1 (databaseDependencies.init)', () => { + it('declares one init entry installing the EQL extension', () => { + const init = cipherstashControlDescriptor.databaseDependencies?.init; + expect(init).toBeDefined(); + expect(init).toHaveLength(1); + expect(init?.[0]?.id).toBe('postgres.extension.eql'); + }); + + it('init entry has a single install operation targeting postgres', () => { + const installs = cipherstashControlDescriptor.databaseDependencies?.init?.[0]?.install; + expect(installs).toHaveLength(1); + const install = installs?.[0]; + expect(install?.id).toBe('eql.install'); + expect(install?.operationClass).toBe('additive'); + expect(install?.target.id).toBe('postgres'); + }); + + it('install has precheck/execute/postcheck step shapes per the spec', () => { + const install = cipherstashControlDescriptor.databaseDependencies?.init?.[0]?.install?.[0]; + expect(install?.precheck).toHaveLength(1); + expect(install?.precheck[0]?.sql).toContain('cs_configuration_v2'); + expect(install?.execute).toHaveLength(1); + expect(install?.execute[0]?.sql).toBeDefined(); + expect(install?.postcheck).toHaveLength(1); + expect(install?.postcheck[0]?.sql).toContain('eql_v2'); + }); + + it('execute step ships a placeholder pointing at the M2.c real bundle (AC-INSTALL2/3 deferred)', () => { + const sql = + cipherstashControlDescriptor.databaseDependencies?.init?.[0]?.install?.[0]?.execute[0]?.sql; + expect(sql).toMatch(/TODO M2\.c/); + }); + + it('descriptor is shaped as a sql-family postgres control extension', () => { + expect(cipherstashControlDescriptor.familyId).toBe('sql'); + expect(cipherstashControlDescriptor.targetId).toBe('postgres'); + }); +}); diff --git a/packages/3-extensions/cipherstash/test/envelope.test-d.ts b/packages/3-extensions/cipherstash/test/envelope.test-d.ts new file mode 100644 index 0000000000..7f2d5a3978 --- /dev/null +++ b/packages/3-extensions/cipherstash/test/envelope.test-d.ts @@ -0,0 +1,36 @@ +/** + * Negative type tests for AC-ENV4: the handle has no public TypeScript surface. + * + * - The handle type itself must not be importable from any subpath. + * - The `EncryptedString` class must not expose a handle accessor (e.g. + * `envelope.handle` / `envelope.plaintext` / `envelope.ciphertext`). + * + * These assertions use the `@ts-expect-error` directive in a position + * permitted by AGENTS.md (negative type tests). + */ + +import { EncryptedString } from '../src/exports/index'; + +const envelope = EncryptedString.from('alice@example.com'); + +// @ts-expect-error — handle accessor is not part of the public surface. +envelope.handle; +// @ts-expect-error — plaintext accessor is not part of the public surface. +envelope.plaintext; +// @ts-expect-error — ciphertext accessor is not part of the public surface. +envelope.ciphertext; + +// The public namespace exposes `EncryptedString` (and, eventually, `decryptAll`). +// It must NOT export a handle type. +type PublicSurface = typeof import('../src/exports/index'); +// @ts-expect-error — `EncryptedStringHandle` is not part of the public surface. +type _NoHandle = PublicSurface['EncryptedStringHandle']; + +// Public methods on `EncryptedString` are limited to `decrypt` and `toJSON`. +const _decrypt: (opts?: { signal?: AbortSignal }) => Promise = + envelope.decrypt.bind(envelope); +const _toJson: () => unknown = envelope.toJSON.bind(envelope); + +export type _AssertNoHandle = _NoHandle; +void _decrypt; +void _toJson; diff --git a/packages/3-extensions/cipherstash/test/envelope.test.ts b/packages/3-extensions/cipherstash/test/envelope.test.ts new file mode 100644 index 0000000000..82ff512c5b --- /dev/null +++ b/packages/3-extensions/cipherstash/test/envelope.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, it, vi } from 'vitest'; +import { EncryptedString } from '../src/core/envelope'; +import type { CipherstashSdk } from '../src/core/sdk'; + +describe('EncryptedString.from(plaintext) — AC-ENV1', () => { + it('returns an envelope', () => { + const envelope = EncryptedString.from('alice@example.com'); + expect(envelope).toBeInstanceOf(EncryptedString); + }); + + it('subsequent decrypt() resolves with the original plaintext (no SDK needed)', async () => { + const envelope = EncryptedString.from('alice@example.com'); + await expect(envelope.decrypt()).resolves.toBe('alice@example.com'); + }); + + it('decrypt() does not require the SDK on the write-side handle', async () => { + const sdk: CipherstashSdk = { + decrypt: vi.fn(), + bulkEncrypt: vi.fn(), + bulkDecrypt: vi.fn(), + }; + const envelope = EncryptedString.from('hello'); + await envelope.decrypt(); + expect(sdk.decrypt).not.toHaveBeenCalled(); + }); +}); + +describe('EncryptedString.fromInternal(...) — AC-ENV2 (read-side)', () => { + it('decrypt({signal}) calls the SDK single-cell decrypt and returns plaintext', async () => { + const ciphertext = { c: 'cipher', i: { t: 'user', c: 'email' } }; + const decryptMock = vi.fn().mockResolvedValue('alice@example.com'); + const sdk: CipherstashSdk = { + decrypt: decryptMock, + bulkEncrypt: vi.fn(), + bulkDecrypt: vi.fn(), + }; + + const envelope = EncryptedString.fromInternal({ + ciphertext, + table: 'user', + column: 'email', + sdk, + }); + + const ac = new AbortController(); + const result = await envelope.decrypt({ signal: ac.signal }); + + expect(result).toBe('alice@example.com'); + expect(decryptMock).toHaveBeenCalledTimes(1); + const callArg = decryptMock.mock.calls[0]?.[0]; + expect(callArg).toMatchObject({ + ciphertext, + table: 'user', + column: 'email', + signal: ac.signal, + }); + }); + + it('forwards the caller-provided AbortSignal to the SDK by identity', async () => { + const decryptMock = vi.fn().mockResolvedValue('plain'); + const sdk: CipherstashSdk = { + decrypt: decryptMock, + bulkEncrypt: vi.fn(), + bulkDecrypt: vi.fn(), + }; + const envelope = EncryptedString.fromInternal({ + ciphertext: 'wire', + table: 't', + column: 'c', + sdk, + }); + const ac = new AbortController(); + await envelope.decrypt({ signal: ac.signal }); + const callArg = decryptMock.mock.calls[0]?.[0] as { signal?: AbortSignal }; + expect(callArg.signal).toBe(ac.signal); + }); + + it('decrypt() without an explicit signal omits signal in the SDK call (or passes undefined)', async () => { + const decryptMock = vi.fn().mockResolvedValue('plain'); + const sdk: CipherstashSdk = { + decrypt: decryptMock, + bulkEncrypt: vi.fn(), + bulkDecrypt: vi.fn(), + }; + const envelope = EncryptedString.fromInternal({ + ciphertext: 'wire', + table: 't', + column: 'c', + sdk, + }); + await envelope.decrypt(); + const callArg = decryptMock.mock.calls[0]?.[0] as { signal?: AbortSignal }; + expect(callArg.signal).toBeUndefined(); + }); +}); + +describe('EncryptedString — handle is package-private — AC-ENV4', () => { + it('does not expose the handle as an own enumerable property', () => { + const envelope = EncryptedString.from('secret'); + expect(Object.keys(envelope)).toEqual([]); + }); + + it('JSON.stringify produces no plaintext leak', () => { + const envelope = EncryptedString.from('top-secret'); + const json = JSON.stringify(envelope); + expect(json).not.toContain('top-secret'); + }); + + it('public methods are limited to decrypt; no handle-accessor on the prototype', () => { + const proto = Object.getPrototypeOf(EncryptedString.from('x')) as object; + const ownNames = Object.getOwnPropertyNames(proto).filter((n) => n !== 'constructor'); + expect(ownNames.sort()).toEqual(['decrypt', 'toJSON']); + }); +}); diff --git a/packages/3-extensions/cipherstash/test/psl-interpretation.test.ts b/packages/3-extensions/cipherstash/test/psl-interpretation.test.ts new file mode 100644 index 0000000000..16f342b76e --- /dev/null +++ b/packages/3-extensions/cipherstash/test/psl-interpretation.test.ts @@ -0,0 +1,211 @@ +import { parsePslDocument } from '@prisma-next/psl-parser'; +import { interpretPslDocumentToSqlContract } from '@prisma-next/sql-contract-psl'; +import { describe, expect, it } from 'vitest'; +import cipherstashControl from '../src/exports/control'; +import cipherstashPack from '../src/exports/pack'; + +const postgresTarget = { + kind: 'target' as const, + familyId: 'sql' as const, + targetId: 'postgres' as const, + id: 'postgres', + version: '0.0.1', + capabilities: {}, +}; + +const postgresScalarTypeDescriptors = new Map([ + ['String', { codecId: 'pg/text@1', nativeType: 'text' }], + ['Boolean', { codecId: 'pg/bool@1', nativeType: 'bool' }], + ['Int', { codecId: 'pg/int4@1', nativeType: 'int4' }], +]); + +interface NarrowedStorage { + readonly tables: Record> }>; + readonly types?: Record>; +} + +function interpret(schema: string) { + return interpretPslDocumentToSqlContract({ + document: parsePslDocument({ schema, sourceId: 'schema.prisma' }), + target: postgresTarget, + scalarTypeDescriptors: postgresScalarTypeDescriptors, + composedExtensionPacks: [cipherstashControl.id], + authoringContributions: { type: cipherstashPack.authoring.type, field: {} }, + }); +} + +function narrowStorage(value: { storage: unknown }): NarrowedStorage { + // Test-only narrowing: the IR's StorageBase is intentionally weak so + // family adapters can specialise it; for these tests we know we're + // working with the SQL family's tables/types projection. + return value.storage as unknown as NarrowedStorage; +} + +function userColumns(value: { storage: unknown }, name: string): Record { + const col = narrowStorage(value).tables['user']?.columns[name]; + if (!col) throw new Error(`expected user.${name} column`); + return col; +} + +describe('PSL interpretation: cipherstash.EncryptedString constructor', () => { + it('lowers full args to a column with codecId, nativeType, typeParams (AC-LOWER1)', () => { + const result = interpret(`model User { + id Int @id + email cipherstash.EncryptedString({ equality: true, freeTextSearch: true }) +} +`); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(userColumns(result.value, 'email')).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + typeParams: { equality: true, freeTextSearch: true }, + nullable: false, + }); + }); + + it('applies false defaults for an empty options literal (AC-LOWER2)', () => { + const result = interpret(`model User { + id Int @id + notes cipherstash.EncryptedString({}) +} +`); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(userColumns(result.value, 'notes')).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + typeParams: { equality: false, freeTextSearch: false }, + nullable: false, + }); + }); + + it('marks nullable columns as nullable (AC-LOWER3)', () => { + const result = interpret(`model User { + id Int @id + username cipherstash.EncryptedString({ equality: true })? +} +`); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(userColumns(result.value, 'username')).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + typeParams: { equality: true, freeTextSearch: false }, + nullable: true, + }); + }); + + it('rejects unknown argument names with PSL_INVALID_ATTRIBUTE_ARGUMENT (AC-CTOR3)', () => { + const result = interpret(`model User { + id Int @id + email cipherstash.EncryptedString({ orderAndRange: true }) +} +`); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.failure.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'PSL_INVALID_ATTRIBUTE_ARGUMENT', + message: expect.stringContaining('orderAndRange'), + }), + ]), + ); + }); + + it('rejects wrong-typed argument values with PSL_INVALID_ATTRIBUTE_ARGUMENT (AC-CTOR4)', () => { + const result = interpret(`model User { + id Int @id + email cipherstash.EncryptedString({ equality: "yes" }) +} +`); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.failure.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'PSL_INVALID_ATTRIBUTE_ARGUMENT', + message: expect.stringContaining('boolean'), + }), + ]), + ); + }); + + it('resolves a named-type alias under types {} and uses it on a model field (AC-ALIAS1)', () => { + const result = interpret(`types { + SearchableEmail = cipherstash.EncryptedString({ equality: true }) +} + +model User { + id Int @id + email SearchableEmail +} +`); + expect(result.ok).toBe(true); + if (!result.ok) return; + const storage = narrowStorage(result.value); + expect(storage.types?.['SearchableEmail']).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + typeParams: { equality: true, freeTextSearch: false }, + }); + expect(userColumns(result.value, 'email')).toMatchObject({ + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + nullable: false, + typeRef: 'SearchableEmail', + }); + }); + + it('produces an alias whose typeParams match the inline-constructor form for the same args (AC-ALIAS2)', () => { + const aliasResult = interpret(`types { + SearchableEmail = cipherstash.EncryptedString({ equality: true, freeTextSearch: true }) +} + +model User { + id Int @id + email SearchableEmail +} +`); + const inlineResult = interpret(`model User { + id Int @id + email cipherstash.EncryptedString({ equality: true, freeTextSearch: true }) +} +`); + expect(aliasResult.ok).toBe(true); + expect(inlineResult.ok).toBe(true); + if (!aliasResult.ok || !inlineResult.ok) return; + + const aliasStorage = narrowStorage(aliasResult.value); + const aliasNamedType = aliasStorage.types?.['SearchableEmail']; + const inlineCol = userColumns(inlineResult.value, 'email'); + + // The named type's storage descriptor and the inline column's + // codec/nativeType/typeParams must agree byte-for-byte; the inline + // column carries `nullable` (and may carry `default`/etc.) which the + // named-type descriptor does not. + expect(aliasNamedType).toEqual({ + codecId: inlineCol['codecId'], + nativeType: inlineCol['nativeType'], + typeParams: inlineCol['typeParams'], + }); + }); + + it('reports a span at the offending argument value (AC-CTOR4 span requirement)', () => { + const result = interpret(`model User { + id Int @id + email cipherstash.EncryptedString({ equality: 42 }) +} +`); + expect(result.ok).toBe(false); + if (result.ok) return; + const diag = result.failure.diagnostics.find( + (d) => d.code === 'PSL_INVALID_ATTRIBUTE_ARGUMENT', + ); + expect(diag?.span).toMatchObject({ + start: { line: expect.any(Number), column: expect.any(Number) }, + end: { line: expect.any(Number), column: expect.any(Number) }, + }); + }); +}); diff --git a/packages/3-extensions/cipherstash/test/runtime.test.ts b/packages/3-extensions/cipherstash/test/runtime.test.ts new file mode 100644 index 0000000000..bca226a305 --- /dev/null +++ b/packages/3-extensions/cipherstash/test/runtime.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it, vi } from 'vitest'; +import { CIPHERSTASH_STRING_CODEC_ID } from '../src/core/codecs'; +import { + createParameterizedCodecDescriptors, + encryptedStringParamsSchema, +} from '../src/core/parameterized'; +import type { CipherstashSdk } from '../src/core/sdk'; +import { createCipherstashRuntimeDescriptor } from '../src/exports/runtime'; + +function makeSdk(): CipherstashSdk { + return { + decrypt: vi.fn(), + bulkEncrypt: vi.fn(), + bulkDecrypt: vi.fn(), + }; +} + +describe('cipherstash runtime descriptor — AC-CODEC5 (parameterized codec descriptor)', () => { + it('exposes one parameterized descriptor for `cipherstash/string@1`', () => { + const sdk = makeSdk(); + const descriptor = createCipherstashRuntimeDescriptor({ sdk }); + const descriptors = descriptor.parameterizedCodecs(); + expect(descriptors).toHaveLength(1); + expect(descriptors[0]?.codecId).toBe(CIPHERSTASH_STRING_CODEC_ID); + }); + + it('descriptor traits and target types match the codec', () => { + const sdk = makeSdk(); + const descriptor = createCipherstashRuntimeDescriptor({ sdk }); + const [first] = descriptor.parameterizedCodecs(); + expect(first?.traits).toEqual(['equality']); + expect(first?.targetTypes).toEqual(['eql_v2_encrypted']); + }); + + it('descriptor renderOutputType returns `EncryptedString`', () => { + const sdk = makeSdk(); + const descriptor = createCipherstashRuntimeDescriptor({ sdk }); + const [first] = descriptor.parameterizedCodecs(); + expect(first?.renderOutputType?.({ equality: true, freeTextSearch: false })).toBe( + 'EncryptedString', + ); + }); +}); + +describe('cipherstash parameterized codec — params schema (arktype)', () => { + const validate = encryptedStringParamsSchema['~standard'].validate; + + it('accepts `{equality, freeTextSearch}` with both booleans', async () => { + const result = await validate({ equality: true, freeTextSearch: false }); + expect(result).not.toHaveProperty('issues'); + }); + + it('rejects missing equality', async () => { + const result = await validate({ freeTextSearch: false }); + expect(result).toHaveProperty('issues'); + }); + + it('rejects missing freeTextSearch', async () => { + const result = await validate({ equality: true }); + expect(result).toHaveProperty('issues'); + }); + + it('rejects non-boolean equality', async () => { + const result = await validate({ equality: 'yes', freeTextSearch: false }); + expect(result).toHaveProperty('issues'); + }); +}); + +describe('cipherstash parameterized codec descriptors — sdk-bound factory', () => { + it('createParameterizedCodecDescriptors(sdk) returns the descriptor list', () => { + const sdk = makeSdk(); + const descriptors = createParameterizedCodecDescriptors(sdk); + expect(descriptors).toHaveLength(1); + expect(descriptors[0]?.codecId).toBe(CIPHERSTASH_STRING_CODEC_ID); + }); + + it('descriptor.factory(params)(ctx) yields a codec wired to the captured sdk', async () => { + const sdk = makeSdk(); + const descriptors = createParameterizedCodecDescriptors(sdk); + const factory = descriptors[0]?.factory; + expect(factory).toBeDefined(); + const resolved = factory?.({ equality: true, freeTextSearch: false })({ + name: 'cipherstash-string-instance', + }); + expect(resolved).toBeDefined(); + expect(resolved?.id).toBe(CIPHERSTASH_STRING_CODEC_ID); + }); +}); diff --git a/packages/3-extensions/cipherstash/tsconfig.json b/packages/3-extensions/cipherstash/tsconfig.json new file mode 100644 index 0000000000..7afa587436 --- /dev/null +++ b/packages/3-extensions/cipherstash/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": ["@prisma-next/tsconfig/base"], + "compilerOptions": { + "rootDir": ".", + "outDir": "dist" + }, + "include": ["src/**/*.ts", "test/**/*.ts"], + "exclude": ["dist"] +} diff --git a/packages/3-extensions/cipherstash/tsconfig.prod.json b/packages/3-extensions/cipherstash/tsconfig.prod.json new file mode 100644 index 0000000000..b08d4c908a --- /dev/null +++ b/packages/3-extensions/cipherstash/tsconfig.prod.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "extends": ["@prisma-next/tsconfig/prod"] +} diff --git a/packages/3-extensions/cipherstash/tsdown.config.ts b/packages/3-extensions/cipherstash/tsdown.config.ts new file mode 100644 index 0000000000..b16cdbbc18 --- /dev/null +++ b/packages/3-extensions/cipherstash/tsdown.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from '@prisma-next/tsdown'; + +export default defineConfig({ + entry: [ + 'src/exports/index.ts', + 'src/exports/control.ts', + 'src/exports/runtime.ts', + 'src/exports/middleware.ts', + 'src/exports/column-types.ts', + 'src/exports/pack.ts', + ], +}); diff --git a/packages/3-extensions/cipherstash/vitest.config.ts b/packages/3-extensions/cipherstash/vitest.config.ts new file mode 100644 index 0000000000..0d44e4c0fb --- /dev/null +++ b/packages/3-extensions/cipherstash/vitest.config.ts @@ -0,0 +1,31 @@ +import { timeouts } from '@prisma-next/test-utils'; +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + testTimeout: timeouts.default, + hookTimeout: timeouts.default, + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html'], + include: ['src/**/*.ts'], + exclude: [ + 'dist/**', + 'test/**', + '**/*.test.ts', + '**/*.test-d.ts', + '**/*.config.ts', + '**/exports/**', + '**/types.ts', + ], + thresholds: { + lines: 95, + branches: 90, + functions: 95, + statements: 95, + }, + }, + }, +}); diff --git a/packages/3-targets/6-adapters/postgres/src/core/sql-renderer.ts b/packages/3-targets/6-adapters/postgres/src/core/sql-renderer.ts index dd72e9db6c..da9bfba901 100644 --- a/packages/3-targets/6-adapters/postgres/src/core/sql-renderer.ts +++ b/packages/3-targets/6-adapters/postgres/src/core/sql-renderer.ts @@ -21,6 +21,7 @@ import { type OrderByItem, type ParamRef, type ProjectionItem, + type RawSqlExpr, type SelectAst, type Codec as SqlCodec, type SubqueryExpr, @@ -148,6 +149,9 @@ export function renderLoweredSql( case 'delete': sql = renderDelete(node, contract, pim); break; + case 'raw-sql': + sql = renderRawSql(node, contract, pim); + break; // v8 ignore next 4 default: throw new Error( @@ -767,6 +771,20 @@ function renderUpdate(ast: UpdateAst, contract: PostgresContract, pim: ParamInde return `UPDATE ${table} SET ${setClauses.join(', ')}${whereClause}${returningClause}`; } +function renderRawSql(ast: RawSqlExpr, contract: PostgresContract, pim: ParamIndexMap): string { + const out: string[] = []; + for (let i = 0; i < ast.fragments.length; i++) { + out.push(ast.fragments[i] ?? ''); + if (i < ast.args.length) { + const arg = ast.args[i]; + if (arg !== undefined) { + out.push(renderExpr(arg, contract, pim)); + } + } + } + return out.join(''); +} + function renderDelete(ast: DeleteAst, contract: PostgresContract, pim: ParamIndexMap): string { const table = quoteIdentifier(ast.table.name); const whereClause = ast.where ? ` WHERE ${renderWhere(ast.where, contract, pim)}` : ''; diff --git a/packages/3-targets/6-adapters/postgres/test/migrations/data-transform.plan-from-ast.test.ts b/packages/3-targets/6-adapters/postgres/test/migrations/data-transform.plan-from-ast.test.ts new file mode 100644 index 0000000000..6aa8bdd6d2 --- /dev/null +++ b/packages/3-targets/6-adapters/postgres/test/migrations/data-transform.plan-from-ast.test.ts @@ -0,0 +1,54 @@ +import type { Contract } from '@prisma-next/contract/types'; +import { coreHash, profileHash } from '@prisma-next/contract/types'; +import type { SqlControlAdapter } from '@prisma-next/family-sql/control-adapter'; +import type { SqlStorage } from '@prisma-next/sql-contract/types'; +import { ParamRef, RawSqlExpr } from '@prisma-next/sql-relational-core/ast'; +import { planFromAst } from '@prisma-next/sql-relational-core/plan'; +import { dataTransform } from '@prisma-next/target-postgres/data-transform'; +import { describe, expect, it, vi } from 'vitest'; + +function makeContract(): Contract { + return { + target: 'postgres', + targetFamily: 'sql', + profileHash: profileHash('sha256:profile'), + roots: {}, + capabilities: {}, + extensionPacks: {}, + meta: {}, + storage: { + storageHash: coreHash('sha256:plan-from-ast-e2e'), + tables: {}, + } as unknown as SqlStorage, + models: {}, + }; +} + +function makeAdapter(impl: (sql: string, params: readonly unknown[]) => void = () => {}) { + const lower = vi.fn((_ast: unknown, _ctx: unknown) => { + const result = { sql: 'SELECT 1', params: [] as readonly unknown[] }; + impl(result.sql, result.params); + return result; + }); + return { lower } as unknown as SqlControlAdapter<'postgres'>; +} + +describe('planFromAst integrated with dataTransform (AC-PLAN3)', () => { + it("AC-PLAN3: a plan returned by planFromAst satisfies dataTransform's assertContractMatches", () => { + const ast = RawSqlExpr.of( + ['SELECT eql_v2.add_search_config(', ', ', ')'], + [ + ParamRef.of('user', { codecId: 'pg/text@1' }), + ParamRef.of('email', { codecId: 'pg/text@1' }), + ], + ); + + const contract = makeContract(); + const plan = planFromAst(ast, contract); + const adapter = makeAdapter(); + + expect(() => + dataTransform(contract, 'add-search-config', { run: () => plan }, adapter), + ).not.toThrow(); + }); +}); diff --git a/packages/3-targets/6-adapters/postgres/test/sql-renderer.raw-sql.test.ts b/packages/3-targets/6-adapters/postgres/test/sql-renderer.raw-sql.test.ts new file mode 100644 index 0000000000..81c534379a --- /dev/null +++ b/packages/3-targets/6-adapters/postgres/test/sql-renderer.raw-sql.test.ts @@ -0,0 +1,118 @@ +import { validateContract } from '@prisma-next/sql-contract/validate'; +import { BinaryExpr, ColumnRef, ParamRef, RawSqlExpr } from '@prisma-next/sql-relational-core/ast'; +import { describe, expect, it } from 'vitest'; +import { createPostgresAdapter } from '../src/core/adapter'; +import type { PostgresContract } from '../src/core/types'; + +const contract = validateContract( + { + target: 'postgres', + targetFamily: 'sql', + profileHash: 'sha256:raw-sql-test', + roots: {}, + capabilities: {}, + extensionPacks: {}, + meta: {}, + storage: { + storageHash: 'sha256:raw-sql-test', + tables: { + user: { + columns: { + id: { codecId: 'pg/int4@1', nativeType: 'int4', nullable: false }, + email: { codecId: 'pg/text@1', nativeType: 'text', nullable: false }, + }, + uniques: [], + indexes: [], + foreignKeys: [], + }, + }, + }, + models: {}, + }, + { get: () => undefined }, +); + +describe('renderLoweredSql RawSqlExpr arm (AC-LOW1..5)', () => { + const adapter = createPostgresAdapter(); + + it('AC-LOW5: zero-arg raw lowers to its single fragment with empty params', () => { + const ast = RawSqlExpr.of(['SELECT 1'], []); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('SELECT 1'); + expect(lowered.params).toEqual([]); + }); + + it('AC-LOW1: one ParamRef substitutes $1 at the gap and lifts the value into params', () => { + const ast = RawSqlExpr.of( + ['SELECT eql_v2.eq(', ')'], + [ParamRef.of('alice@example.com', { codecId: 'pg/text@1' })], + ); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('SELECT eql_v2.eq($1)'); + expect(lowered.params).toEqual(['alice@example.com']); + }); + + it('AC-LOW2: multiple ParamRefs in different positions render $1, $2, ... in source order', () => { + const ast = RawSqlExpr.of( + ['SELECT eql_v2.add_search_config(', ', ', ', ', ', ', ')'], + [ + ParamRef.of('user', { codecId: 'pg/text@1' }), + ParamRef.of('email', { codecId: 'pg/text@1' }), + ParamRef.of('unique', { codecId: 'pg/text@1' }), + ParamRef.of('text', { codecId: 'pg/text@1' }), + ], + ); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('SELECT eql_v2.add_search_config($1, $2, $3, $4)'); + expect(lowered.params).toEqual(['user', 'email', 'unique', 'text']); + }); + + it('AC-LOW3: an inlined typed-builder expression lowers via renderExpr; sub-params append in canonical order', () => { + const inner = BinaryExpr.eq( + ColumnRef.of('user', 'email'), + ParamRef.of('alice@example.com', { codecId: 'pg/text@1' }), + ); + const ast = RawSqlExpr.of( + ['SELECT * FROM "user" WHERE ', ' AND id = ', ''], + [inner, ParamRef.of(7, { codecId: 'pg/int4@1' })], + ); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('SELECT * FROM "user" WHERE "user"."email" = $1 AND id = $2'); + expect(lowered.params).toEqual(['alice@example.com', 7]); + }); + + it('renders an empty leading fragment correctly (template-literal `${value} suffix` shape)', () => { + const ast = RawSqlExpr.of(['', ' AS literal_one'], [ParamRef.of(1, { codecId: 'pg/int4@1' })]); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('$1 AS literal_one'); + expect(lowered.params).toEqual([1]); + }); + + it('dedupes repeated ParamRef identity to a single $N (collectOrderedParamRefs semantics)', () => { + const shared = ParamRef.of('shared', { codecId: 'pg/text@1' }); + const ast = RawSqlExpr.of(['SELECT ', ' = ', ''], [shared, shared]); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('SELECT $1 = $1'); + expect(lowered.params).toEqual(['shared']); + }); + + // SQL-injection invariant: ParamRef values never get text-inlined into + // the rendered SQL. They must appear only in the params array, with + // positional placeholders ($1, $2, ...) at their original positions. + // Defense in depth on top of AC-LOW1/LOW2 — exercised here with the + // exact shape cipherstash's `addSearchConfig` migration factory uses. + it('AC-LOW6: ParamRef values are never text-inlined into the rendered SQL', () => { + const ast = RawSqlExpr.of( + ['SELECT eql_v2.add_search_config(', ', ', ')'], + [ + ParamRef.of('users', { codecId: 'pg/text@1' }), + ParamRef.of('email', { codecId: 'pg/text@1' }), + ], + ); + const lowered = adapter.lower(ast, { contract }); + expect(lowered.sql).toBe('SELECT eql_v2.add_search_config($1, $2)'); + expect(lowered.params).toEqual(['users', 'email']); + expect(lowered.sql).not.toContain('users'); + expect(lowered.sql).not.toContain('email'); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5b7ff34c39..1dd164576c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -2310,6 +2310,52 @@ importers: specifier: 'catalog:' version: 4.0.17(@types/node@24.10.4)(@vitest/ui@4.0.17)(jiti@2.6.1)(jsdom@28.1.0(@noble/hashes@2.0.1))(lightningcss@1.32.0)(tsx@4.20.6)(yaml@2.8.1) + packages/3-extensions/cipherstash: + dependencies: + '@prisma-next/family-sql': + specifier: workspace:* + version: link:../../2-sql/9-family + '@prisma-next/framework-components': + specifier: workspace:* + version: link:../../1-framework/1-core/framework-components + '@prisma-next/sql-relational-core': + specifier: workspace:* + version: link:../../2-sql/4-lanes/relational-core + '@prisma-next/sql-runtime': + specifier: workspace:* + version: link:../../2-sql/5-runtime + '@prisma-next/utils': + specifier: workspace:* + version: link:../../1-framework/0-foundation/utils + arktype: + specifier: 'catalog:' + version: 2.1.29 + devDependencies: + '@prisma-next/psl-parser': + specifier: workspace:* + version: link:../../1-framework/2-authoring/psl-parser + '@prisma-next/sql-contract-psl': + specifier: workspace:* + version: link:../../2-sql/2-authoring/contract-psl + '@prisma-next/test-utils': + specifier: workspace:* + version: link:../../../test/utils + '@prisma-next/tsconfig': + specifier: workspace:* + version: link:../../0-config/tsconfig + '@prisma-next/tsdown': + specifier: workspace:* + version: link:../../0-config/tsdown + tsdown: + specifier: 'catalog:' + version: 0.18.4(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2)(typescript@5.9.3) + typescript: + specifier: 'catalog:' + version: 5.9.3 + vitest: + specifier: 'catalog:' + version: 4.0.17(@types/node@24.10.4)(@vitest/ui@4.0.17)(jiti@2.6.1)(jsdom@28.1.0(@noble/hashes@2.0.1))(lightningcss@1.32.0)(tsx@4.20.6)(yaml@2.8.1) + packages/3-extensions/middleware-telemetry: dependencies: '@prisma-next/contract': @@ -3409,6 +3455,9 @@ importers: '@prisma-next/extension-arktype-json': specifier: workspace:* version: link:../../packages/3-extensions/arktype-json + '@prisma-next/extension-cipherstash': + specifier: workspace:* + version: link:../../packages/3-extensions/cipherstash '@prisma-next/extension-pgvector': specifier: workspace:* version: link:../../packages/3-extensions/pgvector @@ -9748,7 +9797,7 @@ snapshots: ast-kit@2.2.0: dependencies: - '@babel/parser': 7.28.6 + '@babel/parser': 7.29.2 pathe: 2.0.3 ast-v8-to-istanbul@0.3.10: @@ -11156,9 +11205,9 @@ snapshots: rolldown-plugin-dts@0.20.0(rolldown@1.0.0-beta.57(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2))(typescript@5.9.3): dependencies: - '@babel/generator': 7.28.5 - '@babel/parser': 7.28.6 - '@babel/types': 7.28.6 + '@babel/generator': 7.29.1 + '@babel/parser': 7.29.2 + '@babel/types': 7.29.0 ast-kit: 2.2.0 birpc: 4.0.0 dts-resolver: 2.1.3 diff --git a/projects/cipherstash-integration/project-1/HANDOVER.md b/projects/cipherstash-integration/project-1/HANDOVER.md new file mode 100644 index 0000000000..2d90a8967f --- /dev/null +++ b/projects/cipherstash-integration/project-1/HANDOVER.md @@ -0,0 +1,196 @@ +# Project 1 — Searchable-encryption MVP — Handover + +> Written 2026-05-05 by the previous driver. Read this first, then [`spec.md`](spec.md), [`plan.md`](plan.md), and [`reviews/code-review.md`](reviews/code-review.md) in that order. + +## TL;DR + +**Project 1 is ~60% shipped on `tml-2373-project-1-searchable-encryption-mvp`.** Three of five milestones (M1, M2.a, M2.b) are SATISFIED on the branch with **52 ACs PASS / 0 FAIL / 48 NOT VERIFIED**. The remaining work is **M2.c** (bulk-encrypt middleware + real EQL bundle + live Postgres + EQL integration tests), then **M3** (`eq` operator + manual `addSearchConfig` migration), then **M4** (`ilike` + `decryptAll` + `activatePending`), then **M5** (close-out). Two follow-up Linear tickets are filed: [TML-2376](https://linear.app/prisma-company/issue/TML-2376) (Mongo middleware param-mutator runtime wiring) and [TML-2388](https://linear.app/prisma-company/issue/TML-2388) (codec-SDK binding refactor). + +## Where to start (5-minute orientation) + +1. **This file** — sets the table. +2. **[`spec.md`](spec.md)** — the project's source of truth. § Status table now reflects current state. +3. **[`plan.md`](plan.md)** — milestone-by-milestone breakdown. Each milestone now carries an explicit `**Status:**` line; M2.c has a concrete task checklist (T2.c.1..T2.c.8) under `## M2 — Store-only round-trip → ### M2.c remaining work`. +4. **[`reviews/code-review.md`](reviews/code-review.md)** — full AC scoreboard with file:line evidence per PASS, plus § Orchestrator notes capturing accepted deferrals. +5. **[`reviews/system-design-review.md`](reviews/system-design-review.md)** + **[`reviews/walkthrough.md`](reviews/walkthrough.md)** — the previous reviewer's design overview and behavior-change narrative across rounds. Skim if you want the architectural framing in someone else's words. + +The five task specs under [`specs/`](specs/) are the AC-text sources of truth; refer to them when adjudicating whether a given AC is met. + +## What's done + +### M1 — Framework SPI ✅ SATISFIED + +Lands the two framework-side prerequisites the cipherstash extension consumes. + +- **`raw-sql-ast-node`** — `RawSqlExpr` AST node + Postgres lowerer arm + `planFromAst` envelope helper. AC-AST1..5, AC-LOW1..6, AC-PLAN1..3 all PASS. AC-E2E1/E2E2 are migration-factories-coupled and stay M3-scoped. Commits `1d8b70943..9425690fa` (six commits including AC-ABT1 signal plumbing). +- **`middleware-param-transform`** — mutable `beforeExecute` seam (`SqlParamRefMutator`) + per-execute `MiddlewareContext.signal` + Mongo type-seam parity. AC-MUT1..5, AC-EX1, AC-ABT1..4, AC-FAM1..2, AC-TYPE1..2 all PASS. Commits `314011400..33a6e5ad5`. + +**Defer in scope:** Mongo runtime wiring of the param mutator → [TML-2376](https://linear.app/prisma-company/issue/TML-2376) (filed). Project 1 is Postgres-only so this doesn't block; framework symmetry follow-up. + +### M2.a — Cipherstash package skeleton + envelope + codec ✅ SATISFIED + +Bootstraps `packages/3-extensions/cipherstash/` (mirrors `packages/3-extensions/pgvector/` structurally). + +- `EncryptedString` envelope class with module-scoped `WeakMap` for handle storage. +- `CipherstashSdk` interface (3 async methods: `decrypt` / `bulkEncrypt` / `bulkDecrypt`, optional `AbortSignal` per call). +- `cipherstash/string@1` codec (target type `eql_v2_encrypted`, traits `['equality']`). +- `RuntimeParameterizedCodecDescriptor<{equality, freeTextSearch}>` with arktype paramsSchema. +- `databaseDependencies.init` shape with **placeholder install SQL** (real EQL bundle vendored in M2.c). +- AC-PKG1..3, AC-ENV1/2/4, AC-CODEC1..5, AC-INSTALL1 all PASS. Commits `2b2efbe75..2d05b90d3` + `6bbbee20f..0d558b1b2` (F3+F4 cleanup). + +### M2.b — PSL constructor + TS factory + parity ✅ SATISFIED + +Authoring surface — both PSL and TS produce byte-identical `contract.json` for cipherstash columns. + +- `cipherstash.EncryptedString({ equality, freeTextSearch })` PSL constructor. +- `encryptedString({...})` TS factory. +- PSL↔TS parity fixture at `test/integration/test/authoring/parity/cipherstash-encrypted-string/`. +- `dbInit` DDL snapshot proving the column renders as `eql_v2_encrypted` (no live DB; pure in-process). +- Required a small framework addition: `kind: 'boolean'` arm on `AuthoringArgumentDescriptor` (commit `584bbcda6`). Three-file additive change; zero impact on existing extensions. +- AC-CTOR1..4, AC-LOWER1..4, AC-ALIAS1..2, AC-PARITY1..2 all PASS. Commits `584bbcda6..c48d4d7ad`. + +**Defer in scope:** Codec-SDK binding refactor (cipherstash needed two codecs — SDK-free for pack-meta, SDK-bound for runtime). The clean fix is to thread SDK per-call via `CodecCallContext` rather than capturing it at codec construction. M3+ framework scope. Filed as [TML-2388](https://linear.app/prisma-company/issue/TML-2388); the original accepted-deferral record lives in `reviews/code-review.md § Orchestrator notes — M2.b R1`. + +## What remains + +### M2.c — Bulk-encrypt middleware + live integration ⏳ NOT STARTED + +Concrete task list lives in [`plan.md → ### M2.c remaining work`](plan.md). Summary: + +- **T2.c.1** — vendor real EQL bundle (replace placeholder in `packages/3-extensions/cipherstash/src/core/eql-bundle.ts` with content from the adjacent worktree at `/Users/wmadden/Projects/prisma/prisma-next-ws/worktrees/cipherstash-integration/reference/cipherstash/stack/packages/stack/src/prisma/core/eql-bundle.ts`) +- **T2.c.2** — `bulkEncryptMiddleware` factory in `src/middleware/bulk-encrypt.ts` (clears AC-MW1..5) +- **T2.c.3** — routing-key derivation (default `(table, column)`; confirm with CipherStash team) +- **T2.c.4** — live-Postgres + EQL storage round-trip integration test (clears AC-E2E1 storage subset) +- **T2.c.5** — bulk-call counter test (10 inserts → 1 `bulkEncrypt`; clears AC-E2E2 write half) +- **T2.c.6** — `dbInit` against fresh + already-installed DB (clears AC-INSTALL2/3) +- **T2.c.7** — second integration test driven entirely from PSL (clears `psl-encrypted-string-constructor` AC-E2E1) +- **T2.c.8** — full validation gate sweep + +**Entry condition:** live Postgres + EQL reachable from the test runner. The repo's `pnpm test:integration` script spins up Postgres in containers; confirm the EQL bundle install works against that setup before expanding the harness. + +### M3 — `eq` operator + manual `addSearchConfig` migration ⏳ NOT STARTED + +Implements the headline cipherstash feature: `findMany({ where: { email: { equals: 'alice@example.com' } } })` against an encrypted column round-trips against live Postgres + EQL. + +- Operator-lowering for `eq` against `cipherstash/string@1` columns (clears AC-OP1, AC-OP3, AC-OP4 partial) +- `addSearchConfig({ ..., equality: true })` migration factory in `exports/migration.ts` — constructs `RawSqlExpr` from M1, wraps via `planFromAst(ast, contract)`, consumes via `dataTransform(...)` (clears AC-FACT*, AC-SQL*, AC-MIG*, AC-E2E1/E2E2 from `migration-factories.spec.md`) +- Hand-author the integration migration `migration.ts` +- See `plan.md § M3` for the full sketch + validation gate + +### M4 — `ilike` + `activatePending` + `decryptAll` ⏳ NOT STARTED + +Completes the Project 1 user-facing surface. After M4 the seven umbrella ACs (AC-UMB1..7) are all green. + +- `ilike` arm on the operator lowerer (AC-OP2) +- `decryptAll(rows, opts?)` walker — bulk-decrypt amortized (AC-DEC1..4) +- `addSearchConfig` extended to emit `freeTextSearch` → EQL `'match'` index entry +- `activatePendingSearches()` factory +- Update integration migration fixture +- See `plan.md § M4` for the full sketch + validation gate + +### M5 — Close-out ⏳ NOT STARTED + +Project lifecycle close-out per `projects/README.md`. T5.1..T5.6 documented in `plan.md § M5`. The expected ending state: `projects/cipherstash-integration/project-1/` directory deleted; long-lived docs migrated to `docs/`; final PR merges. + +## Operating context + +### Branch + worktree + +- **Branch:** `tml-2373-project-1-searchable-encryption-mvp` (pushed to origin; up-to-date) +- **Worktree:** `/Users/wmadden/Projects/prisma/prisma-next-ws/worktrees/tml-2373-project-1-searchable-encryption-mvp` +- **Main repo path:** `/Users/wmadden/Projects/prisma/prisma-next` (different worktree, possibly on a different branch) +- **Reference (untracked, adjacent worktree):** `/Users/wmadden/Projects/prisma/prisma-next-ws/worktrees/cipherstash-integration/reference/cipherstash/` — the first-attempt cipherstash repo. Used for: (i) vendoring `eql-bundle.ts` in M2.c, (ii) operator-template lookups in M3, (iii) SDK shape reference. Read-only; never copy wholesale. + +### Validation gates (run from the worktree root) + +```sh +pnpm typecheck # repo-wide; expect 125/125 green at HEAD +pnpm test:packages # 111+ tasks; cipherstash package contributes 47/47 +pnpm lint:deps # 0 violations expected +pnpm --filter @prisma-next/extension-cipherstash test +pnpm --filter @prisma-next/extension-cipherstash lint +pnpm --filter @prisma-next/integration-tests test -t 'cipherstash-encrypted-string' # parity fixture +pnpm test:integration # live-DB suite — needed for M2.c onwards +``` + +**Known transient:** `pnpm test:packages` first parallel run sometimes shows a flake in `@prisma-next/cli` + `@prisma-next/adapter-postgres` that's green when re-run individually. Pre-existing turbo-scheduling / DB-resource contention; not introduced by this branch. + +### Key files + +| Surface | File | +|---|---| +| Cipherstash package | `packages/3-extensions/cipherstash/` | +| Envelope class | `packages/3-extensions/cipherstash/src/core/envelope.ts` | +| Codec | `packages/3-extensions/cipherstash/src/core/codecs.ts` | +| Pack-meta + parameterized descriptor | `packages/3-extensions/cipherstash/src/core/{descriptor-meta,parameterized}.ts` | +| PSL constructor registration | `packages/3-extensions/cipherstash/src/core/authoring.ts` | +| Control descriptor + EQL install | `packages/3-extensions/cipherstash/src/exports/control.ts` | +| EQL bundle (PLACEHOLDER) | `packages/3-extensions/cipherstash/src/core/eql-bundle.ts` ← replace in M2.c T2.c.1 | +| TS contract factory | `packages/3-extensions/cipherstash/src/exports/column-types.ts` | +| Middleware (STUB) | `packages/3-extensions/cipherstash/src/exports/middleware.ts` ← populate in M2.c T2.c.2 | +| Parity fixture | `test/integration/test/authoring/parity/cipherstash-encrypted-string/` | +| dbInit DDL snapshot | `test/integration/test/authoring/cipherstash-dbinit-snapshot.test.ts` | +| Framework param-mutator | `packages/2-sql/4-lanes/relational-core/src/middleware/param-ref-mutator.ts` | +| Framework `RawSqlExpr` | `packages/2-sql/4-lanes/relational-core/src/ast/types.ts` (search for `RawSqlExpr`) | +| Per-execute signal plumbing | `packages/1-framework/1-core/framework-components/src/execution/runtime-middleware.ts` + `run-with-middleware.ts` | + +### Repo conventions to know + +- **`pnpm` only**, never `npm`. Never `npx`. +- **No backward-compat shims** unless explicitly requested. Update call sites instead. +- **Tests-first.** Every AC pushed should have a green test on disk before the implementation lands. +- **Explicit-staging commits.** Never `git add -A` or `git add .` (see [`.cursor/rules/git-staging.mdc`](../../../.cursor/rules/git-staging.mdc)). +- **No transient-project links in user-facing docs.** Use `DEVELOPING.md` for contributor notes (see `packages/3-extensions/cipherstash/DEVELOPING.md` precedent). +- **Use `ifDefined()` from `@prisma-next/utils/defined`** for optional-property forwarding (see [`.cursor/rules/use-if-defined.mdc`](../../../.cursor/rules/use-if-defined.mdc)). +- **No `any`, no `@ts-expect-error` outside negative type tests, no `@ts-nocheck`.** +- **Check [`AGENTS.md`](../../../AGENTS.md)** for the full ruleset; it's well-organized. + +## Follow-up tickets + +### TML-2376 (filed) — Mongo middleware param-mutator runtime wiring + +`MongoRuntime` doesn't yet construct/thread a `MongoParamRefMutator`. The Mongo type seam + `flattenMongoParamRefs` helper + unit tests landed in M1 (sufficient for AC-FAM1/FAM2 per the AC text), but end-to-end runtime wiring requires deferring `resolveValue` past `beforeExecute` in `packages/3-mongo-target/2-mongo-adapter/src/mongo-adapter.ts`. Architectural change to Mongo's lowering contract; outside Project 1 (Postgres-only). + +### TML-2388 (filed) — Codec-SDK binding refactor + +Cipherstash's runtime codec captures `CipherstashSdk` in its `decode` closure, which collides with pack-meta consumers that read codec metadata at contract-emit time before any SDK exists. M2.b shipped a two-codec workaround (`cipherstashStringCodecMetadata` + `createCipherstashStringCodec(sdk)`); the clean fix threads SDK per-call via `CodecCallContext` and touches every codec in the repo, so it's M3+ framework scope. Filed as [TML-2388 — Codec-SDK binding refactor](https://linear.app/prisma-company/issue/TML-2388) (Medium, parent TML-2373). Original accepted-deferral record: `reviews/code-review.md § Orchestrator notes — M2.b R1`. + +### Smaller observations not yet ticketed + +These were surfaced under § Anything surprising in M2.b R1 but are infrastructure / build-tool issues outside Project 1's spec — file separately or drop: + +1. **`tsdown build` rewrites `package.json`** with `main`/`module`/`types` fields. The previous implementer reverted twice during commits. Possibly a workspace-config regression; likely an `@prisma-next/tsdown` config issue. Worth a separate ticket if it recurs in M2.c work. +2. **Vitest `testNamePattern` × `UPDATE_AUTHORING_PARITY_EXPECTED=1`** doesn't generate `expected.contract.json` reliably. Workaround: run against the explicit test-file path. Documented in commit `8ea4a1b8b`. +3. **Pre-existing flake** in `@prisma-next/cli` + `@prisma-next/adapter-postgres` parallel test runs (mentioned under Validation gates above). + +## Open spec questions still unresolved + +These appear in `plan.md § Open items` as items 1-6. Most are still relevant for the remaining milestones: + +- Item 1 — PSL parity test location → **resolved** during M2.b: pgvector-mirrored shape (`test/integration/test/authoring/parity/cipherstash-encrypted-string/`). +- Item 2 — operator lowering source-of-truth → **pending; resolve in M3**. Confirm against `reference/cipherstash/.../operation-templates.ts`. +- Item 3 — migration factory naming (single vs split) → **pending; resolve in M3**. +- Item 4 — EQL `activate_pending_searches` exact function name → **pending; resolve in M4**. +- Item 5 — routing-key derivation → **resolved (2026-05-06)**. Routing key is `{ table, column }`; no per-column override in Project 1. CipherStash team will be consulted post-delivery — see `cipherstash-team-questions.md`. +- Item 6 — plaintext-zeroing default → **resolved (2026-05-06)**. Project 1 does not zero plaintext post-encrypt. M2.c implementer removes the existing `handle.plaintext = undefined` line in `setHandleCiphertext` (`packages/3-extensions/cipherstash/src/core/envelope.ts:44-48`) and flips `AC-MW5`. Question for the CipherStash team is in `cipherstash-team-questions.md`. + +## Subagent / orchestration context + +The previous driver used the [`drive-orchestrate-plan`](/Users/wmadden/.agents/skills/drive-orchestrate-plan/SKILL.md) skill to drive milestones to a SATISFIED state via an iterate-implement-review loop with two persistent subagent personas (one implementer per milestone, one reviewer across all milestones). If you want to continue with that workflow: + +- **Reviewer subagent ID** (resume across rounds): `4d37df98-53a6-4eab-8e80-653a03253145`. Persists the AC scoreboard and review artifacts. Resume on every new round; do not spawn fresh. +- **M2.a/M2.b implementer ID** (retired): `58bc641c-8950-41ce-af00-afe216eab421`. Will need a fresh implementer for M2.c per the milestone-fresh-implementer protocol; resume the reviewer. + +If you'd rather not use the skill — totally fine; the spec, plan, code-review, and AC scoreboard are self-contained. Drive the rounds yourself, or route through your own preferred workflow. + +## Pre-flight checklist for the next driver + +- [ ] Pull `tml-2373-project-1-searchable-encryption-mvp`; confirm HEAD is `cc99d503e` or later. +- [ ] `pnpm install` — confirm `pnpm-lock.yaml` matches. +- [ ] Run the validation gates above; expect green across the board. +- [ ] Skim `reviews/code-review.md` § Summary + § Acceptance criteria scoreboard. +- [ ] Read this file's [§ What remains](#what-remains) section + `plan.md § M2 → ### M2.c remaining work`. +- [ ] Confirm Postgres + EQL infra availability (or add it as a pre-T2.c.1 step). +- [ ] Read `cipherstash-team-questions.md` if you'll be talking to the CipherStash team. Two design defaults are queued for them to validate post-delivery (routing-key derivation; plaintext zeroing). Decisions are already baked into the spec/plan and don't gate M2.c — but if their answers diverge from our defaults, the doc explains exactly what changes. + +Good luck — the foundation is solid, the codec/envelope/parity-test surfaces are well-tested, and the remaining work is well-scoped. Reach out to the previous driver via Linear if you hit unexpected blockers. diff --git a/projects/cipherstash-integration/project-1/cipherstash-team-questions.md b/projects/cipherstash-integration/project-1/cipherstash-team-questions.md new file mode 100644 index 0000000000..c2a7a9183d --- /dev/null +++ b/projects/cipherstash-integration/project-1/cipherstash-team-questions.md @@ -0,0 +1,160 @@ +# Questions for the CipherStash team — Project 1 (searchable-encryption MVP) + +> **Purpose.** This doc captures two design defaults we're shipping in `@prisma-next/extension-cipherstash` that we'd like the CipherStash team to validate. Neither blocks delivery — we've made a call and the code is on track to land — but both touch user-visible behavior, so we'd rather know post-delivery whether you'd like us to change the default before any external user adopts the extension. +> +> **Framing for the conversation.** We're not asking you to gate Project 1 on these answers. We picked the defaults that match the reference integration in your `cipherstash/stack` repo (where applicable) and that are simplest to ship. If you'd push back on either, the implication for our extension is small and bounded — we describe exactly what changes for each "no" answer below. After Project 1 ships, the cipherstash extension is something your team can iterate on directly; these defaults are us trying not to back you into a corner. + +## Background — what we're building, briefly + +`@prisma-next/extension-cipherstash` adds a `EncryptedString` column type to Prisma Next, backed by ZeroKMS for encryption and EQL for searchable queries. The user-facing surface looks like this: + +```ts +// Schema (PSL or TS) +model User { + id String @id + email EncryptedString({ equality: true, freeTextSearch: true }) +} + +// Query +await db.insert(User, { id: '1', email: EncryptedString.from('alice@example.com') }); +const rows = await db.findMany(User, { where: { email: { equals: 'alice@example.com' } } }); +await decryptAll(rows); +console.log(rows[0].email.decrypt()); // 'alice@example.com' +``` + +Internally: + +- `EncryptedString.from(plaintext)` produces an envelope object whose internal handle holds the plaintext until middleware encrypts it. +- A bulk-encrypt middleware runs in `beforeExecute` and rewrites every cipherstash envelope's plaintext into ciphertext via `sdk.bulkEncrypt(...)` calls — one call per "routing key" group (defined below). +- The codec emits the ciphertext to the wire as `eql_v2_encrypted` JSONB; reads decode it back into envelopes. +- `await envelope.decrypt()` decrypts a single cell on demand. `await decryptAll(rows)` walks a result set and bulk-decrypts everything in one round-trip per routing key. + +The two questions below are both about the bulk-encrypt middleware path on the write side. + +## Question 1 — Routing-key derivation + +### What we're asking + +When the bulk-encrypt middleware sees, say, 50 envelopes in one query — some heading to `users.email`, some to `users.phone`, some to `accounts.recovery_email` — how should it group them into `sdk.bulkEncrypt(...)` calls? + +We're going with: **group by `(table, column)`. One bulk call per `(table, column)` group. No user-facing override.** That means a query inserting 30 emails + 20 phones makes two `bulkEncrypt` calls, not one. + +We want to know whether that grouping matches what ZeroKMS expects, and whether we're missing a use case where the routing key needs to carry more information than `(table, column)`. + +### Why this is even a decision + +Your bulk-encrypt API has two surfaces in the reference repo: + +- `**bulkEncrypt(plaintexts, { column, table })`** — homogeneous, one `(table, column)` per call. (`reference/cipherstash/stack/packages/protect/src/ffi/index.ts:386`) +- `**bulkEncryptModels(models, table)**` + the underlying `**encryptBulk(client, { plaintexts: heterogeneousArray })**` — heterogeneous, where each entry in the array can carry its own `{ table, column }`. (`reference/cipherstash/stack/packages/protect/src/ffi/model-helpers.ts:665`) + +The heterogeneous shape would let the middleware do *one* SDK call per query regardless of how many distinct columns are involved. The homogeneous shape requires us to chunk by `(table, column)` and make N calls. The heterogeneous version is fewer round-trips; the homogeneous version is simpler and more obvious about cost. + +We picked the homogeneous shape — partly because it's the "primary" surface in your public API documentation, partly because we want users to be able to read the middleware code and immediately understand "one round-trip per `(table, column)`", and partly because it locks the SDK boundary at a smaller, simpler interface. Our `CipherstashSdk.bulkEncrypt` signature is: + +```ts +bulkEncrypt(args: { + routingKey: { table: string; column: string }; + values: ReadonlyArray; + signal?: AbortSignal; +}): Promise>; +``` + +So at the seam between our middleware and your SDK, the contract is one homogeneous batch per call. + +### What this means for users + +In the `encryptedString({...})` factory (the user-facing column-type declaration), we **do not** expose any field that affects routing — no `keyId`, no `dataset`, no per-column override. The column's "routing key" is purely derived from where it lives in the contract (i.e. its `(table, column)` pair). If a user wants a column to encrypt under a different ZeroKMS dataset/key, today they don't have a knob for it; they'd configure it via your SDK setup outside our extension. + +### Specific things we'd love to hear from you + +1. **Is `(table, column)` the right primary routing dimension?** Our reading is that it is — `EncryptOptions = { column, table }` in your types — but we want to confirm there isn't a customer pattern where the dataset/key-id varies per *something else* (per-tenant, per-environment, per-row category) such that "always derive from `(table, column)`" would be wrong by default. +2. **Should the user be able to override routing on a per-column basis from the schema?** For example, do you have customers who'd want to write `email: encryptedString({ equality: true, datasetId: 'pii-keys' })` to make `users.email` encrypt under a different key than `users.legal_name`? Today our default is "no, this isn't a thing in Project 1" — if you say "yes, customers ask for this", we'd add an optional `datasetId?: string` field on `encryptedString({...})` and thread it through the routing key. That's a small, additive change. +3. **How do your customers handle multi-tenant deployments today?** Specifically: one Node process serving many tenants, with each tenant getting different ZeroKMS keys. Our implicit assumption is that this is solved by constructing one `db` runtime per tenant, each with its own SDK instance — i.e. tenancy lives one level above our extension. If customers expect tenancy to be expressible *inside* a single runtime via per-call routing, we'd need a different shape. +4. **Are we losing anything material by going homogeneous-per-`(table, column)` instead of heterogeneous-per-query?** A query inserting envelopes for 5 different columns will make 5 `bulkEncrypt` calls in our shape vs. 1 in the heterogeneous shape. We accept that as a clarity-vs-throughput trade-off. If 5 round-trips is a problem at the latencies you typically see, we'd want to know — that's the kind of feedback that'd push us toward the heterogeneous shape. + +### What changes for each answer + +- **"Yes, `(table, column)` is right; no per-column override needed."** Zero changes; we ship as designed. +- **"Yes, but customers want a per-column dataset/key-id override."** We add `datasetId?: string` (or whatever you prefer to call it) to `encryptedString({...})`, thread it through the envelope handle's routing-key tuple, and expose it on the `bulkEncrypt` args. Bounded refactor, additive, doesn't break existing callers. +- **"You should be using the heterogeneous shape — `(table, column)`-chunking is too many round-trips."** Larger refactor: the `CipherstashSdk` interface widens to take a heterogeneous payload, the middleware drops the per-`(table, column)` grouping, and we coordinate one call per query. Still bounded — a couple of files in our extension. +- **"Tenancy needs to be expressible per-call, not per-runtime."** Larger conversation. Likely a per-execute "context" hook on our runtime that the middleware reads to pick a routing key. We'd want to talk through what shape works for you. + +## Question 2 — Plaintext zeroing post-encrypt + +### What we're asking + +When the bulk-encrypt middleware finishes encrypting an envelope's plaintext, the envelope's internal handle now holds *both* the original plaintext (still in the `plaintext` slot, because that's how it got there from `EncryptedString.from(...)`) and the freshly-computed ciphertext. The question is whether we should overwrite the plaintext slot with `undefined` post-encryption, so the GC can reclaim the original plaintext string sooner. + +We're going with: **no, don't zero. Leave the plaintext on the handle. The user keeps both the plaintext and the ciphertext until they release their reference to the envelope.** + +We want to know whether that posture matches what your team would recommend, or whether you'd want us to zero by default to align with the rest of the cipherstash ecosystem's hygiene expectations. + +### Why this is a decision worth making explicitly + +Plaintext on the heap after encryption is a security-hygiene concern. If a Node process's heap is dumped (debugger, crash dump, leaked log line, attached observability tool), recently-encrypted plaintext shouldn't sit around waiting for GC any longer than necessary. The textbook posture is "drop the reference as soon as you're done with it". + +But: in JavaScript, "zeroing" a plaintext string means setting the slot holding the string reference to `undefined`. The original `string` value is immutable — there's no way to actually overwrite the bytes from JS. So the security win is real but bounded: we'd narrow the window between encryption and GC, but we wouldn't eliminate the plaintext from memory deterministically. + +Two things made us pick "don't zero" as the default: + +1. **Mostly-symbolic security gain.** The win is "one fewer reference" until GC runs. Strict-hygiene users who need real zeroization can't get it from a JS string anyway; they'd need a `Buffer.fill(0)` discipline or out-of-process KMS — neither of which is solved by us zeroing the handle slot. +2. **Useful side effect we get by not zeroing: synchronous read-back.** A user who writes `const env = EncryptedString.from('x'); await db.insert(...); console.log(await env.decrypt())` gets the original plaintext back synchronously, without an SDK round-trip, because the handle still holds the plaintext. With zeroing on, `env.decrypt()` post-write would either error (no plaintext, no SDK binding on a write-side envelope) or quietly hit the SDK. Either is a footgun. + +### What this means for users + +- A user who wants the plaintext back after a write gets it for free, no SDK call. +- A user with strict secrets-hygiene requirements drops envelope references promptly themselves; once nothing holds the envelope, the GC reclaims the plaintext along with everything else on the handle. +- We don't expose an `envelope.dispose()` API in Project 1. If real customers ask for explicit zeroization, that's a phase-2 add-on. + +### Specific things we'd love to hear from you + +1. **What's your team's recommended default?** Specifically: in the existing Drizzle integration and your Prisma plugin, do you actively drop plaintext references post-encrypt, or do you leave them for GC like we're proposing? +2. **Have customers asked for explicit zeroization?** A `dispose()` method, a `using envelope = ...` Symbol.dispose pattern, anything like that? We'd rather match existing customer expectations than invent one. +3. **Is there a documented threat-model statement we can point users at?** We'd like our extension's docs to be consistent with whatever you say to customers about plaintext residency. If you have a doc page or an RFC, we'll link to it; if you don't, we'll write something neutral and run it past you. +4. **Does your SDK do anything notable with plaintext memory during/after encryption?** E.g. does the SDK take ownership of plaintext buffers and zero its own copies? If so, our "we keep a copy on the handle" default looks worse next to your hygiene; if not, our default is consistent with the rest of the chain. + +### What changes for each answer + +- **"Your default is fine; we don't have customers who'd push back."** Zero changes; we ship as designed. +- **"Default should be to zero post-encrypt, the way Drizzle does it."** One-line code change: re-add `handle.plaintext = undefined` to our `setHandleCiphertext` helper. Flip the relevant acceptance criterion. We'd update docs to call out the side effect ("`decrypt()` after a write makes an SDK call") explicitly. +- **"You should expose a `dispose()` API too."** Small additive change — implement `envelope[Symbol.dispose]()` (or `dispose()`, depending on what you'd prefer) that nukes the entire handle. Doesn't change the default behavior, just gives strict-hygiene users a knob. +- **"Both: zero by default *and* expose dispose."** Combine the above two. + +## Adjacent topics we'd appreciate input on (lower-priority) + +These aren't blocking design decisions — we have working answers — but if any of them snag in conversation, we'd rather hear about it now than after we ship. + +### EQL bundle vendoring + +We're currently planning to vendor the EQL install SQL bundle from your reference repo (`reference/cipherstash/stack/packages/stack/src/prisma/core/eql-bundle.ts`) into our extension package. Two things we want to confirm: + +- **Is there a maintained source for the EQL bundle that we should pull from instead?** A versioned npm package, a release artifact, an upstream repo we can pin to a tag? Our preference is "fetch from a versioned source" over "vendor a snapshot", but only if the source exists and is stable. +- **What's your release cadence for EQL?** When you ship a new EQL version, what's the path for our users to upgrade? Today the only path we have planned is "we bump the vendored bundle in a new extension release"; if that's wrong, we'd want to align. + +### Live-EQL integration testing + +Project 1's M2.c milestone needs a Postgres database with EQL installed, reachable from our test runner. We have a working pattern for live-Postgres tests (`pnpm test:integration` spins up containers); we plan to extend it to install the EQL bundle on container boot via our `databaseDependencies.init` machinery. Two questions: + +- **Is there a recommended Postgres + EQL test image / docker-compose setup you use internally?** If yes, we'd rather mirror it than reinvent. If no, we'll publish what we end up with. +- **Are there gotchas in EQL install we should know about?** Required Postgres extensions, role/privilege requirements beyond superuser, ordering constraints with other extensions, anything that's bitten you. The reference `eql-bundle.ts` looks self-contained but we'd appreciate "watch out for X" notes if you have them. + +### Operator-lowering shape (M3, not M2.c) + +Our M3 milestone implements the `eq` and `ilike` operators against cipherstash columns, which means we lower `where: { email: { equals: 'x' } }` into something like `eql_v2.eq("email", eql_v2.encrypt($1, ...))`. We're planning to defer to your `reference/cipherstash/.../operation-templates.ts` file as the source of truth for the exact SQL function calls. Question for when we get to M3: + +- **Is `operation-templates.ts` still the canonical reference?** If you've moved to a different shape since the first-attempt repo, point us at it. +- **Does the EQL operator surface have any quirks we should plan around?** E.g. operand ordering, null handling, casting requirements. We've sketched out null short-circuiting (`email IS NULL` lowers to plain SQL, not EQL) but there may be others. + +This isn't blocking right now; raising it now in case the conversation naturally goes there. + +## Summary — what we need from you today + +If we only get a few minutes: + +1. **Routing key**: "Does deriving the routing key from `(table, column)`, with no per-column user override, match how you expect customers to use ZeroKMS in Postgres?" +2. **Plaintext zeroing**: "Should our default be to drop the plaintext reference post-encrypt, or to leave it on the envelope?" + +If we get longer, the EQL bundle and integration-testing topics are the most useful to walk through, since they affect M2.c delivery directly. Operator lowering can wait until M3. + +Thanks for the time. Project 1 is on track and we expect to deliver an end-to-end demoable searchable-encryption MVP shortly; this conversation is about making sure the defaults we ship don't paint your team into a corner when you take ownership of the extension. \ No newline at end of file diff --git a/projects/cipherstash-integration/project-1/plan.md b/projects/cipherstash-integration/project-1/plan.md index 2a05b0c1d0..edd5372b45 100644 --- a/projects/cipherstash-integration/project-1/plan.md +++ b/projects/cipherstash-integration/project-1/plan.md @@ -49,6 +49,8 @@ Two PRs are open against `main` that touch surfaces this project also touches. P ## M1 — Framework SPI +**Status: ✅ SATISFIED.** All 28 M1-owned ACs PASS; reviewed across two rounds; `AC-AST1..5`, `AC-LOW1..6`, `AC-PLAN1..3`, `AC-MUT1..5`, `AC-EX1`, `AC-ABT1..4`, `AC-FAM1..2`, `AC-TYPE1..2` all promoted with file:line evidence. AC-E2E1/AC-E2E2 from `raw-sql-ast-node` are migration-factories-coupled and stay M3-scoped. Mongo runtime wiring deferred to [TML-2376](https://linear.app/prisma-company/issue/TML-2376) — see § Open items 7. Commits: `1d8b70943..9425690fa` (raw-sql-ast-node + AC-ABT1) and `314011400..33a6e5ad5` (param-mutator + AC-ABT2..4 + family + types). + **Goal.** Land the two framework-side prerequisites (`RawSqlExpr` AST node + lowerer arm; `beforeExecute` mutator + `MiddlewareContext.signal`) on `main`. No cipherstash surface yet. **Visible value.** Other extensions immediately benefit from the seams. After M1, any extension author can write a bulk-pattern middleware following the [middleware-param-transform task spec](specs/middleware-param-transform.spec.md)'s grounding example, and any caller can construct a `RawSqlExpr`-bearing `SqlQueryPlan` for `dataTransform` consumption. @@ -86,6 +88,12 @@ Two PRs are open against `main` that touch surfaces this project also touches. P ## M2 — Store-only round-trip +**Status: 🟡 PARTIALLY SHIPPED.** Split into three sub-rounds during execution; M2.a + M2.b SATISFIED; M2.c remaining. Full breakdown: + +- **M2.a — package skeleton + envelope + codec — ✅ SATISFIED** (12 ACs PASS: `AC-PKG1..3`, `AC-ENV1/2/4`, `AC-CODEC1..5`, `AC-INSTALL1`). Bootstraps `packages/3-extensions/cipherstash/`; `EncryptedString` envelope with module-scoped `WeakMap` handle storage; `cipherstash/string@1` codec; `RuntimeParameterizedCodecDescriptor` with arktype `{equality, freeTextSearch}` schema; `databaseDependencies.init` shape with placeholder install SQL; `CipherstashSdk` interface (`decrypt`/`bulkEncrypt`/`bulkDecrypt`). Commits: `2b2efbe75..2d05b90d3` + `6bbbee20f..0d558b1b2` (F3+F4 cleanup). +- **M2.b — PSL constructor + TS factory + parity — ✅ SATISFIED** (12 ACs PASS: `AC-CTOR1..4`, `AC-LOWER1..4`, `AC-ALIAS1..2`, `AC-PARITY1..2`). PSL constructor `cipherstash.EncryptedString({ equality, freeTextSearch })`; TS factory `encryptedString({...})`; PSL↔TS parity fixture at `test/integration/test/authoring/parity/cipherstash-encrypted-string/`; `dbInit` DDL snapshot (no live DB). Required a framework-level addition: `kind: 'boolean'` arm on `AuthoringArgumentDescriptor` (additive, three-file change, zero impact on existing extensions). Commits: `584bbcda6..c48d4d7ad`. Codec-SDK binding refactor deferred to a follow-up Linear ticket — see § Open items 8. +- **M2.c — bulk-encrypt middleware + live integration — ⏳ NOT STARTED.** Remaining M2 work; entry conditions and task list documented below. + **Goal.** `EncryptedString` works as a column type for *storage* — encrypt on write, decode-into-envelope on read, retrieve plaintext via `await envelope.decrypt()`. No search operators yet (queries are key-lookup or full-table scan only). No migration factories yet (the test suite uses hand-written DDL fixtures). **Visible value.** End-to-end-demoable encrypted column. A test inserts plaintext, the SDK is hit once via bulk-encrypt middleware, the row lands in Postgres as encrypted JSONB, a `findUnique` decodes it back to an envelope, `await envelope.decrypt()` returns the plaintext. @@ -123,10 +131,27 @@ Two PRs are open against `main` that touch surfaces this project also touches. P **Commit.** One or two PRs depending on review size. The PSL constructor (and its parity test) and the runtime/codec/middleware/install can naturally split. +### M2.c remaining work — concrete task list + +> Picked up by the developer continuing this project. Each task has the AC(s) it clears. + +- [ ] **T2.c.1 — Vendor real EQL bundle.** Replace the placeholder string in `packages/3-extensions/cipherstash/src/core/eql-bundle.ts` (~17 lines today, marked `TODO M2.c`) with the real `EQL_INSTALL_SQL` constant copied from `/Users/wmadden/Projects/prisma/prisma-next-ws/worktrees/cipherstash-integration/reference/cipherstash/stack/packages/stack/src/prisma/core/eql-bundle.ts` (untracked file in the adjacent worktree, ~170 KB inlined SQL). Confirms `AC-INSTALL1` against the real bundle. +- [ ] **T2.c.2 — `bulkEncryptMiddleware` factory.** Implement at `packages/3-extensions/cipherstash/src/middleware/bulk-encrypt.ts`. The stub at `src/exports/middleware.ts` (currently `export {}`) becomes the public re-export. Uses M1's `SqlParamRefMutator.entries()` + `replaceValues()` to rewrite cipherstash envelope plaintexts to ciphertexts in one bulk call per routing key. Per the spec's `bulkEncryptMiddleware(sdk: CipherstashSdk)` shape and § Bulk-encrypt middleware code in `specs/envelope-codec-extension.spec.md`. **Sub-task:** remove the `handle.plaintext = undefined` line from `setHandleCiphertext` in `packages/3-extensions/cipherstash/src/core/envelope.ts:44-48` per § Open items 6 (Project 1 does not zero plaintext post-encrypt) and update the docstrings on `from(plaintext)` + the handle interface to match. Clears `AC-MW1..5` (note `AC-MW5` was flipped — see envelope-codec spec § Acceptance Criteria). +- [ ] **T2.c.3 — Routing-key derivation.** Implement `groupByRoutingKey(targets)` per § Open items 5 — default "always derived from `(table, column)`". Confirm with CipherStash team; if CS confirms a different default, escalate as a deferral / spec amendment. +- [ ] **T2.c.4 — Live-Postgres + EQL integration test (storage round-trip).** Hand-write a `migration.ts` test fixture under `test/integration/` that exercises the M2 storage round-trip: insert via `db.insert(User, { email: EncryptedString.from('alice@example.com') })`; verify the wire row is `eql_v2_encrypted` JSONB; `findUnique` returns an envelope; `await envelope.decrypt()` returns the plaintext. Uses a mock `CipherstashSdk` (counter-instrumented) so the bulk-call assertion in the next bullet is clean. Clears the live-DB portion of `AC-E2E1` (storage subset). +- [ ] **T2.c.5 — Bulk-call counter test.** Add an integration assertion: inserting 10 rows × 1 column issues exactly **one** `bulkEncrypt` call. Clears the storage half of `AC-E2E2` (the read-side `bulkDecrypt` half is M4-scoped via `decryptAll`). +- [ ] **T2.c.6 — `dbInit` against a fresh Postgres database.** Verify `eql_v2` schema is reachable; `cs_configuration_v2` table exists; re-running `dbInit` is idempotent (hits the precheck short-circuit). Clears `AC-INSTALL2` + `AC-INSTALL3`. +- [ ] **T2.c.7 — Project 1 (PSL-driven) end-to-end test.** A second integration test driven entirely from PSL (the `psl-encrypted-string-constructor` task spec's `AC-E2E1`) covering the same storage round-trip. Should reuse most of T2.c.4's harness with a different contract source. +- [ ] **T2.c.8 — Validate gates.** `pnpm typecheck`, `pnpm test:packages`, `pnpm test:integration` (or its scoped equivalent), `pnpm lint:deps` all green. The cipherstash package gains `@prisma-next/sql-relational-core` as a runtime dep (for `SqlParamRefMutator` + `ParamRefHandle` types) if not already present. + +**Entry conditions.** Live Postgres database with EQL extension installed (or installable by `dbInit`) reachable from the test runner. The CI `test:integration` scripts spin up Postgres in containers; confirm the EQL bundle install works against that setup before expanding the harness. + --- ## M3 — `eq` operator + manual `addSearchConfig` migration +**Status: ⏳ NOT STARTED.** Blocked on M2.c. No commits, no ACs promoted. + **Goal.** A `findMany({ where: { email: { equals: 'alice@example.com' } } })` against a cipherstash column works against live Postgres + EQL. The user authors a hand-written migration calling `cipherstash.addSearchConfig({ table, column, equality: true })`; the migration installs the EQL search-config row; the query works. **Visible value.** Searchable encryption is real. Equality search on encrypted columns — the headline cipherstash feature — works end-to-end on the framework. @@ -161,6 +186,8 @@ Two PRs are open against `main` that touch surfaces this project also touches. P ## M4 — `ilike` + `activatePendingSearches` + `decryptAll` +**Status: ⏳ NOT STARTED.** Blocked on M3. + **Goal.** Complete the Project 1 user-facing surface: `findMany({ where: { email: { contains: 'alice' } } })` works (free-text search via EQL `ilike`); `decryptAll(rows)` materializes plaintext for batches of envelopes; the migration factories cover both `equality` and `freeTextSearch` modes plus the `activatePendingSearches` final step. **Visible value.** All Project 1 acceptance criteria (UMB1–UMB7) green. The umbrella's "ship a coherent searchable-encryption slice" promise is met. @@ -200,6 +227,8 @@ Two PRs are open against `main` that touch surfaces this project also touches. P ## M5 — Close-out +**Status: ⏳ NOT STARTED.** Blocked on M4. + **Scope.** Project lifecycle close-out per `projects/README.md`. **Tasks.** @@ -221,13 +250,21 @@ Two PRs are open against `main` that touch surfaces this project also touches. P # Status +> Last updated 2026-05-05. Detailed AC scoreboard lives in [`reviews/code-review.md`](reviews/code-review.md). Branch: `tml-2373-project-1-searchable-encryption-mvp`. AC totals at last update: **52 PASS / 0 FAIL / 48 NOT VERIFIED**. + | Milestone | Scope | Status | |---|---|---| -| M1 — Framework SPI | `raw-sql-ast-node` + `middleware-param-transform` | not started | -| M2 — Store-only round-trip | `psl-encrypted-string-constructor` + `envelope-codec` storage path | blocked on M1 | -| M3 — `eq` operator + manual `addSearchConfig` | Operator lowering + migration factories (`equality` mode) | blocked on M2 | -| M4 — `ilike` + `activatePending` + `decryptAll` | Remaining surface from `envelope-codec` + `migration-factories` | blocked on M3 | -| M5 — Close-out | Lifecycle close-out per `projects/README.md` | blocked on M4 | +| **M1 — Framework SPI** | `raw-sql-ast-node` + `middleware-param-transform` | **SATISFIED** (28 ACs PASS; reviewed across two rounds) | +| **M2.a — Cipherstash package skeleton + envelope + codec** | Bootstrap `packages/3-extensions/cipherstash/`; `EncryptedString` envelope + handle; `cipherstash/string@1` codec; `RuntimeParameterizedCodecDescriptor`; `databaseDependencies.init` shape (placeholder install SQL) | **SATISFIED** (12 ACs PASS; reviewed; F3+F4 cleanup) | +| **M2.b — PSL constructor + TS factory + parity** | `cipherstash.EncryptedString({ equality, freeTextSearch })` PSL constructor; `encryptedString({...})` TS factory; PSL↔TS parity fixture; dbInit DDL snapshot (no live DB) | **SATISFIED** (12 ACs PASS; reviewed; 0 findings) | +| **M2.c — Bulk-encrypt middleware + live integration** | `bulkEncryptMiddleware` factory consuming M1's mutator + signal; vendor real `EQL_INSTALL_SQL` from `reference/cipherstash/...`; live-Postgres + EQL integration test for storage round-trip; bulk-call counter | NOT STARTED — requires live Postgres + EQL infra (`AC-INSTALL2`/`AC-INSTALL3`/`AC-E2E1..3`) | +| **M3 — `eq` operator + manual `addSearchConfig`** | Operator lowering for `eq` against cipherstash columns; `addSearchConfig({ equality })` migration factory; integration test driving a real migration file | NOT STARTED | +| **M4 — `ilike` + `activatePending` + `decryptAll`** | `ilike` arm on operator lowering; `decryptAll(rows, opts?)` walker; `freeTextSearch` migration mode; `activatePendingSearches()` factory; full `AC-UMB1..7` | NOT STARTED | +| **M5 — Close-out** | Lifecycle close-out per `projects/README.md` (migrate long-lived docs, strip `projects/` references, delete `projects/cipherstash-integration/project-1/`) | NOT STARTED | + +**M2 sub-round split rationale.** The plan describes M2 as a single milestone with the closing note "Commit. One or two PRs depending on review size." The orchestration cycle split it into three sub-rounds so each lands a coherent, reviewable slice without blocking on infrastructure that isn't yet configured: M2.a is unit-testable in isolation, M2.b adds the authoring surface and a parity test that runs without a live DB, M2.c adds the middleware and exercises the live-Postgres + EQL path. M2.a and M2.b are SATISFIED on the branch; M2.c is the remaining M2 work. + +**M2.c entry conditions.** The implementer can land the bulk-encrypt middleware, vendor the real EQL bundle, and write the integration tests without infrastructure — but `AC-INSTALL2`, `AC-INSTALL3`, and `AC-E2E1..3` only clear once a live Postgres database with EQL installed is reachable from the test runner. The reference EQL bundle lives in an adjacent worktree at `/Users/wmadden/Projects/prisma/prisma-next-ws/worktrees/cipherstash-integration/reference/cipherstash/stack/packages/stack/src/prisma/core/eql-bundle.ts` (untracked there); the M2.c implementer copies it into `packages/3-extensions/cipherstash/src/core/eql-bundle.ts` and replaces the placeholder constant. # Open items @@ -235,7 +272,9 @@ Two PRs are open against `main` that touch surfaces this project also touches. P 2. **Operator lowering source of truth.** [Open question 1 of the envelope-codec task spec](specs/envelope-codec-extension.spec.md#open-questions) — confirm against `reference/cipherstash/stack/packages/stack/src/prisma/core/operation-templates.ts` whether the lowering matches that file's templates byte-for-byte or has minor differences (e.g. `eql_v2.encrypt` wrapping vs an EQL operator-class override). Resolve in M3. 3. **Migration factory naming — single vs split.** [Open question 2 of the migration-factories task spec](specs/migration-factories.spec.md#open-questions) — confirm whether `addSearchConfig` returns an array (current default) or a grouped op. Resolve in M3 implementation. 4. **EQL `activate_pending_searches` exact function name.** [Open question 1 of the migration-factories task spec](specs/migration-factories.spec.md#open-questions) — defer to first-attempt repo's name; confirm against the bundled EQL version. Resolve in M4. -5. **Routing-key derivation.** [Open question 4 of the umbrella spec / Project 1 spec](spec.md#open-questions) — does `encryptedString({ ... })` need an explicit per-column key id slot? Default is "always derived from `(table, column)`." Resolve at the start of M2; confirm with CipherStash team. -6. **Plaintext-zeroing default.** [Open question 5 of the envelope-codec task spec](specs/envelope-codec-extension.spec.md#open-questions) — does the bulk-encrypt middleware overwrite the handle's plaintext slot with `undefined` post-encrypt? Default yes (memory hygiene). Resolve in M2. +5. **Routing-key derivation — RESOLVED (2026-05-06).** Routing key is `{ table, column }`, derived from the envelope handle's `(table, column)` slots. Middleware groups envelopes by `(table, column)` and issues one `bulkEncrypt` call per group. No per-column key-id override is exposed on `encryptedString({...})` in Project 1. The shape matches the reference SDK's `bulkEncrypt(plaintexts, { column, table })` call (`reference/cipherstash/.../ffi/index.ts:386-391`) and is already locked into the `CipherstashSdk` interface shipped in M2.a (`packages/3-extensions/cipherstash/src/core/sdk.ts:55-59`). The CipherStash team will be consulted post-delivery; if they want a per-column override or a different routing-key shape, they can extend the extension surface in a follow-up. See `cipherstash-team-questions.md` § Routing-key derivation for the question being raised with the team. +6. **Plaintext-zeroing default — RESOLVED (2026-05-06).** Project 1 does **not** zero the envelope handle's plaintext slot post-encrypt. Rationale: zeroing in JS is best-effort (strings are immutable; the win is bounded), and as a side effect a write-side envelope's `decrypt()` returns the original plaintext synchronously without a round-trip. The M2.a-shipped `setHandleCiphertext` helper currently sets `handle.plaintext = undefined` (`packages/3-extensions/cipherstash/src/core/envelope.ts:44-48`) — the M2.c implementer removes that line as part of T2.c.2 and flips `AC-MW5` accordingly. No explicit `dispose()` API in Project 1; secrets-hygiene strictness stays a phase-2 concern. See `cipherstash-team-questions.md` § Plaintext zeroing for the question being raised with the team. +7. **Mongo middleware param-mutator runtime wiring — deferred out of Project 1.** [TML-2376](https://linear.app/prisma-company/issue/TML-2376) tracks the follow-up. `middleware-param-transform` shipped the Mongo type seam + `flattenMongoParamRefs` helper + unit tests (satisfying `AC-FAM1`/`AC-FAM2` at the AC-text level) in M1, but `MongoRuntime` does not yet construct and thread a `MongoParamRefMutator` through `beforeExecute`. End-to-end wiring requires deferring `resolveValue` past the middleware chain in `packages/3-mongo-target/2-mongo-adapter/src/mongo-adapter.ts`, which is an architectural change to Mongo's lowering contract and outside M1's scope. Project 1 (Postgres-only) does not depend on the Mongo runtime wiring; this is a framework-symmetry follow-up. +8. **Codec-SDK binding refactor — deferred out of Project 1.** Filed as [TML-2388](https://linear.app/prisma-company/issue/TML-2388) (Medium-priority framework-symmetry follow-up under TML-2373); see `reviews/code-review.md § Orchestrator notes — M2.b R1` for the original accepted-deferral record. M2.b needed an SDK-free pack-meta codec (`cipherstashStringCodecMetadata`) because cipherstash's runtime codec captures `CipherstashSdk` in its `decode` closure, which collides with pack-meta consumers that read codec metadata at contract-emit time before any SDK binding exists. The M2.b-shipped workaround is two codecs representing one logical codec — fine for Project 1's bounded scope but a framework-ergonomics gap every future network-backed codec extension will hit. The clean fix is to thread SDK (or per-call context) through `CodecCallContext` rather than capturing it at codec construction; that refactor touches every codec in the repo and is M3+ framework scope. Each open item is targeted to the milestone where the answer is needed; none block the start of M1. diff --git a/projects/cipherstash-integration/project-1/spec.md b/projects/cipherstash-integration/project-1/spec.md index 2b0d283f19..568db56a2c 100644 --- a/projects/cipherstash-integration/project-1/spec.md +++ b/projects/cipherstash-integration/project-1/spec.md @@ -20,13 +20,15 @@ This project is the *production* integration — superseding the first attempt # Status +> Milestone-level breakdown + AC scoreboard live in [`plan.md`](plan.md) and [`reviews/code-review.md`](reviews/code-review.md). Last updated 2026-05-05. + | Task spec | Status | |---|---| -| [envelope-codec-extension](specs/envelope-codec-extension.spec.md) — runtime pattern + codec + EQL bundle install + operator lowering | Drafted | -| [middleware-param-transform](specs/middleware-param-transform.spec.md) — mutable `beforeExecute` seam | Drafted | -| [psl-encrypted-string-constructor](specs/psl-encrypted-string-constructor.spec.md) — PSL `cipherstash.EncryptedString(...)` constructor + parity test | Drafted | -| [raw-sql-ast-node](specs/raw-sql-ast-node.spec.md) — `RawSqlExpr` AST node + Postgres lowerer arm + `planFromAst` envelope helper | Drafted | -| [migration-factories](specs/migration-factories.spec.md) — `addSearchConfig` / `activatePendingSearches` as `DataTransformOperation`s carrying `invariantId`s | Drafted | +| [raw-sql-ast-node](specs/raw-sql-ast-node.spec.md) — `RawSqlExpr` AST node + Postgres lowerer arm + `planFromAst` envelope helper | **Shipped** in M1 (15 ACs PASS; AC-E2E1/E2E2 are migration-factories-coupled and remain M3-scoped) | +| [middleware-param-transform](specs/middleware-param-transform.spec.md) — mutable `beforeExecute` seam | **Shipped** in M1 (14 ACs PASS; Mongo runtime wiring deferred to [TML-2376](https://linear.app/prisma-company/issue/TML-2376)) | +| [psl-encrypted-string-constructor](specs/psl-encrypted-string-constructor.spec.md) — PSL `cipherstash.EncryptedString(...)` constructor + parity test | **Shipped** in M2.b (12 of 13 ACs PASS; AC-E2E1 deferred to M2.c — needs live Postgres + EQL) | +| [envelope-codec-extension](specs/envelope-codec-extension.spec.md) — runtime pattern + codec + EQL bundle install + operator lowering | **Partially shipped** — codec + envelope class + `RuntimeParameterizedCodecDescriptor` + stub `databaseDependencies.init` shipped in M2.a (12 ACs PASS); bulk-encrypt middleware (`AC-MW1..5`) + real EQL bundle vendor + live integration tests (`AC-INSTALL2/3`, `AC-E2E1..3`) deferred to M2.c; `decryptAll` (`AC-DEC1..4`) and operator lowering (`AC-OP1..4`) deferred to M4/M3 respectively | +| [migration-factories](specs/migration-factories.spec.md) — `addSearchConfig` / `activatePendingSearches` as `DataTransformOperation`s carrying `invariantId`s | Drafted (all ACs pending; scope is M3 + M4) | # Requirements @@ -129,7 +131,7 @@ The umbrella's acceptance criteria are the union of the four task specs' criteri ## Security - **Threat model.** Database operator and any party with raw database access cannot read encrypted columns. Network attackers cannot read encrypted columns in transit (already covered by TLS to Postgres). Application-layer compromise is not in scope — by definition the application must decrypt to operate. -- **Plaintext exposure window.** Plaintext lives on the envelope's internal handle from `from(plaintext)` until the envelope is GC'd. Bulk-encrypt middleware overwrites the handle's plaintext slot with `undefined` after writing the ciphertext (memory-hygiene default — see open question in envelope spec). +- **Plaintext exposure window.** Plaintext lives on the envelope's internal handle from `from(plaintext)` until the envelope is GC'd. Project 1 does not zero the plaintext slot post-encrypt (see `envelope-codec-extension.spec.md` § Open Question 5 for the decision record); a write-side envelope's `decrypt()` therefore returns the original plaintext synchronously without an SDK round-trip. Strict-hygiene users dispose envelopes promptly. An explicit `dispose()` API is a phase-2 add-on. - **Routing keys / dataset identifiers.** ZeroKMS routes bulk calls by `(dataset, keyId)`. The handle captures these from `SqlCodecCallContext.column` plus extension config. Misconfigured routing produces auth failures from ZeroKMS — not silent data corruption. - **EQL extension privileges.** EQL install requires database superuser (creates schemas, types, functions, operators). The `databaseDependencies.init` install runs under whatever role the user supplies; failure surfaces a clear DDL error. Documented prerequisite. - **No new ADR.** Threat model and trust boundaries are an extension-package concern, documented in the package README. A future "encrypted columns ADR" can capture the pattern across extensions if Vault / AWS-KMS extensions land. diff --git a/projects/cipherstash-integration/project-1/specs/envelope-codec-extension.spec.md b/projects/cipherstash-integration/project-1/specs/envelope-codec-extension.spec.md index 56797ef2bf..2654800176 100644 --- a/projects/cipherstash-integration/project-1/specs/envelope-codec-extension.spec.md +++ b/projects/cipherstash-integration/project-1/specs/envelope-codec-extension.spec.md @@ -31,7 +31,7 @@ export class EncryptedString { The class owns its handle internally — closure / private field / WeakMap; the choice is an implementation detail. The handle carries: - **Write side** (after `from(plaintext)`): plaintext + an empty `ciphertext` slot. -- **After bulk-encrypt middleware runs**: ciphertext + the column identity from `SqlCodecCallContext.column` + SDK routing keys (dataset, key id) needed for `bulkEncrypt`. The plaintext slot is overwritten with `undefined` for memory hygiene. +- **After bulk-encrypt middleware runs**: ciphertext + the column identity from `SqlCodecCallContext.column` + SDK routing keys (dataset, key id) needed for `bulkEncrypt`. The plaintext slot is retained (see § Open Question 5 — Project 1 does not zero plaintext post-encrypt). As a side effect, a write-side envelope's `decrypt()` returns the original plaintext synchronously without an SDK round-trip. - **Read side** (after `codec.decode`): ciphertext + `{ table, column }` from `SqlCodecCallContext.column` + SDK routing keys needed for `bulkDecrypt`. The handle has **no exported TypeScript surface**. Inside the package, codec / middleware / `decryptAll` reach into the handle via package-internal helpers (a private symbol on the envelope, a `WeakMap`, or `#`-prefixed fields — implementation choice). @@ -246,7 +246,7 @@ packages/3-extensions/cipherstash/ - **Selective-by-column `decryptAll`.** First-pass walks every envelope it finds. Selective convenience is a follow-on if there's demand. - **KMS provider abstraction.** This package is CipherStash-specific. - **Re-implementing the CipherStash SDK.** Wraps the existing SDK; bulk surface mismatches escalate to the CipherStash team. -- **Automatic plaintext zeroing on the user's `from(plaintext)` argument.** The middleware overwrites the handle's plaintext slot post-encrypt; the user's original `string` argument lifecycle is the user's concern. +- **Plaintext zeroing of any kind.** Per § Open Question 5, Project 1 does not zero the envelope handle's plaintext slot post-encrypt, and does not expose an explicit `dispose()` API. Users with hardened secrets-hygiene requirements drop envelope references promptly to let GC reclaim plaintexts. Revisit if the CipherStash team or an early consumer asks for stricter behavior. - **Re-encryption migration.** Adopting CipherStash for an existing column requires a one-off data migration; not a primitive in this spec. # Acceptance Criteria @@ -278,7 +278,7 @@ packages/3-extensions/cipherstash/ - [ ] **AC-MW2**: For multiple routing keys, exactly one `bulkEncrypt` per group. - [ ] **AC-MW3**: The middleware forwards `ctx.signal` to the SDK; an aborted signal at `beforeExecute` entry surfaces `RUNTIME.ABORTED { phase: 'beforeExecute' }`. - [ ] **AC-MW4**: After the middleware runs, `codec.encode` receives ciphertext via the envelope's handle. -- [ ] **AC-MW5**: After the middleware runs, the handle's plaintext slot is `undefined` (memory hygiene). +- [ ] **AC-MW5**: After the middleware runs, the handle's plaintext slot retains its original value (Project 1 does not zero plaintext post-encrypt — see § Open Question 5). Verifiable via the public surface: `await envelope.decrypt()` on a write-side envelope returns the original plaintext synchronously without invoking the mock SDK's single-cell `decrypt` (counter remains 0). ## `decryptAll` @@ -345,10 +345,10 @@ See umbrella spec. # Open Questions 1. **Canonical EQL operator lowering shape.** The first-attempt's `operation-templates.ts` is the source-of-truth for the exact SQL function calls (`eql_v2.eq` vs `eql_v2.encrypted_eq` etc.). This spec defers to that file but flags the lift as a concrete task. -2. **Routing-key surface in user config.** ZeroKMS bulk calls group by `(dataset, keyId)`. Currently the handle is expected to derive routing keys from `(table, column)` plus extension-level config; whether per-column key-id override surfaces in the `encryptedString({...})` factory is open. Default: no per-column override; derived from `(table, column)`. +2. **Routing-key surface in user config — RESOLVED (2026-05-06).** Routing key is `{ table, column }`, derived from the envelope handle's `(table, column)` slots. No per-column key-id override on `encryptedString({...})` in Project 1. Matches the reference SDK's `bulkEncrypt(plaintexts, { column, table })` shape and is already locked into the `CipherstashSdk` interface (`packages/3-extensions/cipherstash/src/core/sdk.ts:55-59`). Question being raised with the CipherStash team — see `cipherstash-team-questions.md` § Routing-key derivation; if they want a different shape, they extend the surface in a follow-up. 3. **Phase tag for `decryptAll` aborts.** `decryptAll` runs *outside* a `runtime.execute()` call, so phase tags `'encode'` / `'decode'` / `'stream'` don't fit cleanly. Default: `'decode'` (user's mental model is "decode-side"). Consider inventing `'decrypt-all'` if we want stricter attribution. 4. **`JSON.stringify(envelope)` behavior.** Should it produce a placeholder (`{ "$encryptedString": "" }`), throw, or return `undefined` (which `JSON.stringify` treats as field omission)? Default: placeholder. Confirm. -5. **Plaintext memory hygiene strictness.** The middleware overwrites the handle's plaintext slot post-encrypt. Should the envelope class additionally implement explicit zeroization (e.g. `envelope.dispose()`) for users with hardened secrets-hygiene requirements? Default: no — lifecycle is GC-driven, dispose is a phase-2 add-on. +5. **Plaintext memory hygiene strictness — RESOLVED (2026-05-06).** Project 1 does not zero the envelope handle's plaintext slot post-encrypt, and does not expose an explicit `dispose()` API. Rationale: zeroing in JS is best-effort (strings are immutable); the GC-driven lifecycle is sufficient for Project 1's bounded scope. As a side effect, a write-side envelope's `decrypt()` returns the original plaintext synchronously without consulting the SDK. Question being raised with the CipherStash team — see `cipherstash-team-questions.md` § Plaintext zeroing; if they want stricter behavior, the M2.a-shipped `setHandleCiphertext` helper is the right hook to flip in a follow-up. 6. **`expandNativeType` for `eql_v2_encrypted`.** Pgvector's `expandNativeType` produces `vector(1536)` from `nativeType: 'vector'` + `typeParams: { length: 1536 }`. Cipherstash's `eql_v2_encrypted` is a fixed JSONB-domain type; the search-mode params don't affect the column's DDL type expression — they affect runtime behavior + the migration-factories DDL. Is `expandNativeType` a no-op for cipherstash? Default: yes, return `nativeType` unchanged. # Alternatives Considered diff --git a/test/integration/package.json b/test/integration/package.json index 2cb9e8a692..b85887e900 100644 --- a/test/integration/package.json +++ b/test/integration/package.json @@ -25,6 +25,7 @@ "@prisma-next/driver-postgres": "workspace:*", "@prisma-next/emitter": "workspace:*", "@prisma-next/extension-arktype-json": "workspace:*", + "@prisma-next/extension-cipherstash": "workspace:*", "@prisma-next/extension-pgvector": "workspace:*", "@prisma-next/family-sql": "workspace:*", "@prisma-next/ids": "workspace:*", diff --git a/test/integration/test/authoring/cipherstash-dbinit-snapshot.test.ts b/test/integration/test/authoring/cipherstash-dbinit-snapshot.test.ts new file mode 100644 index 0000000000..6de0d37a8d --- /dev/null +++ b/test/integration/test/authoring/cipherstash-dbinit-snapshot.test.ts @@ -0,0 +1,99 @@ +/** + * AC-LOWER4 — `dbInit` plan against a contract carrying a + * `cipherstash.EncryptedString`-typed column renders the column's + * native type as `eql_v2_encrypted`. Pure SQL-shape snapshot; no live + * Postgres required (the live-DB equivalent is exercised by M2.c's + * EQL integration tests). + * + * The snapshot drives the postgres-adapter DDL builder + * (`buildCreateTableSql`) directly with a synthesised + * `StorageTable` whose columns mirror what cipherstash's authoring + * layer lowers to. This gives us a deterministic byte-equal + * assertion on the SQL the migration planner would produce, without + * needing to spin up a database. + */ + +import cipherstashControl from '@prisma-next/extension-cipherstash/control'; +import type { CodecControlHooks } from '@prisma-next/family-sql/control'; +import type { StorageTable } from '@prisma-next/sql-contract/types'; +import { buildCreateTableSql } from '@prisma-next/target-postgres/planner-ddl-builders'; +import { describe, expect, it } from 'vitest'; + +// Real cipherstash control-plane hooks pulled from the extension +// descriptor so the snapshot also pins the cipherstash extension's +// "search-mode typeParams do not affect DDL" decision in +// `exports/control.ts` — i.e. the column's SQL signature is always +// `eql_v2_encrypted`, regardless of `equality` / `freeTextSearch`. +const controlPlaneHooks = (cipherstashControl.types?.codecTypes?.controlPlaneHooks ?? {}) as Record< + string, + CodecControlHooks +>; +const codecHooks = new Map(Object.entries(controlPlaneHooks)); + +function cipherstashColumn(typeParams: Record, nullable: boolean) { + return { + codecId: 'cipherstash/string@1', + nativeType: 'eql_v2_encrypted', + nullable, + typeParams, + } as const; +} + +const encryptedDocStorage: StorageTable = { + columns: { + id: { + codecId: 'pg/int4@1', + nativeType: 'int4', + nullable: false, + default: { kind: 'function', expression: 'autoincrement()' }, + }, + storageOnly: cipherstashColumn({}, false), + equalityOnly: cipherstashColumn({ equality: true }, false), + searchable: cipherstashColumn({ equality: true, freeTextSearch: true }, false), + storageOnlyOpt: cipherstashColumn({}, true), + equalityOnlyOpt: cipherstashColumn({ equality: true }, true), + searchableOpt: cipherstashColumn({ equality: true, freeTextSearch: true }, true), + }, + primaryKey: { columns: ['id'] }, + uniques: [], + indexes: [], + foreignKeys: [], +}; + +describe('cipherstash dbInit DDL snapshot (AC-LOWER4)', () => { + it('renders cipherstash columns with native type eql_v2_encrypted', () => { + const ddl = buildCreateTableSql('"public"."encrypted_doc"', encryptedDocStorage, codecHooks); + + // Each cipherstash-typed column must use the unparameterised + // eql_v2_encrypted native type — typeParams are search-mode + // metadata, not DDL adornments. + expect(ddl).toContain('"storageOnly" eql_v2_encrypted NOT NULL'); + expect(ddl).toContain('"equalityOnly" eql_v2_encrypted NOT NULL'); + expect(ddl).toContain('"searchable" eql_v2_encrypted NOT NULL'); + expect(ddl).toContain('"storageOnlyOpt" eql_v2_encrypted'); + expect(ddl).toContain('"equalityOnlyOpt" eql_v2_encrypted'); + expect(ddl).toContain('"searchableOpt" eql_v2_encrypted'); + + // Make sure we don't accidentally render a parameterised form + // (e.g. eql_v2_encrypted(true)). + expect(ddl).not.toMatch(/eql_v2_encrypted\(/); + }); + + it('matches a stable byte-exact CREATE TABLE snapshot', () => { + const ddl = buildCreateTableSql('"public"."encrypted_doc"', encryptedDocStorage, codecHooks); + expect(ddl).toBe( + [ + 'CREATE TABLE "public"."encrypted_doc" (', + ' "id" SERIAL NOT NULL,', + ' "storageOnly" eql_v2_encrypted NOT NULL,', + ' "equalityOnly" eql_v2_encrypted NOT NULL,', + ' "searchable" eql_v2_encrypted NOT NULL,', + ' "storageOnlyOpt" eql_v2_encrypted,', + ' "equalityOnlyOpt" eql_v2_encrypted,', + ' "searchableOpt" eql_v2_encrypted,', + ' PRIMARY KEY ("id")', + ')', + ].join('\n'), + ); + }); +}); diff --git a/test/integration/test/authoring/parity/cipherstash-encrypted-string/contract.ts b/test/integration/test/authoring/parity/cipherstash-encrypted-string/contract.ts new file mode 100644 index 0000000000..019d2d3f7d --- /dev/null +++ b/test/integration/test/authoring/parity/cipherstash-encrypted-string/contract.ts @@ -0,0 +1,25 @@ +import { int4Column } from '@prisma-next/adapter-postgres/column-types'; +import { encryptedString } from '@prisma-next/extension-cipherstash/column-types'; +import sqlFamily from '@prisma-next/family-sql/pack'; +import { defineContract, field, model } from '@prisma-next/sql-contract-ts/contract-builder'; +import postgresPack from '@prisma-next/target-postgres/pack'; + +export const contract = defineContract({ + family: sqlFamily, + target: postgresPack, + models: { + EncryptedDoc: model('EncryptedDoc', { + fields: { + id: field.column(int4Column).defaultSql('autoincrement()').id(), + storageOnly: field.column(encryptedString({})), + equalityOnly: field.column(encryptedString({ equality: true })), + searchable: field.column(encryptedString({ equality: true, freeTextSearch: true })), + storageOnlyOpt: field.column(encryptedString({})).optional(), + equalityOnlyOpt: field.column(encryptedString({ equality: true })).optional(), + searchableOpt: field + .column(encryptedString({ equality: true, freeTextSearch: true })) + .optional(), + }, + }).sql({ table: 'encrypted_doc' }), + }, +}); diff --git a/test/integration/test/authoring/parity/cipherstash-encrypted-string/expected.contract.json b/test/integration/test/authoring/parity/cipherstash-encrypted-string/expected.contract.json new file mode 100644 index 0000000000..b279e21ff0 --- /dev/null +++ b/test/integration/test/authoring/parity/cipherstash-encrypted-string/expected.contract.json @@ -0,0 +1,216 @@ +{ + "schemaVersion": "1", + "targetFamily": "sql", + "target": "postgres", + "profileHash": "sha256:1a8dbe044289f30a1de958fe800cc5a8378b285d2e126a8c44b58864bac2c18e", + "roots": { + "encrypted_doc": "EncryptedDoc" + }, + "models": { + "EncryptedDoc": { + "fields": { + "equalityOnly": { + "nullable": false, + "type": { + "codecId": "cipherstash/string@1", + "kind": "scalar", + "typeParams": { + "equality": true + } + } + }, + "equalityOnlyOpt": { + "nullable": true, + "type": { + "codecId": "cipherstash/string@1", + "kind": "scalar", + "typeParams": { + "equality": true + } + } + }, + "id": { + "nullable": false, + "type": { + "codecId": "pg/int4@1", + "kind": "scalar" + } + }, + "searchable": { + "nullable": false, + "type": { + "codecId": "cipherstash/string@1", + "kind": "scalar", + "typeParams": { + "equality": true, + "freeTextSearch": true + } + } + }, + "searchableOpt": { + "nullable": true, + "type": { + "codecId": "cipherstash/string@1", + "kind": "scalar", + "typeParams": { + "equality": true, + "freeTextSearch": true + } + } + }, + "storageOnly": { + "nullable": false, + "type": { + "codecId": "cipherstash/string@1", + "kind": "scalar", + "typeParams": {} + } + }, + "storageOnlyOpt": { + "nullable": true, + "type": { + "codecId": "cipherstash/string@1", + "kind": "scalar", + "typeParams": {} + } + } + }, + "relations": {}, + "storage": { + "fields": { + "equalityOnly": { + "column": "equalityOnly" + }, + "equalityOnlyOpt": { + "column": "equalityOnlyOpt" + }, + "id": { + "column": "id" + }, + "searchable": { + "column": "searchable" + }, + "searchableOpt": { + "column": "searchableOpt" + }, + "storageOnly": { + "column": "storageOnly" + }, + "storageOnlyOpt": { + "column": "storageOnlyOpt" + } + }, + "table": "encrypted_doc" + } + } + }, + "storage": { + "storageHash": "sha256:889f3bdc8ed6ba3a28669324ae16fd3986a65400d098ff5155bc436abd81c2d9", + "tables": { + "encrypted_doc": { + "columns": { + "equalityOnly": { + "codecId": "cipherstash/string@1", + "nativeType": "eql_v2_encrypted", + "nullable": false, + "typeParams": { + "equality": true + } + }, + "equalityOnlyOpt": { + "codecId": "cipherstash/string@1", + "nativeType": "eql_v2_encrypted", + "nullable": true, + "typeParams": { + "equality": true + } + }, + "id": { + "codecId": "pg/int4@1", + "default": { + "expression": "autoincrement()", + "kind": "function" + }, + "nativeType": "int4", + "nullable": false + }, + "searchable": { + "codecId": "cipherstash/string@1", + "nativeType": "eql_v2_encrypted", + "nullable": false, + "typeParams": { + "equality": true, + "freeTextSearch": true + } + }, + "searchableOpt": { + "codecId": "cipherstash/string@1", + "nativeType": "eql_v2_encrypted", + "nullable": true, + "typeParams": { + "equality": true, + "freeTextSearch": true + } + }, + "storageOnly": { + "codecId": "cipherstash/string@1", + "nativeType": "eql_v2_encrypted", + "nullable": false, + "typeParams": {} + }, + "storageOnlyOpt": { + "codecId": "cipherstash/string@1", + "nativeType": "eql_v2_encrypted", + "nullable": true, + "typeParams": {} + } + }, + "foreignKeys": [], + "indexes": [], + "primaryKey": { + "columns": ["id"] + }, + "uniques": [] + } + } + }, + "capabilities": { + "postgres": { + "jsonAgg": true, + "lateral": true, + "limit": true, + "orderBy": true, + "returning": true + }, + "sql": { + "defaultInInsert": true, + "enums": true, + "returning": true + } + }, + "extensionPacks": { + "cipherstash": { + "familyId": "sql", + "id": "cipherstash", + "kind": "extension", + "targetId": "postgres", + "types": { + "storage": [ + { + "familyId": "sql", + "nativeType": "eql_v2_encrypted", + "targetId": "postgres", + "typeId": "cipherstash/string@1" + } + ] + }, + "version": "0.0.1" + } + }, + "meta": {}, + "_generated": { + "warning": "⚠️ GENERATED FILE - DO NOT EDIT", + "message": "This file is automatically generated by \"prisma-next contract emit\".", + "regenerate": "To regenerate, run: prisma-next contract emit" + } +} diff --git a/test/integration/test/authoring/parity/cipherstash-encrypted-string/packs.ts b/test/integration/test/authoring/parity/cipherstash-encrypted-string/packs.ts new file mode 100644 index 0000000000..7548f57e36 --- /dev/null +++ b/test/integration/test/authoring/parity/cipherstash-encrypted-string/packs.ts @@ -0,0 +1,3 @@ +import cipherstash from '@prisma-next/extension-cipherstash/control'; + +export const extensionPacks = [cipherstash] as const; diff --git a/test/integration/test/authoring/parity/cipherstash-encrypted-string/schema.prisma b/test/integration/test/authoring/parity/cipherstash-encrypted-string/schema.prisma new file mode 100644 index 0000000000..653cad8464 --- /dev/null +++ b/test/integration/test/authoring/parity/cipherstash-encrypted-string/schema.prisma @@ -0,0 +1,13 @@ +model EncryptedDoc { + id Int @id @default(autoincrement()) + + storageOnly cipherstash.EncryptedString({}) + equalityOnly cipherstash.EncryptedString({ equality: true }) + searchable cipherstash.EncryptedString({ equality: true, freeTextSearch: true }) + + storageOnlyOpt cipherstash.EncryptedString({})? + equalityOnlyOpt cipherstash.EncryptedString({ equality: true })? + searchableOpt cipherstash.EncryptedString({ equality: true, freeTextSearch: true })? + + @@map("encrypted_doc") +}