From ef671f9b1148114e6ae5d5dc7eae1cec6afa4cdf Mon Sep 17 00:00:00 2001 From: Dimitri Glazkov Date: Wed, 8 Nov 2023 16:06:11 -0800 Subject: [PATCH 1/5] [breadboard] Start teaching Breadboard about streams. --- seeds/breadboard/src/index.ts | 1 + seeds/breadboard/src/stream.ts | 48 +++++++++++++++++++++++ seeds/breadboard/src/ui/output.ts | 21 ++++++++++ seeds/breadboard/src/worker/controller.ts | 8 +++- seeds/llm-starter/src/nodes/fetch.ts | 28 +++++++++---- 5 files changed, 97 insertions(+), 9 deletions(-) create mode 100644 seeds/breadboard/src/stream.ts diff --git a/seeds/breadboard/src/index.ts b/seeds/breadboard/src/index.ts index bd79eaa4..e2b15b64 100644 --- a/seeds/breadboard/src/index.ts +++ b/seeds/breadboard/src/index.ts @@ -56,3 +56,4 @@ export { toMermaid } from "./mermaid.js"; export type { Schema } from "jsonschema"; export { callHandler } from "./handler.js"; export { asRuntimeKit } from "./kits/ctors.js"; +export { StreamCapability } from "./stream.js"; diff --git a/seeds/breadboard/src/stream.ts b/seeds/breadboard/src/stream.ts new file mode 100644 index 00000000..02a9127e --- /dev/null +++ b/seeds/breadboard/src/stream.ts @@ -0,0 +1,48 @@ +/** + * @license + * Copyright 2023 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability, NodeValue } from "./types.js"; + +const STREAM_KIND = "stream" as const; + +export interface StreamCapabilityType extends Capability { + kind: typeof STREAM_KIND; + stream: ReadableStream; +} + +export class StreamCapability + implements StreamCapabilityType +{ + kind = STREAM_KIND; + stream: ReadableStream; + + constructor(stream: ReadableStream) { + this.stream = stream; + } +} + +const findStreams = (value: NodeValue, foundStreams: ReadableStream[]) => { + if (Array.isArray(value)) { + value.forEach((item: NodeValue) => { + findStreams(item, foundStreams); + }); + } else if (typeof value === "object") { + const maybeCapability = value as StreamCapabilityType; + if (maybeCapability.kind && maybeCapability.kind === STREAM_KIND) { + foundStreams.push(maybeCapability.stream); + } else { + Object.values(value as object).forEach((item) => { + findStreams(item, foundStreams); + }); + } + } +}; + +export const getStreams = (value: NodeValue) => { + const foundStreams: ReadableStream[] = []; + findStreams(value, foundStreams); + return foundStreams; +}; diff --git a/seeds/breadboard/src/ui/output.ts b/seeds/breadboard/src/ui/output.ts index dcc7861d..340580f2 100644 --- a/seeds/breadboard/src/ui/output.ts +++ b/seeds/breadboard/src/ui/output.ts @@ -5,6 +5,7 @@ */ import { type Schema } from "jsonschema"; +import { StreamCapabilityType } from "../stream.js"; export type OutputArgs = Record & { schema: Schema; @@ -30,9 +31,29 @@ export class Output extends HTMLElement { return; } Object.entries(schema.properties).forEach(([key, property]) => { + if (property.type === "object" && property.format === "stream") { + this.appendStream( + property, + (values[key] as StreamCapabilityType).stream + ); + return; + } const html = document.createElement("pre"); html.innerHTML = `${values[key]}`; root.append(`${property.title}: `, html, "\n"); }); } + + appendStream(property: Schema, stream: ReadableStream) { + const root = this.shadowRoot; + if (!root) return; + root.append(`${property.title}: `); + stream.pipeThrough(new TextDecoderStream()).pipeTo( + new WritableStream({ + write(chunk) { + root.append(chunk); + }, + }) + ); + } } diff --git a/seeds/breadboard/src/worker/controller.ts b/seeds/breadboard/src/worker/controller.ts index 62df18a3..3efcd941 100644 --- a/seeds/breadboard/src/worker/controller.ts +++ b/seeds/breadboard/src/worker/controller.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { getStreams } from "../stream.js"; +import { InputValues } from "../types.js"; import { type ControllerMessage, type RoundTripControllerMessage, @@ -40,11 +42,13 @@ export class WorkerTransport implements MessageControllerTransport { } sendRoundTripMessage(message: T) { - this.worker.postMessage(message); + const streams = getStreams(message.data as InputValues); + this.worker.postMessage(message, streams); } sendMessage(message: T) { - this.worker.postMessage(message); + const streams = getStreams(message.data as InputValues); + this.worker.postMessage(message, streams); } #onMessage(e: MessageEvent) { diff --git a/seeds/llm-starter/src/nodes/fetch.ts b/seeds/llm-starter/src/nodes/fetch.ts index 13960801..deaaed9e 100644 --- a/seeds/llm-starter/src/nodes/fetch.ts +++ b/seeds/llm-starter/src/nodes/fetch.ts @@ -4,10 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { - InputValues, - NodeDescriberFunction, - NodeHandler, +import { + StreamCapability, + type InputValues, + type NodeDescriberFunction, + type NodeHandler, } from "@google-labs/breadboard"; export type FetchOutputs = { @@ -32,9 +33,14 @@ export type FetchInputs = { */ body?: string; /** - * Whether or not to return raw text (as opposed to parsing JSON) + * Whether or not to return raw text (as opposed to parsing JSON). Has no + * effect when `stream` is true. */ raw?: boolean; + /** + * Whether or not to return a stream + */ + stream?: boolean; }; export const fetchDescriber: NodeDescriberFunction = async () => { @@ -98,6 +104,7 @@ export default { body, headers = {}, raw, + stream, } = inputs as FetchInputs; if (!url) throw new Error("Fetch requires `url` input"); const init: RequestInit = { @@ -109,7 +116,14 @@ export default { init.body = JSON.stringify(body); } const data = await fetch(url, init); - const response = raw ? await data.text() : await data.json(); - return { response }; + if (stream) { + if (!data.body) { + throw new Error("Response is not streamable."); + } + return { response: new StreamCapability(data.body) }; + } else { + const response = raw ? await data.text() : await data.json(); + return { response }; + } }, } satisfies NodeHandler; From f9cff94e9e9afd6a0935fcebb51d4f636d95cb31 Mon Sep 17 00:00:00 2001 From: ibush Date: Thu, 9 Nov 2023 12:27:41 -0700 Subject: [PATCH 2/5] Allow user to configure which html tags to exclude. (#185) --- seeds/chunker-python/README.md | 21 ++++- seeds/chunker-python/pyproject.toml | 2 +- .../google_labs_html_chunker/html_chunker.py | 28 +++++-- seeds/chunker-python/src/main.py | 2 + .../chunker-python/tests/test_html_chunker.py | 82 +++++++++++++++++-- 5 files changed, 115 insertions(+), 20 deletions(-) diff --git a/seeds/chunker-python/README.md b/seeds/chunker-python/README.md index 34d17b29..557a290e 100644 --- a/seeds/chunker-python/README.md +++ b/seeds/chunker-python/README.md @@ -33,6 +33,9 @@ aggregated into passages under `max_words_per_aggregate_passage` words. If cannot be combined into a single passage under `max_words_per_aggregate_passage` words. +`html_tags_to_exclude`: Text within any of the tags in this set will not be +included in the output passages. Defaults to `{"noscript", "script", "style"}`. + If you find your passages are too disjointed (insufficient context in a single passage for your application), consider increasing `max_words_per_aggregate_passage` and/or setting @@ -124,4 +127,20 @@ passages = chunker.chunk(html) The sibling children of the `

` node are greedily aggregated while the total is <=4 words: -passages: ["Heading", "Text before link", "and after."] \ No newline at end of file +passages: ["Heading", "Text before link", "and after."] + + +### Example 5 + +``` +chunker = HtmlChunker( + max_words_per_aggregate_passage=4, + greedily_aggregate_sibling_nodes=False, + html_tags_to_exclude={"p"} +) +passages = chunker.chunk(html) +``` + +All text within the `

` tag is excluded from the output.: + +passages: ["Heading"] \ No newline at end of file diff --git a/seeds/chunker-python/pyproject.toml b/seeds/chunker-python/pyproject.toml index 10784308..d1c6f7f2 100644 --- a/seeds/chunker-python/pyproject.toml +++ b/seeds/chunker-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "google_labs_html_chunker" -version = "0.0.3" +version = "0.0.5" authors = [ { name="Google Labs", email="labs-pypi@google.com" }, ] diff --git a/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py b/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py index a4b1058f..c83f827f 100644 --- a/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py +++ b/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from bs4 import BeautifulSoup, NavigableString, Comment -# Html tags for non-content text. Text within these tags will be excluded from -# passages. -_NON_CONTENT_HTML_TAGS = frozenset({"noscript", "script", "style"}) +import bs4 + +# Text within these html tags will be excluded from passages by default. +_DEFAULT_HTML_TAGS_TO_EXCLUDE = frozenset({"noscript", "script", "style"}) # Html tags that indicate a section break. Sibling nodes will not be # greedily-aggregated into a chunk across one of these tags. @@ -53,15 +53,22 @@ class HtmlChunker: false, each sibling node is output as a separate passage if they cannot all be combined into a single passage under max_words_per_aggregate_passage words. + html_tags_to_exclude: Text within any of the tags in this set will not be + included in the output passages. Defaults to {"noscript", "script", + "style"}. """ def __init__( self, max_words_per_aggregate_passage: int, greedily_aggregate_sibling_nodes: bool, + html_tags_to_exclude: frozenset[str] = _DEFAULT_HTML_TAGS_TO_EXCLUDE, ) -> None: self.max_words_per_aggregate_passage = max_words_per_aggregate_passage self.greedily_aggregate_sibling_nodes = greedily_aggregate_sibling_nodes + self.html_tags_to_exclude = { + tag.strip().lower() for tag in html_tags_to_exclude + } class PassageList: """A list of text passages.""" @@ -127,13 +134,16 @@ def _process_node(self, node) -> AggregateNode: current_node = self.AggregateNode() if node.name: current_node.html_tag = node.name - if node.name in _NON_CONTENT_HTML_TAGS or isinstance(node, Comment): + if node.name in self.html_tags_to_exclude or isinstance(node, bs4.Comment): # Exclude text within these tags. return current_node - if isinstance(node, NavigableString): - current_node.num_words = len(node.split()) - current_node.segments.append(node.strip()) + if isinstance(node, bs4.NavigableString): + # Store the text for this leaf node (skipping text directly under the + # top-level BeautifulSoup object, e.g. "html" from ). + if node.parent.name != "[document]": + current_node.num_words = len(node.split()) + current_node.segments.append(node.strip()) return current_node # Will hold the aggregate of this node and all its unchunked descendants @@ -201,7 +211,7 @@ def chunk(self, html: str) -> list[str]: Returns: A list of text passages from the html. """ - tree = BeautifulSoup(html, "html5lib") + tree = bs4.BeautifulSoup(html, "html5lib") root_agg_node = self._process_node(tree) if not root_agg_node.get_passages(): root_agg_node.passage_list.add_passage_for_node(root_agg_node) diff --git a/seeds/chunker-python/src/main.py b/seeds/chunker-python/src/main.py index ac97f024..a8ff964e 100644 --- a/seeds/chunker-python/src/main.py +++ b/seeds/chunker-python/src/main.py @@ -25,6 +25,7 @@ arg_parser.add_argument("-o", "--outfile", help="Output passages file path.", required=True) arg_parser.add_argument("--maxwords", type=int, default=200, help="Max words per aggregate passage.") arg_parser.add_argument("--greedyagg", action=argparse.BooleanOptionalAction, help="Whether to greedily aggregate sibling nodes.") + arg_parser.add_argument("--excludetags", type=str, default="noscript,script,style", help="Comma-separated HTML tags from which to exclude text.") args = arg_parser.parse_args() html_file = open(args.infile, "r") @@ -34,6 +35,7 @@ chunker = HtmlChunker( max_words_per_aggregate_passage=args.maxwords, greedily_aggregate_sibling_nodes=args.greedyagg, + html_tags_to_exclude={tag for tag in args.excludetags.split(',')}, ) passages = chunker.chunk(html) diff --git a/seeds/chunker-python/tests/test_html_chunker.py b/seeds/chunker-python/tests/test_html_chunker.py index ffc035ba..4b53dc3c 100644 --- a/seeds/chunker-python/tests/test_html_chunker.py +++ b/seeds/chunker-python/tests/test_html_chunker.py @@ -46,6 +46,39 @@ def test_handles_escape_codes(self): ["Here's a paragraph."], ) + def test_handles_unicode_characters(self): + html = ( + "

Here is a" + " \u2119\u212b\u213e\u212b\u210A\u213e\u212b\u2119\u210F.

" + ) + + chunker = HtmlChunker( + max_words_per_aggregate_passage=10, + greedily_aggregate_sibling_nodes=False, + ) + + self.assertEqual( + chunker.chunk(html), + ["Here is a ℙÅℾÅℊℾÅℙℏ."], + ) + + def test_handles_byte_string(self): + html_bytes = ( + b"

Here is a" + b" \xe2\x84\x99\xe2\x84\xab\xe2\x84\xbe\xe2\x84\xab\xe2\x84\x8a\xe2\x84\xbe\xe2\x84\xab\xe2\x84\x99\xe2\x84\x8f.

" + ) + + chunker = HtmlChunker( + max_words_per_aggregate_passage=10, + greedily_aggregate_sibling_nodes=False, + ) + + # When using bytes, we must provide the decoding, in this case utf-8. + self.assertEqual( + chunker.chunk(html_bytes.decode("utf-8")), + ["Here is a ℙÅℾÅℊℾÅℙℏ."], + ) + def test_strips_whitespace_around_node_text(self): html = """
@@ -189,16 +222,19 @@ def test_does_not_join_split_text_nodes_within_p_tag_when_over_max(self): ], ) - def test_skips_non_content_text(self): + def test_excludes_text_from_default_html_tags(self): html = """ - - Title - - - - -

Paragraph

- + + + + Title + + + + +

Paragraph

+ + """ chunker = HtmlChunker( @@ -213,6 +249,34 @@ def test_skips_non_content_text(self): ], ) + def test_excludes_text_from_given_html_tags(self): + html = """ + + + + Title + + + + +

Paragraph

+ + + """ + + chunker = HtmlChunker( + max_words_per_aggregate_passage=10, + greedily_aggregate_sibling_nodes=False, + html_tags_to_exclude={" HEAD ", "p"}, + ) + + self.assertEqual( + chunker.chunk(html), + [ + '{"@context":"https://schema.org"}', + ], + ) + def test_greedily_aggregates_sibling_nodes(self): html = """
From 043c6519f8b767dc3dcc361338de13c7ab03898c Mon Sep 17 00:00:00 2001 From: Dimitri Glazkov Date: Thu, 9 Nov 2023 12:28:05 -0800 Subject: [PATCH 3/5] [node-nursery-web] Start on the `transformStream` node. --- seeds/breadboard-web/tests/async-gen.ts | 2 - seeds/breadboard/src/index.ts | 6 +- seeds/breadboard/src/stream.ts | 9 +++ seeds/node-nursery-web/package.json | 1 + .../src/nodes/transform-stream.ts | 62 +++++++++++++++ .../tests/transform-stream.ts | 77 +++++++++++++++++++ 6 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 seeds/node-nursery-web/src/nodes/transform-stream.ts create mode 100644 seeds/node-nursery-web/tests/transform-stream.ts diff --git a/seeds/breadboard-web/tests/async-gen.ts b/seeds/breadboard-web/tests/async-gen.ts index 8f80aedb..2b99a2b7 100644 --- a/seeds/breadboard-web/tests/async-gen.ts +++ b/seeds/breadboard-web/tests/async-gen.ts @@ -7,11 +7,9 @@ import { expect, test } from "vitest"; import { LastMessageKeeper, - PatchedReadableStream, asyncGen, streamFromAsyncGen, } from "../src/async-gen"; -import { Readable } from "stream"; test("async-gen", async () => { const results = []; diff --git a/seeds/breadboard/src/index.ts b/seeds/breadboard/src/index.ts index e2b15b64..1078c7ac 100644 --- a/seeds/breadboard/src/index.ts +++ b/seeds/breadboard/src/index.ts @@ -56,4 +56,8 @@ export { toMermaid } from "./mermaid.js"; export type { Schema } from "jsonschema"; export { callHandler } from "./handler.js"; export { asRuntimeKit } from "./kits/ctors.js"; -export { StreamCapability } from "./stream.js"; +export { + StreamCapability, + isStreamCapability, + type StreamCapabilityType, +} from "./stream.js"; diff --git a/seeds/breadboard/src/stream.ts b/seeds/breadboard/src/stream.ts index 02a9127e..b0700d7e 100644 --- a/seeds/breadboard/src/stream.ts +++ b/seeds/breadboard/src/stream.ts @@ -24,6 +24,15 @@ export class StreamCapability } } +export const isStreamCapability = (object: unknown) => { + const maybeStream = object as StreamCapabilityType; + return ( + maybeStream.kind && + maybeStream.kind === STREAM_KIND && + maybeStream.stream instanceof ReadableStream + ); +}; + const findStreams = (value: NodeValue, foundStreams: ReadableStream[]) => { if (Array.isArray(value)) { value.forEach((item: NodeValue) => { diff --git a/seeds/node-nursery-web/package.json b/seeds/node-nursery-web/package.json index dad86800..42addb4b 100644 --- a/seeds/node-nursery-web/package.json +++ b/seeds/node-nursery-web/package.json @@ -9,6 +9,7 @@ "type": "module", "scripts": { "build": "FORCE_COLOR=1 tsc --b", + "test": "FORCE_COLOR=1 ava", "watch": "FORCE_COLOR=1 tsc --b --watch", "lint": "FORCE_COLOR=1 eslint . --ext .ts" }, diff --git a/seeds/node-nursery-web/src/nodes/transform-stream.ts b/seeds/node-nursery-web/src/nodes/transform-stream.ts new file mode 100644 index 00000000..5aad407d --- /dev/null +++ b/seeds/node-nursery-web/src/nodes/transform-stream.ts @@ -0,0 +1,62 @@ +/** + * @license + * Copyright 2023 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + Board, + BreadboardCapability, + InputValues, + NodeHandlerContext, + OutputValues, + StreamCapability, + StreamCapabilityType, + isStreamCapability, +} from "@google-labs/breadboard"; + +export type TransformStreamInputs = InputValues & { + stream: StreamCapabilityType; + board?: BreadboardCapability; +}; + +const getTransformer = async ( + board?: BreadboardCapability, + context?: NodeHandlerContext +): Promise => { + if (board) { + const runnableBoard = await Board.fromBreadboardCapability( + board as BreadboardCapability + ); + return { + async transform(chunk, controller) { + const inputs = { chunk }; + const result = await runnableBoard.runOnce(inputs, context); + controller.enqueue(result.chunk); + }, + }; + } else + return { + transform(chunk, controller) { + controller.enqueue(chunk); + }, + }; +}; + +export default { + invoke: async ( + inputs: InputValues, + context?: NodeHandlerContext + ): Promise => { + const { stream, board } = inputs as TransformStreamInputs; + if (!stream) throw new Error("The `stream` input is required"); + if (!isStreamCapability(stream)) + throw new Error("The `stream` input must be a `StreamCapability`."); + const transformer = await getTransformer(board, context); + const streamCapability = stream as StreamCapabilityType; + const outputStream = streamCapability.stream.pipeThrough( + new TransformStream(transformer) + ); + return { stream: new StreamCapability(outputStream) }; + }, +}; diff --git a/seeds/node-nursery-web/tests/transform-stream.ts b/seeds/node-nursery-web/tests/transform-stream.ts new file mode 100644 index 00000000..9dbbaba6 --- /dev/null +++ b/seeds/node-nursery-web/tests/transform-stream.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2023 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import test from "ava"; + +import transformStream, { + TransformStreamInputs, +} from "../src/nodes/transform-stream.js"; +import { + Board, + GraphDescriptor, + StreamCapability, + StreamCapabilityType, + callHandler, +} from "@google-labs/breadboard"; + +const toArray = async (stream: ReadableStream) => { + const results: T[] = []; + await stream.pipeTo( + new WritableStream({ + write(chunk) { + results.push(chunk); + }, + }) + ); + return results; +}; + +test("transform stream noop", async (t) => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(1); + controller.enqueue(2); + controller.enqueue(3); + controller.close(); + }, + }); + const inputs: TransformStreamInputs = { + stream: new StreamCapability(stream), + }; + const outputs = (await callHandler(transformStream, inputs, {})) as { + stream: StreamCapabilityType; + }; + const results = await toArray(outputs.stream.stream); + t.deepEqual(results, [1, 2, 3]); +}); + +test("transform stream with a board", async (t) => { + const board = new Board(); + board.input().wire("chunk->", board.output()); + + const graph = board as GraphDescriptor; + + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(1); + controller.enqueue(2); + controller.enqueue(3); + controller.close(); + }, + }); + const inputs: TransformStreamInputs = { + stream: new StreamCapability(stream), + board: { + kind: "board", + board: graph, + }, + }; + const outputs = (await callHandler(transformStream, inputs, {})) as { + stream: StreamCapabilityType; + }; + const results = await toArray(outputs.stream.stream); + t.deepEqual(results, [1, 2, 3]); +}); From 56ddcf5a89e0d872e2e503e781b4785fa9044355 Mon Sep 17 00:00:00 2001 From: Dimitri Glazkov Date: Thu, 9 Nov 2023 13:29:34 -0800 Subject: [PATCH 4/5] [llm-starter] Handle errors in `runJavascript`. Fixes #186. --- seeds/llm-starter/src/nodes/run-javascript.ts | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/seeds/llm-starter/src/nodes/run-javascript.ts b/seeds/llm-starter/src/nodes/run-javascript.ts index 71c3286e..3eb4a1a3 100644 --- a/seeds/llm-starter/src/nodes/run-javascript.ts +++ b/seeds/llm-starter/src/nodes/run-javascript.ts @@ -46,16 +46,29 @@ const runInBrowser = async ( args: string ): Promise => { const runner = (code: string, functionName: string) => { - return `${code}\nself.onmessage = () => self.postMessage(JSON.stringify(${functionName}(${args})))`; + return `${code}\nself.onmessage = () => self.postMessage({ result: JSON.stringify(${functionName}(${args})) });self.onerror = (e) => self.postMessage({ error: e.message })`; }; const blob = new Blob([runner(code, functionName)], { type: "text/javascript", }); + type WebWorkerResultType = "error" | "result"; + type WebWorkerResult = { + [x in WebWorkerResultType]: string; + }; + const worker = new Worker(URL.createObjectURL(blob)); const result = new Promise((resolve) => { - worker.onmessage = (e) => resolve(e.data); + worker.onmessage = (e) => { + const data = e.data as WebWorkerResult; + if (data.result) { + resolve(data.result); + return; + } else if (data.error) { + throw new Error(data.error); + } + }; }); worker.postMessage("please"); return result; @@ -91,7 +104,7 @@ export const runJavascriptHandler: NodeHandlerFunction = async ( ); return raw ? result : { result }; } catch (e) { - // Remove everthing outside eval from the stack trace + // Remove everything outside eval from the stack trace const stack = (e as Error).stack; if (stack !== undefined) { (e as Error).stack = stack From 18c0ec316d0bb59bdf049cb5f35c0d53a7f51dbe Mon Sep 17 00:00:00 2001 From: Dimitri Glazkov Date: Thu, 9 Nov 2023 14:36:51 -0800 Subject: [PATCH 5/5] [node-nursery-web] Add `transformNode` to kit. --- package-lock.json | 1 + seeds/node-nursery-web/package.json | 3 +- seeds/node-nursery-web/src/index.ts | 2 + .../tests/transform-stream.ts | 52 +++++++++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/package-lock.json b/package-lock.json index 9e466b80..c69b2372 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15211,6 +15211,7 @@ }, "devDependencies": { "@ava/typescript": "^4.0.0", + "@google-labs/llm-starter": "*", "@google-labs/tsconfig": "*", "@types/gapi": "^0.0.46", "@types/node": "^18.16.3", diff --git a/seeds/node-nursery-web/package.json b/seeds/node-nursery-web/package.json index 42addb4b..15a91b3e 100644 --- a/seeds/node-nursery-web/package.json +++ b/seeds/node-nursery-web/package.json @@ -48,7 +48,8 @@ "@typescript-eslint/parser": "^5.56.0", "ava": "^5.2.0", "npm": "^10.2.1", - "typescript": "^5.0.4" + "typescript": "^5.0.4", + "@google-labs/llm-starter": "*" }, "dependencies": { "@google-labs/breadboard": "*", diff --git a/seeds/node-nursery-web/src/index.ts b/seeds/node-nursery-web/src/index.ts index 2bb63fc4..4fbf1feb 100644 --- a/seeds/node-nursery-web/src/index.ts +++ b/seeds/node-nursery-web/src/index.ts @@ -7,12 +7,14 @@ import { KitBuilder } from "@google-labs/breadboard/kits"; import credentials from "./nodes/credentials.js"; import driveList from "./nodes/drive-list.js"; +import transformStream from "./nodes/transform-stream.js"; const NodeNurseryWeb = new KitBuilder({ url: "npm:@google-labs/node-nursery-web", }).build({ credentials, driveList, + transformStream, }); export default NodeNurseryWeb; diff --git a/seeds/node-nursery-web/tests/transform-stream.ts b/seeds/node-nursery-web/tests/transform-stream.ts index 9dbbaba6..7db3970c 100644 --- a/seeds/node-nursery-web/tests/transform-stream.ts +++ b/seeds/node-nursery-web/tests/transform-stream.ts @@ -14,8 +14,11 @@ import { GraphDescriptor, StreamCapability, StreamCapabilityType, + asRuntimeKit, callHandler, } from "@google-labs/breadboard"; +import NodeNurseryWeb from "../src/index.js"; +import Starter from "@google-labs/llm-starter"; const toArray = async (stream: ReadableStream) => { const results: T[] = []; @@ -75,3 +78,52 @@ test("transform stream with a board", async (t) => { const results = await toArray(outputs.stream.stream); t.deepEqual(results, [1, 2, 3]); }); + +test("transform works in a board", async (t) => { + const board = new Board(); + const nursery = board.addKit(NodeNurseryWeb); + + board.input().wire( + "stream->", + nursery + .transformStream((board, input, output) => { + const starter = board.addKit(Starter); + + function run({ chunk }: { chunk: number }): string { + return `number: ${chunk}`; + } + + input.wire( + "chunk->", + starter + .runJavascript("run", { + code: run.toString(), + }) + .wire("result->chunk", output) + ); + }) + .wire("stream->", board.output()) + ); + + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(1); + controller.enqueue(2); + controller.enqueue(3); + controller.close(); + }, + }); + + const outputs = (await board.runOnce( + { + stream: new StreamCapability(stream), + }, + { + kits: [asRuntimeKit(Starter)], + } + )) as { + stream: StreamCapabilityType; + }; + const results = await toArray(outputs.stream.stream); + t.deepEqual(results, ["number: 1", "number: 2", "number: 3"]); +});