diff --git a/DEVELOPING.md b/DEVELOPING.md index 8ccdbcc5..6744af5f 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -225,9 +225,7 @@ Currently, to publish an NPM package, you have to be a Googler. This is unlikely ```bash git pull -npm run clean:build -npm i -npx turbo build +npm run sync ``` 2. Change directory to the package to be published. For example: diff --git a/package-lock.json b/package-lock.json index bf93cad7..c69b2372 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14871,7 +14871,7 @@ }, "seeds/breadboard": { "name": "@google-labs/breadboard", - "version": "0.4.1", + "version": "0.5.0", "license": "Apache-2.0", "devDependencies": { "@ava/typescript": "^4.0.0", @@ -14898,7 +14898,7 @@ "license": "Apache-2.0", "dependencies": { "@google-cloud/firestore": "^6.7.0", - "@google-labs/breadboard": "^0.4.1" + "@google-labs/breadboard": "^0.5.0" }, "devDependencies": { "@ava/typescript": "^4.0.0", @@ -14994,9 +14994,9 @@ "version": "0.0.1", "license": "Apache-2.0", "dependencies": { - "@google-labs/breadboard": "^0.4.1", + "@google-labs/breadboard": "^0.5.0", "@google-labs/breadboard-server": "^0.1.3", - "@google-labs/llm-starter": "^0.2.0", + "@google-labs/llm-starter": "^0.2.1", "dotenv": "^16.3.1" }, "devDependencies": { @@ -15032,7 +15032,7 @@ "version": "0.0.1", "license": "Apache-2.0", "dependencies": { - "@google-labs/breadboard": "*" + "@google-labs/breadboard": "^0.5.0" }, "devDependencies": { "@ava/typescript": "^4.0.0", @@ -15148,10 +15148,10 @@ }, "seeds/llm-starter": { "name": "@google-labs/llm-starter", - "version": "0.2.0", + "version": "0.2.1", "license": "Apache-2.0", "dependencies": { - "@google-labs/breadboard": "^0.4.1", + "@google-labs/breadboard": "^0.5.0", "@google-labs/palm-lite": "^0.0.2", "@rgrove/parse-xml": "^4.1.0", "jsonata": "^2.0.3", @@ -15211,6 +15211,7 @@ }, "devDependencies": { "@ava/typescript": "^4.0.0", + "@google-labs/llm-starter": "*", "@google-labs/tsconfig": "*", "@types/gapi": "^0.0.46", "@types/node": "^18.16.3", diff --git a/seeds/breadboard-server/package.json b/seeds/breadboard-server/package.json index d5755201..634aa418 100644 --- a/seeds/breadboard-server/package.json +++ b/seeds/breadboard-server/package.json @@ -55,6 +55,6 @@ }, "dependencies": { "@google-cloud/firestore": "^6.7.0", - "@google-labs/breadboard": "^0.4.1" + "@google-labs/breadboard": "^0.5.0" } } diff --git a/seeds/breadboard-web/tests/async-gen.ts b/seeds/breadboard-web/tests/async-gen.ts index 8f80aedb..2b99a2b7 100644 --- a/seeds/breadboard-web/tests/async-gen.ts +++ b/seeds/breadboard-web/tests/async-gen.ts @@ -7,11 +7,9 @@ import { expect, test } from "vitest"; import { LastMessageKeeper, - PatchedReadableStream, asyncGen, streamFromAsyncGen, } from "../src/async-gen"; -import { Readable } from "stream"; test("async-gen", async () => { const results = []; diff --git a/seeds/breadboard/CHANGELOG.md b/seeds/breadboard/CHANGELOG.md index 26627153..27f52ce1 100644 --- a/seeds/breadboard/CHANGELOG.md +++ b/seeds/breadboard/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [0.5.0] - 2023-11-08 + +- The `/ui` submodule changes: + - supports multiple simultaneous inputs + - does not ask for keys more than once per session + - if you specify `type: "object"` for an input, it will try to parse it as JSON data and pass as an object. + - draw Mermaid diagrams of the boards + - there's now a link to the running board in the UI. +- The `/worker` submodule changes: + - bug fixes (will actually queue received messages and not drop them on the floor) +- The following nodes moved out into the Core Kit: `passthrough`, `reflect`, `slot`, `include`, `import`, and `invoke`. +- The `run` method now takes a `NodeHandlerContext` object as its argument, rather than a list of arguments. +- Kits are no longer implicitly imported by Breadboard. Instead, supply loaded Kits as part `NodeHandlerContext` to `run`. + ## [0.4.1] - 2023-10-20 - Moved the `mermaid` method to `BoardRunner`. diff --git a/seeds/breadboard/docs/nodes.md b/seeds/breadboard/docs/nodes.md index ef6bf3c3..f1edcf99 100644 --- a/seeds/breadboard/docs/nodes.md +++ b/seeds/breadboard/docs/nodes.md @@ -68,59 +68,6 @@ result { say: 'Hello, world!' } - none. -## The `passthrough` node - -This is a no-op node. It takes the input property bag and passes it along as output, unmodified. This node can be useful when the board needs an entry point, but the rest of the board forms a cycle. - -### Example: - -```js -board.input().wire("say->", board.passthrough().wire("say->", board.output())); - -board.runOnce({ - say: "Hello, world!", -}); - -console.log("result", result); -``` - -Will produce this output: - -```sh -result { say: 'Hello, world!' } -``` - -See [Chapter 9: Let's build a chatbot](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/docs/tutorial#chapter-9-lets-build-a-chat-bot) of Breadboard tutorial to see another example of usage. - -### Inputs - -- any properties - -### Outputs - -- the properties that were passed as inputs - -## The `invoke` node - -Use this node to invoke another board from this board. - -It recognizes `path`, `graph`, and `board` properties that specify, respectively, a file path or URL to the serialized board, directly the serialized-as-JSON board, and a `BoardCapability` (returned by `lambda` or `import`). - -The rest of the inputs in the property bag are passed along to the invoked board as its inputs. If other inputs were bound to the board via wires into the `lambda` or `import` node, then those have precedence over inputs passed here. - -The outputs of the invoked board will be passed along as outputs of the `invoke` node. - -### Inputs - -- `path`, which specifes the file path or URL to the serialized board to be included. -- `graph`, which is a serialized board -- `board`, a `BoardCapability` representing a board, created by `lambda` or `import`. -- any other properties are passed as inputs for the invoked board. - -### Outputs - -- the outputs of the invoked board - ## The `lambda` node Use this node to create a lambda board that can be passed around and eventually invoked by e.g. the `invoke` node. @@ -185,100 +132,3 @@ board.invoke({ board: lambda }) ### Outputs - `board`, a `BoardCapability`, which can be passed to `invoke` and other nodes that can invoke boards. - -## The `import` node - -Creates a lambda board from a pre-existing board, either loaded from `path` or passed as JSON via `graph`. All other inputs are bound to the board, which is returned as `board`. - -### Inputs - -- `path`, which specifes the file path or URL to the serialized board to be included. -- `graph`, which is a serialized board -- all other inputs are bound to the board - -### Outputs - -- `board`, a `BoardCapability`, which can be passed to `invoke` and other nodes that can invoke boards. - -## The `include` node (DEPRECATED) - -DEPRECATED: Use `invoke` instead - -Use this node to include other board into the current board. It recognizes `path` or `$ref` properties that specify, respectively, file path or URL to the serialized-as-JSON board to be included. It also accepts the `slotted` property that must contain the serialized-as-JSON boards that will be slotted into the included board. - -The rest of the inputs in the property bag are passed along to the included board as its inputs. The outputs of the included board will be passed along as outputs of the `include` node. - -This enables treating the included board as a kind of a node: it takes inputs and provides outputs. - -### Example - -For an example of how to use the `include` property, see [Chapter 5: Including other boards](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/docs/tutorial#chapter-5-including-other-boards) of Breadboard tutorial. - -### Inputs - -- `path`, which specifes the file path to the serialized board to be included. Either this or `$ref` property is required. -- `$ref`, which specifes the URL of the serialized board to be included. Ether this or `path` property is required. -- `slotted`, which specifies slotted boards that will be used to populate `slot` nodes in the included board. This property is optional. -- any other properties are passed as inputs for the included board. - -### Outputs - -- the outputs of the included board - -## The `slot` node (DEPRECATED) - -DEPRECATED. Instead pass boards either as URLs or as Boards from `lambda` and `invoke` them. - -Use this node to make a slot in a board. Adding a `slot` node turns a board into a sort of a template: each slot represents a placeholder that must be filled in when the node is included into another board. - -The node takes a `slot` property, which specifies the name of the slot, and passes the rest of arguments to the slotted board. The value of the `slot` property is used to match the slot with one of the slotted board that is passed to the `include` node. - -### Example - -For an example of how to use the `slot` node, see [Chapter 6: Boards with slots](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/docs/tutorial#chapter-6-boards-with-slots) of Breadboard tutorial. - -### Inputs - -- `slot` - the name of the slot -- any other properties are passed as inputs for the slotted board - -### Outputs - -- the outputs of the included board - -## The `reflect` node - -This node is used to reflect the board itself. It has no required inputs and provides a JSON representation of the board as a `graph` output property. This node can be used for getting information that might be stored in the structure of the board. - -### Example - -```js -import { Board } from "@google-labs/breadboard"; - -const board = new Board(); - -board.input().wire("", board.reflect().wire("graph->", board.output())); - -const result = await board.runOnce({}); -console.log("result", result); -``` - -will print: - -```sh -result { - graph: { - edges: [ [Object], [Object] ], - nodes: [ [Object], [Object], [Object] ], - kits: [] - } -} -``` - -### Inputs - -- ignored - -### Outputs - -- `graph` -- JSON representation of the board diff --git a/seeds/breadboard/package.json b/seeds/breadboard/package.json index cbeaaf30..49a8bb58 100644 --- a/seeds/breadboard/package.json +++ b/seeds/breadboard/package.json @@ -1,6 +1,6 @@ { "name": "@google-labs/breadboard", - "version": "0.4.1", + "version": "0.5.0", "description": "A library for rapid generative AI application prototyping", "main": "./dist/src/index.js", "exports": { diff --git a/seeds/breadboard/src/index.ts b/seeds/breadboard/src/index.ts index bd79eaa4..1078c7ac 100644 --- a/seeds/breadboard/src/index.ts +++ b/seeds/breadboard/src/index.ts @@ -56,3 +56,8 @@ export { toMermaid } from "./mermaid.js"; export type { Schema } from "jsonschema"; export { callHandler } from "./handler.js"; export { asRuntimeKit } from "./kits/ctors.js"; +export { + StreamCapability, + isStreamCapability, + type StreamCapabilityType, +} from "./stream.js"; diff --git a/seeds/breadboard/src/kits/graph-to-kit.ts b/seeds/breadboard/src/kits/graph-to-kit.ts index c9f1e230..46b0ac20 100644 --- a/seeds/breadboard/src/kits/graph-to-kit.ts +++ b/seeds/breadboard/src/kits/graph-to-kit.ts @@ -5,7 +5,7 @@ */ import { callHandler } from "../handler.js"; -import { KitBuilderOptions } from "./index.js"; +import { KitBuilderOptions } from "./builder.js"; import { BoardRunner } from "../runner.js"; import { GraphDescriptor, diff --git a/seeds/breadboard/src/stream.ts b/seeds/breadboard/src/stream.ts new file mode 100644 index 00000000..b0700d7e --- /dev/null +++ b/seeds/breadboard/src/stream.ts @@ -0,0 +1,57 @@ +/** + * @license + * Copyright 2023 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability, NodeValue } from "./types.js"; + +const STREAM_KIND = "stream" as const; + +export interface StreamCapabilityType extends Capability { + kind: typeof STREAM_KIND; + stream: ReadableStream; +} + +export class StreamCapability + implements StreamCapabilityType +{ + kind = STREAM_KIND; + stream: ReadableStream; + + constructor(stream: ReadableStream) { + this.stream = stream; + } +} + +export const isStreamCapability = (object: unknown) => { + const maybeStream = object as StreamCapabilityType; + return ( + maybeStream.kind && + maybeStream.kind === STREAM_KIND && + maybeStream.stream instanceof ReadableStream + ); +}; + +const findStreams = (value: NodeValue, foundStreams: ReadableStream[]) => { + if (Array.isArray(value)) { + value.forEach((item: NodeValue) => { + findStreams(item, foundStreams); + }); + } else if (typeof value === "object") { + const maybeCapability = value as StreamCapabilityType; + if (maybeCapability.kind && maybeCapability.kind === STREAM_KIND) { + foundStreams.push(maybeCapability.stream); + } else { + Object.values(value as object).forEach((item) => { + findStreams(item, foundStreams); + }); + } + } +}; + +export const getStreams = (value: NodeValue) => { + const foundStreams: ReadableStream[] = []; + findStreams(value, foundStreams); + return foundStreams; +}; diff --git a/seeds/breadboard/src/ui/output.ts b/seeds/breadboard/src/ui/output.ts index dcc7861d..340580f2 100644 --- a/seeds/breadboard/src/ui/output.ts +++ b/seeds/breadboard/src/ui/output.ts @@ -5,6 +5,7 @@ */ import { type Schema } from "jsonschema"; +import { StreamCapabilityType } from "../stream.js"; export type OutputArgs = Record & { schema: Schema; @@ -30,9 +31,29 @@ export class Output extends HTMLElement { return; } Object.entries(schema.properties).forEach(([key, property]) => { + if (property.type === "object" && property.format === "stream") { + this.appendStream( + property, + (values[key] as StreamCapabilityType).stream + ); + return; + } const html = document.createElement("pre"); html.innerHTML = `${values[key]}`; root.append(`${property.title}: `, html, "\n"); }); } + + appendStream(property: Schema, stream: ReadableStream) { + const root = this.shadowRoot; + if (!root) return; + root.append(`${property.title}: `); + stream.pipeThrough(new TextDecoderStream()).pipeTo( + new WritableStream({ + write(chunk) { + root.append(chunk); + }, + }) + ); + } } diff --git a/seeds/breadboard/src/worker/controller.ts b/seeds/breadboard/src/worker/controller.ts index 62df18a3..3efcd941 100644 --- a/seeds/breadboard/src/worker/controller.ts +++ b/seeds/breadboard/src/worker/controller.ts @@ -4,6 +4,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { getStreams } from "../stream.js"; +import { InputValues } from "../types.js"; import { type ControllerMessage, type RoundTripControllerMessage, @@ -40,11 +42,13 @@ export class WorkerTransport implements MessageControllerTransport { } sendRoundTripMessage(message: T) { - this.worker.postMessage(message); + const streams = getStreams(message.data as InputValues); + this.worker.postMessage(message, streams); } sendMessage(message: T) { - this.worker.postMessage(message); + const streams = getStreams(message.data as InputValues); + this.worker.postMessage(message, streams); } #onMessage(e: MessageEvent) { diff --git a/seeds/chunker-python/README.md b/seeds/chunker-python/README.md index 34d17b29..557a290e 100644 --- a/seeds/chunker-python/README.md +++ b/seeds/chunker-python/README.md @@ -33,6 +33,9 @@ aggregated into passages under `max_words_per_aggregate_passage` words. If cannot be combined into a single passage under `max_words_per_aggregate_passage` words. +`html_tags_to_exclude`: Text within any of the tags in this set will not be +included in the output passages. Defaults to `{"noscript", "script", "style"}`. + If you find your passages are too disjointed (insufficient context in a single passage for your application), consider increasing `max_words_per_aggregate_passage` and/or setting @@ -124,4 +127,20 @@ passages = chunker.chunk(html) The sibling children of the `

` node are greedily aggregated while the total is <=4 words: -passages: ["Heading", "Text before link", "and after."] \ No newline at end of file +passages: ["Heading", "Text before link", "and after."] + + +### Example 5 + +``` +chunker = HtmlChunker( + max_words_per_aggregate_passage=4, + greedily_aggregate_sibling_nodes=False, + html_tags_to_exclude={"p"} +) +passages = chunker.chunk(html) +``` + +All text within the `

` tag is excluded from the output.: + +passages: ["Heading"] \ No newline at end of file diff --git a/seeds/chunker-python/pyproject.toml b/seeds/chunker-python/pyproject.toml index 10784308..d1c6f7f2 100644 --- a/seeds/chunker-python/pyproject.toml +++ b/seeds/chunker-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "google_labs_html_chunker" -version = "0.0.3" +version = "0.0.5" authors = [ { name="Google Labs", email="labs-pypi@google.com" }, ] diff --git a/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py b/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py index a4b1058f..c83f827f 100644 --- a/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py +++ b/seeds/chunker-python/src/google_labs_html_chunker/html_chunker.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from bs4 import BeautifulSoup, NavigableString, Comment -# Html tags for non-content text. Text within these tags will be excluded from -# passages. -_NON_CONTENT_HTML_TAGS = frozenset({"noscript", "script", "style"}) +import bs4 + +# Text within these html tags will be excluded from passages by default. +_DEFAULT_HTML_TAGS_TO_EXCLUDE = frozenset({"noscript", "script", "style"}) # Html tags that indicate a section break. Sibling nodes will not be # greedily-aggregated into a chunk across one of these tags. @@ -53,15 +53,22 @@ class HtmlChunker: false, each sibling node is output as a separate passage if they cannot all be combined into a single passage under max_words_per_aggregate_passage words. + html_tags_to_exclude: Text within any of the tags in this set will not be + included in the output passages. Defaults to {"noscript", "script", + "style"}. """ def __init__( self, max_words_per_aggregate_passage: int, greedily_aggregate_sibling_nodes: bool, + html_tags_to_exclude: frozenset[str] = _DEFAULT_HTML_TAGS_TO_EXCLUDE, ) -> None: self.max_words_per_aggregate_passage = max_words_per_aggregate_passage self.greedily_aggregate_sibling_nodes = greedily_aggregate_sibling_nodes + self.html_tags_to_exclude = { + tag.strip().lower() for tag in html_tags_to_exclude + } class PassageList: """A list of text passages.""" @@ -127,13 +134,16 @@ def _process_node(self, node) -> AggregateNode: current_node = self.AggregateNode() if node.name: current_node.html_tag = node.name - if node.name in _NON_CONTENT_HTML_TAGS or isinstance(node, Comment): + if node.name in self.html_tags_to_exclude or isinstance(node, bs4.Comment): # Exclude text within these tags. return current_node - if isinstance(node, NavigableString): - current_node.num_words = len(node.split()) - current_node.segments.append(node.strip()) + if isinstance(node, bs4.NavigableString): + # Store the text for this leaf node (skipping text directly under the + # top-level BeautifulSoup object, e.g. "html" from ). + if node.parent.name != "[document]": + current_node.num_words = len(node.split()) + current_node.segments.append(node.strip()) return current_node # Will hold the aggregate of this node and all its unchunked descendants @@ -201,7 +211,7 @@ def chunk(self, html: str) -> list[str]: Returns: A list of text passages from the html. """ - tree = BeautifulSoup(html, "html5lib") + tree = bs4.BeautifulSoup(html, "html5lib") root_agg_node = self._process_node(tree) if not root_agg_node.get_passages(): root_agg_node.passage_list.add_passage_for_node(root_agg_node) diff --git a/seeds/chunker-python/src/main.py b/seeds/chunker-python/src/main.py index ac97f024..a8ff964e 100644 --- a/seeds/chunker-python/src/main.py +++ b/seeds/chunker-python/src/main.py @@ -25,6 +25,7 @@ arg_parser.add_argument("-o", "--outfile", help="Output passages file path.", required=True) arg_parser.add_argument("--maxwords", type=int, default=200, help="Max words per aggregate passage.") arg_parser.add_argument("--greedyagg", action=argparse.BooleanOptionalAction, help="Whether to greedily aggregate sibling nodes.") + arg_parser.add_argument("--excludetags", type=str, default="noscript,script,style", help="Comma-separated HTML tags from which to exclude text.") args = arg_parser.parse_args() html_file = open(args.infile, "r") @@ -34,6 +35,7 @@ chunker = HtmlChunker( max_words_per_aggregate_passage=args.maxwords, greedily_aggregate_sibling_nodes=args.greedyagg, + html_tags_to_exclude={tag for tag in args.excludetags.split(',')}, ) passages = chunker.chunk(html) diff --git a/seeds/chunker-python/tests/test_html_chunker.py b/seeds/chunker-python/tests/test_html_chunker.py index ffc035ba..4b53dc3c 100644 --- a/seeds/chunker-python/tests/test_html_chunker.py +++ b/seeds/chunker-python/tests/test_html_chunker.py @@ -46,6 +46,39 @@ def test_handles_escape_codes(self): ["Here's a paragraph."], ) + def test_handles_unicode_characters(self): + html = ( + "

Here is a" + " \u2119\u212b\u213e\u212b\u210A\u213e\u212b\u2119\u210F.

" + ) + + chunker = HtmlChunker( + max_words_per_aggregate_passage=10, + greedily_aggregate_sibling_nodes=False, + ) + + self.assertEqual( + chunker.chunk(html), + ["Here is a ℙÅℾÅℊℾÅℙℏ."], + ) + + def test_handles_byte_string(self): + html_bytes = ( + b"

Here is a" + b" \xe2\x84\x99\xe2\x84\xab\xe2\x84\xbe\xe2\x84\xab\xe2\x84\x8a\xe2\x84\xbe\xe2\x84\xab\xe2\x84\x99\xe2\x84\x8f.

" + ) + + chunker = HtmlChunker( + max_words_per_aggregate_passage=10, + greedily_aggregate_sibling_nodes=False, + ) + + # When using bytes, we must provide the decoding, in this case utf-8. + self.assertEqual( + chunker.chunk(html_bytes.decode("utf-8")), + ["Here is a ℙÅℾÅℊℾÅℙℏ."], + ) + def test_strips_whitespace_around_node_text(self): html = """
@@ -189,16 +222,19 @@ def test_does_not_join_split_text_nodes_within_p_tag_when_over_max(self): ], ) - def test_skips_non_content_text(self): + def test_excludes_text_from_default_html_tags(self): html = """ - - Title - - - - -

Paragraph

- + + + + Title + + + + +

Paragraph

+ + """ chunker = HtmlChunker( @@ -213,6 +249,34 @@ def test_skips_non_content_text(self): ], ) + def test_excludes_text_from_given_html_tags(self): + html = """ + + + + Title + + + + +

Paragraph

+ + + """ + + chunker = HtmlChunker( + max_words_per_aggregate_passage=10, + greedily_aggregate_sibling_nodes=False, + html_tags_to_exclude={" HEAD ", "p"}, + ) + + self.assertEqual( + chunker.chunk(html), + [ + '{"@context":"https://schema.org"}', + ], + ) + def test_greedily_aggregates_sibling_nodes(self): html = """
diff --git a/seeds/cloud-function/package.json b/seeds/cloud-function/package.json index a55cf2ba..0fbcb2c4 100644 --- a/seeds/cloud-function/package.json +++ b/seeds/cloud-function/package.json @@ -36,8 +36,8 @@ }, "dependencies": { "@google-labs/breadboard-server": "^0.1.3", - "@google-labs/breadboard": "^0.4.1", - "@google-labs/llm-starter": "^0.2.0", + "@google-labs/breadboard": "^0.5.0", + "@google-labs/llm-starter": "^0.2.1", "dotenv": "^16.3.1" } } diff --git a/seeds/core-kit/CHANGELOG.md b/seeds/core-kit/CHANGELOG.md new file mode 100644 index 00000000..50f332e7 --- /dev/null +++ b/seeds/core-kit/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog + +## [0.0.1] - 2023-11-08 + +- First release. Contains the following nodes: + - Moved from Breadboard: `passthrough`, `reflect`, `slot`, `include`, `import`, and `invoke` + - Graduated from Node Nursery: `batch`, `map` diff --git a/seeds/core-kit/README.md b/seeds/core-kit/README.md index f1939506..e2ae8fdc 100644 --- a/seeds/core-kit/README.md +++ b/seeds/core-kit/README.md @@ -1 +1,159 @@ -# Your README goes here \ No newline at end of file +# Breadboard Core Kit + +![Milestone](https://img.shields.io/badge/milestone-M2-red) ![Stability](https://img.shields.io/badge/stability-wip-green) + +A [Breadboard](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/) Kit containing nodes that enable composition and reuse of boards. + +## Node Reference + +This kit contains the following nodes: + +## The `passthrough` node + +This is a no-op node. It takes the input property bag and passes it along as output, unmodified. This node can be useful when the board needs an entry point, but the rest of the board forms a cycle. + +### Example: + +```js +board.input().wire("say->", board.passthrough().wire("say->", board.output())); + +board.runOnce({ + say: "Hello, world!", +}); + +console.log("result", result); +``` + +Will produce this output: + +```sh +result { say: 'Hello, world!' } +``` + +See [Chapter 9: Let's build a chatbot](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/docs/tutorial#chapter-9-lets-build-a-chat-bot) of Breadboard tutorial to see another example of usage. + +### Inputs + +- any properties + +### Outputs + +- the properties that were passed as inputs + +## The `invoke` node + +Use this node to invoke another board from this board. + +It recognizes `path`, `graph`, and `board` properties that specify, respectively, a file path or URL to the serialized board, directly the serialized-as-JSON board, and a `BoardCapability` (returned by `lambda` or `import`). + +The rest of the inputs in the property bag are passed along to the invoked board as its inputs. If other inputs were bound to the board via wires into the `lambda` or `import` node, then those have precedence over inputs passed here. + +The outputs of the invoked board will be passed along as outputs of the `invoke` node. + +### Inputs + +- `path`, which specifes the file path or URL to the serialized board to be included. +- `graph`, which is a serialized board +- `board`, a `BoardCapability` representing a board, created by `lambda` or `import`. +- any other properties are passed as inputs for the invoked board. + +### Outputs + +- the outputs of the invoked board + +## The `import` node + +Creates a lambda board from a pre-existing board, either loaded from `path` or passed as JSON via `graph`. All other inputs are bound to the board, which is returned as `board`. + +### Inputs + +- `path`, which specifes the file path or URL to the serialized board to be included. +- `graph`, which is a serialized board +- all other inputs are bound to the board + +### Outputs + +- `board`, a `BoardCapability`, which can be passed to `invoke` and other nodes that can invoke boards. + +## The `include` node (DEPRECATED) + +DEPRECATED: Use `invoke` instead + +Use this node to include other board into the current board. It recognizes `path` or `$ref` properties that specify, respectively, file path or URL to the serialized-as-JSON board to be included. It also accepts the `slotted` property that must contain the serialized-as-JSON boards that will be slotted into the included board. + +The rest of the inputs in the property bag are passed along to the included board as its inputs. The outputs of the included board will be passed along as outputs of the `include` node. + +This enables treating the included board as a kind of a node: it takes inputs and provides outputs. + +### Example + +For an example of how to use the `include` property, see [Chapter 5: Including other boards](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/docs/tutorial#chapter-5-including-other-boards) of Breadboard tutorial. + +### Inputs + +- `path`, which specifes the file path to the serialized board to be included. Either this or `$ref` property is required. +- `$ref`, which specifes the URL of the serialized board to be included. Ether this or `path` property is required. +- `slotted`, which specifies slotted boards that will be used to populate `slot` nodes in the included board. This property is optional. +- any other properties are passed as inputs for the included board. + +### Outputs + +- the outputs of the included board + +## The `slot` node (DEPRECATED) + +DEPRECATED. Instead pass boards either as URLs or as Boards from `lambda` and `invoke` them. + +Use this node to make a slot in a board. Adding a `slot` node turns a board into a sort of a template: each slot represents a placeholder that must be filled in when the node is included into another board. + +The node takes a `slot` property, which specifies the name of the slot, and passes the rest of arguments to the slotted board. The value of the `slot` property is used to match the slot with one of the slotted board that is passed to the `include` node. + +### Example + +For an example of how to use the `slot` node, see [Chapter 6: Boards with slots](https://github.com/google/labs-prototypes/tree/main/seeds/breadboard/docs/tutorial#chapter-6-boards-with-slots) of Breadboard tutorial. + +### Inputs + +- `slot` - the name of the slot +- any other properties are passed as inputs for the slotted board + +### Outputs + +- the outputs of the included board + +## The `reflect` node + +This node is used to reflect the board itself. It has no required inputs and provides a JSON representation of the board as a `graph` output property. This node can be used for getting information that might be stored in the structure of the board. + +### Example + +```js +import { Board } from "@google-labs/breadboard"; + +const board = new Board(); + +board.input().wire("", board.reflect().wire("graph->", board.output())); + +const result = await board.runOnce({}); +console.log("result", result); +``` + +will print: + +```sh +result { + graph: { + edges: [ [Object], [Object] ], + nodes: [ [Object], [Object], [Object] ], + kits: [] + } +} +``` + +### Inputs + +- ignored + +### Outputs + +- `graph` -- JSON representation of the board diff --git a/seeds/core-kit/package.json b/seeds/core-kit/package.json index f59e63a9..f0ed859f 100644 --- a/seeds/core-kit/package.json +++ b/seeds/core-kit/package.json @@ -1,6 +1,5 @@ { "name": "@google-labs/core-kit", - "private": true, "version": "0.0.1", "description": "A Breadboard Kit containing nodes that enable composition and reuse of boards.", "main": "./dist/src/index.js", @@ -38,7 +37,7 @@ "bugs": { "url": "https://github.com/google/labs-prototypes/issues" }, - "homepage": "https://github.com/google/labs-prototypes#readme", + "homepage": "https://github.com/google/labs-prototypes/tree/main/seeds/core-kit#readme", "devDependencies": { "@ava/typescript": "^4.0.0", "@typescript-eslint/eslint-plugin": "^5.56.0", @@ -50,6 +49,6 @@ "@google-labs/llm-starter": "*" }, "dependencies": { - "@google-labs/breadboard": "*" + "@google-labs/breadboard": "^0.5.0" } } diff --git a/seeds/llm-starter/CHANGELOG.md b/seeds/llm-starter/CHANGELOG.md index 5f02f9e3..7be2b13b 100644 --- a/seeds/llm-starter/CHANGELOG.md +++ b/seeds/llm-starter/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [0.2.1] - 2023-11-08 + +- In `fetch`, do not add request body if method is `GET` +- In `generateText`, set `text` and `PALM_KEY` as required. + ## [0.2.0] - 2023-10-17 - **M2 Release** diff --git a/seeds/llm-starter/package.json b/seeds/llm-starter/package.json index a65ce0b8..a2b0eb29 100644 --- a/seeds/llm-starter/package.json +++ b/seeds/llm-starter/package.json @@ -1,6 +1,6 @@ { "name": "@google-labs/llm-starter", - "version": "0.2.0", + "version": "0.2.1", "description": "LLM Starter Kit for the Breadboard library", "main": "./dist/src/index.js", "exports": "./dist/src/index.js", @@ -51,7 +51,7 @@ "typescript": "^5.0.4" }, "dependencies": { - "@google-labs/breadboard": "^0.4.1", + "@google-labs/breadboard": "^0.5.0", "@google-labs/palm-lite": "^0.0.2", "@rgrove/parse-xml": "^4.1.0", "jsonata": "^2.0.3", diff --git a/seeds/llm-starter/src/nodes/fetch.ts b/seeds/llm-starter/src/nodes/fetch.ts index 13960801..deaaed9e 100644 --- a/seeds/llm-starter/src/nodes/fetch.ts +++ b/seeds/llm-starter/src/nodes/fetch.ts @@ -4,10 +4,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { - InputValues, - NodeDescriberFunction, - NodeHandler, +import { + StreamCapability, + type InputValues, + type NodeDescriberFunction, + type NodeHandler, } from "@google-labs/breadboard"; export type FetchOutputs = { @@ -32,9 +33,14 @@ export type FetchInputs = { */ body?: string; /** - * Whether or not to return raw text (as opposed to parsing JSON) + * Whether or not to return raw text (as opposed to parsing JSON). Has no + * effect when `stream` is true. */ raw?: boolean; + /** + * Whether or not to return a stream + */ + stream?: boolean; }; export const fetchDescriber: NodeDescriberFunction = async () => { @@ -98,6 +104,7 @@ export default { body, headers = {}, raw, + stream, } = inputs as FetchInputs; if (!url) throw new Error("Fetch requires `url` input"); const init: RequestInit = { @@ -109,7 +116,14 @@ export default { init.body = JSON.stringify(body); } const data = await fetch(url, init); - const response = raw ? await data.text() : await data.json(); - return { response }; + if (stream) { + if (!data.body) { + throw new Error("Response is not streamable."); + } + return { response: new StreamCapability(data.body) }; + } else { + const response = raw ? await data.text() : await data.json(); + return { response }; + } }, } satisfies NodeHandler; diff --git a/seeds/llm-starter/src/nodes/run-javascript.ts b/seeds/llm-starter/src/nodes/run-javascript.ts index 71c3286e..3eb4a1a3 100644 --- a/seeds/llm-starter/src/nodes/run-javascript.ts +++ b/seeds/llm-starter/src/nodes/run-javascript.ts @@ -46,16 +46,29 @@ const runInBrowser = async ( args: string ): Promise => { const runner = (code: string, functionName: string) => { - return `${code}\nself.onmessage = () => self.postMessage(JSON.stringify(${functionName}(${args})))`; + return `${code}\nself.onmessage = () => self.postMessage({ result: JSON.stringify(${functionName}(${args})) });self.onerror = (e) => self.postMessage({ error: e.message })`; }; const blob = new Blob([runner(code, functionName)], { type: "text/javascript", }); + type WebWorkerResultType = "error" | "result"; + type WebWorkerResult = { + [x in WebWorkerResultType]: string; + }; + const worker = new Worker(URL.createObjectURL(blob)); const result = new Promise((resolve) => { - worker.onmessage = (e) => resolve(e.data); + worker.onmessage = (e) => { + const data = e.data as WebWorkerResult; + if (data.result) { + resolve(data.result); + return; + } else if (data.error) { + throw new Error(data.error); + } + }; }); worker.postMessage("please"); return result; @@ -91,7 +104,7 @@ export const runJavascriptHandler: NodeHandlerFunction = async ( ); return raw ? result : { result }; } catch (e) { - // Remove everthing outside eval from the stack trace + // Remove everything outside eval from the stack trace const stack = (e as Error).stack; if (stack !== undefined) { (e as Error).stack = stack diff --git a/seeds/node-nursery-web/package.json b/seeds/node-nursery-web/package.json index dad86800..15a91b3e 100644 --- a/seeds/node-nursery-web/package.json +++ b/seeds/node-nursery-web/package.json @@ -9,6 +9,7 @@ "type": "module", "scripts": { "build": "FORCE_COLOR=1 tsc --b", + "test": "FORCE_COLOR=1 ava", "watch": "FORCE_COLOR=1 tsc --b --watch", "lint": "FORCE_COLOR=1 eslint . --ext .ts" }, @@ -47,7 +48,8 @@ "@typescript-eslint/parser": "^5.56.0", "ava": "^5.2.0", "npm": "^10.2.1", - "typescript": "^5.0.4" + "typescript": "^5.0.4", + "@google-labs/llm-starter": "*" }, "dependencies": { "@google-labs/breadboard": "*", diff --git a/seeds/node-nursery-web/src/index.ts b/seeds/node-nursery-web/src/index.ts index 2bb63fc4..4fbf1feb 100644 --- a/seeds/node-nursery-web/src/index.ts +++ b/seeds/node-nursery-web/src/index.ts @@ -7,12 +7,14 @@ import { KitBuilder } from "@google-labs/breadboard/kits"; import credentials from "./nodes/credentials.js"; import driveList from "./nodes/drive-list.js"; +import transformStream from "./nodes/transform-stream.js"; const NodeNurseryWeb = new KitBuilder({ url: "npm:@google-labs/node-nursery-web", }).build({ credentials, driveList, + transformStream, }); export default NodeNurseryWeb; diff --git a/seeds/node-nursery-web/src/nodes/transform-stream.ts b/seeds/node-nursery-web/src/nodes/transform-stream.ts new file mode 100644 index 00000000..5aad407d --- /dev/null +++ b/seeds/node-nursery-web/src/nodes/transform-stream.ts @@ -0,0 +1,62 @@ +/** + * @license + * Copyright 2023 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + Board, + BreadboardCapability, + InputValues, + NodeHandlerContext, + OutputValues, + StreamCapability, + StreamCapabilityType, + isStreamCapability, +} from "@google-labs/breadboard"; + +export type TransformStreamInputs = InputValues & { + stream: StreamCapabilityType; + board?: BreadboardCapability; +}; + +const getTransformer = async ( + board?: BreadboardCapability, + context?: NodeHandlerContext +): Promise => { + if (board) { + const runnableBoard = await Board.fromBreadboardCapability( + board as BreadboardCapability + ); + return { + async transform(chunk, controller) { + const inputs = { chunk }; + const result = await runnableBoard.runOnce(inputs, context); + controller.enqueue(result.chunk); + }, + }; + } else + return { + transform(chunk, controller) { + controller.enqueue(chunk); + }, + }; +}; + +export default { + invoke: async ( + inputs: InputValues, + context?: NodeHandlerContext + ): Promise => { + const { stream, board } = inputs as TransformStreamInputs; + if (!stream) throw new Error("The `stream` input is required"); + if (!isStreamCapability(stream)) + throw new Error("The `stream` input must be a `StreamCapability`."); + const transformer = await getTransformer(board, context); + const streamCapability = stream as StreamCapabilityType; + const outputStream = streamCapability.stream.pipeThrough( + new TransformStream(transformer) + ); + return { stream: new StreamCapability(outputStream) }; + }, +}; diff --git a/seeds/node-nursery-web/tests/transform-stream.ts b/seeds/node-nursery-web/tests/transform-stream.ts new file mode 100644 index 00000000..7db3970c --- /dev/null +++ b/seeds/node-nursery-web/tests/transform-stream.ts @@ -0,0 +1,129 @@ +/** + * @license + * Copyright 2023 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import test from "ava"; + +import transformStream, { + TransformStreamInputs, +} from "../src/nodes/transform-stream.js"; +import { + Board, + GraphDescriptor, + StreamCapability, + StreamCapabilityType, + asRuntimeKit, + callHandler, +} from "@google-labs/breadboard"; +import NodeNurseryWeb from "../src/index.js"; +import Starter from "@google-labs/llm-starter"; + +const toArray = async (stream: ReadableStream) => { + const results: T[] = []; + await stream.pipeTo( + new WritableStream({ + write(chunk) { + results.push(chunk); + }, + }) + ); + return results; +}; + +test("transform stream noop", async (t) => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(1); + controller.enqueue(2); + controller.enqueue(3); + controller.close(); + }, + }); + const inputs: TransformStreamInputs = { + stream: new StreamCapability(stream), + }; + const outputs = (await callHandler(transformStream, inputs, {})) as { + stream: StreamCapabilityType; + }; + const results = await toArray(outputs.stream.stream); + t.deepEqual(results, [1, 2, 3]); +}); + +test("transform stream with a board", async (t) => { + const board = new Board(); + board.input().wire("chunk->", board.output()); + + const graph = board as GraphDescriptor; + + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(1); + controller.enqueue(2); + controller.enqueue(3); + controller.close(); + }, + }); + const inputs: TransformStreamInputs = { + stream: new StreamCapability(stream), + board: { + kind: "board", + board: graph, + }, + }; + const outputs = (await callHandler(transformStream, inputs, {})) as { + stream: StreamCapabilityType; + }; + const results = await toArray(outputs.stream.stream); + t.deepEqual(results, [1, 2, 3]); +}); + +test("transform works in a board", async (t) => { + const board = new Board(); + const nursery = board.addKit(NodeNurseryWeb); + + board.input().wire( + "stream->", + nursery + .transformStream((board, input, output) => { + const starter = board.addKit(Starter); + + function run({ chunk }: { chunk: number }): string { + return `number: ${chunk}`; + } + + input.wire( + "chunk->", + starter + .runJavascript("run", { + code: run.toString(), + }) + .wire("result->chunk", output) + ); + }) + .wire("stream->", board.output()) + ); + + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(1); + controller.enqueue(2); + controller.enqueue(3); + controller.close(); + }, + }); + + const outputs = (await board.runOnce( + { + stream: new StreamCapability(stream), + }, + { + kits: [asRuntimeKit(Starter)], + } + )) as { + stream: StreamCapabilityType; + }; + const results = await toArray(outputs.stream.stream); + t.deepEqual(results, ["number: 1", "number: 2", "number: 3"]); +});