Skip to content

Commit

Permalink
feat: improve OpenAI model APIs and examples to better support audio,…
Browse files Browse the repository at this point in the history
… images, and tool calling (#707)
  • Loading branch information
mattjohnsonpint authored Jan 22, 2025
1 parent 8dca6e5 commit 67bd5cb
Show file tree
Hide file tree
Showing 27 changed files with 3,294 additions and 1,129 deletions.
2 changes: 2 additions & 0 deletions .trunk/configs/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
"omitif",
"omitnull",
"openai",
"openspeech",
"operationreport",
"PEMS",
"pgconn",
Expand Down Expand Up @@ -175,6 +176,7 @@
"textgeneration",
"tidwall",
"tinygo",
"toolcalling",
"tseslint",
"tsrv",
"typedarray",
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
- chore: remove localHypermodeModels list and handle 404s properly instead in local dev
[#703](https://github.com/hypermodeinc/modus/pull/703)
- fix: remove fallback to default time zone [#706](https://github.com/hypermodeinc/modus/pull/706)
- feat: improve OpenAI model APIs and examples to better support audio, images, and tool calling
[#707](https://github.com/hypermodeinc/modus/pull/707)

## 2025-01-09 - CLI 0.16.6

Expand Down
111 changes: 6 additions & 105 deletions sdk/assemblyscript/examples/textgeneration/assembly/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,109 +4,10 @@
* See the LICENSE file that accompanied this code for further details.
*/

import { JSON } from "json-as";
import { models } from "@hypermode/modus-sdk-as";
import { Product, sampleProductJson } from "./product";
// The examples have been split into separate files for clarity.
// See each file for more details about the specific example.

import {
OpenAIChatModel,
ResponseFormat,
SystemMessage,
UserMessage,
} from "@hypermode/modus-sdk-as/models/openai/chat";

// In this example, we will generate text using the OpenAI Chat model.
// See https://platform.openai.com/docs/api-reference/chat/create for more details
// about the options available on the model, which you can set on the input object.

// This model name should match the one defined in the modus.json manifest file.
const modelName: string = "text-generator";

// This function generates some text based on the instruction and prompt provided.
export function generateText(instruction: string, prompt: string): string {
// The imported ChatModel interface follows the OpenAI Chat completion model input format.
const model = models.getModel<OpenAIChatModel>(modelName);
const input = model.createInput([
new SystemMessage(instruction),
new UserMessage(prompt),
]);

// This is one of many optional parameters available for the OpenAI Chat model.
input.temperature = 0.7;

// Here we invoke the model with the input we created.
const output = model.invoke(input);

// The output is also specific to the ChatModel interface.
// Here we return the trimmed content of the first choice.
return output.choices[0].message.content.trim();
}

// This function generates a single product.
export function generateProduct(category: string): Product {
// We can get creative with the instruction and prompt to guide the model
// in generating the desired output. Here we provide a sample JSON of the
// object we want the model to generate.
const instruction = `Generate a product for the category provided.
Only respond with valid JSON object in this format:
${sampleProductJson}`;
const prompt = `The category is "${category}".`;

// Set up the input for the model, creating messages for the instruction and prompt.
const model = models.getModel<OpenAIChatModel>(modelName);
const input = model.createInput([
new SystemMessage(instruction),
new UserMessage(prompt),
]);

// Let's increase the temperature to get more creative responses.
// Be careful though, if the temperature is too high, the model may generate invalid JSON.
input.temperature = 1.2;

// This model also has a response format parameter that can be set to JSON,
// Which, along with the instruction, can help guide the model in generating valid JSON output.
input.responseFormat = ResponseFormat.Json;

// Here we invoke the model with the input we created.
const output = model.invoke(input);

// The output should contain the JSON string we asked for.
const json = output.choices[0].message.content.trim();
const product = JSON.parse<Product>(json);
return product;
}

// This function generates multiple products.
export function generateProducts(category: string, quantity: i32): Product[] {
// Similar to the previous example above, we can tailor the instruction and prompt
// to guide the model in generating the desired output. Note that understanding the behavior
// of the model is important to get the desired results. In this case, we need the model
// to return an _object_ containing an array, not an array of objects directly. That's because
// the model will not reliably generate an array of objects directly.
const instruction = `Generate ${quantity} products for the category provided.
Only respond with valid JSON object containing a valid JSON array named 'list', in this format:
{"list":[${sampleProductJson}]}`;
const prompt = `The category is "${category}".`;

// Set up the input for the model, creating messages for the instruction and prompt.
const model = models.getModel<OpenAIChatModel>(modelName);
const input = model.createInput([
new SystemMessage(instruction),
new UserMessage(prompt),
]);

// Adjust the model inputs, just like in the previous example.
// Be careful, if the temperature is too high, the model may generate invalid JSON.
input.temperature = 1.2;
input.responseFormat = ResponseFormat.Json;

// Here we invoke the model with the input we created.
const output = model.invoke(input);

// The output should contain the JSON string we asked for.
const json = output.choices[0].message.content.trim();

// We can parse that JSON to a compatible object, to get the data we're looking for.
const results = JSON.parse<Map<string, Product[]>>(json);
return results.get("list");
}
export * from "./simple";
export * from "./products";
export * from "./media";
export * from "./toolcalling";
181 changes: 181 additions & 0 deletions sdk/assemblyscript/examples/textgeneration/assembly/media.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* This example is part of the Modus project, licensed under the Apache License 2.0.
* You may modify and use this example in accordance with the license.
* See the LICENSE file that accompanied this code for further details.
*/

import { models, http } from "@hypermode/modus-sdk-as";

import {
OpenAIChatModel,
DeveloperMessage,
SystemMessage,
UserMessage,
TextContentPart,
AudioContentPart,
ImageContentPart,
Image,
Audio,
} from "@hypermode/modus-sdk-as/models/openai/chat";

// These examples demonstrate how to use audio or image data with OpenAI chat models.
// Currently, audio can be used for input or output, but images can be used only for input.

/**
* This type is used in these examples to represent images or audio.
*/
class Media {
// The content type of the media.
contentType!: string;

// The binary data of the media.
// This value will be base64 encoded when used in an API response.
data!: Uint8Array;

// A text description or transcription of the media.
text!: string;
}

/**
* This function generates an audio response based on the instruction and prompt provided.
*/
export function generateAudio(instruction: string, prompt: string): Media {
// Note, this is similar to the generateText example, but with audio output requested.

// We'll generate the audio using an audio-enabled OpenAI chat model.
const model = models.getModel<OpenAIChatModel>("audio-model");

const input = model.createInput([
new SystemMessage(instruction),
new UserMessage(prompt),
]);

input.temperature = 0.7;

// Request audio output from the model.
// Note, this is a convenience method that requests audio modality and sets the voice and format.
// You can also set these values manually on the input object, if you prefer.
input.requestAudioOutput("ash", "wav");

const output = model.invoke(input);

// Return the audio and its transcription.
// Note that the message Content field will be empty for audio responses.
// Instead, the text will be in the Message.Audio.Transcript field.
const audio = output.choices[0].message.audio!;

const media = <Media>{
contentType: "audio/wav",
data: audio.data,
text: audio.transcript.trim(),
};

return media;
}

/**
* This function generates text that describes the image at the provided url.
* In this example the image url is passed to the model, and the model retrieves the image.
*/
export function describeImage(url: string): string {
// Note that because the model retrieves the image, any URL can be used.
// However, this means that there is a risk of sending data to an unauthorized host, if the URL is not hardcoded or sanitized.
// See the describeRandomImage function below for a safer approach.

const model = models.getModel<OpenAIChatModel>("text-generator");

const input = model.createInput([
UserMessage.fromParts([
new TextContentPart("Describe this image."),
new ImageContentPart(Image.fromURL(url)),
]),
]);

const output = model.invoke(input);

return output.choices[0].message.content.trim();
}

/**
* This function fetches a random image, and then generates text that describes it.
* In this example the image is retrieved by the function before passing it as data to the model.
*/
export function describeRandomImage(): Media {
// Because this approach fetches the image directly, it is safer than the describeImage function above.
// The host URL is allow-listed in the modus.json file, so we can trust the image source.

// Fetch a random image from the Picsum API. We'll just hardcode the size to make the demo simple to call.
const response = http.fetch("https://picsum.photos/640/480");
const data = Uint8Array.wrap(response.body);
const contentType = response.headers.get("Content-Type")!;

// Describe the image using the OpenAI chat model.
const model = models.getModel<OpenAIChatModel>("text-generator");

const input = model.createInput([
UserMessage.fromParts([
new TextContentPart("Describe this image."),
new ImageContentPart(Image.fromData(data, contentType)),
]),
]);

input.temperature = 0.7;

const output = model.invoke(input);

// Return the image and its generated description.
const text = output.choices[0].message.content.trim();
const media = <Media>{
contentType,
data,
text,
};

return media;
}

/**
* This function fetches a random "Harvard Sentences" speech file from OpenSpeech, and then generates a transcript from it.
* The sentences are from https://www.cs.columbia.edu/~hgs/audio/harvard.html
*/
export function transcribeRandomSpeech(): Media {
// Pick a random file number from the list of available here:
// https://www.voiptroubleshooter.com/open_speech/american.html
const numbers: i32[] = [
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 30, 31, 32, 34, 35, 36, 37, 38, 39,
40, 57, 58, 59, 60, 61,
];
const num = numbers[<i32>(Math.random() * numbers.length)];

// Fetch the speech file corresponding to the number.
const url = `https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_00${num}_8k.wav`;
const response = http.fetch(url);
const data = Uint8Array.wrap(response.body);

// Transcribe the audio using an audio-enabled OpenAI chat model.
const model = models.getModel<OpenAIChatModel>("audio-model");

const input = model.createInput([
new DeveloperMessage(
"Do not include any newlines or surrounding quotation marks in the response. Omit any explanation beyond the request.",
),
UserMessage.fromParts([
new TextContentPart(
"Provide an exact transcription of the contents of this audio file.",
),
new AudioContentPart(Audio.fromData(data, "wav")),
]),
]);

const output = model.invoke(input);

// Return the audio file and its transcript.
const text = output.choices[0].message.content.trim();
const media = <Media>{
contentType: "audio/wav",
data,
text,
};

return media;
}
25 changes: 0 additions & 25 deletions sdk/assemblyscript/examples/textgeneration/assembly/product.ts

This file was deleted.

Loading

0 comments on commit 67bd5cb

Please sign in to comment.