Skip to content

feat: add filter for query in ts templates #172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/curvy-penguins-work.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

add filter for query in ts templates
7 changes: 7 additions & 0 deletions helpers/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ export const installTSTemplate = async ({
nextConfigJson.output = "export";
nextConfigJson.images = { unoptimized: true };
console.log("\nUsing static site generation\n");

// if having backend, copy overwrite next.config.simple.mjs to next.config.mjs
await fs.copyFile(
path.join(root, "next.config.simple.mjs"),
path.join(root, "next.config.mjs"),
);
} else {
if (vectorDb === "milvus") {
nextConfigJson.experimental.serverComponentsExternalPackages =
Expand All @@ -64,6 +70,7 @@ export const installTSTemplate = async ({
);
}
}
await fs.rm(path.join(root, "next.config.simple.mjs"));
await fs.writeFile(
nextConfigJsonFile,
JSON.stringify(nextConfigJson, null, 2) + os.EOL,
Expand Down
38 changes: 36 additions & 2 deletions templates/components/engines/typescript/agent/chat.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { BaseToolWithCall, OpenAIAgent, QueryEngineTool } from "llamaindex";
import {
BaseToolWithCall,
MetadataFilters,
OpenAIAgent,
QueryEngineTool,
} from "llamaindex";
import fs from "node:fs/promises";
import path from "node:path";
import { getDataSource } from "./index";
Expand All @@ -14,7 +19,7 @@ export async function createChatEngine(documentIds?: string[]) {
tools.push(
new QueryEngineTool({
queryEngine: index.asQueryEngine({
preFilters: undefined, // TODO: Add filters once LITS supports it (getQueryFilters)
preFilters: generateFilters(documentIds || []),
}),
metadata: {
name: "data_query_engine",
Expand All @@ -41,3 +46,32 @@ export async function createChatEngine(documentIds?: string[]) {
systemPrompt: process.env.SYSTEM_PROMPT,
});
}

function generateFilters(documentIds: string[]): MetadataFilters | undefined {
if (!documentIds.length) {
return {
filters: [
{
key: "private",
value: ["true"],
operator: "nin",
},
],
};
}
return {
filters: [
{
key: "private",
value: "true",
operator: "!=",
},
{
key: "doc_id",
value: documentIds,
operator: "in",
},
],
condition: "or",
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ export async function runPipeline(documents: Document[], filename: string) {
for (const document of documents) {
document.metadata = {
...document.metadata,
doc_id: document.id_,
file_name: filename,
private: "true", // to separate from other public documents
is_local_file: "true", // to distinguish from cloud data sources
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ export function retrieveDocumentIds(annotations?: JSONValue[]): string[] {
) {
const files = data.files as DocumentFile[];
for (const file of files) {
if (Array.isArray(file.content)) {
if (Array.isArray(file.content.value)) {
// it's an array, so it's an array of doc IDs
for (const id of file.content) {
for (const id of file.content.value) {
ids.push(id);
}
}
Expand Down
12 changes: 6 additions & 6 deletions templates/components/llamaindex/typescript/streaming/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,18 @@ export function createCallbackManager(stream: StreamData) {
const callbackManager = new CallbackManager();

callbackManager.on("retrieve-end", async (data) => {
const { nodes, query } = data.detail.payload;
const { nodes, query } = data.detail;
await appendSourceData(stream, nodes);
appendEventData(stream, `Retrieving context for query: '${query}'`);
appendEventData(
stream,
`Retrieved ${nodes.length} sources to use as context for the query`,
);
LLamaCloudFileService.downloadFiles(nodes); // don't await to avoid blocking chat streaming
});

callbackManager.on("llm-tool-call", (event) => {
const { name, input } = event.detail.payload.toolCall;
const { name, input } = event.detail.toolCall;
const inputString = Object.entries(input)
.map(([key, value]) => `${key}: ${value}`)
.join(", ");
Expand All @@ -98,7 +99,7 @@ export function createCallbackManager(stream: StreamData) {
});

callbackManager.on("llm-tool-result", (event) => {
const { toolCall, toolResult } = event.detail.payload;
const { toolCall, toolResult } = event.detail;
appendToolData(stream, toolCall, toolResult);
});

Expand All @@ -118,9 +119,8 @@ async function getNodeUrl(metadata: Metadata) {
const pipelineId = metadata["pipeline_id"];
if (pipelineId && !isLocalFile) {
// file is from LlamaCloud and was not ingested locally
// TODO trigger but don't await file download and just use convention to generate the URL (see Python code)
// return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${pipelineId}\$${fileName}`;
return await LLamaCloudFileService.getFileUrl(fileName, pipelineId);
const name = LLamaCloudFileService.toDownloadedName(pipelineId, fileName);
return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`;
}
const isPrivate = metadata["private"] === "true";
const folder = isPrivate ? "output/uploaded" : "data";
Expand Down
135 changes: 75 additions & 60 deletions templates/components/llamaindex/typescript/streaming/service.ts
Original file line number Diff line number Diff line change
@@ -1,86 +1,66 @@
import { Metadata, NodeWithScore } from "llamaindex";
import fs from "node:fs";
import https from "node:https";
import path from "node:path";

const LLAMA_CLOUD_OUTPUT_DIR = "output/llamacloud";
const LLAMA_CLOUD_BASE_URL = "https://cloud.llamaindex.ai/api/v1";
const FILE_DELIMITER = "$"; // delimiter between pipelineId and filename

export interface LlamaCloudFile {
interface LlamaCloudFile {
name: string;
file_id: string;
project_id: string;
}

export class LLamaCloudFileService {
static async getFiles(pipelineId: string): Promise<LlamaCloudFile[]> {
const url = `${LLAMA_CLOUD_BASE_URL}/pipelines/${pipelineId}/files`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = await response.json();
return data;
public static async downloadFiles(nodes: NodeWithScore<Metadata>[]) {
const files = this.nodesToDownloadFiles(nodes);
if (!files.length) return;
console.log("Downloading files from LlamaCloud...");
for (const file of files) {
await this.downloadFile(file.pipelineId, file.fileName);
}
}

static async getFileDetail(
projectId: string,
fileId: string,
): Promise<{ url: string }> {
const url = `${LLAMA_CLOUD_BASE_URL}/files/${fileId}/content?project_id=${projectId}`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = (await response.json()) as { url: string };
return data;
public static toDownloadedName(pipelineId: string, fileName: string) {
return `${pipelineId}${FILE_DELIMITER}${fileName}`;
}

static async getFileUrl(
name: string,
pipelineId: string,
): Promise<string | null> {
try {
const files = await this.getFiles(pipelineId);
for (const file of files) {
if (file.name === name) {
const fileId = file.file_id;
const projectId = file.project_id;
const fileDetail = await this.getFileDetail(projectId, fileId);
const localFileUrl = this.downloadFile(fileDetail.url, fileId, name);
return localFileUrl;
}
private static nodesToDownloadFiles(nodes: NodeWithScore<Metadata>[]) {
const downloadFiles: Array<{
pipelineId: string;
fileName: string;
}> = [];
for (const node of nodes) {
const isLocalFile = node.node.metadata["is_local_file"] === "true";
const pipelineId = node.node.metadata["pipeline_id"];
const fileName = node.node.metadata["file_name"];
if (isLocalFile || !pipelineId || !fileName) continue;
const isDuplicate = downloadFiles.some(
(f) => f.pipelineId === pipelineId && f.fileName === fileName,
);
if (!isDuplicate) {
downloadFiles.push({ pipelineId, fileName });
}
return null;
} catch (error) {
console.error("Error fetching file from LlamaCloud:", error);
return null;
}
return downloadFiles;
}

static downloadFile(url: string, fileId: string, filename: string) {
const FILE_DELIMITER = "$"; // delimiter between fileId and filename
const downloadedFileName = `${fileId}${FILE_DELIMITER}${filename}`;
const downloadedFilePath = path.join(
LLAMA_CLOUD_OUTPUT_DIR,
downloadedFileName,
);
const urlPrefix = `${process.env.FILESERVER_URL_PREFIX}/${LLAMA_CLOUD_OUTPUT_DIR}`;
const fileUrl = `${urlPrefix}/${downloadedFileName}`;

private static async downloadFile(pipelineId: string, fileName: string) {
try {
const downloadedName = this.toDownloadedName(pipelineId, fileName);
const downloadedPath = path.join(LLAMA_CLOUD_OUTPUT_DIR, downloadedName);

// Check if file already exists
if (fs.existsSync(downloadedFilePath)) return fileUrl;
if (fs.existsSync(downloadedPath)) return;

// Create directory if it doesn't exist
if (!fs.existsSync(LLAMA_CLOUD_OUTPUT_DIR)) {
fs.mkdirSync(LLAMA_CLOUD_OUTPUT_DIR, { recursive: true });
}
const urlToDownload = await this.getFileUrlByName(pipelineId, fileName);
if (!urlToDownload) throw new Error("File not found in LlamaCloud");

const file = fs.createWriteStream(downloadedFilePath);
const file = fs.createWriteStream(downloadedPath);
https
.get(url, (response) => {
.get(urlToDownload, (response) => {
response.pipe(file);
file.on("finish", () => {
file.close(() => {
Expand All @@ -89,15 +69,50 @@ export class LLamaCloudFileService {
});
})
.on("error", (err) => {
fs.unlink(downloadedFilePath, () => {
fs.unlink(downloadedPath, () => {
console.error("Error downloading file:", err);
throw err;
});
});

return fileUrl;
} catch (error) {
throw new Error(`Error downloading file from LlamaCloud: ${error}`);
}
}

private static async getFileUrlByName(
pipelineId: string,
name: string,
): Promise<string | null> {
const files = await this.getAllFiles(pipelineId);
const file = files.find((file) => file.name === name);
if (!file) return null;
return await this.getFileUrlById(file.project_id, file.file_id);
}

private static async getFileUrlById(
projectId: string,
fileId: string,
): Promise<string> {
const url = `${LLAMA_CLOUD_BASE_URL}/files/${fileId}/content?project_id=${projectId}`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = (await response.json()) as { url: string };
return data.url;
}

private static async getAllFiles(
pipelineId: string,
): Promise<LlamaCloudFile[]> {
const url = `${LLAMA_CLOUD_BASE_URL}/pipelines/${pipelineId}/files`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = await response.json();
return data;
}
}
2 changes: 2 additions & 0 deletions templates/components/vectordbs/python/llamacloud/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def generate_datasource():
documents = get_documents()

# Set is_local_file=true to distinguish locally ingested files from LlamaCloud files
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["is_local_file"] = "true"
doc.metadata["private"] = "false"

LlamaCloudIndex.from_documents(
documents=documents,
Expand Down
3 changes: 3 additions & 0 deletions templates/components/vectordbs/python/none/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ def generate_datasource():
storage_dir = os.environ.get("STORAGE_DIR", "storage")
# load the documents and create the index
documents = get_documents()
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["private"] = "false"
index = VectorStoreIndex.from_documents(
documents,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ dotenv.config();
async function loadAndIndex() {
const documents = await getDocuments();
// Set is_local_file=true to distinguish locally ingested files from LlamaCloud files
// Set private=false to mark the document as public (required for filtering)
for (const document of documents) {
document.metadata = {
...document.metadata,
is_local_file: "true",
private: "false",
};
}
await getDataSource();
Expand Down
5 changes: 5 additions & 0 deletions templates/components/vectordbs/typescript/none/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ async function generateDatasource() {
persistDir: STORAGE_CACHE_DIR,
});
const documents = await getDocuments();
// Set private=false to mark the document as public (required for filtering)
documents.forEach((doc) => {
doc.metadata["private"] = "false";
});

await VectorStoreIndex.fromDocuments(documents, {
storageContext,
});
Expand Down
2 changes: 1 addition & 1 deletion templates/types/streaming/express/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"dotenv": "^16.3.1",
"duck-duck-scrape": "^2.2.5",
"express": "^4.18.2",
"llamaindex": "0.4.14",
"llamaindex": "0.5.6",
"pdf2json": "3.0.5",
"ajv": "^8.12.0",
"@e2b/code-interpreter": "^0.0.5",
Expand Down
3 changes: 2 additions & 1 deletion templates/types/streaming/nextjs/next.config.mjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
/** @type {import('next').NextConfig} */
import fs from "fs";
import withLlamaIndex from "llamaindex/next";
import webpack from "./webpack.config.mjs";

const nextConfig = JSON.parse(fs.readFileSync("./next.config.json", "utf-8"));
nextConfig.webpack = webpack;

export default nextConfig;
export default withLlamaIndex(nextConfig);
8 changes: 8 additions & 0 deletions templates/types/streaming/nextjs/next.config.simple.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/** @type {import('next').NextConfig} */
import fs from "fs";
import webpack from "./webpack.config.mjs";

const nextConfig = JSON.parse(fs.readFileSync("./next.config.json", "utf-8"));
nextConfig.webpack = webpack;

export default nextConfig;
Loading
Loading