From d1637b366de3499515ffd0507cbad603f0ba626d Mon Sep 17 00:00:00 2001 From: jing11223344 <118464144+jing11223344@users.noreply.github.com> Date: Sat, 16 May 2026 22:15:27 +0800 Subject: [PATCH 1/4] feat: add ui/utils/app/rag-context.ts --- ui/utils/app/rag-context.ts | 136 ++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 ui/utils/app/rag-context.ts diff --git a/ui/utils/app/rag-context.ts b/ui/utils/app/rag-context.ts new file mode 100644 index 0000000..ae7080d --- /dev/null +++ b/ui/utils/app/rag-context.ts @@ -0,0 +1,136 @@ +/** + * Shared RAG context preparation helper. + * + * Provides: + * - Source formatting + * - Duplicate chunk suppression (by content hash) + * - Configurable character budget + * - Bounded integer parsing + */ + +export interface RagSource { + title: string; + page: number | null; + content: string; + source: string; +} + +export interface RagContextResult { + /** Formatted context string (ready to inject into the LLM prompt). */ + context: string; + /** Number of unique sources included. */ + sourceCount: number; + /** Total characters in the formatted context. */ + charCount: number; + /** Whether the result was truncated by the budget. */ + truncated: boolean; +} + +const DEFAULT_CHAR_BUDGET = 8_000; +const MAX_N_RESULTS = 20; +const DEFAULT_N_RESULTS = 8; + +/** + * Parse an integer from an unknown value, returning a bounded result. + * + * @param value - Raw value (string, number, or undefined). + * @param defaultVal - Fallback when parsing fails. + * @param min - Inclusive lower bound. + * @param max - Inclusive upper bound. + */ +export function parseBoundedInt( + value: unknown, + defaultVal: number, + min: number = 1, + max: number = MAX_N_RESULTS, +): number { + if (value === undefined || value === null) return defaultVal; + const n = typeof value === 'number' ? value : Number(value); + if (Number.isNaN(n)) return defaultVal; + return Math.max(min, Math.min(max, Math.round(n))); +} + +/** + * Format a single RAG source into a string. + */ +export function formatSource(source: RagSource, index: number): string { + const pageStr = source.page != null ? `, Page: ${source.page}` : ''; + return `Source ${index + 1}) Title: ${source.title}${pageStr}, Content: ${source.content}\n`; +} + +/** + * Prepare a RAG context string from raw Chroma results. + * + * 1. Converts raw arrays into `RagSource` objects. + * 2. Deduplicates chunks that have identical content. + * 3. Formats the deduplicated sources. + * 4. Truncates to the configured character budget when necessary. + * + * @param metadatas - Chroma `metadatas` array (first element). + * @param documents - Chroma `documents` array (first element). + * @param budget - Maximum characters for the formatted context. + * @param budgetEnv - Optional environment variable override (takes precedence over `budget`). + */ +export function prepareRagContext( + metadatas: Array<{ title: string; page?: number | null; source: string } | null | undefined>, + documents: string[] | undefined | null, + budget: number = DEFAULT_CHAR_BUDGET, + budgetEnv?: string, +): RagContextResult { + const charBudget = budgetEnv ? parseBoundedInt(budgetEnv, DEFAULT_CHAR_BUDGET, 100, 50_000) : budget; + + // 1. Build RagSource list, deduplicating by content + const seen = new Set(); + const sources: RagSource[] = []; + + if (documents && metadatas) { + for (let i = 0; i < documents.length && i < metadatas.length; i++) { + const content = (documents[i] ?? '').trim(); + if (!content) continue; + + // Deduplicate by content hash + const key = content.slice(0, 200); // first 200 chars as fingerprint + if (seen.has(key)) continue; + seen.add(key); + + const meta = metadatas[i] ?? {}; + sources.push({ + title: meta.title ?? 'Untitled', + page: meta.page ?? null, + content, + source: meta.source ?? '', + }); + } + } + + // 2. Format sources + let context = ''; + let sourceCount = 0; + let truncated = false; + + for (let i = 0; i < sources.length; i++) { + const formatted = formatSource(sources[i], i); + + if (context.length + formatted.length > charBudget) { + truncated = true; + break; + } + + context += formatted; + sourceCount++; + } + + return { + context, + sourceCount, + charCount: context.length, + truncated, + }; +} + +/** + * Default Chroma query count. + */ +export function defaultNResults(requested?: number): number { + return parseBoundedInt(requested, DEFAULT_N_RESULTS, 1, MAX_N_RESULTS); +} From 900c4b9bc8aa75fdb3dbadd11234c8142677d078 Mon Sep 17 00:00:00 2001 From: jing11223344 <118464144+jing11223344@users.noreply.github.com> Date: Sat, 16 May 2026 22:15:30 +0800 Subject: [PATCH 2/4] feat: add ui/__tests__/utils/app/rag-context.test.ts --- ui/__tests__/utils/app/rag-context.test.ts | 179 +++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 ui/__tests__/utils/app/rag-context.test.ts diff --git a/ui/__tests__/utils/app/rag-context.test.ts b/ui/__tests__/utils/app/rag-context.test.ts new file mode 100644 index 0000000..b02ae89 --- /dev/null +++ b/ui/__tests__/utils/app/rag-context.test.ts @@ -0,0 +1,179 @@ +import { describe, expect, it } from 'vitest'; +import { + parseBoundedInt, + formatSource, + prepareRagContext, + defaultNResults, + type RagSource, +} from '@/utils/app/rag-context'; + +describe('parseBoundedInt', () => { + it('should return the default when value is undefined', () => { + expect(parseBoundedInt(undefined, 8)).toBe(8); + }); + + it('should return the default when value is null', () => { + expect(parseBoundedInt(null, 8)).toBe(8); + }); + + it('should return the default for NaN', () => { + expect(parseBoundedInt('not-a-number', 8)).toBe(8); + }); + + it('should clamp values below the minimum', () => { + expect(parseBoundedInt(-5, 8, 1, 20)).toBe(1); + expect(parseBoundedInt(0, 8, 1, 20)).toBe(1); + }); + + it('should clamp values above the maximum', () => { + expect(parseBoundedInt(999, 8, 1, 20)).toBe(20); + }); + + it('should parse string numbers', () => { + expect(parseBoundedInt('12', 8, 1, 20)).toBe(12); + }); + + it('should round floats', () => { + expect(parseBoundedInt(4.7, 8, 1, 20)).toBe(5); + expect(parseBoundedInt(4.2, 8, 1, 20)).toBe(4); + }); + + it('should return a valid number when value is a number inside a string', () => { + expect(parseBoundedInt('7', 8)).toBe(7); + }); +}); + +describe('formatSource', () => { + it('should format a source with a page number', () => { + const source: RagSource = { title: 'Doc A', page: 3, content: 'Hello world', source: 'file.pdf' }; + const result = formatSource(source, 0); + expect(result).toBe('Source 1) Title: Doc A, Page: 3, Content: Hello world\n'); + }); + + it('should format a source without a page number', () => { + const source: RagSource = { title: 'Doc B', page: null, content: 'No page', source: 'notes.txt' }; + const result = formatSource(source, 5); + expect(result).toBe('Source 6) Title: Doc B, Content: No page\n'); + }); + + it('should handle empty content', () => { + const source: RagSource = { title: 'Empty', page: null, content: '', source: '' }; + const result = formatSource(source, 2); + expect(result).toBe('Source 3) Title: Empty, Content: \n'); + }); +}); + +describe('prepareRagContext', () => { + const sampleMetadatas = [ + { title: 'Paper A', page: 1, source: 'paper-a.pdf' }, + { title: 'Paper A', page: 2, source: 'paper-a.pdf' }, + { title: 'Paper B', page: 1, source: 'paper-b.pdf' }, + { title: 'Paper A', page: 1, source: 'paper-a.pdf' }, // duplicate content below + ]; + + const sampleDocuments = [ + 'The quick brown fox jumps over the lazy dog.', + 'This is a second paragraph about the same topic.', + 'Paper B discusses alternative approaches entirely.', + 'The quick brown fox jumps over the lazy dog.', // identical to doc[0] + ]; + + it('should produce a formatted context string', () => { + const result = prepareRagContext(sampleMetadatas, sampleDocuments, 50_000); + expect(result.sourceCount).toBeGreaterThan(0); + expect(result.charCount).toBeGreaterThan(0); + expect(result.context).toContain('Source 1)'); + expect(result.context).toContain('Title: Paper A'); + }); + + it('should deduplicate identical chunks', () => { + const result = prepareRagContext(sampleMetadatas, sampleDocuments, 50_000); + // The 4th document ("The quick brown fox...") is a duplicate of doc[0] + // so the dedup should drop it, giving us 3 unique sources. + expect(result.sourceCount).toBe(3); + // The duplicate source (Paper A page 1) should only appear once + const matches = result.context.match(/Source \d+\) Title: Paper A, Page: 1/g); + expect(matches).toHaveLength(1); + }); + + it('should truncate when context exceeds budget', () => { + const veryTightBudget = 50; + const result = prepareRagContext(sampleMetadatas, sampleDocuments, veryTightBudget); + expect(result.charCount).toBeLessThanOrEqual(veryTightBudget); + expect(result.truncated).toBe(true); + }); + + it('should not truncate when budget is large enough', () => { + const generousBudget = 50_000; + const result = prepareRagContext(sampleMetadatas, sampleDocuments, generousBudget); + expect(result.truncated).toBe(false); + }); + + it('should handle empty retrieval gracefully', () => { + const result = prepareRagContext([], [], 50_000); + expect(result.context).toBe(''); + expect(result.sourceCount).toBe(0); + expect(result.charCount).toBe(0); + expect(result.truncated).toBe(false); + }); + + it('should handle null documents gracefully', () => { + const result = prepareRagContext([{ title: 'Solo', page: 1, source: 'x.pdf' }], null, 50_000); + expect(result.context).toBe(''); + expect(result.sourceCount).toBe(0); + }); + + it('should handle null/undefined metadatas gracefully', () => { + const result = prepareRagContext([null, undefined, { title: 'Valid', page: 1, source: 'y.pdf' }], ['a', 'b', 'c'], 50_000); + // Null/undefined metadatas are filled with defaults; all 3 docs are included. + expect(result.sourceCount).toBe(3); + expect(result.context).toContain('Untitled'); + expect(result.context).toContain('Valid'); + }); + + it('should skip blank document entries', () => { + const result = prepareRagContext( + [{ title: 'A', page: 1, source: 'a.pdf' }, { title: 'B', page: 2, source: 'b.pdf' }], + [' ', 'Some real content'], + 50_000, + ); + // The first doc is blank/whitespace, so it should be skipped. + expect(result.sourceCount).toBe(1); + expect(result.context).toContain('B'); + expect(result.context).not.toContain('A'); + }); + + it('should respect the budgetEnv environment variable override', () => { + const result = prepareRagContext( + [{ title: 'Long', page: 1, source: 'long.pdf' }], + ['A'.repeat(10_000)], + 5_000, + '500', // env override: only 500 chars + ); + expect(result.charCount).toBeLessThanOrEqual(500); + expect(result.truncated).toBe(true); + }); +}); + +describe('defaultNResults', () => { + it('should return the default when nothing is passed', () => { + expect(defaultNResults()).toBe(8); + }); + + it('should return the requested value when it is within bounds', () => { + expect(defaultNResults(5)).toBe(5); + expect(defaultNResults(20)).toBe(20); + }); + + it('should clamp values above the maximum', () => { + expect(defaultNResults(100)).toBe(20); + }); + + it('should clamp values below the minimum', () => { + expect(defaultNResults(-1)).toBe(1); + }); + + it('should return the default for invalid values', () => { + expect(defaultNResults(undefined)).toBe(8); + }); +}); From 7da2ec5192cef1819d5c2ff7a84287af7101d55e Mon Sep 17 00:00:00 2001 From: jing11223344 <118464144+jing11223344@users.noreply.github.com> Date: Sat, 16 May 2026 22:15:59 +0800 Subject: [PATCH 3/4] feat: improve RAG context budgeting - update fetch-documents.ts --- ui/pages/api/fetch-documents.ts | 41 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/ui/pages/api/fetch-documents.ts b/ui/pages/api/fetch-documents.ts index 9304e48..f492ac5 100644 --- a/ui/pages/api/fetch-documents.ts +++ b/ui/pages/api/fetch-documents.ts @@ -1,32 +1,47 @@ import type { NextApiRequest, NextApiResponse } from "next"; import { ChromaClient, TransformersEmbeddingFunction } from "chromadb"; +import { defaultNResults, prepareRagContext } from "@/utils/app/rag-context"; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { const client = new ChromaClient({ - path: "http://chroma-server:8000", + path: process.env.CHROMA_PATH || "http://chroma-server:8000", }); const query = req.body.input; + const nResults = defaultNResults(req.body.nResults); const embedder = new TransformersEmbeddingFunction(); - const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder }); + const collection = await client.getOrCreateCollection({ + name: "default-collection", + embeddingFunction: embedder, + }); + + // query the collection + const results = await collection.query({ + nResults, + queryTexts: [query], + }); - // query the collection - const results = await collection.query({ - nResults: 4, - queryTexts: [query] - }) + // Return both the prepared context string AND the raw results so + // callers that need the lowest-level data can still consume them. + const prepared = prepareRagContext( + results.metadatas?.[0] ?? [], + results.documents?.[0] ?? [], + ); - res.status(200).json(results); + res.status(200).json({ + ...results, + _prepared: prepared, + }); } catch (error) { if (error instanceof Error) { - console.error('Error message:', error.message); - console.error('Stack trace:', error.stack); + console.error("Error message:", error.message); + console.error("Stack trace:", error.stack); } else { - console.error('Unknown error:', error); + console.error("Unknown error:", error); } - res.status(500).json({ error: 'An unexpected error occurred :(' }); + res.status(500).json({ error: "An unexpected error occurred :(" }); } -} \ No newline at end of file +} From 392d1d43b6595518488faa2946efe172859fd61c Mon Sep 17 00:00:00 2001 From: jing11223344 <118464144+jing11223344@users.noreply.github.com> Date: Sat, 16 May 2026 22:16:01 +0800 Subject: [PATCH 4/4] feat: improve RAG context budgeting - update rag-chat.ts --- ui/pages/api/rag-chat.ts | 74 ++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/ui/pages/api/rag-chat.ts b/ui/pages/api/rag-chat.ts index ce84d67..7ee9bda 100644 --- a/ui/pages/api/rag-chat.ts +++ b/ui/pages/api/rag-chat.ts @@ -14,39 +14,15 @@ export const config = { runtime: 'edge', }; -// Function to fetch and format documents -async function fetchAndFormatDocuments(lastMessageContent: string) { - try { - console.log("fetching documents") - const response = await fetch('http://localhost:3000/api/fetch-documents', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ input: lastMessageContent }), - }); - - if (!response.ok) { - throw new Error(`Error fetching documents: ${response.statusText}`); - } - - const data = await response.json(); - const result = data.metadatas[0].map((metadata: any, index: number) => { - return `Source ${index + 1}) Title: ${metadata.title}, Page: ${metadata.page}, Content: ${data.documents[0][index]}\n`; - }).join(''); - - console.log(result); - - return result; - - } catch (error) { - console.error('Error fetching and formatting documents:', error); - throw error; // You may want to throw a more specific error object here - } +// Build the internal fetch-documents URL from the active request origin so that +// hosted, proxied, Docker, and non-localhost deployments use the right host. +function buildFetchDocumentsUrl(req: Request): string { + const origin = req.headers.get('origin') || req.headers.get('x-forwarded-host') || 'http://localhost:3000'; + // If origin already contains a protocol, use it; otherwise prepend https + const base = origin.startsWith('http') ? origin : `https://${origin}`; + return `${base.replace(/\/$/, '')}/api/fetch-documents`; } - - - - const handler = async (req: Request): Promise => { try { @@ -85,7 +61,39 @@ const handler = async (req: Request): Promise => { const lastMessage = messages[messages.length - 1]; - const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content); + // Fetch and prepare documents using the shared pipeline + const fetchDocumentsUrl = buildFetchDocumentsUrl(req); + let relevantDocuments = ''; + + try { + const response = await fetch(fetchDocumentsUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ input: lastMessage.content, nResults: 8 }), + }); + + if (response.ok) { + const data = await response.json(); + // Use the pre-prepared context when available (fetch-documents >= v2), + // otherwise fall back to formatting the raw arrays inline. + if (data._prepared) { + relevantDocuments = data._prepared.context; + } else if (data.metadatas?.[0] && data.documents?.[0]) { + relevantDocuments = data.metadatas[0] + .map((metadata: any, index: number) => { + const title = metadata?.title ?? 'Untitled'; + const page = metadata?.page != null ? `, Page: ${metadata.page}` : ''; + const content = data.documents[0][index] ?? ''; + return `Source ${index + 1}) Title: ${title}${page}, Content: ${content}\n`; + }) + .join(''); + } + } else { + console.error(`fetch-documents returned ${response.status}: ${response.statusText}`); + } + } catch (fetchError) { + console.error('Error fetching documents:', fetchError); + } let temperatureToUse = temperature; if (temperatureToUse == null) { @@ -108,7 +116,7 @@ const handler = async (req: Request): Promise => { role: "user", content: codeBlock` Here is the relevant documentation: - ${relevantDocuments} + ${relevantDocuments || '(No relevant documents found.)'} `, }, {