From d1637b366de3499515ffd0507cbad603f0ba626d Mon Sep 17 00:00:00 2001
From: jing11223344 <118464144+jing11223344@users.noreply.github.com>
Date: Sat, 16 May 2026 22:15:27 +0800
Subject: [PATCH 1/4] feat: add ui/utils/app/rag-context.ts

---
 ui/utils/app/rag-context.ts | 136 ++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 ui/utils/app/rag-context.ts
diff --git a/ui/utils/app/rag-context.ts b/ui/utils/app/rag-context.ts
new file mode 100644
index 0000000..ae7080d
--- /dev/null
+++ b/ui/utils/app/rag-context.ts
@@ -0,0 +1,136 @@
+/**
+ * Shared RAG context preparation helper.
+ *
+ * Provides:
+ *   - Source formatting
+ *   - Duplicate chunk suppression (by content hash)
+ *   - Configurable character budget
+ *   - Bounded integer parsing
+ */
+
+export interface RagSource {
+  title: string;
+  page: number | null;
+  content: string;
+  source: string;
+}
+
+export interface RagContextResult {
+  /** Formatted context string (ready to inject into the LLM prompt). */
+  context: string;
+  /** Number of unique sources included. */
+  sourceCount: number;
+  /** Total characters in the formatted context. */
+  charCount: number;
+  /** Whether the result was truncated by the budget. */
+  truncated: boolean;
+}
+
+const DEFAULT_CHAR_BUDGET = 8_000;
+const MAX_N_RESULTS = 20;
+const DEFAULT_N_RESULTS = 8;
+
+/**
+ * Parse an integer from an unknown value, returning a bounded result.
+ *
+ * @param value      - Raw value (string, number, or undefined).
+ * @param defaultVal - Fallback when parsing fails.
+ * @param min        - Inclusive lower bound.
+ * @param max        - Inclusive upper bound.
+ */
+export function parseBoundedInt(
+  value: unknown,
+  defaultVal: number,
+  min: number = 1,
+  max: number = MAX_N_RESULTS,
+): number {
+  if (value === undefined || value === null) return defaultVal;
+  const n = typeof value === 'number' ? value : Number(value);
+  if (Number.isNaN(n)) return defaultVal;
+  return Math.max(min, Math.min(max, Math.round(n)));
+}
+
+/**
+ * Format a single RAG source into a string.
+ */
+export function formatSource(source: RagSource, index: number): string {
+  const pageStr = source.page != null ? `, Page: ${source.page}` : '';
+  return `Source ${index + 1}) Title: ${source.title}${pageStr}, Content: ${source.content}\n`;
+}
+
+/**
+ * Prepare a RAG context string from raw Chroma results.
+ *
+ * 1. Converts raw arrays into `RagSource` objects.
+ * 2. Deduplicates chunks that have identical content.
+ * 3. Formats the deduplicated sources.
+ * 4. Truncates to the configured character budget when necessary.
+ *
+ * @param metadatas  - Chroma `metadatas` array (first element).
+ * @param documents  - Chroma `documents` array (first element).
+ * @param budget     - Maximum characters for the formatted context.
+ * @param budgetEnv  - Optional environment variable override (takes precedence over `budget`).
+ */
+export function prepareRagContext(
+  metadatas: Array<{ title: string; page?: number | null; source: string } | null | undefined>,
+  documents: string[] | undefined | null,
+  budget: number = DEFAULT_CHAR_BUDGET,
+  budgetEnv?: string,
+): RagContextResult {
+  const charBudget = budgetEnv ? parseBoundedInt(budgetEnv, DEFAULT_CHAR_BUDGET, 100, 50_000) : budget;
+
+  // 1. Build RagSource list, deduplicating by content
+  const seen = new Set<string>();
+  const sources: RagSource[] = [];
+
+  if (documents && metadatas) {
+    for (let i = 0; i < documents.length && i < metadatas.length; i++) {
+      const content = (documents[i] ?? '').trim();
+      if (!content) continue;
+
+      // Deduplicate by content hash
+      const key = content.slice(0, 200); // first 200 chars as fingerprint
+      if (seen.has(key)) continue;
+      seen.add(key);
+
+      const meta = metadatas[i] ?? {};
+      sources.push({
+        title: meta.title ?? 'Untitled',
+        page: meta.page ?? null,
+        content,
+        source: meta.source ?? '',
+      });
+    }
+  }
+
+  // 2. Format sources
+  let context = '';
+  let sourceCount = 0;
+  let truncated = false;
+
+  for (let i = 0; i < sources.length; i++) {
+    const formatted = formatSource(sources[i], i);
+
+    if (context.length + formatted.length > charBudget) {
+      truncated = true;
+      break;
+    }
+
+    context += formatted;
+    sourceCount++;
+  }
+
+  return {
+    context,
+    sourceCount,
+    charCount: context.length,
+    truncated,
+  };
+}
+
+/**
+ * Default Chroma query count.
+ */
+export function defaultNResults(requested?: number): number {
+  return parseBoundedInt(requested, DEFAULT_N_RESULTS, 1, MAX_N_RESULTS);
+}

From 900c4b9bc8aa75fdb3dbadd11234c8142677d078 Mon Sep 17 00:00:00 2001
From: jing11223344 <118464144+jing11223344@users.noreply.github.com>
Date: Sat, 16 May 2026 22:15:30 +0800
Subject: [PATCH 2/4] feat: add ui/__tests__/utils/app/rag-context.test.ts

---
 ui/__tests__/utils/app/rag-context.test.ts | 179 +++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 ui/__tests__/utils/app/rag-context.test.ts

diff --git a/ui/__tests__/utils/app/rag-context.test.ts b/ui/__tests__/utils/app/rag-context.test.ts
new file mode 100644
index 0000000..b02ae89
--- /dev/null
+++ b/ui/__tests__/utils/app/rag-context.test.ts
@@ -0,0 +1,179 @@
+import { describe, expect, it } from 'vitest';
+import {
+  parseBoundedInt,
+  formatSource,
+  prepareRagContext,
+  defaultNResults,
+  type RagSource,
+} from '@/utils/app/rag-context';
+
+describe('parseBoundedInt', () => {
+  it('should return the default when value is undefined', () => {
+    expect(parseBoundedInt(undefined, 8)).toBe(8);
+  });
+
+  it('should return the default when value is null', () => {
+    expect(parseBoundedInt(null, 8)).toBe(8);
+  });
+
+  it('should return the default for NaN', () => {
+    expect(parseBoundedInt('not-a-number', 8)).toBe(8);
+  });
+
+  it('should clamp values below the minimum', () => {
+    expect(parseBoundedInt(-5, 8, 1, 20)).toBe(1);
+    expect(parseBoundedInt(0, 8, 1, 20)).toBe(1);
+  });
+
+  it('should clamp values above the maximum', () => {
+    expect(parseBoundedInt(999, 8, 1, 20)).toBe(20);
+  });
+
+  it('should parse string numbers', () => {
+    expect(parseBoundedInt('12', 8, 1, 20)).toBe(12);
+  });
+
+  it('should round floats', () => {
+    expect(parseBoundedInt(4.7, 8, 1, 20)).toBe(5);
+    expect(parseBoundedInt(4.2, 8, 1, 20)).toBe(4);
+  });
+
+  it('should return a valid number when value is a number inside a string', () => {
+    expect(parseBoundedInt('7', 8)).toBe(7);
+  });
+});
+
+describe('formatSource', () => {
+  it('should format a source with a page number', () => {
+    const source: RagSource = { title: 'Doc A', page: 3, content: 'Hello world', source: 'file.pdf' };
+    const result = formatSource(source, 0);
+    expect(result).toBe('Source 1) Title: Doc A, Page: 3, Content: Hello world\n');
+  });
+
+  it('should format a source without a page number', () => {
+    const source: RagSource = { title: 'Doc B', page: null, content: 'No page', source: 'notes.txt' };
+    const result = formatSource(source, 5);
+    expect(result).toBe('Source 6) Title: Doc B, Content: No page\n');
+  });
+
+  it('should handle empty content', () => {
+    const source: RagSource = { title: 'Empty', page: null, content: '', source: '' };
+    const result = formatSource(source, 2);
+    expect(result).toBe('Source 3) Title: Empty, Content: \n');
+  });
+});
+
+describe('prepareRagContext', () => {
+  const sampleMetadatas = [
+    { title: 'Paper A', page: 1, source: 'paper-a.pdf' },
+    { title: 'Paper A', page: 2, source: 'paper-a.pdf' },
+    { title: 'Paper B', page: 1, source: 'paper-b.pdf' },
+    { title: 'Paper A', page: 1, source: 'paper-a.pdf' }, // duplicate content below
+  ];
+
+  const sampleDocuments = [
+    'The quick brown fox jumps over the lazy dog.',
+    'This is a second paragraph about the same topic.',
+    'Paper B discusses alternative approaches entirely.',
+    'The quick brown fox jumps over the lazy dog.', // identical to doc[0]
+  ];
+
+  it('should produce a formatted context string', () => {
+    const result = prepareRagContext(sampleMetadatas, sampleDocuments, 50_000);
+    expect(result.sourceCount).toBeGreaterThan(0);
+    expect(result.charCount).toBeGreaterThan(0);
+    expect(result.context).toContain('Source 1)');
+    expect(result.context).toContain('Title: Paper A');
+  });
+
+  it('should deduplicate identical chunks', () => {
+    const result = prepareRagContext(sampleMetadatas, sampleDocuments, 50_000);
+    // The 4th document ("The quick brown fox...") is a duplicate of doc[0]
+    // so the dedup should drop it, giving us 3 unique sources.
+    expect(result.sourceCount).toBe(3);
+    // The duplicate source (Paper A page 1) should only appear once
+    const matches = result.context.match(/Source \d+\) Title: Paper A, Page: 1/g);
+    expect(matches).toHaveLength(1);
+  });
+
+  it('should truncate when context exceeds budget', () => {
+    const veryTightBudget = 50;
+    const result = prepareRagContext(sampleMetadatas, sampleDocuments, veryTightBudget);
+    expect(result.charCount).toBeLessThanOrEqual(veryTightBudget);
+    expect(result.truncated).toBe(true);
+  });
+
+  it('should not truncate when budget is large enough', () => {
+    const generousBudget = 50_000;
+    const result = prepareRagContext(sampleMetadatas, sampleDocuments, generousBudget);
+    expect(result.truncated).toBe(false);
+  });
+
+  it('should handle empty retrieval gracefully', () => {
+    const result = prepareRagContext([], [], 50_000);
+    expect(result.context).toBe('');
+    expect(result.sourceCount).toBe(0);
+    expect(result.charCount).toBe(0);
+    expect(result.truncated).toBe(false);
+  });
+
+  it('should handle null documents gracefully', () => {
+    const result = prepareRagContext([{ title: 'Solo', page: 1, source: 'x.pdf' }], null, 50_000);
+    expect(result.context).toBe('');
+    expect(result.sourceCount).toBe(0);
+  });
+
+  it('should handle null/undefined metadatas gracefully', () => {
+    const result = prepareRagContext([null, undefined, { title: 'Valid', page: 1, source: 'y.pdf' }], ['a', 'b', 'c'], 50_000);
+    // Null/undefined metadatas are filled with defaults; all 3 docs are included.
+    expect(result.sourceCount).toBe(3);
+    expect(result.context).toContain('Untitled');
+    expect(result.context).toContain('Valid');
+  });
+
+  it('should skip blank document entries', () => {
+    const result = prepareRagContext(
+      [{ title: 'A', page: 1, source: 'a.pdf' }, { title: 'B', page: 2, source: 'b.pdf' }],
+      ['   ', 'Some real content'],
+      50_000,
+    );
+    // The first doc is blank/whitespace, so it should be skipped.
+    expect(result.sourceCount).toBe(1);
+    expect(result.context).toContain('B');
+    expect(result.context).not.toContain('A');
+  });
+
+  it('should respect the budgetEnv environment variable override', () => {
+    const result = prepareRagContext(
+      [{ title: 'Long', page: 1, source: 'long.pdf' }],
+      ['A'.repeat(10_000)],
+      5_000,
+      '500', // env override: only 500 chars
+    );
+    expect(result.charCount).toBeLessThanOrEqual(500);
+    expect(result.truncated).toBe(true);
+  });
+});
+
+describe('defaultNResults', () => {
+  it('should return the default when nothing is passed', () => {
+    expect(defaultNResults()).toBe(8);
+  });
+
+  it('should return the requested value when it is within bounds', () => {
+    expect(defaultNResults(5)).toBe(5);
+    expect(defaultNResults(20)).toBe(20);
+  });
+
+  it('should clamp values above the maximum', () => {
+    expect(defaultNResults(100)).toBe(20);
+  });
+
+  it('should clamp values below the minimum', () => {
+    expect(defaultNResults(-1)).toBe(1);
+  });
+
+  it('should return the default for invalid values', () => {
+    expect(defaultNResults(undefined)).toBe(8);
+  });
+});

From 7da2ec5192cef1819d5c2ff7a84287af7101d55e Mon Sep 17 00:00:00 2001
From: jing11223344 <118464144+jing11223344@users.noreply.github.com>
Date: Sat, 16 May 2026 22:15:59 +0800
Subject: [PATCH 3/4] feat: improve RAG context budgeting - update
 fetch-documents.ts

---
 ui/pages/api/fetch-documents.ts | 41 ++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/ui/pages/api/fetch-documents.ts b/ui/pages/api/fetch-documents.ts
index 9304e48..f492ac5 100644
--- a/ui/pages/api/fetch-documents.ts
+++ b/ui/pages/api/fetch-documents.ts
@@ -1,32 +1,47 @@
 import type { NextApiRequest, NextApiResponse } from "next";
 import { ChromaClient, TransformersEmbeddingFunction } from "chromadb";
+import { defaultNResults, prepareRagContext } from "@/utils/app/rag-context";
 
 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
   try {
     const client = new ChromaClient({
-      path: "http://chroma-server:8000",
+      path: process.env.CHROMA_PATH || "http://chroma-server:8000",
     });
 
     const query = req.body.input;
+    const nResults = defaultNResults(req.body.nResults);
 
     const embedder = new TransformersEmbeddingFunction();
 
-    const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder });
+    const collection = await client.getOrCreateCollection({
+      name: "default-collection",
+      embeddingFunction: embedder,
+    });
+
+    // query the collection
+    const results = await collection.query({
+      nResults,
+      queryTexts: [query],
+    });
 
-  // query the collection
-  const results = await collection.query({
-      nResults: 4, 
-      queryTexts: [query]
-  }) 
+    // Return both the prepared context string AND the raw results so
+    // callers that need the lowest-level data can still consume them.
+    const prepared = prepareRagContext(
+      results.metadatas?.[0] ?? [],
+      results.documents?.[0] ?? [],
+    );
 
-    res.status(200).json(results);
+    res.status(200).json({
+      ...results,
+      _prepared: prepared,
+    });
   } catch (error) {
     if (error instanceof Error) {
-      console.error('Error message:', error.message);
-      console.error('Stack trace:', error.stack);
+      console.error("Error message:", error.message);
+      console.error("Stack trace:", error.stack);
     } else {
-      console.error('Unknown error:', error);
+      console.error("Unknown error:", error);
     }
-    res.status(500).json({ error: 'An unexpected error occurred :(' });
+    res.status(500).json({ error: "An unexpected error occurred :(" });
   }
-}
\ No newline at end of file
+}

From 392d1d43b6595518488faa2946efe172859fd61c Mon Sep 17 00:00:00 2001
From: jing11223344 <118464144+jing11223344@users.noreply.github.com>
Date: Sat, 16 May 2026 22:16:01 +0800
Subject: [PATCH 4/4] feat: improve RAG context budgeting - update rag-chat.ts

---
 ui/pages/api/rag-chat.ts | 74 ++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 33 deletions(-)

diff --git a/ui/pages/api/rag-chat.ts b/ui/pages/api/rag-chat.ts
index ce84d67..7ee9bda 100644
--- a/ui/pages/api/rag-chat.ts
+++ b/ui/pages/api/rag-chat.ts
@@ -14,39 +14,15 @@ export const config = {
   runtime: 'edge',
 };
 
-// Function to fetch and format documents
-async function fetchAndFormatDocuments(lastMessageContent: string) {
-  try {
-    console.log("fetching documents")
-    const response = await fetch('http://localhost:3000/api/fetch-documents', {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ input: lastMessageContent }),
-    });
-    
-    if (!response.ok) {
-      throw new Error(`Error fetching documents: ${response.statusText}`);
-    }
-
-    const data = await response.json();
-    const result = data.metadatas[0].map((metadata: any, index: number) => {
-      return `Source ${index + 1}) Title: ${metadata.title}, Page: ${metadata.page}, Content: ${data.documents[0][index]}\n`;
-    }).join('');
-
-    console.log(result);
-
-    return result;
-
-  } catch (error) {
-    console.error('Error fetching and formatting documents:', error);
-    throw error; // You may want to throw a more specific error object here
-  }
+// Build the internal fetch-documents URL from the active request origin so that
+// hosted, proxied, Docker, and non-localhost deployments use the right host.
+function buildFetchDocumentsUrl(req: Request): string {
+  const origin = req.headers.get('origin') || req.headers.get('x-forwarded-host') || 'http://localhost:3000';
+  // If origin already contains a protocol, use it; otherwise prepend https
+  const base = origin.startsWith('http') ? origin : `https://${origin}`;
+  return `${base.replace(/\/$/, '')}/api/fetch-documents`;
 }
 
-
-
-
-
 const handler = async (req: Request): Promise<Response> => {
 
   try {
@@ -85,7 +61,39 @@ const handler = async (req: Request): Promise<Response> => {
 
     const lastMessage = messages[messages.length - 1];
 
-    const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content);
+    // Fetch and prepare documents using the shared pipeline
+    const fetchDocumentsUrl = buildFetchDocumentsUrl(req);
+    let relevantDocuments = '';
+
+    try {
+      const response = await fetch(fetchDocumentsUrl, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ input: lastMessage.content, nResults: 8 }),
+      });
+
+      if (response.ok) {
+        const data = await response.json();
+        // Use the pre-prepared context when available (fetch-documents >= v2),
+        // otherwise fall back to formatting the raw arrays inline.
+        if (data._prepared) {
+          relevantDocuments = data._prepared.context;
+        } else if (data.metadatas?.[0] && data.documents?.[0]) {
+          relevantDocuments = data.metadatas[0]
+            .map((metadata: any, index: number) => {
+              const title = metadata?.title ?? 'Untitled';
+              const page = metadata?.page != null ? `, Page: ${metadata.page}` : '';
+              const content = data.documents[0][index] ?? '';
+              return `Source ${index + 1}) Title: ${title}${page}, Content: ${content}\n`;
+            })
+            .join('');
+        }
+      } else {
+        console.error(`fetch-documents returned ${response.status}: ${response.statusText}`);
+      }
+    } catch (fetchError) {
+      console.error('Error fetching documents:', fetchError);
+    }
     
     let temperatureToUse = temperature;
     if (temperatureToUse == null) {
@@ -108,7 +116,7 @@ const handler = async (req: Request): Promise<Response> => {
         role: "user",
         content: codeBlock`
           Here is the relevant documentation:
-          ${relevantDocuments}
+          ${relevantDocuments || '(No relevant documents found.)'}
         `,
       },
       {