diff --git a/ui/__tests__/scientific-evidence.test.ts b/ui/__tests__/scientific-evidence.test.ts
new file mode 100644
index 0000000..c00cbf9
--- /dev/null
+++ b/ui/__tests__/scientific-evidence.test.ts
@@ -0,0 +1,137 @@
+import {
+  type ChromaQueryLike,
+  buildScientificEvidencePayload,
+} from '@/utils/server/scientific-evidence';
+
+import { describe, expect, it } from 'vitest';
+
+describe('buildScientificEvidencePayload', () => {
+  it('deduplicates duplicate chunks and emits stable citation keys/source manifest', () => {
+    const queryResult: ChromaQueryLike = {
+      ids: [['id-1', 'id-2', 'id-3']],
+      documents: [['Alpha finding', 'Alpha finding', 'Beta finding']],
+      metadatas: [
+        [
+          {
+            title: 'Paper A',
+            source: '/tmp/paper-a.pdf',
+            page: 2,
+            chunkIndex: 0,
+          },
+          {
+            title: 'Paper A',
+            source: '/tmp/paper-a.pdf',
+            page: 2,
+            chunkIndex: 0,
+          },
+          {
+            title: 'Paper A',
+            source: '/tmp/paper-a.pdf',
+            page: 3,
+            chunkIndex: 1,
+          },
+        ],
+      ],
+      distances: [[0.01, 0.01, 0.02]],
+    };
+
+    const first = buildScientificEvidencePayload(queryResult);
+    const second = buildScientificEvidencePayload(queryResult);
+
+    expect(first.citations).toHaveLength(2);
+    expect(first.citations.map((citation) => citation.key)).toEqual(
+      second.citations.map((citation) => citation.key),
+    );
+    expect(first.citations.map((citation) => citation.key)).toEqual([
+      expect.stringMatching(/^SRC-[0-9A-F]{8}$/),
+      expect.stringMatching(/^SRC-[0-9A-F]{8}$/),
+    ]);
+    expect(first.sourceManifest).toHaveLength(1);
+    expect(first.sourceManifest[0].sourceId).toEqual(
+      expect.stringMatching(/^DOC-[0-9A-F]{8}$/),
+    );
+    expect(first.sourceManifest[0].citationKeys).toHaveLength(2);
+  });
+
+  it('normalizes ragged metadata and prefers citation-friendly sources', () => {
+    const queryResult: ChromaQueryLike = {
+      ids: [['id-1']],
+      documents: [['  Content   with   spaces  ']],
+      metadatas: [
+        [
+          {
+            title: 123,
+            filename: 'research.pdf',
+            sourcePath: '/var/tmp/research.pdf',
+            pageNumber: '5',
+            chunk_index: '2',
+            chunk_id: 'c-2',
+          },
+        ],
+      ],
+      distances: [[0.1]],
+    };
+
+    const payload = buildScientificEvidencePayload(queryResult);
+    expect(payload.citations).toHaveLength(1);
+    expect(payload.citations[0].title).toBe('123');
+    expect(payload.citations[0].source).toBe('research.pdf');
+    expect(payload.sourceManifest[0].source).toBe('research.pdf');
+    expect(payload.evidenceContext).not.toContain('/var/tmp/research.pdf');
+    expect(payload.citations[0].page).toBe(5);
+    expect(payload.citations[0].chunkIndex).toBe(2);
+    expect(payload.citations[0].chunkId).toBe('c-2');
+    expect(payload.citations[0].content).toBe('Content with spaces');
+  });
+
+  it('bounds evidence context length and truncates safely', () => {
+    const queryResult: ChromaQueryLike = {
+      ids: [['id-1']],
+      documents: [['A'.repeat(200)]],
+      metadatas: [[{ title: 'Large Chunk', source: 'source.pdf', page: 1 }]],
+      distances: [[0.2]],
+    };
+
+    const payload = buildScientificEvidencePayload(queryResult, {
+      maxChunkChars: 20,
+      maxEvidenceChars: 60,
+    });
+
+    expect(payload.citations[0].content.length).toBeLessThanOrEqual(20);
+    expect(payload.evidenceContext.length).toBeLessThanOrEqual(60);
+    expect(payload.evidenceContext.endsWith('...')).toBe(true);
+  });
+
+  it('truncates safely when max chars is shorter than the ellipsis', () => {
+    const queryResult: ChromaQueryLike = {
+      ids: [['id-1']],
+      documents: [['A'.repeat(200)]],
+      metadatas: [[{ title: 'Large Chunk', source: 'source.pdf', page: 1 }]],
+      distances: [[0.2]],
+    };
+
+    const payload = buildScientificEvidencePayload(queryResult, {
+      maxChunkChars: 2,
+      maxEvidenceChars: 2,
+    });
+
+    expect(payload.citations[0].content).toBe('..');
+    expect(payload.citations[0].content.length).toBeLessThanOrEqual(2);
+    expect(payload.evidenceContext).toBe('..');
+    expect(payload.evidenceContext.length).toBeLessThanOrEqual(2);
+  });
+
+  it('handles ragged/null chroma arrays without throwing', () => {
+    const raggedResult: ChromaQueryLike = {
+      ids: [['id-1', 'id-2'], []],
+      documents: [['Chunk one', null], []],
+      metadatas: [[null], []],
+      distances: null,
+    };
+
+    const payload = buildScientificEvidencePayload(raggedResult);
+    expect(payload.citations).toHaveLength(1);
+    expect(payload.citations[0].source).toBe('unknown-source');
+    expect(payload.sourceManifest).toHaveLength(1);
+  });
+});
diff --git a/ui/pages/api/fetch-documents.ts b/ui/pages/api/fetch-documents.ts
index 9304e48..4f869cf 100644
--- a/ui/pages/api/fetch-documents.ts
+++ b/ui/pages/api/fetch-documents.ts
@@ -1,25 +1,84 @@
-import type { NextApiRequest, NextApiResponse } from "next";
-import { ChromaClient, TransformersEmbeddingFunction } from "chromadb";
+import type { NextApiRequest, NextApiResponse } from 'next';
 
-export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+import { buildScientificEvidencePayload } from '@/utils/server/scientific-evidence';
+
+import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
+
+const DEFAULT_RESULTS = 8;
+const MAX_RESULTS = 20;
+const DEFAULT_EVIDENCE_CHARS = 12000;
+const MAX_EVIDENCE_CHARS = 30000;
+
+function parseBoundedInteger(
+  value: unknown,
+  defaultValue: number,
+  maxValue: number,
+): number {
+  const parsed =
+    typeof value === 'number'
+      ? value
+      : typeof value === 'string'
+      ? Number(value)
+      : NaN;
+
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    return defaultValue;
+  }
+
+  return Math.min(Math.floor(parsed), maxValue);
+}
+
+export default async function handler(
+  req: NextApiRequest,
+  res: NextApiResponse,
+) {
   try {
+    if (req.method !== 'POST') {
+      return res.status(405).json({ error: 'Only POST is supported' });
+    }
+
     const client = new ChromaClient({
-      path: "http://chroma-server:8000",
+      path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
     });
 
     const query = req.body.input;
+    if (typeof query !== 'string' || query.trim().length === 0) {
+      return res.status(400).json({ error: 'input is required' });
+    }
+
+    const nResults = parseBoundedInteger(
+      req.body.nResults,
+      DEFAULT_RESULTS,
+      MAX_RESULTS,
+    );
+    const maxEvidenceChars = parseBoundedInteger(
+      req.body.maxEvidenceChars,
+      DEFAULT_EVIDENCE_CHARS,
+      MAX_EVIDENCE_CHARS,
+    );
 
     const embedder = new TransformersEmbeddingFunction();
 
-    const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder });
+    const collection = await client.getOrCreateCollection({
+      name: 'default-collection',
+      embeddingFunction: embedder,
+    });
+
+    const results = await collection.query({
+      nResults,
+      queryTexts: [query.trim()],
+    });
 
-  // query the collection
-  const results = await collection.query({
-      nResults: 4, 
-      queryTexts: [query]
-  }) 
+    const evidence = buildScientificEvidencePayload(results, {
+      maxEvidenceChars,
+    });
 
-    res.status(200).json(results);
+    res.status(200).json({
+      ...results,
+      evidenceContext: evidence.evidenceContext,
+      sourceManifest: evidence.sourceManifest,
+      citations: evidence.citations,
+    });
   } catch (error) {
     if (error instanceof Error) {
       console.error('Error message:', error.message);
@@ -29,4 +88,4 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
     }
     res.status(500).json({ error: 'An unexpected error occurred :(' });
   }
-}
\ No newline at end of file
+}
diff --git a/ui/pages/api/inject-documents.ts b/ui/pages/api/inject-documents.ts
index 532a635..8a3caf2 100644
--- a/ui/pages/api/inject-documents.ts
+++ b/ui/pages/api/inject-documents.ts
@@ -3,8 +3,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
 import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
 import { IncomingForm } from 'formidable';
 import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
-import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
-
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import path from 'path';
 import { v4 as uuidv4 } from 'uuid';
 
@@ -18,37 +17,44 @@ export default async function handler(
   req: NextApiRequest,
   res: NextApiResponse,
 ) {
-  try {
-    if (req.method !== 'POST') {
-      return res.status(405).end();
-    }
+  if (req.method !== 'POST') {
+    return res.status(405).end();
+  }
 
-    const form = new IncomingForm();
-    form.parse(req, async (err, fields, files) => {
+  const form = new IncomingForm();
+  form.parse(req, async (err, fields, files) => {
+    try {
       if (err) {
         return res.status(400).json({ error: 'Failed to upload file' });
       }
 
+      const pdfFile = Array.isArray(files.pdf) ? files.pdf[0] : files.pdf;
+      if (!pdfFile?.filepath) {
+        return res.status(400).json({ error: 'A PDF file is required' });
+      }
+      const publicSource =
+        asNonEmptyString(pdfFile.originalFilename) ?? 'uploaded-document.pdf';
+
       const client = new ChromaClient({
         path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
       });
 
-      const loader = new PDFLoader(files.pdf[0].filepath);
+      const loader = new PDFLoader(pdfFile.filepath);
 
       const originalDocs = await loader.load();
 
-      console.log(JSON.stringify(originalDocs));
-
-
       const splitter = new RecursiveCharacterTextSplitter({
         chunkSize: 500,
         chunkOverlap: 100,
-      });      
+      });
 
       const docs = await splitter.splitDocuments(originalDocs);
- 
+
       // Process the documents and perform other logic
-      const { ids, metadatas, documentContents } = processDocuments(docs);
+      const { ids, metadatas, documentContents } = processDocuments(
+        docs,
+        publicSource,
+      );
 
       const embedder = new TransformersEmbeddingFunction();
       const collection = await client.getOrCreateCollection({
@@ -66,39 +72,175 @@ export default async function handler(
         message: 'Documents processed successfully',
         documentCount: ids.length,
       });
-    });
-  } catch (error) {
-    console.error(error);
-    res
-      .status(500)
-      .json({ message: 'An error occurred while processing the documents' });
+    } catch (error) {
+      console.error(error);
+      res
+        .status(500)
+        .json({ message: 'An error occurred while processing the documents' });
+    }
+  });
+}
+
+type PrimitiveMetadata = Record<string, string | number | boolean>;
+
+type LoadedDocument = {
+  pageContent: string;
+  metadata?: unknown;
+};
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function getPrimitive(
+  record: Record<string, unknown>,
+  key: string,
+): string | number | boolean | null {
+  const value = record[key];
+  if (
+    typeof value === 'string' ||
+    typeof value === 'number' ||
+    typeof value === 'boolean'
+  ) {
+    return value;
+  }
+
+  return null;
+}
+
+function asNonEmptyString(value: unknown): string | null {
+  if (typeof value === 'string') {
+    const trimmed = value.trim();
+    return trimmed.length > 0 ? trimmed : null;
+  }
+
+  if (typeof value === 'number' || typeof value === 'boolean') {
+    return String(value);
   }
+
+  return null;
 }
 
-function processDocuments(docs: any) {
-  const ids = [];
-  const metadatas = [];
-  const documentContents = [];
+function asNumber(value: unknown): number | null {
+  if (typeof value === 'number' && Number.isFinite(value)) {
+    return value;
+  }
+
+  if (typeof value === 'string') {
+    const parsed = Number(value);
+    if (Number.isFinite(parsed)) {
+      return parsed;
+    }
+  }
+
+  return null;
+}
+
+function getPageFromMetadata(metadata: Record<string, unknown>): number | null {
+  const directPage = asNumber(
+    getPrimitive(metadata, 'page') ?? getPrimitive(metadata, 'pageNumber'),
+  );
+  if (directPage !== null) {
+    return directPage;
+  }
+
+  const loc = metadata.loc;
+  if (!isRecord(loc)) {
+    return null;
+  }
+
+  return asNumber(getPrimitive(loc, 'pageNumber') ?? getPrimitive(loc, 'page'));
+}
 
-  for (const document of docs) {
-    // Generate an ID for each document, or use some existing unique identifier
-    const id = uuidv4();
-    ids.push(id);
+function getPdfInfoPrimitive(
+  metadata: Record<string, unknown>,
+  key: string,
+): string | number | boolean | null {
+  const pdf = metadata.pdf;
+  if (!isRecord(pdf)) {
+    return null;
+  }
 
-    const fallbackTitle = path.basename(document.metadata.source);
-    const titleFromMetadata = document.metadata.pdf.info.Title;
+  const info = pdf.info;
+  if (!isRecord(info)) {
+    return null;
+  }
 
-    const title = titleFromMetadata && titleFromMetadata.length > 0 ? titleFromMetadata : fallbackTitle;
+  return getPrimitive(info, key);
+}
 
-  
-    const metadata = {
-      title: title,
-      page: document.metadata.loc.pageNumber, // Define this function to extract chapter info
-      source: document.metadata.source, // Define this function to extract verse info
+function processDocuments(docs: LoadedDocument[], publicSource?: string) {
+  const ids: string[] = [];
+  const metadatas: PrimitiveMetadata[] = [];
+  const documentContents: string[] = [];
+
+  for (let index = 0; index < docs.length; index += 1) {
+    const document = docs[index];
+    const metadata = isRecord(document.metadata) ? document.metadata : {};
+
+    const sourceForCitation =
+      asNonEmptyString(getPrimitive(metadata, 'filename')) ??
+      asNonEmptyString(getPrimitive(metadata, 'fileName')) ??
+      asNonEmptyString(getPrimitive(metadata, 'originalFilename')) ??
+      publicSource ??
+      asNonEmptyString(getPrimitive(metadata, 'source')) ??
+      asNonEmptyString(getPrimitive(metadata, 'sourcePath')) ??
+      `document-${index + 1}.pdf`;
+    const filename = path.basename(sourceForCitation.replace(/\\/g, '/'));
+    const fallbackTitle =
+      filename.length > 0 ? filename : `Document ${index + 1}`;
+    const titleFromMetadata =
+      asNonEmptyString(getPrimitive(metadata, 'title')) ??
+      asNonEmptyString(getPrimitive(metadata, 'documentTitle')) ??
+      asNonEmptyString(getPdfInfoPrimitive(metadata, 'Title'));
+    const title = titleFromMetadata ?? fallbackTitle;
+    const page = getPageFromMetadata(metadata);
+    const chunkIndex =
+      asNumber(
+        getPrimitive(metadata, 'chunkIndex') ??
+          getPrimitive(metadata, 'chunk_index'),
+      ) ?? index;
+
+    const generatedId = uuidv4();
+    const chunkId =
+      asNonEmptyString(
+        getPrimitive(metadata, 'chunkId') ?? getPrimitive(metadata, 'chunk_id'),
+      ) ?? `${filename}:${page ?? 'na'}:${chunkIndex}`;
+    const documentId =
+      asNonEmptyString(
+        getPrimitive(metadata, 'documentId') ??
+          getPrimitive(metadata, 'document_id'),
+      ) ?? generatedId;
+
+    const metadataToStore: PrimitiveMetadata = {
+      title,
+      source: fallbackTitle,
+      filename,
+      chunkIndex,
+      chunkId,
+      documentId,
     };
-    metadatas.push(metadata);
 
-    // Add the page content to the documents array
+    if (page !== null) {
+      metadataToStore.page = page;
+    }
+
+    const optionalPdfInfoFields = [
+      'Author',
+      'Subject',
+      'Keywords',
+      'Creator',
+      'Producer',
+    ];
+    for (const field of optionalPdfInfoFields) {
+      const value = getPdfInfoPrimitive(metadata, field);
+      if (value !== null) {
+        metadataToStore[`pdf${field}`] = value;
+      }
+    }
+
+    ids.push(generatedId);
+    metadatas.push(metadataToStore);
     documentContents.push(document.pageContent);
   }
 
diff --git a/ui/pages/api/rag-chat.ts b/ui/pages/api/rag-chat.ts
index ce84d67..25de16f 100644
--- a/ui/pages/api/rag-chat.ts
+++ b/ui/pages/api/rag-chat.ts
@@ -1,6 +1,6 @@
 import { DEFAULT_SYSTEM_PROMPT, DEFAULT_TEMPERATURE } from '@/utils/app/const';
 import { OpenAIError, OpenAIStream } from '@/utils/server';
-import { codeBlock, oneLine } from 'common-tags'
+import type { ScientificSourceManifestEntry } from '@/utils/server/scientific-evidence';
 
 import { ChatBody, Message } from '@/types/chat';
 
@@ -9,46 +9,67 @@ import wasm from '../../node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm?module
 
 import tiktokenModel from '@dqbd/tiktoken/encoders/cl100k_base.json';
 import { Tiktoken, init } from '@dqbd/tiktoken/lite/init';
+import { codeBlock, oneLine } from 'common-tags';
 
 export const config = {
   runtime: 'edge',
 };
 
-// Function to fetch and format documents
-async function fetchAndFormatDocuments(lastMessageContent: string) {
+type FetchDocumentsResponse = {
+  evidenceContext?: string;
+  sourceManifest?: ScientificSourceManifestEntry[];
+};
+
+function formatSourceManifest(
+  sourceManifest: ScientificSourceManifestEntry[],
+): string {
+  return sourceManifest
+    .map((source, index) => {
+      return `${index + 1}. ${source.title} (${
+        source.source
+      }) -> keys: ${source.citationKeys.join(', ')}`;
+    })
+    .join('\n');
+}
+
+async function fetchScientificEvidence(
+  req: Request,
+  lastMessageContent: string,
+) {
   try {
-    console.log("fetching documents")
-    const response = await fetch('http://localhost:3000/api/fetch-documents', {
+    const fetchDocumentsUrl = new URL(
+      '/api/fetch-documents',
+      req.url,
+    ).toString();
+    const response = await fetch(fetchDocumentsUrl, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ input: lastMessageContent }),
+      body: JSON.stringify({
+        input: lastMessageContent,
+        nResults: 8,
+        maxEvidenceChars: 12000,
+      }),
     });
-    
+
     if (!response.ok) {
       throw new Error(`Error fetching documents: ${response.statusText}`);
     }
 
-    const data = await response.json();
-    const result = data.metadatas[0].map((metadata: any, index: number) => {
-      return `Source ${index + 1}) Title: ${metadata.title}, Page: ${metadata.page}, Content: ${data.documents[0][index]}\n`;
-    }).join('');
-
-    console.log(result);
-
-    return result;
-
+    const data = (await response.json()) as FetchDocumentsResponse;
+    return {
+      evidenceContext:
+        typeof data.evidenceContext === 'string' ? data.evidenceContext : '',
+      sourceManifest: Array.isArray(data.sourceManifest)
+        ? data.sourceManifest
+        : [],
+    };
   } catch (error) {
-    console.error('Error fetching and formatting documents:', error);
-    throw error; // You may want to throw a more specific error object here
+    console.error('Error fetching scientific evidence:', error);
+    throw error;
   }
 }
 
-
-
-
-
 const handler = async (req: Request): Promise<Response> => {
-
   try {
     const { model, messages, key, prompt, temperature } =
       (await req.json()) as ChatBody;
@@ -85,8 +106,11 @@ const handler = async (req: Request): Promise<Response> => {
 
     const lastMessage = messages[messages.length - 1];
 
-    const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content);
-    
+    const { evidenceContext, sourceManifest } = await fetchScientificEvidence(
+      req,
+      lastMessage.content,
+    );
+
     let temperatureToUse = temperature;
     if (temperatureToUse == null) {
       temperatureToUse = DEFAULT_TEMPERATURE;
@@ -97,22 +121,27 @@ const handler = async (req: Request): Promise<Response> => {
     let tokenCount = prompt_tokens.length;
     let messagesToSend: Message[] = [];
 
-
     encoding.free();
 
     console.log(model, promptToSend, temperatureToUse, key, messagesToSend);
 
-  
-  messagesToSend = [
+    messagesToSend = [
+      {
+        role: 'user',
+        content: codeBlock`
+          Here is the evidence context:
+          ${evidenceContext}
+        `,
+      },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
-          Here is the relevant documentation:
-          ${relevantDocuments}
+          Here is the source manifest:
+          ${formatSourceManifest(sourceManifest)}
         `,
       },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
           ${oneLine`
             Answer my next question using only the above documentation.
@@ -130,24 +159,26 @@ const handler = async (req: Request): Promise<Response> => {
             - Prefer splitting your response into multiple paragraphs.
           `}
           ${oneLine`
-            - Output as markdown with citations based on the documentation.
+            - Cite claims inline with the provided citation keys (format: [SRC-XXXXXXXX]).
+          `}
+          ${oneLine`
+            - Only cite keys that appear in the source manifest/evidence context.
           `}
         `,
       },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
           Here is my question:
           ${oneLine`${lastMessage.content}`}
       `,
       },
-    ]
-
+    ];
 
     const stream = await OpenAIStream(
       model,
       promptToSend,
-      0,
+      temperatureToUse,
       key,
       messagesToSend,
     );
diff --git a/ui/utils/server/scientific-evidence.ts b/ui/utils/server/scientific-evidence.ts
new file mode 100644
index 0000000..3298d8e
--- /dev/null
+++ b/ui/utils/server/scientific-evidence.ts
@@ -0,0 +1,360 @@
+type Primitive = string | number | boolean;
+
+type UnknownRecord = Record<string, unknown>;
+
+export type ChromaQueryLike = {
+  ids: string[][];
+  documents: (string | null)[][];
+  metadatas: (Record<string, unknown> | null)[][];
+  distances: null | number[][];
+};
+
+export type ScientificCitation = {
+  key: string;
+  sourceId: string;
+  title: string;
+  source: string;
+  page: number | null;
+  chunkIndex: number | null;
+  chunkId: string | null;
+  documentId: string | null;
+  distance: number | null;
+  content: string;
+};
+
+export type ScientificSourceManifestEntry = {
+  sourceId: string;
+  title: string;
+  source: string;
+  citationKeys: string[];
+  documentIds: string[];
+};
+
+export type ScientificEvidencePayload = {
+  citations: ScientificCitation[];
+  sourceManifest: ScientificSourceManifestEntry[];
+  evidenceContext: string;
+};
+
+export type ScientificEvidenceOptions = {
+  maxEvidenceChars?: number;
+  maxChunkChars?: number;
+};
+
+const DEFAULT_MAX_EVIDENCE_CHARS = 12000;
+const DEFAULT_MAX_CHUNK_CHARS = 1200;
+
+const TITLE_KEYS = ['title', 'documentTitle', 'document_title', 'pdfTitle'];
+const SOURCE_KEYS = [
+  'sourceLabel',
+  'publicSource',
+  'publicIdentifier',
+  'originalFilename',
+  'filename',
+  'fileName',
+  'source',
+  'sourcePath',
+  'source_path',
+];
+const PAGE_KEYS = ['page', 'pageNumber', 'page_number'];
+const CHUNK_INDEX_KEYS = ['chunkIndex', 'chunk_index'];
+const CHUNK_ID_KEYS = ['chunkId', 'chunk_id'];
+const DOCUMENT_ID_KEYS = ['documentId', 'document_id', 'id'];
+
+function isRecord(value: unknown): value is UnknownRecord {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function asPrimitive(value: unknown): Primitive | undefined {
+  if (
+    typeof value === 'string' ||
+    typeof value === 'number' ||
+    typeof value === 'boolean'
+  ) {
+    return value;
+  }
+
+  return undefined;
+}
+
+function firstPrimitive(
+  record: UnknownRecord | null,
+  keys: string[],
+): Primitive | undefined {
+  if (!record) {
+    return undefined;
+  }
+
+  for (const key of keys) {
+    const value = asPrimitive(record[key]);
+    if (value !== undefined) {
+      return value;
+    }
+  }
+
+  return undefined;
+}
+
+function toCleanString(value: unknown): string | null {
+  if (typeof value === 'string') {
+    const trimmed = value.trim();
+    return trimmed.length > 0 ? trimmed : null;
+  }
+
+  if (typeof value === 'number' || typeof value === 'boolean') {
+    return String(value);
+  }
+
+  return null;
+}
+
+function toCitationSource(value: unknown): string | null {
+  const source = toCleanString(value);
+  if (!source) {
+    return null;
+  }
+
+  const [withoutQuery] = source.split(/[?#]/);
+  const normalizedPath = withoutQuery.replace(/\\/g, '/');
+  const pathParts = normalizedPath.split('/').filter(Boolean);
+
+  return pathParts[pathParts.length - 1] ?? source;
+}
+
+function toNumberOrNull(value: unknown): number | null {
+  if (typeof value === 'number' && Number.isFinite(value)) {
+    return value;
+  }
+
+  if (typeof value === 'string') {
+    const parsed = Number(value);
+    if (Number.isFinite(parsed)) {
+      return parsed;
+    }
+  }
+
+  return null;
+}
+
+function normalizeMetadata(rawMetadata: unknown): {
+  title: string;
+  source: string;
+  page: number | null;
+  chunkIndex: number | null;
+  chunkId: string | null;
+  documentId: string | null;
+} {
+  const metadata = isRecord(rawMetadata) ? rawMetadata : null;
+
+  const title =
+    toCleanString(firstPrimitive(metadata, TITLE_KEYS)) ?? 'Untitled Source';
+  const source =
+    toCitationSource(firstPrimitive(metadata, SOURCE_KEYS)) ?? 'unknown-source';
+  const page = toNumberOrNull(firstPrimitive(metadata, PAGE_KEYS));
+  const chunkIndex = toNumberOrNull(firstPrimitive(metadata, CHUNK_INDEX_KEYS));
+  const chunkId = toCleanString(firstPrimitive(metadata, CHUNK_ID_KEYS));
+  const documentId = toCleanString(firstPrimitive(metadata, DOCUMENT_ID_KEYS));
+
+  return {
+    title,
+    source,
+    page,
+    chunkIndex,
+    chunkId,
+    documentId,
+  };
+}
+
+function collapseWhitespace(content: string): string {
+  return content.replace(/\s+/g, ' ').trim();
+}
+
+function safeTruncate(text: string, maxChars: number): string {
+  const maxLength = Math.max(0, Math.floor(maxChars));
+
+  if (maxLength <= 0) {
+    return '';
+  }
+
+  if (text.length <= maxLength) {
+    return text;
+  }
+
+  const ellipsis = '...';
+  if (maxLength <= ellipsis.length) {
+    return ellipsis.slice(0, maxLength);
+  }
+
+  const limit = maxLength - ellipsis.length;
+  const truncated = text.slice(0, limit).trimEnd();
+  return `${truncated}${ellipsis}`;
+}
+
+function hashString(value: string): string {
+  let hash = 0x811c9dc5;
+  for (let i = 0; i < value.length; i += 1) {
+    hash ^= value.charCodeAt(i);
+    hash +=
+      (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
+  }
+
+  return (hash >>> 0).toString(16).toUpperCase().padStart(8, '0');
+}
+
+function buildCitationKey(
+  source: string,
+  page: number | null,
+  chunkIndex: number | null,
+  chunkId: string | null,
+  normalizedContent: string,
+): string {
+  const keySeed = [
+    source.toLowerCase(),
+    page ?? 'na',
+    chunkIndex ?? 'na',
+    (chunkId ?? '').toLowerCase(),
+    normalizedContent,
+  ].join('|');
+
+  return `SRC-${hashString(keySeed)}`;
+}
+
+function buildSourceId(source: string, title: string): string {
+  return `DOC-${hashString(`${source.toLowerCase()}|${title.toLowerCase()}`)}`;
+}
+
+export function buildScientificEvidencePayload(
+  results: ChromaQueryLike,
+  options: ScientificEvidenceOptions = {},
+): ScientificEvidencePayload {
+  const maxEvidenceChars =
+    options.maxEvidenceChars ?? DEFAULT_MAX_EVIDENCE_CHARS;
+  const maxChunkChars = options.maxChunkChars ?? DEFAULT_MAX_CHUNK_CHARS;
+
+  const citations: ScientificCitation[] = [];
+  const seenCitationKeys = new Set<string>();
+  const sourceManifestMap = new Map<string, ScientificSourceManifestEntry>();
+
+  const documentsByQuery = Array.isArray(results.documents)
+    ? results.documents
+    : [];
+  const metadatasByQuery = Array.isArray(results.metadatas)
+    ? results.metadatas
+    : [];
+  const idsByQuery = Array.isArray(results.ids) ? results.ids : [];
+  const distancesByQuery = Array.isArray(results.distances)
+    ? results.distances
+    : [];
+
+  for (
+    let queryIndex = 0;
+    queryIndex < documentsByQuery.length;
+    queryIndex += 1
+  ) {
+    const documents = Array.isArray(documentsByQuery[queryIndex])
+      ? documentsByQuery[queryIndex]
+      : [];
+    const metadatas = Array.isArray(metadatasByQuery[queryIndex])
+      ? metadatasByQuery[queryIndex]
+      : [];
+    const ids = Array.isArray(idsByQuery[queryIndex])
+      ? idsByQuery[queryIndex]
+      : [];
+    const distances = Array.isArray(distancesByQuery[queryIndex])
+      ? distancesByQuery[queryIndex]
+      : [];
+
+    for (let index = 0; index < documents.length; index += 1) {
+      const rawContent = documents[index];
+      if (typeof rawContent !== 'string') {
+        continue;
+      }
+
+      const normalizedContent = collapseWhitespace(rawContent);
+      if (!normalizedContent) {
+        continue;
+      }
+
+      const metadata = normalizeMetadata(metadatas[index] ?? null);
+      const documentId = toCleanString(ids[index]) ?? metadata.documentId;
+      const citationKey = buildCitationKey(
+        metadata.source,
+        metadata.page,
+        metadata.chunkIndex,
+        metadata.chunkId,
+        normalizedContent,
+      );
+
+      if (seenCitationKeys.has(citationKey)) {
+        continue;
+      }
+      seenCitationKeys.add(citationKey);
+
+      const sourceId = buildSourceId(metadata.source, metadata.title);
+      const distance =
+        typeof distances[index] === 'number' ? distances[index] : null;
+
+      citations.push({
+        key: citationKey,
+        sourceId,
+        title: metadata.title,
+        source: metadata.source,
+        page: metadata.page,
+        chunkIndex: metadata.chunkIndex,
+        chunkId: metadata.chunkId,
+        documentId,
+        distance,
+        content: safeTruncate(normalizedContent, maxChunkChars),
+      });
+
+      const existingSource = sourceManifestMap.get(sourceId);
+      if (!existingSource) {
+        sourceManifestMap.set(sourceId, {
+          sourceId,
+          title: metadata.title,
+          source: metadata.source,
+          citationKeys: [citationKey],
+          documentIds: documentId ? [documentId] : [],
+        });
+      } else {
+        existingSource.citationKeys.push(citationKey);
+        if (documentId && !existingSource.documentIds.includes(documentId)) {
+          existingSource.documentIds.push(documentId);
+        }
+      }
+    }
+  }
+
+  const evidenceLines: string[] = [];
+  let usedChars = 0;
+  for (const citation of citations) {
+    const headerParts = [
+      `[${citation.key}]`,
+      `Title: ${citation.title}`,
+      `Source: ${citation.source}`,
+      `Page: ${citation.page ?? 'n/a'}`,
+    ];
+
+    if (citation.chunkIndex !== null) {
+      headerParts.push(`Chunk: ${citation.chunkIndex}`);
+    }
+
+    const block = `${headerParts.join(' | ')}\n${citation.content}\n`;
+    if (usedChars + block.length > maxEvidenceChars) {
+      const remaining = maxEvidenceChars - usedChars;
+      if (remaining > 0) {
+        evidenceLines.push(safeTruncate(block, remaining));
+      }
+      break;
+    }
+
+    evidenceLines.push(block);
+    usedChars += block.length;
+  }
+
+  return {
+    citations,
+    sourceManifest: Array.from(sourceManifestMap.values()),
+    evidenceContext: evidenceLines.join('\n'),
+  };
+}