diff --git a/ui/__tests__/scientific-rag.test.ts b/ui/__tests__/scientific-rag.test.ts
new file mode 100644
index 0000000..83e9f3f
--- /dev/null
+++ b/ui/__tests__/scientific-rag.test.ts
@@ -0,0 +1,74 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  buildScientificMetadata,
+  citationSlug,
+  clampRetrievedDocumentCount,
+  detectScientificSection,
+  formatRetrievedDocument,
+} from '@/utils/server/scientific-rag';
+
+describe('scientific RAG helpers', () => {
+  it('detects scientific sections from chunk headings', () => {
+    expect(detectScientificSection('Abstract\nWe study retrieval quality.')).toBe('abstract');
+    expect(detectScientificSection('2. Methods\nWe collected samples.')).toBe('methods');
+    expect(detectScientificSection('RESULTS\nAccuracy improved.')).toBe('results');
+    expect(detectScientificSection('A paragraph without a heading.')).toBe('body');
+  });
+
+  it('builds stable readable citation metadata', () => {
+    const metadata = buildScientificMetadata(
+      {
+        pageContent: 'Introduction\nThis paper evaluates citation stability.',
+        metadata: {
+          loc: { pageNumber: 3 },
+          pdf: { info: { Title: 'Scientific RAG: Stable Citations.pdf' } },
+          source: '/uploads/scientific-rag.pdf',
+        },
+      },
+      'fallback.pdf',
+      7,
+      1,
+    );
+
+    expect(metadata).toMatchObject({
+      title: 'Scientific RAG: Stable Citations',
+      page: 3,
+      source: '/uploads/scientific-rag.pdf',
+      section: 'introduction',
+      chunkIndex: 7,
+      pageChunkIndex: 1,
+      citationKey: 'scientific-rag-stable-citations:p3:c2',
+    });
+  });
+
+  it('keeps citation slugs deterministic and bounded', () => {
+    expect(citationSlug('  A Very_Long Scientific Paper!!!  ')).toBe(
+      'a-very-long-scientific-paper',
+    );
+    expect(citationSlug('')).toBe('document');
+  });
+
+  it('clamps retrieval result counts to a useful range', () => {
+    expect(clampRetrievedDocumentCount(undefined)).toBe(6);
+    expect(clampRetrievedDocumentCount(0)).toBe(1);
+    expect(clampRetrievedDocumentCount(99)).toBe(12);
+    expect(clampRetrievedDocumentCount(4.9)).toBe(4);
+  });
+
+  it('formats retrieved chunks with citation and distance context', () => {
+    expect(
+      formatRetrievedDocument({
+        content: 'The model retrieved a grounded answer.',
+        metadata: {
+          title: 'Grounded RAG',
+          page: 4,
+          section: 'results',
+          citationKey: 'grounded-rag:p4:c1',
+        },
+        distance: 0.012345,
+        index: 0,
+      }),
+    ).toContain('[grounded-rag:p4:c1] Grounded RAG, page 4, section results, distance 0.0123');
+  });
+});
diff --git a/ui/pages/api/fetch-documents.ts b/ui/pages/api/fetch-documents.ts
index 9304e48..75bd8fc 100644
--- a/ui/pages/api/fetch-documents.ts
+++ b/ui/pages/api/fetch-documents.ts
@@ -1,23 +1,31 @@
 import type { NextApiRequest, NextApiResponse } from "next";
 import { ChromaClient, TransformersEmbeddingFunction } from "chromadb";
 
+import { clampRetrievedDocumentCount } from '@/utils/server/scientific-rag';
+
 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
   try {
+    if (req.method !== 'POST') {
+      return res.status(405).end();
+    }
+
     const client = new ChromaClient({
-      path: "http://chroma-server:8000",
+      path: process.env.CHROMA_PATH || "http://chroma-server:8000",
     });
 
     const query = req.body.input;
+    if (typeof query !== 'string' || query.trim().length === 0) {
+      return res.status(400).json({ error: 'Missing document query' });
+    }
 
     const embedder = new TransformersEmbeddingFunction();
 
     const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder });
 
-  // query the collection
-  const results = await collection.query({
-      nResults: 4, 
-      queryTexts: [query]
-  }) 
+    const results = await collection.query({
+      nResults: clampRetrievedDocumentCount(req.body.nResults),
+      queryTexts: [query],
+    });
 
     res.status(200).json(results);
   } catch (error) {
@@ -29,4 +37,4 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
     }
     res.status(500).json({ error: 'An unexpected error occurred :(' });
   }
-}
\ No newline at end of file
+}
diff --git a/ui/pages/api/inject-documents.ts b/ui/pages/api/inject-documents.ts
index 532a635..626152c 100644
--- a/ui/pages/api/inject-documents.ts
+++ b/ui/pages/api/inject-documents.ts
@@ -3,11 +3,17 @@ import type { NextApiRequest, NextApiResponse } from 'next';
 import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
 import { IncomingForm } from 'formidable';
 import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
-import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 
 import path from 'path';
 import { v4 as uuidv4 } from 'uuid';
 
+import {
+  SCIENTIFIC_TEXT_SEPARATORS,
+  buildScientificMetadata,
+  type ScientificDocument,
+} from '@/utils/server/scientific-rag';
+
 export const config = {
   api: {
     bodyParser: false,
@@ -33,17 +39,20 @@ export default async function handler(
         path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
       });
 
-      const loader = new PDFLoader(files.pdf[0].filepath);
-
-      const originalDocs = await loader.load();
+      const pdfFile = Array.isArray(files.pdf) ? files.pdf[0] : files.pdf;
+      if (!pdfFile) {
+        return res.status(400).json({ error: 'Missing PDF file' });
+      }
 
-      console.log(JSON.stringify(originalDocs));
+      const loader = new PDFLoader(pdfFile.filepath);
 
+      const originalDocs = await loader.load();
 
       const splitter = new RecursiveCharacterTextSplitter({
-        chunkSize: 500,
-        chunkOverlap: 100,
-      });      
+        chunkSize: 900,
+        chunkOverlap: 180,
+        separators: SCIENTIFIC_TEXT_SEPARATORS,
+      });
 
       const docs = await splitter.splitDocuments(originalDocs);
  
@@ -75,30 +84,31 @@ export default async function handler(
   }
 }
 
-function processDocuments(docs: any) {
-  const ids = [];
+function processDocuments(docs: ScientificDocument[]) {
+  const ids: string[] = [];
   const metadatas = [];
-  const documentContents = [];
+  const documentContents: string[] = [];
+  const pageChunkCounts = new Map<string, number>();
 
-  for (const document of docs) {
-    // Generate an ID for each document, or use some existing unique identifier
+  for (let index = 0; index < docs.length; index += 1) {
+    const document = docs[index];
     const id = uuidv4();
     ids.push(id);
 
-    const fallbackTitle = path.basename(document.metadata.source);
-    const titleFromMetadata = document.metadata.pdf.info.Title;
-
-    const title = titleFromMetadata && titleFromMetadata.length > 0 ? titleFromMetadata : fallbackTitle;
-
-  
-    const metadata = {
-      title: title,
-      page: document.metadata.loc.pageNumber, // Define this function to extract chapter info
-      source: document.metadata.source, // Define this function to extract verse info
-    };
+    const fallbackTitle = path.basename(document.metadata.source ?? 'document.pdf');
+    const page = document.metadata.loc?.pageNumber ?? 'unknown';
+    const pageKey = `${document.metadata.source ?? fallbackTitle}:${page}`;
+    const pageChunkIndex = pageChunkCounts.get(pageKey) ?? 0;
+    pageChunkCounts.set(pageKey, pageChunkIndex + 1);
+
+    const metadata = buildScientificMetadata(
+      document,
+      fallbackTitle,
+      index,
+      pageChunkIndex,
+    );
     metadatas.push(metadata);
 
-    // Add the page content to the documents array
     documentContents.push(document.pageContent);
   }
 
diff --git a/ui/pages/api/rag-chat.ts b/ui/pages/api/rag-chat.ts
index ce84d67..14c7778 100644
--- a/ui/pages/api/rag-chat.ts
+++ b/ui/pages/api/rag-chat.ts
@@ -1,8 +1,9 @@
 import { DEFAULT_SYSTEM_PROMPT, DEFAULT_TEMPERATURE } from '@/utils/app/const';
 import { OpenAIError, OpenAIStream } from '@/utils/server';
-import { codeBlock, oneLine } from 'common-tags'
+import { codeBlock, oneLine } from 'common-tags';
 
 import { ChatBody, Message } from '@/types/chat';
+import { formatRetrievedDocument } from '@/utils/server/scientific-rag';
 
 // @ts-expect-error
 import wasm from '../../node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm?module';
@@ -15,13 +16,15 @@ export const config = {
 };
 
 // Function to fetch and format documents
-async function fetchAndFormatDocuments(lastMessageContent: string) {
+async function fetchAndFormatDocuments(
+  baseUrl: string,
+  lastMessageContent: string,
+) {
   try {
-    console.log("fetching documents")
-    const response = await fetch('http://localhost:3000/api/fetch-documents', {
+    const response = await fetch(`${baseUrl}/api/fetch-documents`, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ input: lastMessageContent }),
+      body: JSON.stringify({ input: lastMessageContent, nResults: 6 }),
     });
     
     if (!response.ok) {
@@ -30,10 +33,13 @@ async function fetchAndFormatDocuments(lastMessageContent: string) {
 
     const data = await response.json();
     const result = data.metadatas[0].map((metadata: any, index: number) => {
-      return `Source ${index + 1}) Title: ${metadata.title}, Page: ${metadata.page}, Content: ${data.documents[0][index]}\n`;
-    }).join('');
-
-    console.log(result);
+      return formatRetrievedDocument({
+        content: data.documents[0][index],
+        metadata,
+        distance: data.distances?.[0]?.[index],
+        index,
+      });
+    }).join('\n\n---\n\n');
 
     return result;
 
@@ -64,7 +70,7 @@ const handler = async (req: Request): Promise<Response> => {
     ${oneLine`
       You are a very enthusiastic AI assistant  who loves
       to help people! Given the following information from
-      relevant documentation, answer the user's question using
+      relevant scientific documentation, answer the user's question using
       only that information, outputted in markdown format.
     `}
 
@@ -75,7 +81,7 @@ const handler = async (req: Request): Promise<Response> => {
     `}
     
     ${oneLine`
-      Always include citations from the documentation.
+      Every factual claim must include citation keys from the documentation.
     `}
   `;
 
@@ -85,7 +91,10 @@ const handler = async (req: Request): Promise<Response> => {
 
     const lastMessage = messages[messages.length - 1];
 
-    const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content);
+    const relevantDocuments = await fetchAndFormatDocuments(
+      new URL(req.url).origin,
+      lastMessage.content,
+    );
     
     let temperatureToUse = temperature;
     if (temperatureToUse == null) {
@@ -100,9 +109,6 @@ const handler = async (req: Request): Promise<Response> => {
 
     encoding.free();
 
-    console.log(model, promptToSend, temperatureToUse, key, messagesToSend);
-
-  
   messagesToSend = [
       {
         role: "user",
@@ -121,6 +127,14 @@ const handler = async (req: Request): Promise<Response> => {
           ${oneLine`
             - Do not make up answers that are not provided in the documentation.
           `}
+          ${oneLine`
+            - Cite sources using the exact citation keys shown in square brackets,
+            for example [paper-title:p3:c2].
+          `}
+          ${oneLine`
+            - Prefer sources with lower retrieval distance when multiple sources
+            contain similar information.
+          `}
           ${oneLine`
             - If you are unsure and the answer is not explicitly written
             in the documentation context, say
diff --git a/ui/utils/server/scientific-rag.ts b/ui/utils/server/scientific-rag.ts
new file mode 100644
index 0000000..9e95bfe
--- /dev/null
+++ b/ui/utils/server/scientific-rag.ts
@@ -0,0 +1,157 @@
+export type ScientificDocument = {
+  pageContent: string;
+  metadata: {
+    loc?: {
+      pageNumber?: number;
+    };
+    pdf?: {
+      info?: {
+        Title?: string;
+      };
+    };
+    source?: string;
+    [key: string]: unknown;
+  };
+};
+
+export type ScientificChunkMetadata = {
+  title: string;
+  page: number | string;
+  source: string;
+  section: string;
+  chunkIndex: number;
+  pageChunkIndex: number;
+  citationKey: string;
+};
+
+export type RetrievedScientificDocument = {
+  content: string;
+  metadata: Partial<ScientificChunkMetadata>;
+  distance?: number;
+  index: number;
+};
+
+const SECTION_PATTERNS: Array<[string, RegExp]> = [
+  ['abstract', /^\s*(?:abstract|summary)\b/im],
+  ['introduction', /^\s*(?:\d+\.?\s*)?introduction\b/im],
+  ['background', /^\s*(?:\d+\.?\s*)?background\b/im],
+  ['methods', /^\s*(?:\d+\.?\s*)?(?:methods|methodology|materials and methods|experimental setup)\b/im],
+  ['results', /^\s*(?:\d+\.?\s*)?(?:results|evaluation|experiments?)\b/im],
+  ['discussion', /^\s*(?:\d+\.?\s*)?discussion\b/im],
+  ['limitations', /^\s*(?:\d+\.?\s*)?limitations?\b/im],
+  ['conclusion', /^\s*(?:\d+\.?\s*)?(?:conclusion|conclusions|future work)\b/im],
+  ['references', /^\s*(?:references|bibliography)\b/im],
+];
+
+export const SCIENTIFIC_TEXT_SEPARATORS = [
+  '\nAbstract',
+  '\nABSTRACT',
+  '\nIntroduction',
+  '\nINTRODUCTION',
+  '\nBackground',
+  '\nMethods',
+  '\nMETHODS',
+  '\nMaterials and Methods',
+  '\nResults',
+  '\nRESULTS',
+  '\nDiscussion',
+  '\nDISCUSSION',
+  '\nConclusion',
+  '\nCONCLUSION',
+  '\nReferences',
+  '\n\n',
+  '\n',
+  '. ',
+  ' ',
+  '',
+];
+
+export function detectScientificSection(content: string): string {
+  for (const [section, pattern] of SECTION_PATTERNS) {
+    if (pattern.test(content)) {
+      return section;
+    }
+  }
+
+  return 'body';
+}
+
+export function normalizeTitle(title: string): string {
+  return title
+    .trim()
+    .replace(/\.[^.]+$/, '')
+    .replace(/[_-]+/g, ' ')
+    .replace(/\s+/g, ' ')
+    .slice(0, 140);
+}
+
+export function citationSlug(title: string): string {
+  const slug = normalizeTitle(title)
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 48);
+
+  return slug || 'document';
+}
+
+export function buildCitationKey(
+  title: string,
+  page: number | string,
+  pageChunkIndex: number,
+): string {
+  return `${citationSlug(title)}:p${page}:c${pageChunkIndex + 1}`;
+}
+
+export function buildScientificMetadata(
+  document: ScientificDocument,
+  fallbackTitle: string,
+  chunkIndex: number,
+  pageChunkIndex: number,
+): ScientificChunkMetadata {
+  const titleFromMetadata = document.metadata.pdf?.info?.Title;
+  const title = normalizeTitle(
+    titleFromMetadata && titleFromMetadata.trim().length > 0
+      ? titleFromMetadata
+      : fallbackTitle,
+  );
+  const page = document.metadata.loc?.pageNumber ?? 'unknown';
+
+  return {
+    title,
+    page,
+    source: document.metadata.source ?? fallbackTitle,
+    section: detectScientificSection(document.pageContent),
+    chunkIndex,
+    pageChunkIndex,
+    citationKey: buildCitationKey(title, page, pageChunkIndex),
+  };
+}
+
+export function clampRetrievedDocumentCount(value: unknown): number {
+  if (typeof value !== 'number' || !Number.isFinite(value)) {
+    return 6;
+  }
+
+  return Math.min(12, Math.max(1, Math.trunc(value)));
+}
+
+export function formatRetrievedDocument({
+  content,
+  metadata,
+  distance,
+  index,
+}: RetrievedScientificDocument): string {
+  const citationKey =
+    metadata.citationKey ??
+    buildCitationKey(metadata.title ?? `source-${index + 1}`, metadata.page ?? 'unknown', index);
+  const title = metadata.title ?? `Source ${index + 1}`;
+  const page = metadata.page ?? 'unknown';
+  const section = metadata.section ?? 'body';
+  const distanceText = typeof distance === 'number' ? `, distance ${distance.toFixed(4)}` : '';
+
+  return [
+    `[${citationKey}] ${title}, page ${page}, section ${section}${distanceText}`,
+    content.trim(),
+  ].join('\n');
+}