Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions ui/__tests__/scientific-evidence.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import {
type ChromaQueryLike,
buildScientificEvidencePayload,
} from '@/utils/server/scientific-evidence';

import { describe, expect, it } from 'vitest';

describe('buildScientificEvidencePayload', () => {
it('deduplicates duplicate chunks and emits stable citation keys/source manifest', () => {
const queryResult: ChromaQueryLike = {
ids: [['id-1', 'id-2', 'id-3']],
documents: [['Alpha finding', 'Alpha finding', 'Beta finding']],
metadatas: [
[
{
title: 'Paper A',
source: '/tmp/paper-a.pdf',
page: 2,
chunkIndex: 0,
},
{
title: 'Paper A',
source: '/tmp/paper-a.pdf',
page: 2,
chunkIndex: 0,
},
{
title: 'Paper A',
source: '/tmp/paper-a.pdf',
page: 3,
chunkIndex: 1,
},
],
],
distances: [[0.01, 0.01, 0.02]],
};

const first = buildScientificEvidencePayload(queryResult);
const second = buildScientificEvidencePayload(queryResult);

expect(first.citations).toHaveLength(2);
expect(first.citations.map((citation) => citation.key)).toEqual(
second.citations.map((citation) => citation.key),
);
expect(first.citations.map((citation) => citation.key)).toEqual([
expect.stringMatching(/^SRC-[0-9A-F]{8}$/),
expect.stringMatching(/^SRC-[0-9A-F]{8}$/),
]);
expect(first.sourceManifest).toHaveLength(1);
expect(first.sourceManifest[0].sourceId).toEqual(
expect.stringMatching(/^DOC-[0-9A-F]{8}$/),
);
expect(first.sourceManifest[0].citationKeys).toHaveLength(2);
});

it('normalizes ragged metadata and prefers citation-friendly sources', () => {
const queryResult: ChromaQueryLike = {
ids: [['id-1']],
documents: [[' Content with spaces ']],
metadatas: [
[
{
title: 123,
filename: 'research.pdf',
sourcePath: '/var/tmp/research.pdf',
pageNumber: '5',
chunk_index: '2',
chunk_id: 'c-2',
},
],
],
distances: [[0.1]],
};

const payload = buildScientificEvidencePayload(queryResult);
expect(payload.citations).toHaveLength(1);
expect(payload.citations[0].title).toBe('123');
expect(payload.citations[0].source).toBe('research.pdf');
expect(payload.sourceManifest[0].source).toBe('research.pdf');
expect(payload.evidenceContext).not.toContain('/var/tmp/research.pdf');
expect(payload.citations[0].page).toBe(5);
expect(payload.citations[0].chunkIndex).toBe(2);
expect(payload.citations[0].chunkId).toBe('c-2');
expect(payload.citations[0].content).toBe('Content with spaces');
});

it('bounds evidence context length and truncates safely', () => {
const queryResult: ChromaQueryLike = {
ids: [['id-1']],
documents: [['A'.repeat(200)]],
metadatas: [[{ title: 'Large Chunk', source: 'source.pdf', page: 1 }]],
distances: [[0.2]],
};

const payload = buildScientificEvidencePayload(queryResult, {
maxChunkChars: 20,
maxEvidenceChars: 60,
});

expect(payload.citations[0].content.length).toBeLessThanOrEqual(20);
expect(payload.evidenceContext.length).toBeLessThanOrEqual(60);
expect(payload.evidenceContext.endsWith('...')).toBe(true);
});

it('truncates safely when max chars is shorter than the ellipsis', () => {
const queryResult: ChromaQueryLike = {
ids: [['id-1']],
documents: [['A'.repeat(200)]],
metadatas: [[{ title: 'Large Chunk', source: 'source.pdf', page: 1 }]],
distances: [[0.2]],
};

const payload = buildScientificEvidencePayload(queryResult, {
maxChunkChars: 2,
maxEvidenceChars: 2,
});

expect(payload.citations[0].content).toBe('..');
expect(payload.citations[0].content.length).toBeLessThanOrEqual(2);
expect(payload.evidenceContext).toBe('..');
expect(payload.evidenceContext.length).toBeLessThanOrEqual(2);
});

it('handles ragged/null chroma arrays without throwing', () => {
const raggedResult: ChromaQueryLike = {
ids: [['id-1', 'id-2'], []],
documents: [['Chunk one', null], []],
metadatas: [[null], []],
distances: null,
};

const payload = buildScientificEvidencePayload(raggedResult);
expect(payload.citations).toHaveLength(1);
expect(payload.citations[0].source).toBe('unknown-source');
expect(payload.sourceManifest).toHaveLength(1);
});
});
83 changes: 71 additions & 12 deletions ui/pages/api/fetch-documents.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,84 @@
import type { NextApiRequest, NextApiResponse } from "next";
import { ChromaClient, TransformersEmbeddingFunction } from "chromadb";
import type { NextApiRequest, NextApiResponse } from 'next';

export default async function handler(req: NextApiRequest, res: NextApiResponse) {
import { buildScientificEvidencePayload } from '@/utils/server/scientific-evidence';

import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';

const DEFAULT_RESULTS = 8;
const MAX_RESULTS = 20;
const DEFAULT_EVIDENCE_CHARS = 12000;
const MAX_EVIDENCE_CHARS = 30000;

function parseBoundedInteger(
value: unknown,
defaultValue: number,
maxValue: number,
): number {
const parsed =
typeof value === 'number'
? value
: typeof value === 'string'
? Number(value)
: NaN;

if (!Number.isFinite(parsed) || parsed <= 0) {
return defaultValue;
}

return Math.min(Math.floor(parsed), maxValue);
}

export default async function handler(
req: NextApiRequest,
res: NextApiResponse,
) {
try {
if (req.method !== 'POST') {
return res.status(405).json({ error: 'Only POST is supported' });
}

const client = new ChromaClient({
path: "http://chroma-server:8000",
path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
});

const query = req.body.input;
if (typeof query !== 'string' || query.trim().length === 0) {
return res.status(400).json({ error: 'input is required' });
}

const nResults = parseBoundedInteger(
req.body.nResults,
DEFAULT_RESULTS,
MAX_RESULTS,
);
const maxEvidenceChars = parseBoundedInteger(
req.body.maxEvidenceChars,
DEFAULT_EVIDENCE_CHARS,
MAX_EVIDENCE_CHARS,
);

const embedder = new TransformersEmbeddingFunction();

const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder });
const collection = await client.getOrCreateCollection({
name: 'default-collection',
embeddingFunction: embedder,
});

const results = await collection.query({
nResults,
queryTexts: [query.trim()],
});

// query the collection
const results = await collection.query({
nResults: 4,
queryTexts: [query]
})
const evidence = buildScientificEvidencePayload(results, {
maxEvidenceChars,
});

res.status(200).json(results);
res.status(200).json({
...results,
evidenceContext: evidence.evidenceContext,
sourceManifest: evidence.sourceManifest,
citations: evidence.citations,
});
} catch (error) {
if (error instanceof Error) {
console.error('Error message:', error.message);
Expand All @@ -29,4 +88,4 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
}
res.status(500).json({ error: 'An unexpected error occurred :(' });
}
}
}
Loading