Official Node.js/TypeScript SDK for the Knowhere document parsing API.
- π TypeScript-first - Full type safety with comprehensive type definitions
- π¦ Stream-based uploads - Efficient handling of large files
- π Automatic retries - Exponential backoff for transient failures
- π Adaptive polling - Smart waiting for job completion
- π― Progressive API - High-level convenience methods + low-level control
- β‘ Modern JavaScript - ESM and CommonJS support
npm install @ontos-ai/knowhere-sdkRequirements:
- Node.js >= 20.19.0
- npm >= 10.0.0
- TypeScript >= 5.0 (optional, for type checking)
import Knowhere from '@ontos-ai/knowhere-sdk';
// Initialize client
const client = new Knowhere({
apiKey: process.env.KNOWHERE_API_KEY,
});
// Parse a document from URL
const result = await client.parse({
url: 'https://example.com/document.pdf',
});
// Access parsed content
console.log(`Found ${result.textChunks.length} text chunks`);
console.log(`Found ${result.imageChunks.length} images`);
console.log(`Found ${result.tableChunks.length} tables`);
// Work with chunks β worker metadata is in chunk.metadata
result.textChunks.forEach((chunk) => {
console.log(chunk.content);
console.log(chunk.metadata.keywords);
console.log(chunk.metadata.summary);
});
// Save results to disk
await result.save('./output/');KNOWHERE_API_KEY=sk_... # Required
KNOWHERE_BASE_URL=https://api.knowhereto.ai # Optionalconst client = new Knowhere({
apiKey: 'sk_...', // API authentication key
baseURL: 'https://...', // API base URL
timeout: 60000, // Request timeout (ms)
uploadTimeout: 600000, // Upload timeout (ms)
maxRetries: 5, // Max retry attempts
});// From file path (recommended)
const result = await client.parse({
file: './document.pdf',
});
// From Buffer
const buffer = await fs.readFile('./document.pdf');
const result = await client.parse({
file: buffer,
fileName: 'document.pdf',
});
// From Stream
const stream = fs.createReadStream('./document.pdf');
const result = await client.parse({
file: stream,
fileName: 'document.pdf',
});fileName is inferred automatically when file is a local file path. When
file is a Buffer, Uint8Array, or a stream without path metadata, provide
fileName explicitly.
const result = await client.parse({
url: 'https://example.com/doc.pdf',
model: 'advanced', // 'base' | 'advanced'
ocr: true, // Enable OCR
docType: 'pdf', // Document type hint
smartTitleParse: true, // Smart title detection
summaryImage: true, // Generate image summaries
summaryTable: true, // Generate table summaries
summaryText: true, // Generate text summaries
addFragDesc: 'Custom context', // Additional fragment description
kbDir: 'project_docs', // Knowledge base directory
pollInterval: 10000, // Polling interval (ms)
pollTimeout: 1800000, // Max wait time (ms)
verifyChecksum: true, // Verify ZIP checksum (default: true)
webhook: {
// Webhook for completion
url: 'https://...',
},
onUploadProgress: (progress) => {
console.log(`Upload: ${progress.percent}%`);
},
onPollProgress: (status) => {
console.log(`Status: ${status.status}`);
},
});For granular control over the job lifecycle:
// 1. Create job
const job = await client.jobs.create({
sourceType: 'file',
fileName: 'document.pdf',
documentMetadata: {
createdByClient: 'cli',
sourceFileName: 'document.pdf',
},
parsingParams: { model: 'advanced', ocrEnabled: true },
});
// 2. Upload file
await client.jobs.upload(job, {
file: './document.pdf',
onProgress: ({ percent }) => console.log(`${percent}%`),
});
// 3. Wait for completion
const jobResult = await client.jobs.wait(job.jobId, {
pollInterval: 10000,
});
// 4. Load results
const result = await client.jobs.load(jobResult);Published documents are queryable through the retrieval API after a job
finishes. client.jobs.create(...) may return a planned documentId; persist
jobResult.documentId after publication as the canonical value if you need to
update or archive the same document later.
const job = await client.jobs.create({
sourceType: 'url',
sourceUrl: 'https://example.com/manual.pdf',
namespace: 'support-center',
documentMetadata: {
createdByClient: 'notebook',
title: 'Support manual',
},
});
const jobResult = await client.jobs.wait(job.jobId);
const documentId = jobResult.documentId ?? job.documentId;
if (!documentId) {
throw new Error('Expected documentId after successful publication.');
}
console.log(documentId);
// Agentic mode (LLM navigation + answer synthesis)
const response = await client.retrieval.query({
namespace: 'support-center',
query: 'How do I reset Bluetooth pairing?',
topK: 5,
useAgentic: true,
});
console.log(response.answerText); // LLM-generated answer
console.log(response.referencedChunks); // cited evidence chunks
console.log(response.evidenceText); // rendered evidence context, when returned
console.log(response.stopReason); // agentic termination reason, when returned
console.log(response.failureReason); // no-answer reason, when returned
for (const result of response.results) {
console.log(result.content);
console.log(result.score);
console.log(result.source.sourceFileName, result.source.sectionPath);
}Retrieval results use one canonical source object:
result.content;
result.chunkType;
result.score;
result.assetUrl;
result.source.documentId;
result.source.sourceFileName;
result.source.sectionPath;Agentic references expose the current retrieval citation fields:
const reference = response.referencedChunks[0];
reference.chunkId;
reference.documentId;
reference.chunkType;
reference.sectionPath;
reference.filePath;
reference.jobId;
reference.assetUrl;Use documentId to update or archive a document:
const updateJob = await client.jobs.create({
sourceType: 'url',
sourceUrl: 'https://example.com/manual-v2.pdf',
documentId,
});
const documents = await client.documents.list({
namespace: 'support-center',
page: 1,
pageSize: 50,
});
const document = await client.documents.get(documentId);
const chunks = await client.documents.listChunks(documentId, {
page: 1,
pageSize: 50,
chunkType: 'image',
includeAssetUrls: true,
});
const archived = await client.documents.archive(documentId);
console.log(documents.documents.length);
console.log(documents.pagination.totalPages);
console.log(document.status);
console.log(chunks.pagination.total);
if (chunks.chunks[0]) {
const chunk = await client.documents.getChunk(documentId, chunks.chunks[0].id, {
includeAssetUrls: true,
});
console.log(chunk.chunk.content);
console.log(chunk.chunk.assetUrl);
}
console.log(archived.status);The SDK can also keep parsed results in a local cache and run exact inspection
tools over that cached copy. This is the implementation used by the separate
@ontos-ai/knowhere-mcp package.
const parsed = await client.knowledge.parse({
file: './manual.pdf',
localDocumentId: 'manual-v1',
});
const outline = await client.knowledge.getDocumentOutline(parsed.document.localDocumentId);
const read = await client.knowledge.readChunks({
localDocumentId: parsed.document.localDocumentId,
sectionPath: outline.sections[0]?.sectionPath,
limit: 5,
});
const grep = await client.knowledge.grepChunks({
localDocumentId: parsed.document.localDocumentId,
pattern: 'warranty',
maxResults: 10,
});
const serverSearch = await client.knowledge.search({
query: 'battery warranty',
localDocumentIds: [parsed.document.localDocumentId],
topK: 5,
});
console.log(read.chunks);
console.log(grep.matches);
console.log(serverSearch.references);Local grep and reads use the cached parse result, not server-side chunk scans.
Search uses the Knowhere API retrieval query; local document IDs only help map
returned server document IDs back to local cache IDs when available.
If a search result only has a published documentId, or an async parse flow only
has a completed jobId, read-oriented helpers can accept that remote identifier
directly and will sync the result into the local cache before reading. For a
documentId, the SDK resolves the document's current published jobId and then
downloads the parser result ZIP through the same cacheJobResult(...) path:
const remoteRead = await client.knowledge.readChunks({
documentId: 'doc_123',
sectionPath: 'Overview',
limit: 5,
});
const remoteOutline = await client.knowledge.getDocumentOutline({
documentId: 'doc_123',
});
const jobRead = await client.knowledge.readChunks({
jobId: jobResult.jobId,
localDocumentId: 'manual-v1',
limit: 5,
});The MCP package is a wrapper over this SDK interface; install it only when an agent host needs an MCP server. See the MCP package README for Codex, Claude Code, Claude Desktop, and generic stdio MCP host configuration examples.
For longer parses, use the non-blocking SDK flow and cache the result after the job completes:
const started = await client.knowledge.startParse({
file: './manual.pdf',
localDocumentId: 'manual-v1',
});
const status = await client.knowledge.getJobStatus(started.job.jobId);
if (status.job.isDone && status.cache.document) {
console.log(status.cache.document.localDocumentId);
}When the job was started through client.knowledge.startParse(...),
getJobStatus(...) automatically caches the completed result locally the first
time it observes status.job.isDone. Use cacheJobResult(...) only to recover a
completed job that was not started through the local knowledge helper, or to
retry a cache step explicitly.
Follow-up queries can exclude documents or sections for one request:
const followUp = await client.retrieval.query({
namespace: 'support-center',
query: 'battery charging',
excludeDocumentIds: ['doc_old'],
excludeSections: [{ documentId: 'doc_123', sectionPath: 'Appendix / Legal' }],
});import {
BadRequestError,
AuthenticationError,
RateLimitError,
PollingTimeoutError,
JobFailedError,
ValidationError,
InvalidStateError,
} from '@ontos-ai/knowhere-sdk';
try {
const result = await client.parse({ url: '...' });
} catch (error) {
if (error instanceof ValidationError) {
console.error('Invalid parameters:', error.message);
} else if (error instanceof RateLimitError) {
// Wait and retry
await sleep(error.retryAfter * 1000);
} else if (error instanceof AuthenticationError) {
console.error('Invalid API key');
} else if (error instanceof PollingTimeoutError) {
console.error('Processing timeout');
} else if (error instanceof JobFailedError) {
console.error('Job failed:', error.jobResult.error);
} else if (error instanceof InvalidStateError) {
console.error('Invalid state:', error.message);
}
}For complete documentation, visit https://docs.knowhereto.ai
Check out the examples directory for more usage examples:
# Install dependencies
npm ci
# Run tests
npm test
# Run tests with coverage
npm run test:ci
# Lint code
npm run lint
# Format code
npm run format
# Type check
npm run typecheck
# Build
npm run buildSee docs/release-workflow.md for the Changesets-based stable and beta release process.
- Contributing guide: CONTRIBUTING.md
- Security policy: SECURITY.md
- Code of conduct: CODE_OF_CONDUCT.md
- π§ Email: team@knowhereto.ai
- π Issues: GitHub Issues
- π Documentation: https://docs.knowhereto.ai
See CHANGELOG.md for release history.