diff --git a/.github/workflows/docker-smoke.yml b/.github/workflows/docker-smoke.yml index bb779091d..3d5b926c9 100644 --- a/.github/workflows/docker-smoke.yml +++ b/.github/workflows/docker-smoke.yml @@ -62,7 +62,10 @@ jobs: - name: Build and start stack run: | - SIGNET_HTTP_PORT=8080 SIGNET_HTTPS_PORT=8443 docker compose -f deploy/docker/compose.yml up -d --build + SIGNET_HTTP_PORT=8080 SIGNET_HTTPS_PORT=8443 docker compose -f deploy/docker/compose.yml up -d --build || { + docker compose -f deploy/docker/compose.yml logs signet 2>&1 || true + exit 1 + } - name: Wait for proxy readiness run: | diff --git a/docs/API.md b/docs/API.md index ba628b543..f62bb1fbc 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1687,6 +1687,51 @@ and source chunk embeddings. Source files are not modified. } ``` +### POST /api/sources/github + +Add or update a GitHub source. Queues an async sync that indexes issues, pull +requests, discussions, and/or docs from one or more repos into the knowledge +graph and embedding store. Requires `admin` permission. + +**Request body** + +```json +{ + "name": "Signet Issues", + "tokenRef": "GITHUB_TOKEN", + "repos": ["Signet-AI/signetai"], + "resourceTypes": ["issues", "pulls", "discussions", "docs"], + "state": "all", + "includeComments": true, + "labels": ["bug", "feature"], + "maxItemsPerRepo": 500, + "docPaths": ["README.md", "CHANGELOG.md"] +} +``` + +**Response** + +```json +{ + "source": { "id": "github:abc123def456", "kind": "github" }, + "created": true +} +``` + +### DELETE /api/sources/:sourceId + +Remove a source config and purge Signet-owned source artifacts, graph rows, +and source chunk embeddings. Source files are not modified. + +**Response** + +```json +{ + "source": { "id": "obsidian:abc123", "kind": "obsidian" }, + "purged": 150 +} +``` + ### POST /api/sources/pick-directory Best-effort local directory picker used by dashboard/browser flows. It returns @@ -4385,6 +4430,7 @@ silently disappear from the API reference. | GET | `/api/sources` | platform/daemon/src/routes/sources-routes.ts | | POST | `/api/sources/pick-directory` | platform/daemon/src/routes/sources-routes.ts | | POST | `/api/sources/obsidian` | platform/daemon/src/routes/sources-routes.ts | +| POST | `/api/sources/github` | platform/daemon/src/routes/sources-routes.ts | | DELETE | `/api/sources/:sourceId` | platform/daemon/src/routes/sources-routes.ts | | GET | `/api/knowledge/entities` | platform/daemon/src/routes/knowledge-routes.ts | | POST | `/api/knowledge/entities/:id/pin` | platform/daemon/src/routes/knowledge-routes.ts | diff --git a/platform/core/src/index.ts b/platform/core/src/index.ts index e95270de4..f0e4aaf7b 100644 --- a/platform/core/src/index.ts +++ b/platform/core/src/index.ts @@ -221,18 +221,24 @@ export type { WorkspaceSourceRepoSyncResult, } from "./workspace-source-repo"; export { + addGitHubSource, addObsidianSource, + DEFAULT_GITHUB_DOC_PATHS, + DEFAULT_GITHUB_RESOURCE_TYPES, DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, getAgentsDir, getSourcesConfigPath, loadSourcesConfig, markSourceIndexed, + parseGitHubSettings, removeSource, saveSourcesConfig, } from "./sources-config"; export type { + AddGitHubSourceInput, AddObsidianSourceInput, AddSourceResult, + GitHubSourceSettings, RemoveSourceResult, SignetSourceEntry, SignetSourceKind, diff --git a/platform/core/src/sources-config.test.ts b/platform/core/src/sources-config.test.ts index 2e1e58550..d94477ea4 100644 --- a/platform/core/src/sources-config.test.ts +++ b/platform/core/src/sources-config.test.ts @@ -4,10 +4,12 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, + addGitHubSource, addObsidianSource, getSourcesConfigPath, loadSourcesConfig, markSourceIndexed, + parseGitHubSettings, removeSource, } from "./sources-config"; @@ -152,4 +154,75 @@ describe("sources-config", () => { if (removed.ok === true) throw new Error("expected removeSource to fail"); expect(removed.error).toContain("not found"); }); + + describe("GitHub source", () => { + it("adds a GitHub source with repos and token ref", () => { + const agentsDir = tmp(); + const result = addGitHubSource( + { repos: ["Signet-AI/signetai", "Signet-AI/sqmd"], name: "Signet Repos", tokenRef: "github-pat", now: "2026-01-01T00:00:00.000Z" }, + agentsDir, + ); + expect(result.ok).toBe(true); + if (result.ok === false) throw new Error(result.error); + expect(result.created).toBe(true); + expect(result.source.kind).toBe("github"); + expect(result.source.mode).toBe("read-only"); + expect(result.source.enabled).toBe(true); + expect(result.source.name).toBe("Signet Repos"); + + const config = loadSourcesConfig(agentsDir); + expect(config.sources).toHaveLength(1); + const settings = parseGitHubSettings(config.sources[0]!.settings); + expect(settings.repos).toEqual(["Signet-AI/signetai", "Signet-AI/sqmd"]); + expect(settings.tokenRef).toBe("github-pat"); + }); + + it("updates an existing GitHub source instead of duplicating", () => { + const agentsDir = tmp(); + const first = addGitHubSource( + { repos: ["owner/repo"], name: "Repo A", now: "2026-01-01T00:00:00.000Z" }, + agentsDir, + ); + const second = addGitHubSource( + { repos: ["owner/repo"], name: "Repo B", now: "2026-01-02T00:00:00.000Z" }, + agentsDir, + ); + expect(first.ok).toBe(true); + expect(second.ok).toBe(true); + if (second.ok === false) throw new Error(second.error); + expect(second.created).toBe(false); + expect(second.source.name).toBe("Repo B"); + expect(loadSourcesConfig(agentsDir).sources).toHaveLength(1); + }); + + it("requires at least one repo", () => { + const agentsDir = tmp(); + const result = addGitHubSource({ repos: [] }, agentsDir); + expect(result.ok).toBe(false); + if (result.ok === true) throw new Error("expected failure"); + expect(result.error).toContain("repo"); + }); + + it("coexists with Obsidian sources", () => { + const agentsDir = tmp(); + const vault = join(agentsDir, "vault"); + mkdirSync(vault, { recursive: true }); + + addObsidianSource({ root: vault, name: "My Vault" }, agentsDir); + addGitHubSource({ repos: ["owner/repo"], name: "GitHub" }, agentsDir); + + const config = loadSourcesConfig(agentsDir); + expect(config.sources).toHaveLength(2); + expect(config.sources.map((s) => s.kind)).toEqual(["obsidian", "github"]); + }); + + it("defaults resource types to all four", () => { + const agentsDir = tmp(); + const result = addGitHubSource({ repos: ["owner/repo"] }, agentsDir); + expect(result.ok).toBe(true); + if (result.ok === false) throw new Error(result.error); + const settings = parseGitHubSettings(loadSourcesConfig(agentsDir).sources[0]!.settings); + expect(settings.resourceTypes).toEqual(["issues", "pulls", "discussions", "docs"]); + }); + }); }); diff --git a/platform/core/src/sources-config.ts b/platform/core/src/sources-config.ts index 424cb7cdd..f5133108c 100644 --- a/platform/core/src/sources-config.ts +++ b/platform/core/src/sources-config.ts @@ -3,9 +3,24 @@ import { existsSync, mkdirSync, readFileSync, renameSync, rmSync, statSync, writ import { homedir } from "node:os"; import { dirname, resolve } from "node:path"; -export type SignetSourceKind = "obsidian"; +export type SignetSourceKind = "obsidian" | "github"; export type SignetSourceMode = "read-only"; +export interface GitHubSourceSettings { + readonly repos: readonly string[]; + readonly tokenRef?: string; + readonly resourceTypes: readonly ("issues" | "pulls" | "discussions" | "docs")[]; + readonly state?: "open" | "closed" | "all"; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; +} + +export const DEFAULT_GITHUB_RESOURCE_TYPES = ["issues", "pulls", "discussions", "docs"] as const; +const VALID_GITHUB_RESOURCE_TYPES = new Set(DEFAULT_GITHUB_RESOURCE_TYPES); +export const DEFAULT_GITHUB_DOC_PATHS = ["README.md", "CHANGELOG.md"] as const; + export interface SignetSourceEntry { readonly id: string; readonly kind: SignetSourceKind; @@ -17,6 +32,8 @@ export interface SignetSourceEntry { readonly updatedAt: string; readonly lastIndexedAt?: string; readonly excludeGlobs?: readonly string[]; + readonly settings?: Readonly>; + readonly agentId?: string; } export const DEFAULT_OBSIDIAN_EXCLUDE_GLOBS = [ @@ -39,6 +56,20 @@ export interface AddObsidianSourceInput { readonly now?: string; } +export interface AddGitHubSourceInput { + readonly repos: readonly string[]; + readonly name?: string; + readonly tokenRef?: string; + readonly resourceTypes?: readonly ("issues" | "pulls" | "discussions" | "docs")[]; + readonly state?: "open" | "closed" | "all"; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; + readonly now?: string; + readonly agentId?: string; +} + export type AddSourceResult = | { readonly ok: true; readonly source: SignetSourceEntry; readonly created: boolean } | { readonly ok: false; readonly error: string }; @@ -163,6 +194,127 @@ function addObsidianSourceChecked(input: AddObsidianSourceInput, agentsDir = get return { ok: true, source, created: true }; } +export function addGitHubSource(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + return withSourcesConfigLock(agentsDir, () => addGitHubSourceUnlocked(input, agentsDir)); +} + +function addGitHubSourceUnlocked(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + try { + return addGitHubSourceChecked(input, agentsDir); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + return { ok: false, error: detail }; + } +} + +function addGitHubSourceChecked(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + const repos = input.repos.map((r) => r.trim()).filter(Boolean); + if (repos.length === 0) return { ok: false, error: "At least one repo (owner/repo or owner/*) is required" }; + for (const repo of repos) { + if (!/^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_*.-]+$/.test(repo)) { + return { ok: false, error: `Invalid repo pattern: ${repo}. Expected owner/repo or owner/*` }; + } + } + + if (input.maxItemsPerRepo !== undefined) { + if (!Number.isFinite(input.maxItemsPerRepo) || input.maxItemsPerRepo < 1 || input.maxItemsPerRepo > 10000) { + return { ok: false, error: "maxItemsPerRepo must be between 1 and 10000" }; + } + } + + if (input.resourceTypes && input.resourceTypes.length > 0) { + const invalid = input.resourceTypes.filter((t) => !VALID_GITHUB_RESOURCE_TYPES.has(t)); + if (invalid.length > 0) { + return { ok: false, error: `Invalid resource types: ${invalid.join(", ")}. Must be one of: ${[...DEFAULT_GITHUB_RESOURCE_TYPES].join(", ")}` }; + } + } + + const now = input.now ?? new Date().toISOString(); + const cfg = loadSourcesConfigForWrite(agentsDir); + const settingsKey = repos.sort().join(","); + const existing = cfg.sources.find( + (source) => source.kind === "github" && (source.settings?.repos as string[])?.sort().join(",") === settingsKey, + ); + + if (existing) { + const updated: SignetSourceEntry = { + ...existing, + name: cleanName(input.name) ?? existing.name, + enabled: true, + updatedAt: now, + settings: buildGitHubSettings(input, repos), + }; + saveSourcesConfig( + { + version: SOURCES_CONFIG_VERSION, + sources: cfg.sources.map((source) => (source.id === existing.id ? updated : source)), + }, + agentsDir, + ); + return { ok: true, source: updated, created: false }; + } + + const source: SignetSourceEntry = { + id: `github:${createHash("sha256").update(settingsKey).digest("hex").slice(0, 16)}`, + kind: "github", + name: cleanName(input.name) ?? repos[0], + root: "", + enabled: true, + mode: "read-only", + createdAt: now, + updatedAt: now, + settings: buildGitHubSettings(input, repos), + agentId: input.agentId, + }; + saveSourcesConfig({ version: SOURCES_CONFIG_VERSION, sources: [...cfg.sources, source] }, agentsDir); + return { ok: true, source, created: true }; +} + +function buildGitHubSettings(input: AddGitHubSourceInput, repos: readonly string[]): Readonly> { + const resourceTypes = input.resourceTypes ?? [...DEFAULT_GITHUB_RESOURCE_TYPES]; + return { + repos: repos, + tokenRef: input.tokenRef, + resourceTypes, + state: input.state ?? "all", + includeComments: input.includeComments ?? true, + labels: input.labels, + docPaths: input.docPaths ?? [...DEFAULT_GITHUB_DOC_PATHS], + maxItemsPerRepo: input.maxItemsPerRepo ?? 500, + }; +} + +export function parseGitHubSettings(raw: Readonly> | undefined): GitHubSourceSettings { + if (!raw) { + return { repos: [], resourceTypes: [...DEFAULT_GITHUB_RESOURCE_TYPES] }; + } + const repos = + Array.isArray(raw.repos) && raw.repos.every((r) => typeof r === "string") ? (raw.repos as string[]) : []; + let resourceTypes = + Array.isArray(raw.resourceTypes) && raw.resourceTypes.every((t) => typeof t === "string") + ? (raw.resourceTypes as string[]).filter((t): t is "issues" | "pulls" | "discussions" | "docs" => + ["issues", "pulls", "discussions", "docs"].includes(t), + ) + : [...DEFAULT_GITHUB_RESOURCE_TYPES]; + if (resourceTypes.length === 0) resourceTypes = [...DEFAULT_GITHUB_RESOURCE_TYPES]; + return { + repos, + tokenRef: typeof raw.tokenRef === "string" ? raw.tokenRef : undefined, + resourceTypes, + state: raw.state === "open" || raw.state === "closed" || raw.state === "all" ? raw.state : "all", + includeComments: typeof raw.includeComments === "boolean" ? raw.includeComments : true, + labels: + Array.isArray(raw.labels) && raw.labels.every((l) => typeof l === "string") + ? (raw.labels as string[]) + : undefined, + docPaths: + Array.isArray(raw.docPaths) && raw.docPaths.every((p) => typeof p === "string") + ? (raw.docPaths as string[]) + : [...DEFAULT_GITHUB_DOC_PATHS], + maxItemsPerRepo: typeof raw.maxItemsPerRepo === "number" && raw.maxItemsPerRepo > 0 ? raw.maxItemsPerRepo : 500, + }; +} + export function markSourceIndexed( sourceId: string, indexedAt = new Date().toISOString(), @@ -272,7 +424,7 @@ function isRecord(value: unknown): value is Record { function isSourceEntry(value: unknown): value is SignetSourceEntry { return ( isRecord(value) && - value.kind === "obsidian" && + (value.kind === "obsidian" || value.kind === "github") && typeof value.id === "string" && typeof value.name === "string" && typeof value.root === "string" && @@ -282,6 +434,7 @@ function isSourceEntry(value: unknown): value is SignetSourceEntry { typeof value.updatedAt === "string" && (value.lastIndexedAt === undefined || typeof value.lastIndexedAt === "string") && (value.excludeGlobs === undefined || - (Array.isArray(value.excludeGlobs) && value.excludeGlobs.every((entry) => typeof entry === "string"))) + (Array.isArray(value.excludeGlobs) && value.excludeGlobs.every((entry) => typeof entry === "string"))) && + (value.settings === undefined || isRecord(value.settings)) ); } diff --git a/platform/daemon/src/daemon.ts b/platform/daemon/src/daemon.ts index dc043cf88..95719e05f 100644 --- a/platform/daemon/src/daemon.ts +++ b/platform/daemon/src/daemon.ts @@ -40,6 +40,7 @@ import { fetchEmbedding } from "./embedding-fetch"; import { type EmbeddingTrackerHandle, startEmbeddingTracker } from "./embedding-tracker"; import { initFeatureFlags } from "./feature-flags"; import { writeFileIfChangedAsync } from "./file-sync"; +import { type GitHubSourceBridgeHandle, startGitHubSourceBridge } from "./github-source-bridge"; import { createSignetHttpServer } from "./http-server"; import { syncAgentWorkspaces } from "./identity-sync"; import { getOrCreateInferenceRouter } from "./inference-router.js"; @@ -243,6 +244,7 @@ setupDashboardRoutes(app); let watcher: ReturnType | null = null; let nativeMemoryBridge: NativeMemoryBridgeHandle | null = null; +let githubSourceBridge: GitHubSourceBridgeHandle | null = null; // Track ingested files to avoid re-processing (path -> content hash) const ingestedMemoryFiles = new Map(); @@ -1189,6 +1191,11 @@ async function cleanup() { nativeMemoryBridge = null; } + if (githubSourceBridge) { + await githubSourceBridge.close(); + githubSourceBridge = null; + } + if (heartbeatTimer) { clearInterval(heartbeatTimer); heartbeatTimer = undefined; @@ -1610,6 +1617,27 @@ async function main() { }); } + if (!githubSourceBridge) { + const embeddingCfg = memoryCfg.embedding.provider !== "none" ? memoryCfg.embedding : undefined; + githubSourceBridge = startGitHubSourceBridge( + () => loadSourcesConfig(AGENTS_DIR).sources.filter((s) => s.enabled && s.kind === "github"), + { + agentsDir: AGENTS_DIR, + pollIntervalMs: 300_000, + embeddingConfig: embeddingCfg, + fetchEmbedding: embeddingCfg ? fetchEmbedding : undefined, + }, + ); + githubSourceBridge.sync().catch((e) => { + logger.error( + "daemon", + "Failed to sync GitHub sources", + undefined, + e instanceof Error ? { message: e.message, stack: e.stack } : { error: String(e) }, + ); + }); + } + const startupCfg = loadMemoryConfig(AGENTS_DIR); if (startupCfg.embedding.provider !== "none") { checkEmbeddingProvider(startupCfg.embedding) diff --git a/platform/daemon/src/github-source-bridge.ts b/platform/daemon/src/github-source-bridge.ts new file mode 100644 index 000000000..51e3ed057 --- /dev/null +++ b/platform/daemon/src/github-source-bridge.ts @@ -0,0 +1,464 @@ +import type { GitHubSourceSettings, SignetSourceEntry } from "@signet/core"; +import { loadSourcesConfig, parseGitHubSettings } from "@signet/core"; +import { homedir } from "node:os"; +import { resolveDaemonAgentId } from "./agent-id"; +import { getDbAccessor } from "./db-accessor"; +import { yieldEvery } from "./async-yield"; +import { + clearSourceIndexInFlight, + isSourceIndexInFlight, + markSourceIndexInFlight, +} from "./source-index-progress"; +import type { GitHubResource } from "./github-source-fetch"; +import { + type GitHubFetchConfig, + expandRepoGlob, + fetchDiscussionComments, + fetchDiscussions, + fetchIssueComments, + fetchIssues, + fetchPullRequestComments, + fetchPullRequests, + fetchPullRequestsBySearch, + fetchRepoDocs, + fetchRepoInfo, +} from "./github-source-fetch"; +import { indexGitHubSourceStructure, purgeGitHubSourceStructure } from "./github-source-graph"; +import { + indexGitHubSourceEmbeddings, + purgeGitHubSourceEmbeddings, +} from "./github-source-embeddings"; +import { logger } from "./logger"; +import type { EmbeddingConfig } from "./memory-config"; +import type { SourceEmbeddingFetch } from "./obsidian-source-embeddings"; +import { getSecret } from "./secrets"; + +export interface GitHubSourceBridgeHandle { + readonly sync: () => Promise; + readonly close: () => Promise; +} + +export interface GitHubSourceBridgeOptions { + readonly agentId?: string; + readonly pollIntervalMs?: number; + readonly embeddingConfig?: EmbeddingConfig; + readonly fetchEmbedding?: SourceEmbeddingFetch; + readonly agentsDir?: string; + readonly sourceActiveCheck?: () => boolean; +} + +interface ResolvedRepo { + readonly owner: string; + readonly repo: string; + readonly fullName: string; + defaultBranch: string; +} + +export async function resolveRepos(settings: GitHubSourceSettings, token?: string): Promise { + const resolved: ResolvedRepo[] = []; + for (const pattern of settings.repos) { + const [owner, repoPart] = pattern.split("/"); + if (!owner || !repoPart) continue; + if (repoPart === "*" || repoPart.includes("*")) { + const expanded = await expandRepoGlob(owner, repoPart, token); + for (const fullName of expanded) { + const [o, r] = fullName.split("/"); + if (!o || !r) continue; + resolved.push({ owner: o, repo: r, fullName, defaultBranch: "main" }); + } + } else { + resolved.push({ owner, repo: repoPart, fullName: `${owner}/${repoPart}`, defaultBranch: "main" }); + } + } + for (const repo of resolved) { + const info = await fetchRepoInfo({ owner: repo.owner, repo: repo.repo, token }); + if (info) { + repo.defaultBranch = info.defaultBranch; + } + } + return resolved; +} + +export interface GitHubSourceSyncResult { + readonly indexed: number; + readonly hadErrors: boolean; +} + +export async function syncGitHubSource( + source: SignetSourceEntry, + options: GitHubSourceBridgeOptions = {}, +): Promise { + const agentId = options.agentId ?? resolveDaemonAgentId(); + const settings = parseGitHubSettings(source.settings); + if (settings.repos.length === 0) { + logger.warn("github-source", "Source has no repos — skipping. Settings may be malformed.", { + sourceId: source.id, + hasSettings: !!source.settings, + }); + return { indexed: 0, hadErrors: false }; + } + const token = settings.tokenRef ? await resolveToken(settings.tokenRef, options.agentsDir) : undefined; + let totalIndexed = 0; + let hadErrors = false; + + const repos = await resolveRepos(settings, token); + logger.info("github-source", "Starting GitHub source sync", { + sourceId: source.id, + repoCount: repos.length, + resourceTypes: settings.resourceTypes, + }); + + const agentsDir = options.agentsDir ?? process.env.SIGNET_PATH ?? `${homedir()}/.agents`; + const isSourceActive = (): boolean => + loadSourcesConfig(agentsDir).sources.some((s) => s.id === source.id && s.enabled); + const syncOpts: GitHubSourceBridgeOptions = { ...options, sourceActiveCheck: isSourceActive }; + + for (const repo of repos) { + const config: GitHubFetchConfig = { owner: repo.owner, repo: repo.repo, token }; + const yielder = yieldEvery(5); + let repoIndexed = 0; + const seenKeys = new Set(); + const completeTypes = new Set(); + + try { + if (!isSourceActive()) { + logger.info("github-source", "Source removed during sync, aborting", { sourceId: source.id }); + break; + } + let commentFetchFailed = false; + if (settings.resourceTypes.includes("issues")) { + const result = await fetchIssues(config, undefined, settings.state, settings.maxItemsPerRepo, settings.labels); + if (!isSourceActive()) break; + const capped = result.resources.length >= settings.maxItemsPerRepo; + for (const resource of result.resources) { + seenKeys.add(resourceKey(resource)); + let comments: { author: string | null; body: string; createdAt: string }[] | undefined; + if (settings.includeComments && resource.commentsCount > 0) { + try { + const rawComments = await fetchIssueComments(config, resource.number ?? 0); + comments = rawComments.map((c) => ({ + author: c.user?.login ?? null, + body: c.body, + createdAt: c.created_at, + })); + } catch { + commentFetchFailed = true; + hadErrors = true; + } + } + await indexResource(source.id, repo.fullName, resource, comments, agentId, syncOpts); + repoIndexed++; + await yielder(); + } + logErrors(source.id, repo.fullName, "issues", result.resources.length, result.errors); + if (result.errors.length > 0) hadErrors = true; + if (!capped && result.errors.length === 0 && !commentFetchFailed) completeTypes.add("issues"); + } + + if (settings.resourceTypes.includes("pulls")) { + const hasLabels = settings.labels && settings.labels.length > 0; + const result = hasLabels + ? await fetchPullRequestsBySearch(config, settings.labels, undefined, settings.state, settings.maxItemsPerRepo) + : await fetchPullRequests(config, undefined, settings.state, settings.maxItemsPerRepo); + if (!isSourceActive()) break; + const capped = result.resources.length >= settings.maxItemsPerRepo; + for (const resource of result.resources) { + seenKeys.add(resourceKey(resource)); + let comments: { author: string | null; body: string; createdAt: string }[] | undefined; + if (settings.includeComments && resource.commentsCount > 0) { + try { + const issueComments = await fetchIssueComments(config, resource.number ?? 0); + const reviewComments = await fetchPullRequestComments(config, resource.number ?? 0); + comments = [...issueComments, ...reviewComments].map((c) => ({ + author: c.user?.login ?? null, + body: c.body, + createdAt: c.created_at, + })); + } catch { + commentFetchFailed = true; + hadErrors = true; + } + } + await indexResource(source.id, repo.fullName, resource, comments, agentId, syncOpts); + repoIndexed++; + await yielder(); + } + logErrors(source.id, repo.fullName, "pulls", result.resources.length, result.errors); + if (result.errors.length > 0) hadErrors = true; + if (!capped && result.errors.length === 0 && !commentFetchFailed) completeTypes.add("pulls"); + } + + if (settings.resourceTypes.includes("discussions")) { + if (!config.token) { + logger.warn("github-source", "Discussions require a token (GraphQL API) — skipping", { + sourceId: source.id, + repo: repo.fullName, + }); + } else { + const result = await fetchDiscussions(config, undefined, settings.maxItemsPerRepo); + if (!isSourceActive()) break; + const capped = result.resources.length >= settings.maxItemsPerRepo; + const labelSet = settings.labels?.length ? new Set(settings.labels) : null; + for (const resource of result.resources) { + if (labelSet && !resource.labels.some((l) => labelSet.has(l))) continue; + seenKeys.add(resourceKey(resource)); + let comments: { author: string | null; body: string; createdAt: string }[] | undefined; + if (settings.includeComments && resource.commentsCount > 0) { + try { + const rawComments = await fetchDiscussionComments(config, resource.number ?? 0); + comments = rawComments.map((c) => ({ + author: typeof c.author === "string" ? c.author : c.author?.login ?? null, + body: c.body, + createdAt: c.created_at, + })); + } catch { + commentFetchFailed = true; + hadErrors = true; + } + } + await indexResource(source.id, repo.fullName, resource, comments, agentId, syncOpts); + repoIndexed++; + await yielder(); + } + logErrors(source.id, repo.fullName, "discussions", result.resources.length, result.errors); + if (result.errors.length > 0) hadErrors = true; + if (!capped && result.errors.length === 0 && !commentFetchFailed) completeTypes.add("discussions"); + } + } + + if (settings.resourceTypes.includes("docs")) { + const docPaths = settings.docPaths ?? ["README.md", "CHANGELOG.md"]; + const result = await fetchRepoDocs(config, docPaths, repo.defaultBranch); + if (!isSourceActive()) break; + for (const resource of result.resources) { + seenKeys.add(resourceKey(resource)); + await indexResource(source.id, repo.fullName, resource, undefined, agentId, syncOpts); + repoIndexed++; + await yielder(); + } + logErrors(source.id, repo.fullName, "docs", result.resources.length, result.errors); + if (result.errors.length > 0) hadErrors = true; + if (result.errors.length === 0) completeTypes.add("docs"); + } + + await reconcileStaleResources(source.id, repo.fullName, seenKeys, completeTypes, agentId); + } catch (err) { + hadErrors = true; + logger.warn("github-source", "Failed to sync repo", { + sourceId: source.id, + repo: repo.fullName, + error: err instanceof Error ? err.message : String(err), + }); + } + + logger.info("github-source", "Repo sync complete", { + sourceId: source.id, + repo: repo.fullName, + indexed: repoIndexed, + }); + totalIndexed += repoIndexed; + } + return { indexed: totalIndexed, hadErrors }; +} + +async function indexResource( + sourceId: string, + repo: string, + resource: GitHubResource, + comments: { author: string | null; body: string; createdAt: string }[] | undefined, + agentId: string, + options: GitHubSourceBridgeOptions, +): Promise { + if (options.sourceActiveCheck && !options.sourceActiveCheck()) { + throw new Error(`Source ${sourceId} removed during sync`); + } + indexGitHubSourceStructure({ + agentId, + sourceId, + sourceName: repo, + repo, + resource, + }); + + if (options.embeddingConfig && options.fetchEmbedding) { + await indexGitHubSourceEmbeddings({ + agentId, + sourceId, + repo, + resource, + comments: comments?.map((c) => ({ author: c.author, body: c.body, createdAt: c.createdAt })), + embeddingConfig: options.embeddingConfig, + fetchEmbedding: options.fetchEmbedding, + }); + } +} + +async function resolveToken(tokenRef: string, _agentsDir?: string): Promise { + try { + return await getSecret(tokenRef); + } catch (err) { + throw new Error(`Failed to resolve token ref '${tokenRef}': ${err instanceof Error ? err.message : String(err)}`); + } +} + +function logErrors( + sourceId: string, + repo: string, + type: string, + count: number, + errors: readonly { message: string }[], +): void { + if (errors.length > 0) { + logger.warn("github-source", `Errors during ${type} fetch`, { + sourceId, + repo, + type, + fetched: count, + errors: errors.length, + }); + } +} + +function resourceKey(resource: GitHubResource): string { + if (resource.type === "doc" && resource.path) return `docs:${resource.path}`; + return `${resource.type}:${resource.number}`; +} + +function resourceTypePlural(type: string): string { + switch (type) { + case "issue": return "issues"; + case "pull": return "pulls"; + case "discussion": return "discussions"; + case "doc": return "docs"; + case "docs": return "docs"; + default: return type; + } +} + +function sourcePathToLocalKey(sourcePath: string, repo: string): { localKey: string; rawType: string } | null { + const key = sourcePath.startsWith("github:") ? sourcePath.slice("github:".length) : sourcePath; + const repoPrefix = `${repo}:`; + if (!key.startsWith(repoPrefix)) return null; + const localKey = key.slice(repoPrefix.length); + const rawType = localKey.split(":")[0] ?? ""; + return { localKey, rawType }; +} + +async function reconcileStaleResources( + sourceId: string, + repo: string, + seenKeys: Set, + completeTypes: Set, + agentId: string, +): Promise { + if (completeTypes.size === 0) return; + const { purgeGitHubResourceEmbeddings } = await import("./github-source-embeddings"); + const { purgeGitHubResourceStructure } = await import("./github-source-graph"); + const db = getDbAccessor(); + const rows = db.withReadDb((d) => + d + .prepare( + "SELECT source_path FROM entities WHERE source_id = ? AND agent_id = ? AND entity_type = 'source_document'", + ) + .all(sourceId, agentId), + ) as Array<{ source_path: string }>; + let purged = 0; + for (const row of rows) { + const parsed = sourcePathToLocalKey(row.source_path, repo); + if (!parsed) continue; + const { localKey, rawType } = parsed; + if (seenKeys.has(localKey)) continue; + if (!completeTypes.has(resourceTypePlural(rawType === "docs" ? "doc" : rawType))) continue; + const isDoc = rawType === "docs"; + const type = isDoc ? "doc" : rawType; + const numOrPath = localKey.slice(rawType.length + 1); + const resource: GitHubResource = { + type: type as GitHubResource["type"], + number: !isDoc ? Number(numOrPath) || 0 : undefined, + path: isDoc ? numOrPath : undefined, + title: "", + body: "", + state: "", + labels: [], + author: null, + createdAt: "", + updatedAt: "", + closedAt: null, + mergedAt: null, + commentsCount: 0, + extra: {}, + }; + purgeGitHubResourceEmbeddings({ sourceId, repo, agentId, resource }); + purgeGitHubResourceStructure({ sourceId, repo, agentId, resource }); + purged++; + } + if (purged > 0) { + logger.info("github-source", "Reconciled stale resources", { sourceId, repo, purged }); + } +} + +export function startGitHubSourceBridge( + sourcesOrLoader: readonly SignetSourceEntry[] | (() => readonly SignetSourceEntry[]), + options: GitHubSourceBridgeOptions = {}, +): GitHubSourceBridgeHandle { + const loadSources = typeof sourcesOrLoader === "function" ? sourcesOrLoader : () => sourcesOrLoader; + const agentId = options.agentId ?? resolveDaemonAgentId(); + let syncInFlight: Promise | null = null; + + const sync = async (): Promise => { + if (syncInFlight) return syncInFlight; + syncInFlight = (async () => { + let total = 0; + const sources = loadSources(); + for (const source of sources) { + if (!source.enabled || source.kind !== "github") continue; + if (source.agentId && source.agentId !== agentId) continue; + if (isSourceIndexInFlight(source.id)) continue; + markSourceIndexInFlight(source.id); + try { + const result = await syncGitHubSource(source, { ...options, agentId }); + total += result.indexed; + } catch (err) { + logger.warn("github-source", "Source sync failed", { + sourceId: source.id, + error: err instanceof Error ? err.message : String(err), + }); + } finally { + clearSourceIndexInFlight(source.id); + } + } + return total; + })().finally(() => { + syncInFlight = null; + }); + return syncInFlight; + }; + + const pollIntervalMs = options.pollIntervalMs ?? 300_000; + const pollTimer = + pollIntervalMs > 0 + ? setInterval(() => { + sync().catch((err) => { + logger.warn("github-source", "Polling sync failed", { + error: err instanceof Error ? err.message : String(err), + }); + }); + }, pollIntervalMs) + : null; + pollTimer?.unref?.(); + + return { + sync, + async close(): Promise { + if (pollTimer) clearInterval(pollTimer); + if (syncInFlight) await syncInFlight.catch(() => 0); + }, + }; +} + +export function purgeGitHubSource(sourceId: string, agentId?: string): number { + const id = agentId ?? resolveDaemonAgentId(); + const embeddings = purgeGitHubSourceEmbeddings({ sourceId, agentId: id }); + const structure = purgeGitHubSourceStructure({ sourceId, agentId: id }); + return embeddings + structure; +} diff --git a/platform/daemon/src/github-source-embeddings.test.ts b/platform/daemon/src/github-source-embeddings.test.ts new file mode 100644 index 000000000..14ead69f6 --- /dev/null +++ b/platform/daemon/src/github-source-embeddings.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, test } from "bun:test"; +import { buildGitHubSourceChunks } from "./github-source-embeddings"; +import type { GitHubResource } from "./github-source-fetch"; + +function makeIssue(overrides: Partial = {}): GitHubResource { + return { + type: "issue", + number: 42, + title: "Fix native bundle installer 404", + body: "The install.sh script fetches from `bundle-latest` tag which does not exist yet.", + state: "open", + labels: ["bug", "priority:high"], + author: "nicholai", + createdAt: "2026-05-10T00:00:00Z", + updatedAt: "2026-05-17T00:00:00Z", + closedAt: null, + mergedAt: null, + commentsCount: 2, + extra: { html_url: "https://github.com/Signet-AI/signetai/issues/42" }, + ...overrides, + }; +} + +describe("buildGitHubSourceChunks", () => { + test("produces chunks from an issue", () => { + const resource = makeIssue(); + const chunks = buildGitHubSourceChunks({ + sourceId: "github:abc123", + repo: "Signet-AI/signetai", + resource, + }); + expect(chunks.length).toBeGreaterThan(0); + expect(chunks[0]?.id).toContain("github:abc123"); + expect(chunks[0]?.id).toContain("Signet-AI/signetai"); + expect(chunks[0]?.id).toContain("issue:42"); + expect(chunks[0]?.chunkText).toContain("source_id: github:abc123"); + expect(chunks[0]?.chunkText).toContain("repo: Signet-AI/signetai"); + }); + + test("produces chunks from a PR with comments", () => { + const resource = makeIssue({ type: "pull", number: 123, title: "Add GitHub source connector" }); + const comments = [ + { author: "alexmondello", body: "Looks good, just one nit.", createdAt: "2026-05-12T00:00:00Z" }, + { author: "nicholai", body: "Fixed, pushing now.", createdAt: "2026-05-12T01:00:00Z" }, + ]; + const chunks = buildGitHubSourceChunks({ + sourceId: "github:abc123", + repo: "Signet-AI/signetai", + resource, + comments, + }); + expect(chunks.length).toBeGreaterThan(0); + const allText = chunks.map((c) => c.chunkText).join(" "); + expect(allText).toContain("alexmondello"); + expect(allText).toContain("nicholai"); + }); + + test("produces chunks from a doc", () => { + const resource: GitHubResource = { + type: "doc", + path: "README.md", + title: "README", + body: "# Signet AI\n\nThis is the signet project. It does stuff.\n\n## Installation\n\nRun `bun add -g signetai`.", + state: "open", + labels: [], + author: null, + createdAt: "", + updatedAt: "", + closedAt: null, + mergedAt: null, + commentsCount: 0, + extra: { path: "README.md" }, + }; + const chunks = buildGitHubSourceChunks({ + sourceId: "github:abc123", + repo: "Signet-AI/signetai", + resource, + }); + expect(chunks.length).toBeGreaterThan(0); + expect(chunks[0]?.id).toContain("docs:README.md"); + }); + + test("returns minimal chunk for empty body", () => { + const resource = makeIssue({ body: "" }); + const chunks = buildGitHubSourceChunks({ + sourceId: "github:abc123", + repo: "Signet-AI/signetai", + resource, + }); + expect(chunks.length).toBe(1); + expect(chunks[0]?.chunkText).toContain("source_id: github:abc123"); + }); + + test("splits long content into multiple chunks", () => { + const longBody = "x".repeat(5000); + const resource = makeIssue({ body: longBody }); + const chunks = buildGitHubSourceChunks({ + sourceId: "github:abc123", + repo: "Signet-AI/signetai", + resource, + }); + expect(chunks.length).toBeGreaterThan(1); + }); +}); diff --git a/platform/daemon/src/github-source-embeddings.ts b/platform/daemon/src/github-source-embeddings.ts new file mode 100644 index 000000000..7cc92cdc8 --- /dev/null +++ b/platform/daemon/src/github-source-embeddings.ts @@ -0,0 +1,330 @@ +import { createHash } from "node:crypto"; +import { yieldEvery } from "./async-yield"; +import { getDbAccessor } from "./db-accessor"; +import { syncVecDeleteByEmbeddingIds, syncVecInsert, vectorToBlob } from "./db-helpers"; +import type { GitHubResource } from "./github-source-fetch"; +import { resourceToMarkdown } from "./github-source-fetch"; +import type { EmbeddingConfig } from "./memory-config"; +import type { SourceEmbeddingFetch } from "./obsidian-source-embeddings"; + +export const GITHUB_CHUNK_SOURCE_TYPE = "source_github_chunk"; +const GITHUB_SOURCE_CHUNK_DELAY_MS = 100; + +export interface GitHubSourceChunk { + readonly id: string; + readonly text: string; + readonly chunkText: string; + readonly heading: string; + readonly startLine: number; + readonly endLine: number; +} + +export interface IndexGitHubSourceEmbeddingsInput { + readonly agentId: string; + readonly sourceId: string; + readonly repo: string; + readonly resource: GitHubResource; + readonly comments?: readonly { author: string | null; body: string; createdAt: string }[]; + readonly embeddingConfig: EmbeddingConfig; + readonly fetchEmbedding: SourceEmbeddingFetch; +} + +export interface IndexGitHubSourceEmbeddingsResult { + readonly chunks: number; + readonly embedded: number; + readonly skipped: number; +} + +const TARGET_CHARS = 1_600; +const MAX_CHARS = 2_200; +const MIN_CHARS = 40; + +function hash(input: string): string { + return createHash("sha256").update(input).digest("hex"); +} + +function resourceId(sourceId: string, repo: string, resource: GitHubResource): string { + if (resource.type === "doc" && resource.path) { + return `${sourceId}:${repo}:docs:${resource.path}`; + } + return `${sourceId}:${repo}:${resource.type}:${resource.number}`; +} + +export function buildGitHubSourceChunks(input: { + readonly sourceId: string; + readonly repo: string; + readonly resource: GitHubResource; + readonly comments?: readonly { author: string | null; body: string; createdAt: string }[]; +}): GitHubSourceChunk[] { + const markdown = resourceToMarkdown(input.resource, input.comments); + const prefix = resourceId(input.sourceId, input.repo, input.resource); + const sections = parseMarkdownSections(markdown); + const chunks: GitHubSourceChunk[] = []; + + for (const section of sections) { + const paragraphs = splitParagraphs(section.body); + let bucket = ""; + let chunkIndex = 0; + const flush = (): void => { + const trimmed = bucket.trim(); + if (trimmed.length < MIN_CHARS) { + bucket = ""; + return; + } + for (const piece of splitLongText(trimmed)) { + const headingKey = slug(section.heading) || "overview"; + const lineKey = `${section.startLine}-${section.endLine}`; + const chunkId = `${prefix}#${headingKey}:${lineKey}:${chunkIndex}`; + const chunkText = [ + `source_id: ${input.sourceId}`, + `repo: ${input.repo}`, + `type: ${input.resource.type}`, + input.resource.number != null ? `number: ${input.resource.number}` : `path: ${input.resource.path}`, + `heading: ${section.heading}`, + "", + piece, + ].join("\n"); + chunks.push({ + id: chunkId, + text: piece, + chunkText, + heading: section.heading, + startLine: section.startLine, + endLine: section.endLine, + }); + chunkIndex++; + } + bucket = ""; + }; + for (const paragraph of paragraphs) { + if (paragraph.length > MAX_CHARS) { + flush(); + for (const piece of splitLongText(paragraph)) { + bucket = piece; + flush(); + } + continue; + } + const candidate = bucket ? `${bucket}\n\n${paragraph}` : paragraph; + if (candidate.length > TARGET_CHARS) { + flush(); + bucket = paragraph; + } else { + bucket = candidate; + } + } + flush(); + } + return chunks; +} + +interface MarkdownSection { + readonly heading: string; + readonly startLine: number; + readonly endLine: number; + readonly body: string; +} + +function parseMarkdownSections(content: string): MarkdownSection[] { + const lines = content.replace(/\r\n?/g, "\n").split("\n"); + const sections: Array<{ heading: string; startLine: number; lines: string[] }> = []; + let current: { heading: string; startLine: number; lines: string[] } = { + heading: "Overview", + startLine: 1, + lines: [], + }; + + for (let idx = 0; idx < lines.length; idx++) { + const line = lines[idx] ?? ""; + const match = /^(#{1,6})\s+(.+?)\s*$/.exec(line); + if (match) { + const body = current.lines.join("\n").trim(); + if (body.length >= MIN_CHARS || current.heading !== "Overview") { + sections.push({ + heading: current.heading, + startLine: current.startLine, + endLine: current.startLine + current.lines.length, + body, + }); + } + current = { heading: match[2]?.trim() || "Untitled", startLine: idx + 1, lines: [] }; + continue; + } + current.lines.push(line); + } + const finalBody = current.lines.join("\n").trim(); + if (finalBody.length >= MIN_CHARS || current.heading !== "Overview") { + sections.push({ + heading: current.heading, + startLine: current.startLine, + endLine: current.startLine + current.lines.length, + body: finalBody, + }); + } + return sections.filter((s) => s.body.length >= MIN_CHARS); +} + +function splitParagraphs(body: string): string[] { + return body + .split(/\n{2,}/) + .map((part) => part.trim()) + .filter((part) => part.length > 0); +} + +function splitLongText(text: string): string[] { + if (text.length <= MAX_CHARS) return [text]; + const chunks: string[] = []; + for (let start = 0; start < text.length; start += TARGET_CHARS) { + chunks.push(text.slice(start, start + MAX_CHARS).trim()); + } + return chunks.filter((chunk) => chunk.length >= MIN_CHARS); +} + +function slug(input: string): string { + return input + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 80); +} + +function sleep(ms: number): Promise { + return ms > 0 ? new Promise((resolve) => setTimeout(resolve, ms)) : Promise.resolve(); +} + +export async function indexGitHubSourceEmbeddings( + input: IndexGitHubSourceEmbeddingsInput, +): Promise { + if (input.embeddingConfig.provider === "none") return { chunks: 0, embedded: 0, skipped: 0 }; + const chunks = buildGitHubSourceChunks(input); + const currentHashes = new Set(); + const yielder = yieldEvery(1); + let embedded = 0; + let skipped = 0; + const now = new Date().toISOString(); + + for (const chunk of chunks) { + const contentHash = hash(`${input.agentId}\n${chunk.id}\n${chunk.chunkText}`); + currentHashes.add(contentHash); + if (existingChunkEmbeddingContentHash(input.agentId, chunk.id) === contentHash) { + skipped++; + await yielder(); + await sleep(GITHUB_SOURCE_CHUNK_DELAY_MS); + continue; + } + const vector = await input.fetchEmbedding(chunk.chunkText, input.embeddingConfig); + if (!vector || vector.length === 0) { + skipped++; + await yielder(); + await sleep(GITHUB_SOURCE_CHUNK_DELAY_MS); + continue; + } + getDbAccessor().withWriteTx((db) => { + const embId = hash(`${GITHUB_CHUNK_SOURCE_TYPE}:${input.agentId}:${chunk.id}`).slice(0, 32); + const existingForId = db.prepare("SELECT content_hash FROM embeddings WHERE id = ?").get(embId) as + | { content_hash: string } + | undefined; + if (existingForId && existingForId.content_hash !== contentHash) { + syncVecDeleteByEmbeddingIds(db, [embId]); + db.prepare("DELETE FROM embeddings WHERE id = ?").run(embId); + } + db.prepare( + `INSERT INTO embeddings + (id, content_hash, vector, dimensions, source_type, source_id, chunk_text, created_at, agent_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(content_hash) DO UPDATE SET + vector = excluded.vector, + dimensions = excluded.dimensions, + source_type = excluded.source_type, + source_id = excluded.source_id, + chunk_text = excluded.chunk_text, + created_at = excluded.created_at, + agent_id = excluded.agent_id`, + ).run( + embId, + contentHash, + vectorToBlob(vector), + vector.length, + GITHUB_CHUNK_SOURCE_TYPE, + chunk.id, + chunk.chunkText, + now, + input.agentId, + ); + const stored = db.prepare("SELECT id FROM embeddings WHERE content_hash = ?").get(contentHash) as + | { id: string } + | undefined; + syncVecInsert(db, stored?.id ?? embId, vector); + }); + embedded++; + await yielder(); + await sleep(GITHUB_SOURCE_CHUNK_DELAY_MS); + } + + const prefix = `${resourceId(input.sourceId, input.repo, input.resource)}#`; + getDbAccessor().withWriteTx((db) => { + const stale = db + .prepare( + "SELECT id, content_hash FROM embeddings WHERE source_type = ? AND source_id >= ? AND source_id < ? AND agent_id = ?", + ) + .all(GITHUB_CHUNK_SOURCE_TYPE, prefix, `${prefix}\uffff`, input.agentId) as Array<{ + id: string; + content_hash: string; + }>; + const staleIds = stale.filter((row) => !currentHashes.has(row.content_hash)).map((row) => row.id); + if (staleIds.length > 0) { + syncVecDeleteByEmbeddingIds(db, staleIds); + const stmt = db.prepare("DELETE FROM embeddings WHERE id = ?"); + for (const id of staleIds) stmt.run(id); + } + }); + + return { chunks: chunks.length, embedded, skipped }; +} + +function existingChunkEmbeddingContentHash(agentId: string, chunkId: string): string | null { + const row = getDbAccessor().withReadDb((db) => + db + .prepare("SELECT content_hash FROM embeddings WHERE source_type = ? AND source_id = ? AND agent_id = ? LIMIT 1") + .get(GITHUB_CHUNK_SOURCE_TYPE, chunkId, agentId), + ) as { content_hash: string } | undefined; + return row?.content_hash ?? null; +} + +export function purgeGitHubSourceEmbeddings(input: { readonly sourceId: string; readonly agentId?: string }): number { + const prefix = `${input.sourceId}:`; + return getDbAccessor().withWriteTx((db) => { + const agentWhere = input.agentId ? " AND agent_id = ?" : ""; + const upper = `${prefix}\uffff`; + const args = input.agentId + ? [GITHUB_CHUNK_SOURCE_TYPE, prefix, upper, input.agentId] + : [GITHUB_CHUNK_SOURCE_TYPE, prefix, upper]; + const rows = db + .prepare(`SELECT id FROM embeddings WHERE source_type = ? AND source_id >= ? AND source_id < ?${agentWhere}`) + .all(...args) as Array<{ id: string }>; + const ids = rows.map((row) => row.id); + syncVecDeleteByEmbeddingIds(db, ids); + return db + .prepare(`DELETE FROM embeddings WHERE source_type = ? AND source_id >= ? AND source_id < ?${agentWhere}`) + .run(...args).changes; + }); +} + +export function purgeGitHubResourceEmbeddings(input: { + readonly sourceId: string; + readonly repo: string; + readonly agentId: string; + readonly resource: GitHubResource; +}): number { + const prefix = `${resourceId(input.sourceId, input.repo, input.resource)}#`; + return getDbAccessor().withWriteTx((db) => { + const rows = db + .prepare("SELECT id FROM embeddings WHERE source_type = ? AND source_id >= ? AND source_id < ? AND agent_id = ?") + .all(GITHUB_CHUNK_SOURCE_TYPE, prefix, `${prefix}\uffff`, input.agentId) as Array<{ id: string }>; + const ids = rows.map((row) => row.id); + syncVecDeleteByEmbeddingIds(db, ids); + return db + .prepare("DELETE FROM embeddings WHERE source_type = ? AND source_id >= ? AND source_id < ? AND agent_id = ?") + .run(GITHUB_CHUNK_SOURCE_TYPE, prefix, `${prefix}\uffff`, input.agentId).changes; + }); +} diff --git a/platform/daemon/src/github-source-fetch.ts b/platform/daemon/src/github-source-fetch.ts new file mode 100644 index 000000000..c5642d3d4 --- /dev/null +++ b/platform/daemon/src/github-source-fetch.ts @@ -0,0 +1,790 @@ +import { logger } from "./logger"; + +export interface GitHubFetchConfig { + readonly token?: string; + readonly owner: string; + readonly repo: string; +} + +export interface GitHubIssue { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels: readonly { readonly name: string; readonly color: string }[]; + readonly assignees: readonly { readonly login: string }[]; + readonly milestone: { readonly title: string } | null; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly pull_request?: { readonly url: string }; + readonly comments: number; +} + +export interface GitHubComment { + readonly id: number; + readonly body: string; + readonly user: { readonly login: string } | null; + readonly created_at: string; + readonly updated_at: string; +} + +export interface GitHubPullRequest { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels: readonly { readonly name: string; readonly color: string }[]; + readonly assignees: readonly { readonly login: string }[]; + readonly milestone: { readonly title: string } | null; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly merged_at: string | null; + readonly draft: boolean; + readonly base: { readonly ref: string }; + readonly head: { readonly ref: string }; + readonly comments: number; + readonly review_comments: number; + readonly commits: number; + readonly changed_files: number; +} + +export interface GitHubSearchIssue { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels: readonly ({ readonly name: string } | string)[]; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly comments: number; +} + +export interface GitHubDiscussion { + readonly number: number; + readonly title: string; + readonly body: string; + readonly state: string; + readonly url: string; + readonly author: { readonly login: string } | null; + readonly labels: readonly { readonly name: string }[]; + readonly created_at: string; + readonly updated_at: string; + readonly answer_id: number | null; + readonly comments_count: number; +} + +export interface GitHubDiscussionComment { + readonly id: number; + readonly body: string; + readonly author: { readonly login: string } | null; + readonly created_at: string; + readonly updated_at: string; + readonly is_answer: boolean; +} + +export interface GitHubRepoDoc { + readonly path: string; + readonly content: string; + readonly sha: string; +} + +export interface GitHubResource { + readonly type: "issue" | "pull" | "discussion" | "doc"; + readonly number?: number; + readonly path?: string; + readonly title: string; + readonly body: string; + readonly state: string; + readonly labels: readonly string[]; + readonly author: string | null; + readonly createdAt: string; + readonly updatedAt: string; + readonly closedAt: string | null; + readonly mergedAt: string | null; + readonly commentsCount: number; + readonly extra: Readonly>; +} + +export interface GitHubFetchResult { + readonly resources: readonly GitHubResource[]; + readonly rateLimitRemaining: number; + readonly rateLimitReset: number; + readonly errors: readonly { readonly message: string; readonly retryable: boolean }[]; +} + +export interface GitHubRepoInfo { + readonly owner: string; + readonly repo: string; + readonly fullName: string; + readonly description: string | null; + readonly defaultBranch: string; + readonly htmlUrl: string; +} + +const GITHUB_API_BASE = "https://api.github.com"; +const GRAPHQL_URL = "https://api.github.com/graphql"; +const PER_PAGE = 100; +const MAX_COMMENTS_PER_RESOURCE = 200; +const REQUEST_TIMEOUT_MS = 30_000; +const MAX_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 1000; + +interface GitHubApiResponse { + readonly status: number; + readonly headers: Headers; + readonly body: unknown; +} + +interface RateLimitInfo { + readonly remaining: number; + readonly reset: number; +} + +function parseRateLimit(headers: Headers): RateLimitInfo { + return { + remaining: Number(headers.get("x-ratelimit-remaining") ?? "5000"), + reset: Number(headers.get("x-ratelimit-reset") ?? "0") * 1000, + }; +} + +async function githubRequest(url: string, token?: string, method = "GET", body?: unknown): Promise { + const headers: Record = { + Accept: "application/vnd.github.v3+json", + "User-Agent": "signet-daemon", + }; + if (token) headers.Authorization = `Bearer ${token}`; + if (body) headers["Content-Type"] = "application/json"; + + let lastError: Error | null = null; + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + const response = await fetch(url, { + method, + headers, + body: body ? JSON.stringify(body) : undefined, + signal: controller.signal, + }); + clearTimeout(timeout); + const rateLimit = parseRateLimit(response.headers); + if (rateLimit.remaining < 10 && rateLimit.reset > Date.now()) { + const waitMs = rateLimit.reset - Date.now() + 1000; + logger.warn("github-source", "Approaching rate limit, backing off", { + remaining: rateLimit.remaining, + waitMs, + }); + await new Promise((resolve) => setTimeout(resolve, Math.min(waitMs, 60_000))); + } + if (response.status === 403 && rateLimit.remaining === 0) { + const waitMs = rateLimit.reset - Date.now() + 1000; + logger.warn("github-source", "Rate limit exhausted, waiting", { waitMs }); + await new Promise((resolve) => setTimeout(resolve, Math.min(waitMs, 60_000))); + continue; + } + if (response.status >= 500) { + lastError = new Error(`GitHub API ${response.status}: ${await response.text()}`); + await new Promise((resolve) => setTimeout(resolve, RETRY_BASE_DELAY_MS * (attempt + 1))); + continue; + } + return { + status: response.status, + headers: response.headers, + body: response.status === 204 ? null : await response.json(), + }; + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + if (attempt < MAX_RETRIES - 1) { + await new Promise((resolve) => setTimeout(resolve, RETRY_BASE_DELAY_MS * (attempt + 1))); + } + } + } + throw lastError ?? new Error("GitHub API request failed after retries"); +} + +export async function fetchRepoInfo(config: GitHubFetchConfig): Promise { + const url = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}`; + const response = await githubRequest(url, config.token); + if (response.status === 404) return null; + if (response.status !== 200) { + throw new Error(`Failed to fetch repo info: ${response.status}`); + } + const data = response.body as Record; + return { + owner: ((data.owner as Record | undefined)?.login as string) ?? config.owner, + repo: (data.name as string) ?? config.repo, + fullName: (data.full_name as string) ?? `${config.owner}/${config.repo}`, + description: (data.description as string) ?? null, + defaultBranch: (data.default_branch as string) ?? "main", + htmlUrl: (data.html_url as string) ?? `https://github.com/${config.owner}/${config.repo}`, + }; +} + +export async function expandRepoGlob(owner: string, pattern: string, token?: string): Promise { + if (!pattern.includes("*")) return [`${owner}/${pattern}`]; + const regex = new RegExp(`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`); + for (const prefix of [`/orgs/${owner}/repos`, `/users/${owner}/repos`]) { + const repos: Array<{ full_name: string; name: string }> = []; + let page = 1; + while (true) { + const url = `${GITHUB_API_BASE}${prefix}?per_page=${PER_PAGE}&page=${page}&type=all`; + const response = await githubRequest(url, token); + if (response.status !== 200) break; + const batch = response.body as Array<{ full_name: string; name: string }>; + repos.push(...batch); + if (batch.length < PER_PAGE) break; + page++; + } + if (repos.length > 0) return repos.filter((r) => regex.test(r.name)).map((r) => r.full_name); + } + logger.warn("github-source", "Failed to expand repo glob", { owner }); + return []; +} + +export async function fetchIssues( + config: GitHubFetchConfig, + since?: string, + state = "all", + maxItems = 500, + labels?: readonly string[], +): Promise { + const resources: GitHubResource[] = []; + const errors: { message: string; retryable: boolean }[] = []; + let rateLimitRemaining = 5000; + let rateLimitReset = 0; + let page = 1; + let fetched = 0; + + while (fetched < maxItems) { + const params = new URLSearchParams({ + state, + per_page: String(Math.min(PER_PAGE, maxItems - fetched)), + sort: "updated", + direction: "desc", + page: String(page), + }); + if (since) params.set("since", since); + if (labels && labels.length > 0) { + params.set("labels", labels.join(",")); + } + const url = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/issues?${params}`; + const response = await githubRequest(url, config.token); + const rl = parseRateLimit(response.headers); + rateLimitRemaining = rl.remaining; + rateLimitReset = rl.reset; + if (response.status !== 200) { + errors.push({ message: `Issues fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const issues = response.body as GitHubIssue[]; + if (issues.length === 0) break; + for (const issue of issues) { + if (issue.pull_request) continue; + resources.push({ + type: "issue", + number: issue.number, + title: issue.title, + body: issue.body ?? "", + state: issue.state, + labels: issue.labels.map((l) => l.name), + author: issue.user?.login ?? null, + createdAt: issue.created_at, + updatedAt: issue.updated_at, + closedAt: issue.closed_at, + mergedAt: null, + commentsCount: issue.comments, + extra: { + milestone: issue.milestone?.title ?? null, + assignees: issue.assignees.map((a) => a.login), + html_url: issue.html_url, + }, + }); + fetched++; + } + if (issues.length < PER_PAGE) break; + page++; + } + return { resources, rateLimitRemaining, rateLimitReset, errors }; +} + +export async function fetchIssueComments(config: GitHubFetchConfig, issueNumber: number): Promise { + const comments: GitHubComment[] = []; + let page = 1; + while (comments.length < MAX_COMMENTS_PER_RESOURCE) { + const url = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/issues/${issueNumber}/comments?per_page=${PER_PAGE}&page=${page}`; + const response = await githubRequest(url, config.token); + if (response.status !== 200) break; + const batch = response.body as GitHubComment[]; + comments.push(...batch); + if (batch.length < PER_PAGE) break; + page++; + } + return comments.slice(0, MAX_COMMENTS_PER_RESOURCE); +} + +export async function fetchPullRequestComments(config: GitHubFetchConfig, pullNumber: number): Promise { + const comments: GitHubComment[] = []; + let page = 1; + while (comments.length < MAX_COMMENTS_PER_RESOURCE) { + const url = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls/${pullNumber}/comments?per_page=${PER_PAGE}&page=${page}`; + const response = await githubRequest(url, config.token); + if (response.status !== 200) break; + const batch = response.body as GitHubComment[]; + comments.push(...batch); + if (batch.length < PER_PAGE) break; + page++; + } + return comments.slice(0, MAX_COMMENTS_PER_RESOURCE); +} + +export async function fetchPullRequests( + config: GitHubFetchConfig, + since?: string, + state = "all", + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: { message: string; retryable: boolean }[] = []; + let rateLimitRemaining = 5000; + let rateLimitReset = 0; + let page = 1; + let fetched = 0; + + while (fetched < maxItems) { + const params = new URLSearchParams({ + state, + per_page: String(Math.min(PER_PAGE, maxItems - fetched)), + sort: "updated", + direction: "desc", + page: String(page), + }); + if (since) params.set("since", since); + const url = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls?${params}`; + const response = await githubRequest(url, config.token); + const rl = parseRateLimit(response.headers); + rateLimitRemaining = rl.remaining; + rateLimitReset = rl.reset; + if (response.status !== 200) { + errors.push({ message: `PRs fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const pulls = response.body as GitHubPullRequest[]; + if (pulls.length === 0) break; + for (const pull of pulls) { + if (since && pull.updated_at < since) { + break; + } + resources.push({ + type: "pull", + number: pull.number, + title: pull.title, + body: pull.body ?? "", + state: pull.state, + labels: pull.labels.map((l) => l.name), + author: pull.user?.login ?? null, + createdAt: pull.created_at, + updatedAt: pull.updated_at, + closedAt: pull.closed_at, + mergedAt: pull.merged_at, + commentsCount: pull.comments + pull.review_comments, + extra: { + draft: pull.draft, + base: pull.base.ref, + head: pull.head.ref, + commits: pull.commits, + changed_files: pull.changed_files, + milestone: pull.milestone?.title ?? null, + assignees: pull.assignees.map((a) => a.login), + html_url: pull.html_url, + }, + }); + fetched++; + } + if (pulls.length < PER_PAGE) break; + if (since && pulls[pulls.length - 1]?.updated_at < since) break; + page++; + } + return { resources, rateLimitRemaining, rateLimitReset, errors }; +} + +export async function fetchPullRequestsBySearch( + config: GitHubFetchConfig, + labels: readonly string[], + since?: string, + state = "all", + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: { message: string; retryable: boolean }[] = []; + let rateLimitRemaining = 30; + let rateLimitReset = 0; + let page = 1; + + const labelQuery = labels.map((l) => `label:"${l}"`).join(" "); + const stateQuery = state === "all" ? "" : ` is:${state}`; + const q = `repo:${config.owner}/${config.repo} type:pr${stateQuery} ${labelQuery}`; + + while (resources.length < maxItems) { + const params = new URLSearchParams({ + q: q.trim(), + per_page: String(Math.min(PER_PAGE, maxItems - resources.length)), + sort: "updated", + order: "desc", + page: String(page), + }); + const url = `${GITHUB_API_BASE}/search/issues?${params}`; + const response = await githubRequest(url, config.token); + const rl = parseRateLimit(response.headers); + rateLimitRemaining = rl.remaining; + rateLimitReset = rl.reset; + if (response.status !== 200) { + errors.push({ message: `PR search fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const data = response.body as { items: GitHubSearchIssue[]; total_count: number }; + if (!data.items || data.items.length === 0) break; + for (const item of data.items) { + if (since && item.updated_at < since) break; + resources.push({ + type: "pull", + number: item.number, + title: item.title, + body: item.body ?? "", + state: item.state === "open" ? "open" : "closed", + labels: item.labels.map((l) => (typeof l === "string" ? l : l.name)), + author: item.user?.login ?? null, + createdAt: item.created_at, + updatedAt: item.updated_at, + closedAt: item.closed_at ?? null, + mergedAt: null, + commentsCount: item.comments, + extra: { html_url: item.html_url }, + }); + } + if (data.items.length < PER_PAGE) break; + if (data.total_count <= resources.length) break; + page++; + } + return { resources, rateLimitRemaining, rateLimitReset, errors }; +} + +export async function fetchDiscussions( + config: GitHubFetchConfig, + since?: string, + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: { message: string; retryable: boolean }[] = []; + let rateLimitRemaining = 5000; + let rateLimitReset = 0; + + const query = ` + query($owner: String!, $repo: String!, $first: Int!, $after: String) { + repository(owner: $owner, name: $repo) { + discussions(first: $first, after: $after, orderBy: {field: UPDATED_AT, direction: DESC}) { + pageInfo { hasNextPage endCursor } + nodes { + number title body url + author { login } + labels(first: 20) { nodes { name } } + createdAt updatedAt + answerId: answer { id } + comments { totalCount } + } + } + } + }`; + let cursor: string | null = null; + let fetched = 0; + + while (fetched < maxItems) { + const variables = { + owner: config.owner, + repo: config.repo, + first: Math.min(100, maxItems - fetched), + after: cursor, + }; + const response = await githubRequest(GRAPHQL_URL, config.token, "POST", { query, variables }); + const rl = parseRateLimit(response.headers); + rateLimitRemaining = rl.remaining; + rateLimitReset = rl.reset; + if (response.status !== 200) { + const body = response.body as { message?: string } | null; + errors.push({ + message: `Discussions fetch failed: ${response.status} ${body?.message ?? ""}`, + retryable: response.status >= 500, + }); + break; + } + const data = response.body as { + errors?: Array<{ message: string }>; + data?: { + repository?: { + discussions?: { + pageInfo: { hasNextPage: boolean; endCursor: string | null }; + nodes: Array<{ + number: number; + title: string; + body: string; + state: string; + url: string; + author: { login: string } | null; + labels: { nodes: Array<{ name: string }> }; + createdAt: string; + updatedAt: string; + answerId: { id: string } | null; + comments: { totalCount: number }; + }>; + }; + }; + }; + }; + if (data.errors && data.errors.length > 0) { + for (const gqlErr of data.errors) { + errors.push({ message: `GraphQL: ${gqlErr.message}`, retryable: false }); + } + break; + } + const discussions = data.data?.repository?.discussions; + if (!discussions?.nodes?.length) break; + for (const d of discussions.nodes) { + if (since && d.updatedAt < since) { + cursor = null; + break; + } + resources.push({ + type: "discussion", + number: d.number, + title: d.title, + body: d.body ?? "", + state: "open", + labels: d.labels?.nodes?.map((l) => l.name) ?? [], + author: d.author?.login ?? null, + createdAt: d.createdAt, + updatedAt: d.updatedAt, + closedAt: null, + mergedAt: null, + commentsCount: d.comments?.totalCount ?? 0, + extra: { url: d.url, answer_id: d.answerId?.id ?? null }, + }); + fetched++; + } + if (!discussions.pageInfo.hasNextPage) break; + cursor = discussions.pageInfo.endCursor; + if (!cursor) break; + } + return { resources, rateLimitRemaining, rateLimitReset, errors }; +} + +export async function fetchDiscussionComments( + config: GitHubFetchConfig, + discussionNumber: number, +): Promise { + const query = ` + query($owner: String!, $repo: String!, $number: Int!, $after: String) { + repository(owner: $owner, name: $repo) { + discussion(number: $number) { + comments(first: 100, after: $after) { + pageInfo { hasNextPage endCursor } + nodes { + id body isAnswer + author { login } + createdAt updatedAt + } + } + } + } + }`; + const comments: GitHubDiscussionComment[] = []; + let cursor: string | null = null; + while (true) { + const variables = { owner: config.owner, repo: config.repo, number: discussionNumber, after: cursor }; + const response = await githubRequest(GRAPHQL_URL, config.token, "POST", { query, variables }); + if (response.status !== 200) break; + const data = response.body as { + data?: { + repository?: { + discussion?: { + comments?: { + pageInfo: { hasNextPage: boolean; endCursor: string | null }; + nodes: Array<{ + id: string; + body: string; + isAnswer: boolean; + author: { login: string } | null; + createdAt: string; + updatedAt: string; + }>; + }; + }; + }; + }; + }; + const nodes = data.data?.repository?.discussion?.comments?.nodes ?? []; + for (const c of nodes) { + comments.push({ + id: Number.parseInt(c.id.replace(/^DIC_/, ""), 10) || 0, + body: c.body, + author: c.author, + created_at: c.createdAt, + updated_at: c.updatedAt, + is_answer: c.isAnswer, + }); + } + const pageInfo = data.data?.repository?.discussion?.comments?.pageInfo; + if (!pageInfo?.hasNextPage || comments.length >= MAX_COMMENTS_PER_RESOURCE) break; + cursor = pageInfo.endCursor; + } + return comments.slice(0, MAX_COMMENTS_PER_RESOURCE); +} + +export async function fetchRepoDocs( + config: GitHubFetchConfig, + docPaths: readonly string[], + branch?: string, +): Promise { + const resources: GitHubResource[] = []; + const errors: { message: string; retryable: boolean }[] = []; + let rateLimitRemaining = 5000; + let rateLimitReset = 0; + + for (const docPath of docPaths) { + if (docPath.includes("*")) { + const treeResources = await fetchTreeDocs(config, docPath, branch); + resources.push(...treeResources.resources); + errors.push(...treeResources.errors); + continue; + } + const ref = branch ? `?ref=${branch}` : ""; + const url = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/contents/${docPath}${ref}`; + const response = await githubRequest(url, config.token); + const rl = parseRateLimit(response.headers); + rateLimitRemaining = rl.remaining; + rateLimitReset = rl.reset; + if (response.status === 404) continue; + if (response.status !== 200) { + errors.push({ + message: `Doc fetch failed for ${docPath}: ${response.status}`, + retryable: response.status >= 500, + }); + continue; + } + const data = response.body as { content?: string; sha?: string; name?: string }; + if (!data.content) continue; + const content = Buffer.from(data.content, "base64").toString("utf-8"); + resources.push({ + type: "doc", + path: docPath, + title: (data.name ?? docPath).replace(/\.[^.]+$/, ""), + body: content, + state: "open", + labels: [], + author: null, + createdAt: "", + updatedAt: "", + closedAt: null, + mergedAt: null, + commentsCount: 0, + extra: { sha: data.sha, path: docPath }, + }); + } + return { resources, rateLimitRemaining, rateLimitReset, errors }; +} + +async function fetchTreeDocs(config: GitHubFetchConfig, globPath: string, branch?: string): Promise { + const resources: GitHubResource[] = []; + const errors: { message: string; retryable: boolean }[] = []; + const dir = globPath.replace(/\/\*\*\/.*$/, "").replace(/\/\*.*$/, ""); + const matcher = globPath.includes("**/*.md") + ? (p: string) => p.endsWith(".md") + : globPath.includes("*.md") + ? (p: string) => p.endsWith(".md") && !p.includes("/") + : () => false; + + const sha = branch || "HEAD"; + const treeUrl = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/git/trees/${sha}${dir ? `:${dir}` : ""}?recursive=1`; + const treeResponse = await githubRequest(treeUrl, config.token); + if (treeResponse.status !== 200) { + errors.push({ message: `Tree fetch failed for ${dir}: ${treeResponse.status}`, retryable: treeResponse.status >= 500 }); + return { resources, rateLimitRemaining: 5000, rateLimitReset: 0, errors }; + } + const treeData = treeResponse.body as { tree?: Array<{ path: string; type: string }> }; + const entries = (treeData.tree ?? []).filter((e) => e.type === "blob" && matcher(e.path)); + const refParam = branch ? `?ref=${branch}` : ""; + + for (const entry of entries.slice(0, 100)) { + const fileUrl = `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/contents/${dir ? `${dir}/` : ""}${entry.path}${refParam}`; + const fileResponse = await githubRequest(fileUrl, config.token); + if (fileResponse.status !== 200) { + errors.push({ message: `File fetch failed: ${entry.path}`, retryable: true }); + continue; + } + const data = fileResponse.body as { content?: string; sha?: string; name?: string }; + if (!data.content) continue; + const content = Buffer.from(data.content, "base64").toString("utf-8"); + resources.push({ + type: "doc", + path: dir ? `${dir}/${entry.path}` : entry.path, + title: (data.name ?? entry.path).replace(/\.[^.]+$/, ""), + body: content, + state: "open", + labels: [], + author: null, + createdAt: "", + updatedAt: "", + closedAt: null, + mergedAt: null, + commentsCount: 0, + extra: { sha: data.sha, path: entry.path }, + }); + } + return { resources, rateLimitRemaining: 5000, rateLimitReset: 0, errors }; +} + +export function resourceToMarkdown( + resource: GitHubResource, + comments?: readonly { author: string | null; body: string; createdAt: string }[], +): string { + const parts: string[] = []; + parts.push(`# ${resource.title}`); + parts.push(""); + const meta: string[] = [ + `**Type:** ${resource.type}`, + resource.number != null ? `**Number:** #${resource.number}` : null, + `**State:** ${resource.state}`, + resource.labels.length > 0 ? `**Labels:** ${resource.labels.join(", ")}` : null, + resource.author ? `**Author:** @${resource.author}` : null, + resource.createdAt ? `**Created:** ${resource.createdAt}` : null, + resource.updatedAt ? `**Updated:** ${resource.updatedAt}` : null, + resource.closedAt ? `**Closed:** ${resource.closedAt}` : null, + resource.mergedAt ? `**Merged:** ${resource.mergedAt}` : null, + resource.extra.draft != null ? `**Draft:** ${resource.extra.draft ? "yes" : "no"}` : null, + resource.extra.base && resource.extra.head ? `**Branch:** ${resource.extra.head} → ${resource.extra.base}` : null, + ].filter(Boolean); + parts.push(meta.join(" | ")); + parts.push(""); + if (resource.body.trim()) { + parts.push(resource.body.trim()); + parts.push(""); + } + if (comments && comments.length > 0) { + parts.push("## Comments"); + parts.push(""); + for (const comment of comments) { + const author = comment.author ? `**@${comment.author}**` : "*unknown*"; + parts.push(`${author} (${comment.createdAt}):`); + parts.push(comment.body.trim()); + parts.push(""); + } + } + return parts.join("\n"); +} diff --git a/platform/daemon/src/github-source-graph.test.ts b/platform/daemon/src/github-source-graph.test.ts new file mode 100644 index 000000000..2dca0b4ed --- /dev/null +++ b/platform/daemon/src/github-source-graph.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, test } from "bun:test"; +import { extractGitHubRefs } from "./github-source-graph"; + +describe("extractGitHubRefs", () => { + test("extracts #123 references", () => { + const refs = extractGitHubRefs("Fixes #42 and closes #100", "owner/repo"); + expect(refs.length).toBeGreaterThanOrEqual(2); + const numbers = refs.map((r) => r.number); + expect(numbers).toContain(42); + expect(numbers).toContain(100); + }); + + test("extracts GitHub URLs", () => { + const refs = extractGitHubRefs( + "See https://github.com/owner/repo/pull/55 and https://github.com/owner/repo/issues/77", + "owner/repo", + ); + expect(refs.length).toBeGreaterThanOrEqual(2); + const types = refs.map((r) => r.type); + expect(types).toContain("pull"); + expect(types).toContain("issue"); + }); + + test("deduplicates references", () => { + const refs = extractGitHubRefs("Fixes #42. See also #42.", "owner/repo"); + const numbers = refs.map((r) => r.number); + const unique = new Set(numbers); + expect(unique.size).toBe(numbers.length); + }); + + test("handles empty body", () => { + const refs = extractGitHubRefs("", "owner/repo"); + expect(refs.length).toBe(0); + }); +}); diff --git a/platform/daemon/src/github-source-graph.ts b/platform/daemon/src/github-source-graph.ts new file mode 100644 index 000000000..a1d12bceb --- /dev/null +++ b/platform/daemon/src/github-source-graph.ts @@ -0,0 +1,413 @@ +import { createHash } from "node:crypto"; +import type { WriteDb } from "./db-accessor"; +import { getDbAccessor } from "./db-accessor"; +import { requireDependencyReason } from "./dependency-history"; +import type { GitHubResource } from "./github-source-fetch"; + +const GITHUB_SOURCE_KIND = "source_github_resource"; + +export interface IndexGitHubSourceStructureInput { + readonly agentId: string; + readonly sourceId: string; + readonly sourceName: string; + readonly repo: string; + readonly resource: GitHubResource; +} + +export interface PurgeGitHubSourceStructureInput { + readonly agentId?: string; + readonly sourceId: string; +} + +function idFor(...parts: readonly string[]): string { + return `ghsrc_${createHash("sha256").update(parts.join("\0")).digest("hex").slice(0, 28)}`; +} + +function upsertEntity( + db: WriteDb, + input: { + readonly id: string; + readonly name: string; + readonly canonicalName: string; + readonly entityType: string; + readonly agentId: string; + readonly sourceId: string; + readonly sourcePath: string; + readonly now: string; + }, +): string { + const uniqueName = `${input.name} — ${input.canonicalName} — ${input.agentId}`; + const existing = db + .prepare("SELECT id FROM entities WHERE canonical_name = ? AND agent_id = ? LIMIT 1") + .get(input.canonicalName, input.agentId) as { id: string } | undefined; + if (existing) { + db.prepare( + `UPDATE entities + SET name = ?, entity_type = ?, mentions = MAX(COALESCE(mentions, 0), 1), updated_at = ?, + source_id = ?, source_kind = ?, source_path = ?, source_root = ? + WHERE id = ?`, + ).run( + uniqueName, + input.entityType, + input.now, + input.sourceId, + GITHUB_SOURCE_KIND, + input.sourcePath, + input.sourceId, + existing.id, + ); + return existing.id; + } + db.prepare( + `INSERT INTO entities + (id, name, canonical_name, entity_type, agent_id, mentions, created_at, updated_at, + source_id, source_kind, source_path, source_root) + VALUES (?, ?, ?, ?, ?, 1, ?, ?, ?, ?, ?, ?)`, + ).run( + input.id, + uniqueName, + input.canonicalName, + input.entityType, + input.agentId, + input.now, + input.now, + input.sourceId, + GITHUB_SOURCE_KIND, + input.sourcePath, + input.sourceId, + ); + return input.id; +} + +function upsertCommunity( + db: WriteDb, + input: { + readonly id: string; + readonly name: string; + readonly agentId: string; + readonly sourceId: string; + readonly now: string; + }, +): void { + db.prepare( + `INSERT INTO entity_communities + (id, agent_id, name, cohesion, member_count, created_at, updated_at, source_id, source_kind, source_path, source_root) + VALUES (?, ?, ?, 1.0, 0, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + name = excluded.name, + updated_at = excluded.updated_at, + source_id = excluded.source_id`, + ).run( + input.id, + input.agentId, + input.name, + input.now, + input.now, + input.sourceId, + GITHUB_SOURCE_KIND, + "", + input.sourceId, + ); +} + +function upsertDependency( + db: WriteDb, + input: { + readonly sourceEntityId: string; + readonly targetEntityId: string; + readonly agentId: string; + readonly type: string; + readonly strength: number; + readonly confidence: number; + readonly reason: string; + readonly sourceId: string; + readonly now: string; + }, +): boolean { + const existing = db + .prepare( + `SELECT id FROM entity_dependencies + WHERE source_entity_id = ? AND target_entity_id = ? AND dependency_type = ? AND agent_id = ? + LIMIT 1`, + ) + .get(input.sourceEntityId, input.targetEntityId, input.type, input.agentId) as { id: string } | undefined; + if (existing) { + db.prepare( + `UPDATE entity_dependencies + SET strength = MAX(strength, ?), confidence = MAX(COALESCE(confidence, 0), ?), + reason = ?, updated_at = ?, source_id = ?, source_kind = ?, source_path = ?, source_root = ? + WHERE id = ?`, + ).run( + input.strength, + input.confidence, + input.reason, + input.now, + input.sourceId, + GITHUB_SOURCE_KIND, + "", + input.sourceId, + existing.id, + ); + return false; + } + const id = idFor("dep", input.agentId, input.type, input.sourceEntityId, input.targetEntityId); + db.prepare( + `INSERT INTO entity_dependencies + (id, source_entity_id, target_entity_id, agent_id, dependency_type, strength, confidence, reason, + created_at, updated_at, source_id, source_kind, source_path, source_root) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ).run( + id, + input.sourceEntityId, + input.targetEntityId, + input.agentId, + input.type, + input.strength, + input.confidence, + input.reason, + input.now, + input.now, + input.sourceId, + GITHUB_SOURCE_KIND, + "", + input.sourceId, + ); + return true; +} + +export function indexGitHubSourceStructure(input: IndexGitHubSourceStructureInput): void { + const now = new Date().toISOString(); + const sourcePath = resourceSourcePath(input.repo, input.resource); + + getDbAccessor().withWriteTx((db) => { + const sourceEntityId = idFor(input.agentId, input.sourceId, "source"); + upsertEntity(db, { + id: sourceEntityId, + name: input.sourceName, + canonicalName: `github:${input.sourceId}`, + entityType: "source", + agentId: input.agentId, + sourceId: input.sourceId, + sourcePath: input.sourceId, + now, + }); + + const repoEntityId = idFor(input.agentId, input.sourceId, "repo", input.repo); + const repoCanonical = `github:${input.sourceId}:${input.repo}`; + upsertEntity(db, { + id: repoEntityId, + name: input.repo, + canonicalName: repoCanonical, + entityType: "source_folder", + agentId: input.agentId, + sourceId: input.sourceId, + sourcePath: `github:${input.repo}`, + now, + }); + const repoCommunityId = idFor(input.agentId, input.sourceId, "community", input.repo); + upsertCommunity(db, { + id: repoCommunityId, + name: input.repo, + agentId: input.agentId, + sourceId: input.sourceId, + now, + }); + db.prepare("UPDATE entities SET community_id = ? WHERE id = ?").run(repoCommunityId, repoEntityId); + upsertDependency(db, { + sourceEntityId, + targetEntityId: repoEntityId, + agentId: input.agentId, + type: "contains", + strength: 1, + confidence: 1, + reason: requireDependencyReason("related_to", `GitHub source contains repo ${input.repo}`), + sourceId: input.sourceId, + now, + }); + + const resourceEntityId = idFor( + input.agentId, + input.sourceId, + "resource", + input.repo, + input.resource.type, + String(input.resource.number ?? input.resource.path), + ); + const resourceCanonical = `github:${input.sourceId}:${sourcePath}`; + upsertEntity(db, { + id: resourceEntityId, + name: resourceDisplayName(input.resource), + canonicalName: resourceCanonical, + entityType: "source_document", + agentId: input.agentId, + sourceId: input.sourceId, + sourcePath: sourcePath, + now, + }); + db.prepare("UPDATE entities SET community_id = ? WHERE id = ?").run(repoCommunityId, resourceEntityId); + upsertDependency(db, { + sourceEntityId: repoEntityId, + targetEntityId: resourceEntityId, + agentId: input.agentId, + type: "contains", + strength: 1, + confidence: 1, + reason: requireDependencyReason( + "related_to", + `GitHub repo ${input.repo} contains ${input.resource.type} ${input.resource.number ?? input.resource.path}`, + ), + sourceId: input.sourceId, + now, + }); + + db.prepare( + "DELETE FROM entity_dependencies WHERE source_entity_id = ? AND agent_id = ? AND source_id = ? AND dependency_type IN ('tagged_with', 'wiki_link')", + ).run(resourceEntityId, input.agentId, input.sourceId); + + for (const label of input.resource.labels) { + const labelEntityId = idFor(input.agentId, input.sourceId, "label", label); + upsertEntity(db, { + id: labelEntityId, + name: label, + canonicalName: `github:${input.sourceId}:${input.repo}:label:${label}`, + entityType: "source_document_reference", + agentId: input.agentId, + sourceId: input.sourceId, + sourcePath: `github:${input.repo}:label:${label}`, + now, + }); + upsertDependency(db, { + sourceEntityId: resourceEntityId, + targetEntityId: labelEntityId, + agentId: input.agentId, + type: "wiki_link", + strength: 0.8, + confidence: 1, + reason: requireDependencyReason("related_to", `GitHub ${input.resource.type} labeled ${label}`), + sourceId: input.sourceId, + now, + }); + } + + const body = input.resource.body ?? ""; + const refs = extractGitHubRefs(body, input.repo); + for (const ref of refs) { + const refEntityId = idFor(input.agentId, input.sourceId, "resource", input.repo, ref.type, String(ref.number)); + upsertEntity(db, { + id: refEntityId, + name: `${ref.type} #${ref.number}`, + canonicalName: `github:${input.sourceId}:github:${input.repo}:${ref.type}:${ref.number}`, + entityType: "source_document_reference", + agentId: input.agentId, + sourceId: input.sourceId, + sourcePath: `github:${input.repo}:${ref.type}:${ref.number}`, + now, + }); + upsertDependency(db, { + sourceEntityId: resourceEntityId, + targetEntityId: refEntityId, + agentId: input.agentId, + type: "wiki_link", + strength: ref.type === "pull" ? 0.9 : 0.7, + confidence: 0.8, + reason: requireDependencyReason( + "related_to", + `GitHub ${input.resource.type} references ${ref.type} #${ref.number}`, + ), + sourceId: input.sourceId, + now, + }); + } + + db.prepare( + `UPDATE entity_communities + SET member_count = ( + SELECT COUNT(*) FROM entities e WHERE e.community_id = entity_communities.id + ), updated_at = ? + WHERE agent_id = ? AND source_id = ?`, + ).run(now, input.agentId, input.sourceId); + }); +} + +function resourceSourcePath(repo: string, resource: GitHubResource): string { + if (resource.type === "doc" && resource.path) return `github:${repo}:docs:${resource.path}`; + return `github:${repo}:${resource.type}:${resource.number}`; +} + +function resourceDisplayName(resource: GitHubResource): string { + if (resource.type === "doc" && resource.path) return resource.path.split("/").pop() ?? resource.path; + return `${resource.type} #${resource.number}: ${resource.title}`; +} + +interface GitHubRef { + readonly type: string; + readonly number: number; +} + +export function extractGitHubRefs(body: string, _repo: string): GitHubRef[] { + const refs = new Map(); + const patterns = [ + /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?|ref[s]?|see)\s+#(\d+)/gi, + /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?|ref[s]?|see)\s+https:\/\/github\.com\/[^/]+\/[^/]+\/(issues|pull)\/(\d+)/gi, + /https:\/\/github\.com\/[^/]+\/[^/]+\/(issues|pull)\/(\d+)/gi, + /#(\d+)/g, + ]; + for (const pattern of patterns) { + let match: RegExpExecArray | null; + while ((match = pattern.exec(body))) { + if (pattern === patterns[3]) { + const num = Number(match[1]); + if (num > 0 && num < 1_000_000) { + const key = `issue:${num}`; + if (!refs.has(key)) refs.set(key, { type: "issue", number: num }); + } + } else if (pattern === patterns[1] || pattern === patterns[2]) { + const type = match[1] === "pull" ? "pull" : "issue"; + const num = Number(match[2]); + if (num > 0) refs.set(`${type}:${num}`, { type, number: num }); + } else { + const num = Number(match[1]); + if (num > 0 && num < 1_000_000) { + refs.set(`issue:${num}`, { type: "issue", number: num }); + } + } + } + } + return [...refs.values()]; +} + +export function purgeGitHubSourceStructure(input: PurgeGitHubSourceStructureInput): number { + const agentWhere = input.agentId ? "agent_id = ? AND " : ""; + const params = input.agentId ? [input.agentId, input.sourceId] : [input.sourceId]; + return getDbAccessor().withWriteTx((db) => { + const attrs = db.prepare(`DELETE FROM entity_attributes WHERE ${agentWhere}source_id = ?`).run(...params).changes; + const deps = db.prepare(`DELETE FROM entity_dependencies WHERE ${agentWhere}source_id = ?`).run(...params).changes; + const entities = db.prepare(`DELETE FROM entities WHERE ${agentWhere}source_id = ?`).run(...params).changes; + const communities = db + .prepare(`DELETE FROM entity_communities WHERE ${agentWhere}source_id = ?`) + .run(...params).changes; + return entities + attrs + deps + communities; + }); +} + +export interface PurgeGitHubResourceStructureInput { + readonly sourceId: string; + readonly repo: string; + readonly agentId: string; + readonly resource: GitHubResource; +} + +export function purgeGitHubResourceStructure(input: PurgeGitHubResourceStructureInput): number { + const sourcePath = resourceSourcePath(input.repo, input.resource); + const canonicalName = `github:${input.sourceId}:${sourcePath}`; + return getDbAccessor().withWriteTx((db) => { + const entity = db + .prepare("SELECT id FROM entities WHERE canonical_name = ? AND agent_id = ? LIMIT 1") + .get(canonicalName, input.agentId) as { id: string } | undefined; + if (!entity) return 0; + const attrs = db.prepare("DELETE FROM entity_attributes WHERE entity_id = ? AND agent_id = ?").run(entity.id, input.agentId).changes; + const deps = db.prepare("DELETE FROM entity_dependencies WHERE (source_entity_id = ? OR target_entity_id = ?) AND agent_id = ?").run(entity.id, entity.id, input.agentId).changes; + const entities = db.prepare("DELETE FROM entities WHERE id = ? AND agent_id = ?").run(entity.id, input.agentId).changes; + return entities + attrs + deps; + }); +} diff --git a/platform/daemon/src/routes/sources-routes.ts b/platform/daemon/src/routes/sources-routes.ts index 7d2fb5223..196efc830 100644 --- a/platform/daemon/src/routes/sources-routes.ts +++ b/platform/daemon/src/routes/sources-routes.ts @@ -6,14 +6,20 @@ import { dirname } from "node:path"; import { promisify } from "node:util"; import { type SignetSourceEntry, + addGitHubSource, addObsidianSource, loadSourcesConfig, markSourceIndexed, removeSource, } from "@signet/core"; +import { requirePermission } from "../auth/middleware"; import type { Hono } from "hono"; +import { authConfig } from "./state.js"; import { resolveDaemonAgentId } from "../agent-id"; import { getDbAccessor } from "../db-accessor"; +import { fetchEmbedding } from "../embedding-fetch"; +import { type GitHubSourceBridgeOptions, purgeGitHubSource, syncGitHubSource } from "../github-source-bridge"; +import { loadMemoryConfig } from "../memory-config"; import { type NativeMemoryBridgeHandle, obsidianNativeMemorySource, @@ -59,6 +65,18 @@ interface AddObsidianSourceBody { readonly excludeGlobs?: readonly string[]; } +interface AddGitHubSourceBody { + readonly repos?: readonly string[]; + readonly name?: string; + readonly tokenRef?: string; + readonly resourceTypes?: readonly ("issues" | "pulls" | "discussions" | "docs")[]; + readonly state?: "open" | "closed" | "all"; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; +} + interface PickDirectoryBody { readonly title?: string; } @@ -125,21 +143,66 @@ export function registerSourcesRoutes(app: Hono, deps: RegisterSourcesRoutesDeps return c.json({ source: result.source, created: result.created, indexed: 0, queued: true, job }, 202); }); - app.delete("/api/sources/:sourceId", (c) => { + app.post("/api/sources/github", requirePermission("admin", authConfig), async (c) => { + let body: AddGitHubSourceBody = {}; + try { + body = (await c.req.json()) as AddGitHubSourceBody; + } catch { + return c.json({ error: "Invalid JSON body" }, 400); + } + + const repos = Array.isArray(body.repos) ? body.repos.filter((r) => typeof r === "string") : []; + if (repos.length === 0) return c.json({ error: "repos is required (e.g. ['owner/repo', 'owner/*'])" }, 400); + + const result = addGitHubSource( + { + repos, + name: body.name, + tokenRef: body.tokenRef, + resourceTypes: body.resourceTypes, + state: body.state, + includeComments: body.includeComments, + labels: body.labels, + docPaths: body.docPaths, + maxItemsPerRepo: body.maxItemsPerRepo, + agentId: resolveDaemonAgentId(), + }, + agentsDir, + ); + if (result.ok === false) return c.json({ error: result.error }, 400); + + const embeddingCfg = loadMemoryConfig(agentsDir); + const ec = embeddingCfg.embedding.provider !== "none" ? embeddingCfg.embedding : undefined; + const job = enqueueGitHubSourceIndexJob(result.source, { + agentsDir, + embeddingConfig: ec, + fetchEmbedding: ec ? fetchEmbedding : undefined, + }); + + return c.json({ source: result.source, created: result.created, queued: true, job }, 202); + }); + + app.delete("/api/sources/:sourceId", requirePermission("admin", authConfig), async (c) => { const sourceId = c.req.param("sourceId"); const result = removeSource(sourceId, agentsDir); if (result.ok === false) return c.json({ error: result.error }, 404); + const currentAgentId = resolveDaemonAgentId(); + if (result.source.agentId && result.source.agentId !== currentAgentId) { + return c.json({ error: "Source is owned by a different agent" }, 403); + } cancelSourceIndexJob(result.source.id); - const sourceAgentId = resolveDaemonAgentId(); + const sourceAgentId = currentAgentId; recordSourceDeletionTombstone(result.source, sourceAgentId, agentsDir); - const purged = - result.source.kind === "obsidian" - ? purgeNativeSource( - obsidianNativeMemorySource(result.source.root, result.source.name, result.source.id), - sourceAgentId, - ) - : 0; + let purged = 0; + if (result.source.kind === "obsidian") { + purged = purgeNativeSource( + obsidianNativeMemorySource(result.source.root, result.source.name, result.source.id), + sourceAgentId, + ); + } else if (result.source.kind === "github") { + purged = await purgeGitHubSource(result.source.id, sourceAgentId); + } if (!isSourceIndexInFlight(result.source.id)) clearSourceDeletionTombstone(result.source.id, sourceAgentId, agentsDir); return c.json({ source: result.source, purged }); @@ -212,6 +275,45 @@ function scheduleSourceIndexJob(input: SourceIndexJobInput, job: SourceIndexJob, }, delayMs).unref?.(); } +function enqueueGitHubSourceIndexJob( + source: SignetSourceEntry, + options: GitHubSourceBridgeOptions, +): SourceIndexJob { + const job = beginSourceIndexJob(source.id, "github-source-index"); + setTimeout(() => { + if (!isCurrentSourceIndexJob(source.id, job.id)) return; + if (isSourceIndexInFlight(source.id)) { + setTimeout(() => void runGitHubSourceIndexJob(source, options, job), 50).unref?.(); + return; + } + markSourceIndexInFlight(source.id); + if (!markSourceIndexJobRunning(source.id, job.id)) { + clearSourceIndexInFlight(source.id); + return; + } + void runGitHubSourceIndexJob(source, options, job); + }, 0).unref?.(); + return job; +} + +async function runGitHubSourceIndexJob( + source: SignetSourceEntry, + options: GitHubSourceBridgeOptions, + job: SourceIndexJob, +): Promise { + try { + const result = await syncGitHubSource(source, options); + if (!isCurrentSourceIndexJob(source.id, job.id)) return; + if (!result.hadErrors) markSourceIndexed(source.id, undefined, options.agentsDir); + completeSourceIndexJob(source.id, job.id, result.indexed); + } catch (err) { + if (!isCurrentSourceIndexJob(source.id, job.id)) return; + failSourceIndexJob(source.id, job.id, err); + } finally { + clearSourceIndexInFlight(source.id); + } +} + function cleanupSourceDeletionTombstones( agentsDir: string, purgeNativeSource: typeof purgeNativeMemorySourceArtifacts, diff --git a/surfaces/cli/src/commands/sources.ts b/surfaces/cli/src/commands/sources.ts index c4c38a533..114c6ecb2 100644 --- a/surfaces/cli/src/commands/sources.ts +++ b/surfaces/cli/src/commands/sources.ts @@ -1,5 +1,11 @@ import type { Command } from "commander"; -import { type SourcesDeps, addObsidianVaultSource, listSources, removeConfiguredSource } from "../features/sources.js"; +import { + type SourcesDeps, + addGitHubRepoSource, + addObsidianVaultSource, + listSources, + removeConfiguredSource, +} from "../features/sources.js"; import type { DaemonApiCall } from "../lib/daemon.js"; export interface RegisterSourcesCommandsDeps extends SourcesDeps { @@ -62,4 +68,17 @@ export function registerSourcesCommands(program: Command, deps: RegisterSourcesC .action((path: string, options: { name?: string; exclude?: string[] }) => addObsidianVaultSource(path, options, deps), ); + + add + .command("github") + .description("Index GitHub repos (issues, PRs, discussions, docs) as a recall source") + .requiredOption("--repos ", "Repo patterns (owner/repo or owner/*)") + .option("--name ", "Display name for this source") + .option("--token-ref ", "Signet secret reference for GitHub PAT") + .option("--types ", "Resource types: issues,pulls,discussions,docs", "issues,pulls,discussions,docs") + .option("--state ", "Filter by state: open, closed, all", "all") + .option("--no-comments", "Skip fetching comments") + .option("--doc-paths ", "Doc file paths to index", ["README.md", "CHANGELOG.md"]) + .option("--max-items ", "Max items per repo", "500") + .action((options) => addGitHubRepoSource(options, deps)); } diff --git a/surfaces/cli/src/features/sources.ts b/surfaces/cli/src/features/sources.ts index 0b27b39dd..9f9b7fd33 100644 --- a/surfaces/cli/src/features/sources.ts +++ b/surfaces/cli/src/features/sources.ts @@ -1,4 +1,4 @@ -import { addObsidianSource, loadSourcesConfig, removeSource } from "@signet/core"; +import { addGitHubSource, addObsidianSource, loadSourcesConfig, removeSource } from "@signet/core"; import chalk from "chalk"; export interface SourcesDeps { @@ -40,6 +40,77 @@ export async function addObsidianVaultSource( console.log(chalk.dim("Run `signet daemon restart` if the daemon is already running.")); } +export async function addGitHubRepoSource( + options: { + readonly repos?: readonly string[]; + readonly name?: string; + readonly tokenRef?: string; + readonly types?: string; + readonly state?: string; + readonly comments?: boolean; + readonly docPaths?: readonly string[]; + readonly maxItems?: string; + }, + deps: SourcesDeps, +): Promise { + const repos = options.repos ?? []; + if (repos.length === 0) { + console.error(chalk.red("✗ --repos is required (e.g. --repos owner/repo owner/*)")); + process.exitCode = 1; + return; + } + const validTypes = new Set(["issues", "pulls", "discussions", "docs"]); + const rawTypes = options.types + ? options.types + .split(",") + .map((t) => t.trim()) + .filter(Boolean) + : undefined; + if (rawTypes && rawTypes.length > 0) { + const invalid = rawTypes.filter((t) => !validTypes.has(t)); + if (invalid.length > 0) { + console.error(chalk.red(`✗ Invalid resource types: ${invalid.join(", ")}. Must be one of: issues, pulls, discussions, docs`)); + process.exitCode = 1; + return; + } + } + const resourceTypes = rawTypes as ("issues" | "pulls" | "discussions" | "docs")[] | undefined; + if (options.state && !["open", "closed", "all"].includes(options.state)) { + console.error(chalk.red(`✗ Invalid state: ${options.state}. Must be one of: open, closed, all`)); + process.exitCode = 1; + return; + } + const maxItems = options.maxItems ? Number(options.maxItems) : undefined; + + const result = addGitHubSource( + { + repos, + name: options.name, + tokenRef: options.tokenRef, + resourceTypes, + state: options.state as "open" | "closed" | "all" | undefined, + includeComments: options.comments, + docPaths: options.docPaths, + maxItemsPerRepo: maxItems, + }, + deps.agentsDir, + ); + if (result.ok === false) { + console.error(chalk.red(`✗ ${result.error}`)); + process.exitCode = 1; + return; + } + + const verb = result.created ? "Added" : "Updated"; + console.log(chalk.green(`✓ ${verb} GitHub source: ${result.source.name}`)); + console.log(chalk.dim(` repos: ${repos.join(", ")}`)); + if (options.tokenRef) console.log(chalk.dim(` token: ${options.tokenRef}`)); + console.log(chalk.dim(` types: ${(resourceTypes ?? ["issues", "pulls", "discussions", "docs"]).join(", ")}`)); + console.log(); + console.log(chalk.dim("The daemon indexes GitHub sources on startup and polls every 5 minutes.")); + console.log(chalk.dim("Run `signet daemon restart` if the daemon is already running.")); +} + export async function listSources(deps: SourcesDeps): Promise { const config = loadSourcesConfig(deps.agentsDir); if (config.sources.length === 0) {