From 49d55d94a67dcdecb0d17428b7306d05b0a161d3 Mon Sep 17 00:00:00 2001 From: Nicholai Date: Sun, 24 May 2026 11:37:52 -0600 Subject: [PATCH 01/21] feat(sources): add GitHub source provider --- docs/SOURCES.md | 31 +- docs/api/documents-sources.md | 47 +- platform/core/src/index.ts | 11 + platform/core/src/sources-config.test.ts | 106 ++++ platform/core/src/sources-config.ts | 250 ++++++++ .../daemon/src/github-source-fetch.test.ts | 137 ++++ platform/daemon/src/github-source-fetch.ts | 599 ++++++++++++++++++ .../daemon/src/github-source-provider.test.ts | 194 ++++++ platform/daemon/src/github-source-provider.ts | 406 ++++++++++++ .../daemon/src/routes/sources-routes.test.ts | 40 +- platform/daemon/src/routes/sources-routes.ts | 69 +- platform/daemon/src/source-providers.ts | 4 +- surfaces/cli/src/commands/sources.ts | 15 + surfaces/cli/src/features/sources.test.ts | 22 + surfaces/cli/src/features/sources.ts | 65 +- 15 files changed, 1989 insertions(+), 7 deletions(-) create mode 100644 platform/daemon/src/github-source-fetch.test.ts create mode 100644 platform/daemon/src/github-source-fetch.ts create mode 100644 platform/daemon/src/github-source-provider.test.ts create mode 100644 platform/daemon/src/github-source-provider.ts diff --git a/docs/SOURCES.md b/docs/SOURCES.md index b77dfcf54..21cdf84a2 100644 --- a/docs/SOURCES.md +++ b/docs/SOURCES.md @@ -10,7 +10,7 @@ Sources Sources are external knowledge bases that Signet can read, index, and recall from without turning them into ordinary saved memories. -Sources currently support **Obsidian** vaults and **Discord** guilds. Point Signet at an Obsidian vault and the daemon mounts that vault as a read-only knowledge base: Markdown files become searchable artifacts, the vault structure becomes graph topology, and heading-aware chunks participate in semantic recall. Add Discord with a bot-token secret reference and Signet indexes guild topology, channels, threads, members, message windows, and Discord metadata through the same source-owned artifact lifecycle. +Sources currently support **Obsidian** vaults, **Discord** guilds, and **GitHub** repositories. Point Signet at an Obsidian vault and the daemon mounts that vault as a read-only knowledge base: Markdown files become searchable artifacts, the vault structure becomes graph topology, and heading-aware chunks participate in semantic recall. Add Discord with a bot-token secret reference and Signet indexes guild topology, channels, threads, members, message windows, and Discord metadata through the same source-owned artifact lifecycle. Add GitHub repositories to index issues, pull requests, discussions, selected Markdown docs, comments, and source failure artifacts through the shared source provider pipeline. The important rule is simple: **the source stays canonical**. Signet reads from the vault. It does not edit notes, rewrite frontmatter, create files, or move anything inside the source directory. @@ -94,6 +94,34 @@ artifacts under the synthetic `@me` guild by default; use `--include-local-discord` only when intentionally moving that private local cache data. +GitHub v1 +--------- + +GitHub Sources v1 indexes configured repositories through the shared Sources job pipeline: + +```bash +signet sources add github --repo Signet-AI/signetai --name "Signet GitHub" +signet sources add github --repo Signet-AI/signetai --token-ref GITHUB_TOKEN --resource-type issues --resource-type discussions +signet sources add github --repo Signet-AI/* --resource-type docs --doc-path "docs/**/*.md" --max-items 50 +signet sources list +signet sources remove github:... +``` + +Without `--token-ref`, GitHub sources default to REST-fetchable resources: +issues, pull requests, and selected Markdown docs. Discussions use the GitHub +GraphQL API and require a token reference. Tokens must be stored in Signet +Secrets or an external secret reference; Signet does not store raw GitHub +tokens in source config. + +GitHub source config is bounded by `maxItemsPerRepo`. Repo globs, issue/PR +fetches, discussion fetches, and wildcard docs paths all honor configured caps. +Direct docs paths are limited to Markdown paths or Markdown globs, so GitHub v1 +does not become arbitrary source-code indexing by accident. + +Partial GitHub failures are written as source-owned failure artifacts and cause +the shared source job to report failure instead of silently marking incomplete +data as fully indexed. + Obsidian v1 ----------- @@ -228,6 +256,7 @@ The daemon exposes the Sources lifecycle under `/api/sources`: | `GET` | `/api/sources` | List configured sources. | | `POST` | `/api/sources/obsidian` | Add/update an Obsidian vault source and index it. | | `POST` | `/api/sources/discord` | Add/update a Discord source and queue a shared source index job. | +| `POST` | `/api/sources/github` | Add/update a GitHub source and queue a shared source index job. | | `DELETE` | `/api/sources/:sourceId` | Remove a source config and purge Signet-owned source rows. | | `POST` | `/api/sources/pick-directory` | Development/browser fallback for choosing a local directory. | diff --git a/docs/api/documents-sources.md b/docs/api/documents-sources.md index e1352df84..33247dab6 100644 --- a/docs/api/documents-sources.md +++ b/docs/api/documents-sources.md @@ -135,7 +135,7 @@ the document are soft-deleted one at a time with audit history. Sources connect read-only external knowledge bases to Signet recall without turning them into ordinary saved memories. Supported source kinds are -`obsidian` and `discord`. +`obsidian`, `discord`, and `github`. ### GET /api/sources @@ -267,6 +267,51 @@ windows, attachments, mentions, embeds, polls, checkpoints, and import stats. Cache imports are observational and never reconcile deletes from missing or evicted local cache files. +### POST /api/sources/github + +Add or update a GitHub source and queue a shared source index job. Without a +token reference, GitHub sources default to issues, pull requests, and selected +Markdown docs. Discussions require `tokenRef` because they use the GitHub +GraphQL API. Raw GitHub tokens are rejected; pass a Signet secret name or +external secret reference instead. + +**Request body** + +```json +{ + "repos": ["Signet-AI/signetai"], + "tokenRef": "GITHUB_TOKEN", + "name": "Signet GitHub", + "resourceTypes": ["issues", "pulls", "discussions", "docs"], + "state": "all", + "includeComments": true, + "labels": ["bug", "needs review"], + "docPaths": ["README.md", "docs/**/*.md"], + "maxItemsPerRepo": 500 +} +``` + +`repo` is accepted as a single-repository alias. `docPaths` are limited to +Markdown files or Markdown globs so GitHub source indexing stays focused on +chosen docs instead of broad source-code ingestion. + +**Response** + +```json +{ + "source": { "id": "github:abc123", "kind": "github" }, + "created": true, + "indexed": 0, + "queued": true, + "job": { "status": "queued", "sourceId": "github:abc123" } +} +``` + +The sync path indexes source-owned artifacts for issues, pull requests, +discussions, selected Markdown docs, comments, and partial-failure artifacts. +Partial GitHub failures cause the shared source job to report failure while +preserving source-owned rows that were indexed successfully. + ### DELETE /api/sources/:sourceId Remove a source config and purge Signet-owned source artifacts, graph rows, diff --git a/platform/core/src/index.ts b/platform/core/src/index.ts index 182453874..73c613a54 100644 --- a/platform/core/src/index.ts +++ b/platform/core/src/index.ts @@ -224,25 +224,36 @@ export type { } from "./workspace-source-repo"; export { addDiscordSource, + addGitHubSource, addObsidianSource, DEFAULT_DISCORD_DESKTOP_CACHE_PATH, DEFAULT_DISCORD_MAX_MESSAGES_PER_CHANNEL, + DEFAULT_GITHUB_DOC_PATHS, + DEFAULT_GITHUB_MAX_ITEMS_PER_REPO, + DEFAULT_GITHUB_RESOURCE_TYPES, + DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN, DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, MAX_DISCORD_MAX_MESSAGES_PER_CHANNEL, + MAX_GITHUB_MAX_ITEMS_PER_REPO, getAgentsDir, getSourcesConfigPath, loadSourcesConfig, markSourceIndexed, parseDiscordSettings, + parseGitHubSettings, removeSource, saveSourcesConfig, } from "./sources-config"; export type { AddDiscordSourceInput, + AddGitHubSourceInput, AddObsidianSourceInput, AddSourceResult, DiscordSourceSettings, DiscordSourceSyncMode, + GitHubSourceResourceType, + GitHubSourceSettings, + GitHubSourceState, RemoveSourceResult, SignetSourceEntry, SignetSourceKind, diff --git a/platform/core/src/sources-config.test.ts b/platform/core/src/sources-config.test.ts index 4a222aee2..dfe1904f8 100644 --- a/platform/core/src/sources-config.test.ts +++ b/platform/core/src/sources-config.test.ts @@ -4,13 +4,16 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { DEFAULT_DISCORD_MAX_MESSAGES_PER_CHANNEL, + DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN, DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, addDiscordSource, + addGitHubSource, addObsidianSource, getSourcesConfigPath, loadSourcesConfig, markSourceIndexed, parseDiscordSettings, + parseGitHubSettings, removeSource, } from "./sources-config"; @@ -278,6 +281,109 @@ describe("sources-config", () => { }); }); + it("adds a GitHub source with validated provider settings", () => { + const agentsDir = tmp(); + + const result = addGitHubSource( + { + repos: ["Signet-AI/signetai", "Signet-AI/signetai"], + tokenRef: "GITHUB_TOKEN", + name: "Signet GitHub", + resourceTypes: ["issues", "pulls", "discussions", "docs"], + state: "open", + labels: ["bug", "needs review", "bug"], + docPaths: ["README.md", "docs/**/*.md"], + maxItemsPerRepo: 25, + now: "2026-01-02T00:00:00.000Z", + }, + agentsDir, + ); + + expect(result.ok).toBe(true); + if (result.ok === false) throw new Error(result.error); + expect(result.source.kind).toBe("github"); + expect(result.source.root).toBe("github://repos/Signet-AI/signetai"); + expect(result.source.providerSettings).toEqual({ + repos: ["Signet-AI/signetai"], + tokenRef: "GITHUB_TOKEN", + resourceTypes: ["issues", "pulls", "discussions", "docs"], + state: "open", + includeComments: true, + labels: ["bug", "needs review"], + docPaths: ["README.md", "docs/**/*.md"], + maxItemsPerRepo: 25, + }); + }); + + it("defaults GitHub sources without tokenRef to REST-fetchable resources", () => { + const result = addGitHubSource({ repos: ["Signet-AI/signetai"] }, tmp()); + + expect(result.ok).toBe(true); + if (result.ok === false) throw new Error(result.error); + expect(parseGitHubSettings(result.source.providerSettings).resourceTypes).toEqual([ + ...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN, + ]); + }); + + it("preserves GitHub settings on partial update", () => { + const agentsDir = tmp(); + const first = addGitHubSource( + { + repos: ["Signet-AI/signetai"], + tokenRef: "GITHUB_TOKEN", + resourceTypes: ["issues", "discussions"], + labels: ["reviewed"], + docPaths: ["docs/API.md"], + maxItemsPerRepo: 12, + now: "2026-01-01T00:00:00.000Z", + }, + agentsDir, + ); + const second = addGitHubSource( + { repos: ["Signet-AI/signetai"], name: "Renamed", now: "2026-01-02T00:00:00.000Z" }, + agentsDir, + ); + + expect(first.ok).toBe(true); + expect(second.ok).toBe(true); + if (second.ok === false) throw new Error(second.error); + expect(second.created).toBe(false); + expect(second.source.name).toBe("Renamed"); + expect(parseGitHubSettings(second.source.providerSettings)).toMatchObject({ + tokenRef: "GITHUB_TOKEN", + resourceTypes: ["issues", "discussions"], + labels: ["reviewed"], + docPaths: ["docs/API.md"], + maxItemsPerRepo: 12, + }); + expect(loadSourcesConfig(agentsDir).sources).toHaveLength(1); + }); + + it("rejects invalid GitHub source boundaries", () => { + const agentsDir = tmp(); + + expect(addGitHubSource({ repos: [] }, agentsDir)).toEqual({ + ok: false, + error: "At least one GitHub repo pattern is required", + }); + expect(addGitHubSource({ repos: ["not-a-repo"] }, agentsDir)).toEqual({ + ok: false, + error: "Invalid GitHub repo pattern: not-a-repo. Expected owner/repo or owner/*", + }); + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], resourceTypes: ["discussions"] }, agentsDir)).toEqual({ + ok: false, + error: "GitHub discussions require tokenRef because they use the GitHub GraphQL API", + }); + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], docPaths: ["src/daemon.ts"] }, agentsDir)).toEqual({ + ok: false, + error: "Invalid GitHub docPaths: src/daemon.ts", + }); + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], maxItemsPerRepo: 0 }, agentsDir)).toEqual({ + ok: false, + error: "GitHub maxItemsPerRepo must be an integer between 1 and 10000", + }); + }); + it("round-trips provider-neutral source settings for future adapters", () => { const agentsDir = tmp(); const source = { diff --git a/platform/core/src/sources-config.ts b/platform/core/src/sources-config.ts index 65e2bb62f..3bd741e8f 100644 --- a/platform/core/src/sources-config.ts +++ b/platform/core/src/sources-config.ts @@ -42,6 +42,8 @@ export interface AddObsidianSourceInput { } export type DiscordSourceSyncMode = "rest" | "gateway-tail" | "desktop-cache"; +export type GitHubSourceResourceType = "issues" | "pulls" | "discussions" | "docs"; +export type GitHubSourceState = "open" | "closed" | "all"; export interface DiscordSourceSettings { readonly guildIds: readonly string[]; @@ -83,6 +85,30 @@ export interface AddDiscordSourceInput { readonly now?: string; } +export interface GitHubSourceSettings { + readonly repos: readonly string[]; + readonly tokenRef?: string; + readonly resourceTypes: readonly GitHubSourceResourceType[]; + readonly state: GitHubSourceState; + readonly includeComments: boolean; + readonly labels?: readonly string[]; + readonly docPaths: readonly string[]; + readonly maxItemsPerRepo: number; +} + +export interface AddGitHubSourceInput { + readonly repos: readonly string[]; + readonly tokenRef?: string; + readonly name?: string; + readonly resourceTypes?: readonly GitHubSourceResourceType[]; + readonly state?: GitHubSourceState; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; + readonly now?: string; +} + export type AddSourceResult = | { readonly ok: true; readonly source: SignetSourceEntry; readonly created: boolean } | { readonly ok: false; readonly error: string }; @@ -95,6 +121,12 @@ const SOURCES_CONFIG_VERSION = 1; export const DEFAULT_DISCORD_MAX_MESSAGES_PER_CHANNEL = 1000; export const MAX_DISCORD_MAX_MESSAGES_PER_CHANNEL = 10_000; export const DEFAULT_DISCORD_DESKTOP_CACHE_PATH = defaultDiscordDesktopCachePath(); +export const DEFAULT_GITHUB_RESOURCE_TYPES = ["issues", "pulls", "discussions", "docs"] as const; +export const DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN = ["issues", "pulls", "docs"] as const; +export const DEFAULT_GITHUB_DOC_PATHS = ["README.md", "CHANGELOG.md"] as const; +export const DEFAULT_GITHUB_MAX_ITEMS_PER_REPO = 500; +export const MAX_GITHUB_MAX_ITEMS_PER_REPO = 10_000; +const VALID_GITHUB_RESOURCE_TYPES = new Set(DEFAULT_GITHUB_RESOURCE_TYPES); export function getAgentsDir(): string { return process.env.SIGNET_PATH || `${homedir()}/.agents`; @@ -156,6 +188,10 @@ export function addDiscordSource(input: AddDiscordSourceInput, agentsDir = getAg return withSourcesConfigLock(agentsDir, () => addDiscordSourceUnlocked(input, agentsDir)); } +export function addGitHubSource(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + return withSourcesConfigLock(agentsDir, () => addGitHubSourceUnlocked(input, agentsDir)); +} + function addDiscordSourceUnlocked(input: AddDiscordSourceInput, agentsDir = getAgentsDir()): AddSourceResult { try { return addDiscordSourceChecked(input, agentsDir); @@ -214,6 +250,62 @@ function addDiscordSourceChecked(input: AddDiscordSourceInput, agentsDir = getAg return { ok: true, source, created: true }; } +function addGitHubSourceUnlocked(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + try { + return addGitHubSourceChecked(input, agentsDir); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + return { ok: false, error: detail }; + } +} + +function addGitHubSourceChecked(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + const settings = buildGitHubSettings(input); + if ("error" in settings) return { ok: false, error: settings.error }; + + const now = input.now ?? new Date().toISOString(); + const cfg = loadSourcesConfigForWrite(agentsDir); + const settingsKey = settings.repos.slice().sort().join(","); + const sourceId = `github:${createHash("sha256").update(settingsKey).digest("hex").slice(0, 16)}`; + const root = `github://repos/${settings.repos.slice().sort().join(",")}`; + const existing = cfg.sources.find((source) => source.id === sourceId); + if (existing) { + const existingSettings = parseGitHubSettings(existing.providerSettings); + const updatedSettings = buildGitHubSettings(input, existingSettings); + if ("error" in updatedSettings) return { ok: false, error: updatedSettings.error }; + const updated: SignetSourceEntry = { + ...existing, + name: cleanName(input.name) ?? existing.name, + root, + enabled: true, + providerSettings: githubSettingsProviderSettings(updatedSettings), + updatedAt: now, + }; + saveSourcesConfig( + { + version: SOURCES_CONFIG_VERSION, + sources: cfg.sources.map((source) => (source.id === existing.id ? updated : source)), + }, + agentsDir, + ); + return { ok: true, source: updated, created: false }; + } + + const source: SignetSourceEntry = { + id: sourceId, + kind: "github", + name: cleanName(input.name) ?? settings.repos[0] ?? "GitHub Source", + root, + enabled: true, + mode: "read-only", + createdAt: now, + updatedAt: now, + providerSettings: githubSettingsProviderSettings(settings), + }; + saveSourcesConfig({ version: SOURCES_CONFIG_VERSION, sources: [...cfg.sources, source] }, agentsDir); + return { ok: true, source, created: true }; +} + export function parseDiscordSettings(raw?: SignetSourceProviderSettings): DiscordSourceSettings { const guildIds = Array.isArray(raw?.guildIds) ? cleanDiscordIds(raw.guildIds) : []; const tokenRef = typeof raw?.tokenRef === "string" ? raw.tokenRef.trim() : ""; @@ -243,6 +335,32 @@ export function parseDiscordSettings(raw?: SignetSourceProviderSettings): Discor }; } +export function parseGitHubSettings(raw?: SignetSourceProviderSettings): GitHubSourceSettings { + const repos = Array.isArray(raw?.repos) ? cleanGitHubRepos(raw.repos) : []; + const tokenRef = typeof raw?.tokenRef === "string" ? raw.tokenRef.trim() || undefined : undefined; + const resourceTypes = + Array.isArray(raw?.resourceTypes) && raw.resourceTypes.every((type) => typeof type === "string") + ? raw.resourceTypes.filter((type): type is GitHubSourceResourceType => isGitHubResourceType(type)) + : tokenRef + ? [...DEFAULT_GITHUB_RESOURCE_TYPES] + : [...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN]; + const labels = Array.isArray(raw?.labels) ? cleanStringArray(raw.labels) : undefined; + const docPaths = Array.isArray(raw?.docPaths) + ? cleanStringArray(raw.docPaths).filter(isSafeGitHubDocPath) + : [...DEFAULT_GITHUB_DOC_PATHS]; + return { + repos, + ...(tokenRef ? { tokenRef } : {}), + resourceTypes: resourceTypes.length > 0 ? resourceTypes : [...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN], + state: isGitHubState(raw?.state) ? raw.state : "all", + includeComments: raw?.includeComments !== false, + ...(labels && labels.length > 0 ? { labels } : {}), + docPaths: docPaths.length > 0 ? docPaths : [...DEFAULT_GITHUB_DOC_PATHS], + maxItemsPerRepo: + cleanPositiveInteger(raw?.maxItemsPerRepo, MAX_GITHUB_MAX_ITEMS_PER_REPO) ?? DEFAULT_GITHUB_MAX_ITEMS_PER_REPO, + }; +} + function buildDiscordSettings(input: AddDiscordSourceInput): DiscordSourceSettings | { readonly error: string } { if (input.syncMode && !isDiscordSyncMode(input.syncMode)) return { error: `Unsupported Discord sync mode: ${input.syncMode}` }; @@ -293,6 +411,74 @@ function buildDiscordSettings(input: AddDiscordSourceInput): DiscordSourceSettin }; } +function buildGitHubSettings( + input: AddGitHubSourceInput, + existing?: GitHubSourceSettings, +): GitHubSourceSettings | { readonly error: string } { + const repos = input.repos !== undefined ? cleanGitHubRepos(input.repos) : (existing?.repos ?? []); + if (repos.length === 0) return { error: "At least one GitHub repo pattern is required" }; + for (const repo of repos) { + if (!/^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_*.-]+$/.test(repo)) { + return { error: `Invalid GitHub repo pattern: ${repo}. Expected owner/repo or owner/*` }; + } + } + const tokenRef = input.tokenRef !== undefined ? input.tokenRef.trim() || undefined : existing?.tokenRef; + const resourceTypes = input.resourceTypes + ? [...input.resourceTypes] + : existing?.resourceTypes?.length + ? [...existing.resourceTypes] + : tokenRef + ? [...DEFAULT_GITHUB_RESOURCE_TYPES] + : [...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN]; + if (resourceTypes.length === 0) return { error: "GitHub resourceTypes must include at least one resource type" }; + const invalidTypes = resourceTypes.filter((type) => !isGitHubResourceType(type)); + if (invalidTypes.length > 0) { + return { + error: `Invalid GitHub resource types: ${invalidTypes.join(", ")}. Must be one of: ${[...DEFAULT_GITHUB_RESOURCE_TYPES].join(", ")}`, + }; + } + if (!tokenRef && resourceTypes.includes("discussions")) { + return { error: "GitHub discussions require tokenRef because they use the GitHub GraphQL API" }; + } + if (input.state !== undefined && !isGitHubState(input.state)) { + return { error: "GitHub state must be one of: open, closed, all" }; + } + if (input.includeComments !== undefined && typeof input.includeComments !== "boolean") { + return { error: "GitHub includeComments must be a boolean" }; + } + if (input.labels !== undefined && !isStringArray(input.labels)) { + return { error: "GitHub labels must be an array of strings" }; + } + if (input.docPaths !== undefined) { + if (!isStringArray(input.docPaths)) return { error: "GitHub docPaths must be an array of strings" }; + const invalid = cleanStringArray(input.docPaths).filter((path) => !isSafeGitHubDocPath(path)); + if (invalid.length > 0) return { error: `Invalid GitHub docPaths: ${invalid.join(", ")}` }; + } + if (input.maxItemsPerRepo !== undefined) { + const maxItemsPerRepo = cleanPositiveInteger(input.maxItemsPerRepo, MAX_GITHUB_MAX_ITEMS_PER_REPO); + if (maxItemsPerRepo !== input.maxItemsPerRepo) { + return { + error: `GitHub maxItemsPerRepo must be an integer between 1 and ${MAX_GITHUB_MAX_ITEMS_PER_REPO}`, + }; + } + } + const labels = input.labels !== undefined ? cleanStringArray(input.labels) : existing?.labels; + const docPaths = + input.docPaths !== undefined + ? cleanStringArray(input.docPaths) + : (existing?.docPaths ?? [...DEFAULT_GITHUB_DOC_PATHS]); + return { + repos, + ...(tokenRef ? { tokenRef } : {}), + resourceTypes, + state: input.state ?? existing?.state ?? "all", + includeComments: input.includeComments ?? existing?.includeComments ?? true, + ...(labels && labels.length > 0 ? { labels } : {}), + docPaths, + maxItemsPerRepo: input.maxItemsPerRepo ?? existing?.maxItemsPerRepo ?? DEFAULT_GITHUB_MAX_ITEMS_PER_REPO, + }; +} + function discordSettingsProviderSettings(settings: DiscordSourceSettings): SignetSourceProviderSettings { return { guildIds: settings.guildIds, @@ -316,6 +502,19 @@ function discordSettingsProviderSettings(settings: DiscordSourceSettings): Signe }; } +function githubSettingsProviderSettings(settings: GitHubSourceSettings): SignetSourceProviderSettings { + return { + repos: settings.repos, + ...(settings.tokenRef ? { tokenRef: settings.tokenRef } : {}), + resourceTypes: settings.resourceTypes, + state: settings.state, + includeComments: settings.includeComments, + ...(settings.labels ? { labels: settings.labels } : {}), + docPaths: settings.docPaths, + maxItemsPerRepo: settings.maxItemsPerRepo, + }; +} + function addObsidianSourceUnlocked(input: AddObsidianSourceInput, agentsDir = getAgentsDir()): AddSourceResult { try { return addObsidianSourceChecked(input, agentsDir); @@ -497,6 +696,32 @@ function cleanLocalPath(value: string | undefined): string | undefined { return trimmed ? resolve(trimmed.replace(/^~(?=$|\/|\\)/, homedir())) : undefined; } +function cleanGitHubRepos(values: readonly unknown[]): readonly string[] { + return Array.from( + new Set( + values + .filter((value): value is string => typeof value === "string") + .map((value) => value.trim()) + .filter(Boolean), + ), + ); +} + +function cleanStringArray(values: readonly unknown[]): readonly string[] { + return Array.from( + new Set( + values + .filter((value): value is string => typeof value === "string") + .map((value) => value.trim()) + .filter(Boolean), + ), + ); +} + +function isStringArray(value: unknown): value is readonly string[] { + return Array.isArray(value) && value.every((entry) => typeof entry === "string"); +} + function isDiscordSnowflake(value: string): boolean { return /^\d{17,20}$/.test(value); } @@ -546,6 +771,31 @@ function looksLikeDiscordDesktopCacheRoot(value: string): boolean { return ["discord", "discordcanary", "discordptb", "discorddevelopment", "vesktop"].includes(base); } +function isGitHubResourceType(value: unknown): value is GitHubSourceResourceType { + return typeof value === "string" && VALID_GITHUB_RESOURCE_TYPES.has(value); +} + +function isGitHubState(value: unknown): value is GitHubSourceState { + return value === "open" || value === "closed" || value === "all"; +} + +function isMarkdownDocPath(path: string): boolean { + return path.toLowerCase().endsWith(".md"); +} + +function isMarkdownDocGlob(path: string): boolean { + const lowered = path.toLowerCase(); + return lowered.endsWith("/*.md") || lowered.endsWith("/**/*.md"); +} + +function isSafeGitHubDocPath(value: string): boolean { + const path = value.trim(); + if (!path) return false; + if (path.startsWith("/") || path.includes("\\") || path.includes("?") || path.includes("#")) return false; + if (path.split("/").some((segment) => segment === "" || segment === "." || segment === "..")) return false; + return isMarkdownDocPath(path) || isMarkdownDocGlob(path); +} + function mergeDefaultObsidianExcludeGlobs(values: readonly string[] | undefined): readonly string[] { return [...DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, ...(cleanExcludeGlobs(values) ?? [])].filter( (value, index, all) => all.indexOf(value) === index, diff --git a/platform/daemon/src/github-source-fetch.test.ts b/platform/daemon/src/github-source-fetch.test.ts new file mode 100644 index 000000000..0e788654c --- /dev/null +++ b/platform/daemon/src/github-source-fetch.test.ts @@ -0,0 +1,137 @@ +import { afterEach, describe, expect, it, mock } from "bun:test"; +import { + expandRepoGlob, + fetchDiscussions, + fetchIssues, + fetchPullRequestsBySearch, + fetchRepoDocs, +} from "./github-source-fetch"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("github-source-fetch", () => { + it("escapes wildcard repo glob literals and caps expansion", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + expect(String(url)).toContain("per_page=2"); + return Promise.resolve( + Response.json([ + { full_name: "owner/private.*", name: "private.*" }, + { full_name: "owner/privateXarchive", name: "privateXarchive" }, + ]), + ); + }) as typeof fetch; + + const result = await expandRepoGlob("owner", "private.*", undefined, 2); + + expect(result.repos).toEqual(["owner/private.*"]); + expect(result.truncated).toBe(true); + }); + + it("bounds issue scanning separately from indexed issue count on PR-heavy repos", async () => { + let calls = 0; + globalThis.fetch = mock(() => { + calls++; + return Promise.resolve( + Response.json([ + { + number: calls, + pull_request: { url: "x" }, + title: "PR", + body: "", + state: "open", + html_url: "", + user: null, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-01T00:00:00.000Z", + closed_at: null, + comments: 0, + }, + ]), + ); + }) as typeof fetch; + + const result = await fetchIssues({ owner: "o", repo: "r" }, undefined, "all", 1); + + expect(result.resources).toEqual([]); + expect(calls).toBeLessThanOrEqual(5); + }); + + it("escapes PR label search values", async () => { + let requested = ""; + globalThis.fetch = mock((url: string | URL | Request) => { + requested = String(url); + return Promise.resolve(Response.json({ items: [] })); + }) as typeof fetch; + + await fetchPullRequestsBySearch({ owner: "o", repo: "r" }, ['quoted"label'], undefined, "open", 10); + + expect(decodeURIComponent(requested)).toContain('label:"quoted\\"label"'); + }); + + it("maps GraphQL discussion closed state without requiring a state string field", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + Response.json({ + data: { + repository: { + discussions: { + nodes: [ + { + number: 7, + title: "Closed discussion", + body: "body", + url: "https://github.com/o/r/discussions/7", + closed: true, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "alice" }, + labels: { nodes: [{ name: "roadmap" }] }, + comments: { totalCount: 0 }, + }, + ], + }, + }, + }, + }), + ), + ) as typeof fetch; + + const result = await fetchDiscussions({ owner: "o", repo: "r", token: "token" }, undefined, "closed", 10); + + expect(result.resources[0]?.state).toBe("closed"); + expect(result.resources[0]?.labels).toEqual(["roadmap"]); + }); + + it("applies maxItems to wildcard docs", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.includes("/git/trees/")) { + return Promise.resolve( + Response.json({ + tree: [ + { type: "blob", path: "docs/a.md" }, + { type: "blob", path: "docs/b.md" }, + ], + }), + ); + } + return Promise.resolve( + Response.json({ + content: Buffer.from("# doc").toString("base64"), + encoding: "base64", + sha: "abc", + }), + ); + }) as typeof fetch; + + const result = await fetchRepoDocs({ owner: "o", repo: "r" }, ["docs/*.md"], "main", 1); + + expect(result.resources).toHaveLength(1); + expect(result.resources[0]?.path).toBe("docs/a.md"); + }); +}); diff --git a/platform/daemon/src/github-source-fetch.ts b/platform/daemon/src/github-source-fetch.ts new file mode 100644 index 000000000..0aea50049 --- /dev/null +++ b/platform/daemon/src/github-source-fetch.ts @@ -0,0 +1,599 @@ +import type { GitHubSourceState } from "@signet/core"; +import { logger } from "./logger"; + +export interface GitHubFetchConfig { + readonly token?: string; + readonly owner: string; + readonly repo: string; +} + +export interface GitHubLabel { + readonly name: string; + readonly color?: string; +} + +export interface GitHubIssue { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels: readonly GitHubLabel[]; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly pull_request?: { readonly url: string }; + readonly comments: number; +} + +export interface GitHubPullRequest { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels: readonly GitHubLabel[]; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly merged_at: string | null; + readonly draft: boolean; + readonly base: { readonly ref: string }; + readonly head: { readonly ref: string }; + readonly comments: number; + readonly review_comments: number; +} + +export interface GitHubComment { + readonly id: number; + readonly body: string; + readonly user?: { readonly login?: string } | null; + readonly author?: { readonly login?: string } | string | null; + readonly created_at: string; + readonly updated_at: string; +} + +export interface GitHubResource { + readonly type: "issue" | "pull" | "discussion" | "doc"; + readonly number?: number; + readonly path?: string; + readonly title: string; + readonly body: string; + readonly state: string; + readonly url: string; + readonly labels: readonly string[]; + readonly author: string | null; + readonly createdAt: string; + readonly updatedAt: string; + readonly closedAt: string | null; + readonly mergedAt: string | null; + readonly commentsCount: number; + readonly extra: Readonly>; +} + +export interface GitHubFetchResult { + readonly resources: readonly GitHubResource[]; + readonly errors: readonly { readonly message: string; readonly retryable: boolean }[]; +} + +export interface GitHubRepoInfo { + readonly owner: string; + readonly repo: string; + readonly fullName: string; + readonly defaultBranch: string; + readonly htmlUrl: string; +} + +export interface RepoGlobExpansion { + readonly repos: readonly string[]; + readonly truncated: boolean; +} + +interface GitHubApiResponse { + readonly status: number; + readonly headers: Headers; + readonly body: unknown; +} + +const GITHUB_API_BASE = "https://api.github.com"; +const GRAPHQL_URL = "https://api.github.com/graphql"; +const PER_PAGE = 100; +const REQUEST_TIMEOUT_MS = 30_000; +const MAX_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 1_000; +const MAX_ISSUE_SCAN_MULTIPLIER = 5; +const MAX_ISSUE_SCAN_FLOOR = PER_PAGE * 5; +const MAX_ISSUE_SCAN_CEILING = PER_PAGE * 20; +const MAX_COMMENTS_PER_RESOURCE = 200; + +async function githubRequest(url: string, token?: string, method = "GET", body?: unknown): Promise { + const headers: Record = { + Accept: "application/vnd.github.v3+json", + "User-Agent": "signet-daemon", + }; + if (token) headers.Authorization = `Bearer ${token}`; + if (body) headers["Content-Type"] = "application/json"; + + let lastError: Error | null = null; + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + const response = await fetch(url, { + method, + headers, + body: body ? JSON.stringify(body) : undefined, + signal: controller.signal, + }); + clearTimeout(timeout); + const remaining = Number(response.headers.get("x-ratelimit-remaining") ?? "5000"); + const reset = Number(response.headers.get("x-ratelimit-reset") ?? "0") * 1000; + if (remaining < 10 && reset > Date.now()) { + await new Promise((resolve) => setTimeout(resolve, Math.min(reset - Date.now() + 1000, 60_000))); + } + if (response.status === 403 && remaining === 0 && reset > Date.now()) { + await new Promise((resolve) => setTimeout(resolve, Math.min(reset - Date.now() + 1000, 60_000))); + continue; + } + if (response.status >= 500) { + lastError = new Error(`GitHub API ${response.status}: ${await response.text()}`); + await new Promise((resolve) => setTimeout(resolve, RETRY_BASE_DELAY_MS * (attempt + 1))); + continue; + } + return { + status: response.status, + headers: response.headers, + body: response.status === 204 ? null : await response.json(), + }; + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + if (attempt < MAX_RETRIES - 1) { + await new Promise((resolve) => setTimeout(resolve, RETRY_BASE_DELAY_MS * (attempt + 1))); + } + } + } + throw lastError ?? new Error("GitHub API request failed after retries"); +} + +export async function fetchRepoInfo(config: GitHubFetchConfig): Promise { + const response = await githubRequest(`${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}`, config.token); + if (response.status === 404) return null; + if (response.status !== 200) throw new Error(`Failed to fetch repo info: ${response.status}`); + const data = response.body as Record; + return { + owner: ((data.owner as Record | undefined)?.login as string | undefined) ?? config.owner, + repo: (data.name as string | undefined) ?? config.repo, + fullName: (data.full_name as string | undefined) ?? `${config.owner}/${config.repo}`, + defaultBranch: (data.default_branch as string | undefined) ?? "main", + htmlUrl: (data.html_url as string | undefined) ?? `https://github.com/${config.owner}/${config.repo}`, + }; +} + +export async function expandRepoGlob( + owner: string, + pattern: string, + token?: string, + maxRepos = 500, +): Promise { + if (!pattern.includes("*")) return { repos: [`${owner}/${pattern}`], truncated: false }; + const regex = new RegExp(`^${globToRegexSource(pattern)}$`); + for (const prefix of [`/orgs/${owner}/repos`, `/users/${owner}/repos`]) { + const repos: Array<{ full_name: string; name: string }> = []; + let page = 1; + let truncated = false; + while (repos.length < maxRepos) { + const remaining = Math.max(1, maxRepos - repos.length); + const response = await githubRequest( + `${GITHUB_API_BASE}${prefix}?per_page=${Math.min(PER_PAGE, remaining)}&page=${page}&type=all`, + token, + ); + if (response.status !== 200) break; + const batch = response.body as Array<{ full_name: string; name: string }>; + repos.push(...batch); + if (repos.length >= maxRepos) truncated = batch.length === Math.min(PER_PAGE, remaining); + if (batch.length < Math.min(PER_PAGE, remaining)) break; + page++; + } + const matches = repos.filter((repo) => regex.test(repo.name)).map((repo) => repo.full_name); + if (matches.length > 0 || truncated) return { repos: matches.slice(0, maxRepos), truncated }; + } + return { repos: [], truncated: false }; +} + +export async function fetchIssues( + config: GitHubFetchConfig, + since?: string, + state: GitHubSourceState = "all", + maxItems = 500, + labels?: readonly string[], +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + const scanLimit = Math.min( + Math.max(maxItems * MAX_ISSUE_SCAN_MULTIPLIER, MAX_ISSUE_SCAN_FLOOR), + MAX_ISSUE_SCAN_CEILING, + ); + let scanned = 0; + let page = 1; + while (resources.length < maxItems && scanned < scanLimit) { + const remainingScan = scanLimit - scanned; + const url = new URL(`${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/issues`); + url.searchParams.set("state", state === "all" ? "all" : state); + url.searchParams.set("per_page", String(Math.min(PER_PAGE, remainingScan))); + url.searchParams.set("page", String(page)); + url.searchParams.set("sort", "updated"); + url.searchParams.set("direction", "desc"); + if (since) url.searchParams.set("since", since); + if (labels?.length) url.searchParams.set("labels", labels.join(",")); + const response = await githubRequest(url.toString(), config.token); + if (response.status !== 200) { + errors.push({ message: `Issues fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const batch = response.body as GitHubIssue[]; + scanned += batch.length; + for (const issue of batch) { + if (resources.length >= maxItems) break; + if (issue.pull_request) continue; + resources.push(issueResource(issue)); + } + if (batch.length < Math.min(PER_PAGE, remainingScan)) break; + page++; + } + return { resources, errors }; +} + +export async function fetchPullRequests( + config: GitHubFetchConfig, + _since?: string, + state: GitHubSourceState = "all", + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + let page = 1; + while (resources.length < maxItems) { + const url = new URL(`${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls`); + url.searchParams.set("state", state === "all" ? "all" : state); + url.searchParams.set("per_page", String(Math.min(PER_PAGE, maxItems - resources.length))); + url.searchParams.set("page", String(page)); + const response = await githubRequest(url.toString(), config.token); + if (response.status !== 200) { + errors.push({ message: `Pull requests fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const batch = response.body as GitHubPullRequest[]; + resources.push(...batch.map(pullResource)); + if (batch.length < Math.min(PER_PAGE, maxItems - resources.length + batch.length)) break; + page++; + } + return { resources: resources.slice(0, maxItems), errors }; +} + +export async function fetchPullRequestsBySearch( + config: GitHubFetchConfig, + labels: readonly string[], + _since?: string, + state: GitHubSourceState = "all", + maxItems = 500, +): Promise { + const statePart = state === "all" ? "" : ` state:${state}`; + const labelPart = labels.map((label) => ` label:${quoteSearchValue(label)}`).join(""); + const q = `repo:${config.owner}/${config.repo} is:pr${statePart}${labelPart}`; + const response = await githubRequest( + `${GITHUB_API_BASE}/search/issues?q=${encodeURIComponent(q)}&per_page=${Math.min(PER_PAGE, maxItems)}`, + config.token, + ); + if (response.status !== 200) { + return { resources: [], errors: [{ message: `Pull request search failed: ${response.status}`, retryable: false }] }; + } + const body = response.body as { items?: GitHubIssue[] }; + return { resources: (body.items ?? []).slice(0, maxItems).map(searchPullResource), errors: [] }; +} + +export async function fetchIssueComments(config: GitHubFetchConfig, number: number): Promise { + return fetchComments( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/issues/${number}/comments`, + config.token, + ); +} + +export async function fetchPullRequestComments(config: GitHubFetchConfig, number: number): Promise { + return fetchComments( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls/${number}/comments`, + config.token, + ); +} + +async function fetchComments(baseUrl: string, token?: string): Promise { + const comments: GitHubComment[] = []; + let page = 1; + while (comments.length < MAX_COMMENTS_PER_RESOURCE) { + const response = await githubRequest( + `${baseUrl}?per_page=${Math.min(PER_PAGE, MAX_COMMENTS_PER_RESOURCE - comments.length)}&page=${page}`, + token, + ); + if (response.status !== 200) throw new Error(`GitHub comments fetch failed: ${response.status}`); + const batch = response.body as GitHubComment[]; + comments.push(...batch); + if (batch.length < Math.min(PER_PAGE, MAX_COMMENTS_PER_RESOURCE - comments.length + batch.length)) break; + page++; + } + return comments.slice(0, MAX_COMMENTS_PER_RESOURCE); +} + +export async function fetchDiscussions( + config: GitHubFetchConfig, + _after?: string, + state: GitHubSourceState = "all", + maxItems = 500, +): Promise { + const query = ` + query($owner:String!, $name:String!, $first:Int!) { + repository(owner:$owner, name:$name) { + discussions(first:$first, orderBy:{field:UPDATED_AT, direction:DESC}) { + nodes { + number title body url closed createdAt updatedAt + author { login } + labels(first:20) { nodes { name } } + comments { totalCount } + } + } + } + }`; + const response = await githubRequest(GRAPHQL_URL, config.token, "POST", { + query, + variables: { owner: config.owner, name: config.repo, first: Math.min(maxItems, 100) }, + }); + if (response.status !== 200) { + return { resources: [], errors: [{ message: `Discussions fetch failed: ${response.status}`, retryable: false }] }; + } + const data = response.body as { + data?: { + repository?: { + discussions?: { nodes?: DiscussionNode[] }; + }; + }; + errors?: Array<{ message?: string }>; + }; + if (data.errors?.length) { + return { + resources: [], + errors: data.errors.map((error) => ({ message: error.message ?? "GraphQL error", retryable: false })), + }; + } + const nodes = data.data?.repository?.discussions?.nodes ?? []; + const resources = nodes + .map(discussionResource) + .filter((resource) => state === "all" || resource.state === state) + .slice(0, maxItems); + return { resources, errors: [] }; +} + +export async function fetchDiscussionComments(config: GitHubFetchConfig, number: number): Promise { + const query = ` + query($owner:String!, $name:String!, $number:Int!, $first:Int!) { + repository(owner:$owner, name:$name) { + discussion(number:$number) { + comments(first:$first) { + nodes { id body createdAt updatedAt author { login } } + } + } + } + }`; + const response = await githubRequest(GRAPHQL_URL, config.token, "POST", { + query, + variables: { owner: config.owner, name: config.repo, number, first: MAX_COMMENTS_PER_RESOURCE }, + }); + if (response.status !== 200) throw new Error(`Discussion comments fetch failed: ${response.status}`); + const body = response.body as { + data?: { repository?: { discussion?: { comments?: { nodes?: DiscussionCommentNode[] } } } }; + }; + return (body.data?.repository?.discussion?.comments?.nodes ?? []).map((node) => ({ + id: Number(node.id), + body: node.body, + author: node.author, + user: node.author, + created_at: node.createdAt, + updated_at: node.updatedAt, + })); +} + +export async function fetchRepoDocs( + config: GitHubFetchConfig, + paths: readonly string[], + ref: string, + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + for (const path of paths) { + if (resources.length >= maxItems) break; + try { + if (path.includes("*")) { + const result = await fetchTreeDocs(config, path, ref, maxItems - resources.length); + resources.push(...result.resources); + errors.push(...result.errors); + } else { + const resource = await fetchDoc(config, path, ref); + if (resource) resources.push(resource); + } + } catch (err) { + errors.push({ message: err instanceof Error ? err.message : String(err), retryable: false }); + } + } + return { resources: resources.slice(0, maxItems), errors }; +} + +async function fetchDoc(config: GitHubFetchConfig, path: string, ref: string): Promise { + const response = await githubRequest( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/contents/${encodeURIComponent(path)}?ref=${encodeURIComponent(ref)}`, + config.token, + ); + if (response.status === 404) return null; + if (response.status !== 200) throw new Error(`Doc fetch failed for ${path}: ${response.status}`); + const body = response.body as { content?: string; encoding?: string; sha?: string; html_url?: string }; + if (body.encoding !== "base64" || !body.content) return null; + return docResource(path, Buffer.from(body.content, "base64").toString("utf8"), body.sha ?? "", body.html_url); +} + +async function fetchTreeDocs( + config: GitHubFetchConfig, + pattern: string, + ref: string, + maxItems: number, +): Promise { + const response = await githubRequest( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/git/trees/${encodeURIComponent(ref)}?recursive=1`, + config.token, + ); + if (response.status !== 200) { + return { + resources: [], + errors: [{ message: `Tree fetch failed: ${response.status}`, retryable: response.status >= 500 }], + }; + } + const body = response.body as { tree?: Array<{ path?: string; type?: string }> }; + const regex = new RegExp(`^${globToRegexSource(pattern)}$`); + const paths = (body.tree ?? []) + .filter((entry) => entry.type === "blob" && typeof entry.path === "string" && regex.test(entry.path)) + .map((entry) => entry.path as string) + .slice(0, maxItems); + const resources: GitHubResource[] = []; + for (const path of paths) { + const resource = await fetchDoc(config, path, ref); + if (resource) resources.push(resource); + } + return { resources, errors: [] }; +} + +function issueResource(issue: GitHubIssue): GitHubResource { + return { + type: "issue", + number: issue.number, + title: issue.title, + body: issue.body ?? "", + state: issue.state, + url: issue.html_url, + labels: issue.labels.map((label) => label.name), + author: issue.user?.login ?? null, + createdAt: issue.created_at, + updatedAt: issue.updated_at, + closedAt: issue.closed_at, + mergedAt: null, + commentsCount: issue.comments, + extra: {}, + }; +} + +function pullResource(pull: GitHubPullRequest): GitHubResource { + return { + type: "pull", + number: pull.number, + title: pull.title, + body: pull.body ?? "", + state: pull.state, + url: pull.html_url, + labels: pull.labels.map((label) => label.name), + author: pull.user?.login ?? null, + createdAt: pull.created_at, + updatedAt: pull.updated_at, + closedAt: pull.closed_at, + mergedAt: pull.merged_at, + commentsCount: pull.comments + pull.review_comments, + extra: { draft: pull.draft, base: pull.base.ref, head: pull.head.ref }, + }; +} + +function searchPullResource(issue: GitHubIssue): GitHubResource { + return { ...issueResource(issue), type: "pull", mergedAt: null }; +} + +interface DiscussionNode { + readonly number: number; + readonly title: string; + readonly body: string; + readonly url: string; + readonly closed?: boolean; + readonly createdAt: string; + readonly updatedAt: string; + readonly author?: { readonly login?: string } | null; + readonly labels?: { readonly nodes?: Array<{ readonly name?: string }> }; + readonly comments?: { readonly totalCount?: number }; +} + +interface DiscussionCommentNode { + readonly id: string; + readonly body: string; + readonly createdAt: string; + readonly updatedAt: string; + readonly author?: { readonly login?: string } | null; +} + +function discussionResource(node: DiscussionNode): GitHubResource { + return { + type: "discussion", + number: node.number, + title: node.title, + body: node.body, + state: node.closed ? "closed" : "open", + url: node.url, + labels: node.labels?.nodes?.map((label) => label.name).filter((name): name is string => !!name) ?? [], + author: node.author?.login ?? null, + createdAt: node.createdAt, + updatedAt: node.updatedAt, + closedAt: node.closed ? node.updatedAt : null, + mergedAt: null, + commentsCount: node.comments?.totalCount ?? 0, + extra: {}, + }; +} + +function docResource(path: string, content: string, sha: string, url?: string): GitHubResource { + return { + type: "doc", + path, + title: path, + body: content, + state: "current", + url: url ?? "", + labels: [], + author: null, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + closedAt: null, + mergedAt: null, + commentsCount: 0, + extra: { sha }, + }; +} + +function quoteSearchValue(value: string): string { + return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; +} + +function globToRegexSource(pattern: string): string { + return pattern + .split("") + .map((char) => { + if (char === "*") return ".*"; + if (char === "?") return "."; + return escapeRegex(char); + }) + .join(""); +} + +function escapeRegex(char: string): string { + return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char; +} + +export function logGitHubFetchError(sourceId: string, repo: string, phase: string, err: unknown): void { + logger.warn("github-source", "GitHub source fetch failed", { + sourceId, + repo, + phase, + error: err instanceof Error ? err.message : String(err), + }); +} diff --git a/platform/daemon/src/github-source-provider.test.ts b/platform/daemon/src/github-source-provider.test.ts new file mode 100644 index 000000000..afd1ca9cc --- /dev/null +++ b/platform/daemon/src/github-source-provider.test.ts @@ -0,0 +1,194 @@ +import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { type SignetSourceEntry, addGitHubSource } from "@signet/core"; +import { closeDbAccessor, getDbAccessor, initDbAccessor } from "./db-accessor"; +import { githubSourceProvider } from "./github-source-provider"; +import { indexExternalMemoryArtifact } from "./memory-lineage"; + +const originalFetch = globalThis.fetch; + +describe("github-source-provider", () => { + let dir = ""; + let previousSignetPath: string | undefined; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "signet-github-source-")); + previousSignetPath = process.env.SIGNET_PATH; + process.env.SIGNET_PATH = dir; + mkdirSync(join(dir, "memory"), { recursive: true }); + closeDbAccessor(); + initDbAccessor(join(dir, "memory", "memories.db")); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + closeDbAccessor(); + if (previousSignetPath === undefined) Reflect.deleteProperty(process.env, "SIGNET_PATH"); + else process.env.SIGNET_PATH = previousSignetPath; + rmSync(dir, { recursive: true, force: true }); + }); + + it("indexes GitHub issue and comment artifacts with source provenance", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ + name: "signetai", + full_name: "Signet-AI/signetai", + default_branch: "main", + html_url: "https://github.com/Signet-AI/signetai", + owner: { login: "Signet-AI" }, + }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Index GitHub", + body: "issue body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [{ name: "sources" }], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 1, + }, + ]), + ); + } + if (text.includes("/issues/12/comments")) { + return Promise.resolve( + Response.json([ + { + id: 99, + body: "comment body", + user: { login: "bob" }, + created_at: "2026-01-03T00:00:00.000Z", + updated_at: "2026-01-03T00:00:00.000Z", + }, + ]), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + const added = addGitHubSource( + { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues"], + maxItemsPerRepo: 5, + now: "2026-01-01T00:00:00.000Z", + }, + dir, + ); + expect(added.ok).toBe(true); + if (added.ok === false) throw new Error(added.error); + + const result = await githubSourceProvider.sync?.({ + source: added.source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + expect(result?.failures).toEqual([]); + const rows = sourceRows(added.source.id); + expect(rows.map((row) => row.source_kind)).toContain("source_github_issue"); + expect(rows.map((row) => row.source_kind)).toContain("source_github_comment"); + expect(rows.find((row) => row.source_kind === "source_github_issue")?.source_external_id).toBe( + "Signet-AI/signetai:issue:12", + ); + expect(rows.find((row) => row.source_kind === "source_github_comment")?.content).toContain("comment body"); + }); + + it("records requested discussion failures when no token is available", async () => { + const source: SignetSourceEntry = { + id: "github:test", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai"], + resourceTypes: ["discussions"], + state: "all", + includeComments: true, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + globalThis.fetch = mock((url: string | URL | Request) => { + if (String(url).endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + expect(result?.failures[0]?.message).toContain("discussions require tokenRef"); + expect(sourceRows(source.id).map((row) => row.source_kind)).toContain("source_github_failure"); + }); + + it("purges source-owned GitHub artifacts through the provider", () => { + indexExternalMemoryArtifact({ + agentId: "default", + harness: "github", + sourceId: "github:test", + sourceRoot: "github://repos/Signet-AI/signetai", + sourceExternalId: "Signet-AI/signetai:issue:1", + sourcePath: "github://Signet-AI/signetai/issues/1", + sourceKind: "source_github_issue", + sourceMtimeMs: Date.now(), + content: "old issue", + }); + + const purged = githubSourceProvider.purge({ id: "github:test" } as SignetSourceEntry, "default"); + + expect(purged).toBeGreaterThanOrEqual(1); + expect(sourceRows("github:test")).toEqual([]); + }); +}); + +function sourceRows(sourceId: string): Array<{ + source_kind: string; + source_path: string; + source_external_id: string | null; + source_meta_json: string | null; + content: string; +}> { + return getDbAccessor().withReadDb( + (db) => + db + .prepare( + `SELECT source_kind, source_path, source_external_id, source_meta_json, content + FROM memory_artifacts + WHERE source_id = ? + AND COALESCE(is_deleted, 0) = 0 + ORDER BY source_path`, + ) + .all(sourceId) as Array<{ + source_kind: string; + source_path: string; + source_external_id: string | null; + source_meta_json: string | null; + content: string; + }>, + ); +} diff --git a/platform/daemon/src/github-source-provider.ts b/platform/daemon/src/github-source-provider.ts new file mode 100644 index 000000000..f8a1ca5a7 --- /dev/null +++ b/platform/daemon/src/github-source-provider.ts @@ -0,0 +1,406 @@ +import { + type GitHubSourceSettings, + type SignetSourceEntry, + type SourceFailureState, + type SourceProviderKind, + parseGitHubSettings, +} from "@signet/core"; +import { resolveDaemonAgentId } from "./agent-id"; +import { yieldEvery } from "./async-yield"; +import { getDbAccessor } from "./db-accessor"; +import { countChanges } from "./db-helpers"; +import { + type GitHubComment, + type GitHubFetchConfig, + type GitHubResource, + expandRepoGlob, + fetchDiscussionComments, + fetchDiscussions, + fetchIssueComments, + fetchIssues, + fetchPullRequestComments, + fetchPullRequests, + fetchPullRequestsBySearch, + fetchRepoDocs, + fetchRepoInfo, + logGitHubFetchError, +} from "./github-source-fetch"; +import { logger } from "./logger"; +import { indexExternalMemoryArtifact } from "./memory-lineage"; +import { getSecret } from "./secrets"; +import type { SourceProviderAdapter, SourceProviderSyncContext, SourceProviderSyncResult } from "./source-providers"; +import { purgeSourceOwnedRows } from "./source-purge"; + +const GITHUB_PROVIDER_KIND: SourceProviderKind = "github"; +const GITHUB_HARNESS = "github"; + +interface ResolvedRepo { + readonly owner: string; + readonly repo: string; + readonly fullName: string; + readonly defaultBranch: string; +} + +export const githubSourceProvider: SourceProviderAdapter = { + kind: "github", + sync: syncGitHubSource, + purge: (source, agentId) => purgeSourceOwnedRows({ sourceId: source.id, agentId }), +}; + +async function syncGitHubSource(context: SourceProviderSyncContext): Promise { + const settings = parseGitHubSettings(context.source.providerSettings); + if (settings.repos.length === 0) throw new Error("GitHub source has no repositories"); + + const failures: SourceFailureState[] = []; + const syncStartedAt = new Date().toISOString(); + const agentId = context.agentId || resolveDaemonAgentId(); + const token = settings.tokenRef ? await resolveToken(settings.tokenRef) : undefined; + const repos = await resolveRepos(settings, token); + let indexed = 0; + let scanned = 0; + + for (const repo of repos) { + if (!context.shouldContinue()) break; + context.onProgress?.({ scanned, total: repos.length, indexed, currentPath: `github://${repo.fullName}` }); + const config: GitHubFetchConfig = { owner: repo.owner, repo: repo.repo, token }; + const seenPaths = new Set(); + const yielder = yieldEvery(5); + + for (const resource of await fetchRepoResources(context.source, settings, config, repo, failures)) { + if (!context.shouldContinue()) break; + const written = await writeResourceWithComments( + context.source, + agentId, + repo.fullName, + config, + resource, + settings, + ); + indexed += written; + seenPaths.add(resourcePath(repo.fullName, resource)); + await yielder(); + } + scanned++; + context.onProgress?.({ scanned, total: repos.length, indexed, currentPath: `github://${repo.fullName}` }); + if (failures.length === 0) + purgeStaleGitHubArtifacts(context.source.id, agentId, syncStartedAt, seenPaths, repo.fullName); + } + for (const failure of failures) { + indexed += writeFailureArtifact(context.source, agentId, failure); + } + + return { indexed, scanned, total: repos.length, failures }; +} + +async function fetchRepoResources( + source: SignetSourceEntry, + settings: GitHubSourceSettings, + config: GitHubFetchConfig, + repo: ResolvedRepo, + failures: SourceFailureState[], +): Promise { + const resources: GitHubResource[] = []; + if (settings.resourceTypes.includes("issues")) { + const result = await fetchIssues(config, undefined, settings.state, settings.maxItemsPerRepo, settings.labels); + resources.push(...result.resources); + writeFetchFailures(source, failures, repo.fullName, "issues", result.errors); + } + if (settings.resourceTypes.includes("pulls")) { + const result = settings.labels?.length + ? await fetchPullRequestsBySearch(config, settings.labels, undefined, settings.state, settings.maxItemsPerRepo) + : await fetchPullRequests(config, undefined, settings.state, settings.maxItemsPerRepo); + resources.push(...result.resources); + writeFetchFailures(source, failures, repo.fullName, "pulls", result.errors); + } + if (settings.resourceTypes.includes("discussions")) { + if (!config.token) { + const failure = failureState(source, "GitHub discussions require tokenRef", { + repo: repo.fullName, + phase: "discussions", + }); + failures.push(failure); + } else { + const result = await fetchDiscussions(config, undefined, settings.state, settings.maxItemsPerRepo); + const labelSet = settings.labels?.length ? new Set(settings.labels) : null; + resources.push( + ...result.resources.filter((resource) => !labelSet || resource.labels.some((label) => labelSet.has(label))), + ); + writeFetchFailures(source, failures, repo.fullName, "discussions", result.errors); + } + } + if (settings.resourceTypes.includes("docs")) { + const result = await fetchRepoDocs(config, settings.docPaths, repo.defaultBranch, settings.maxItemsPerRepo); + resources.push(...result.resources); + writeFetchFailures(source, failures, repo.fullName, "docs", result.errors); + } + return resources; +} + +async function writeResourceWithComments( + source: SignetSourceEntry, + agentId: string, + repo: string, + config: GitHubFetchConfig, + resource: GitHubResource, + settings: GitHubSourceSettings, +): Promise { + let indexed = writeResourceArtifact(source, agentId, repo, resource); + if (!settings.includeComments || resource.commentsCount <= 0 || resource.type === "doc") return indexed; + try { + const comments = await fetchCommentsForResource(config, resource); + for (const comment of comments) { + indexed += writeCommentArtifact(source, agentId, repo, resource, comment); + } + } catch (err) { + logGitHubFetchError(source.id, repo, `${resource.type}_comments`, err); + indexed += writeFailureArtifact( + source, + agentId, + failureState(source, `GitHub ${resource.type} comment fetch failed: ${errorMessage(err)}`, { + repo, + type: resource.type, + number: resource.number, + path: resource.path, + }), + ); + } + return indexed; +} + +async function fetchCommentsForResource( + config: GitHubFetchConfig, + resource: GitHubResource, +): Promise { + if (!resource.number) return []; + if (resource.type === "issue") return fetchIssueComments(config, resource.number); + if (resource.type === "pull") { + const issueComments = await fetchIssueComments(config, resource.number); + const reviewComments = await fetchPullRequestComments(config, resource.number); + return [...issueComments, ...reviewComments]; + } + if (resource.type === "discussion") return fetchDiscussionComments(config, resource.number); + return []; +} + +function writeResourceArtifact( + source: SignetSourceEntry, + agentId: string, + repo: string, + resource: GitHubResource, +): number { + indexExternalMemoryArtifact({ + agentId, + harness: GITHUB_HARNESS, + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: resourceExternalId(repo, resource), + sourceParentPath: `github://${repo}`, + sourcePath: resourcePath(repo, resource), + sourceKind: `source_github_${resource.type}`, + sourceMtimeMs: Date.parse(resource.updatedAt) || Date.now(), + capturedAt: resource.updatedAt, + content: resourceContent(repo, resource), + sourceMeta: { + provider: GITHUB_PROVIDER_KIND, + repo, + type: resource.type, + number: resource.number, + path: resource.path, + url: resource.url, + state: resource.state, + labels: resource.labels, + author: resource.author, + createdAt: resource.createdAt, + closedAt: resource.closedAt, + mergedAt: resource.mergedAt, + commentsCount: resource.commentsCount, + ...resource.extra, + }, + }); + return 1; +} + +function writeCommentArtifact( + source: SignetSourceEntry, + agentId: string, + repo: string, + resource: GitHubResource, + comment: GitHubComment, +): number { + const author = + typeof comment.author === "string" ? comment.author : (comment.author?.login ?? comment.user?.login ?? null); + const commentId = String(comment.id); + indexExternalMemoryArtifact({ + agentId, + harness: GITHUB_HARNESS, + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: `${resourceExternalId(repo, resource)}#comment:${commentId}`, + sourceParentPath: resourcePath(repo, resource), + sourcePath: `${resourcePath(repo, resource)}#comment-${commentId}`, + sourceKind: "source_github_comment", + sourceMtimeMs: Date.parse(comment.updated_at) || Date.now(), + capturedAt: comment.updated_at, + content: [`# Comment on ${resource.title}`, "", `Author: ${author ?? "unknown"}`, "", comment.body].join("\n"), + sourceMeta: { + provider: GITHUB_PROVIDER_KIND, + repo, + parentType: resource.type, + parentNumber: resource.number, + parentPath: resource.path, + commentId, + author, + createdAt: comment.created_at, + updatedAt: comment.updated_at, + }, + }); + return 1; +} + +function writeFetchFailures( + source: SignetSourceEntry, + failures: SourceFailureState[], + repo: string, + phase: string, + errors: readonly { readonly message: string; readonly retryable: boolean }[], +): void { + for (const error of errors) { + failures.push(failureState(source, error.message, { repo, phase, retryable: error.retryable })); + } +} + +function writeFailureArtifact(source: SignetSourceEntry, agentId: string, failure: SourceFailureState): number { + indexExternalMemoryArtifact({ + agentId, + harness: GITHUB_HARNESS, + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: `failure:${failure.failedAt}:${failure.message}`, + sourcePath: `github://source/${source.id}/failures/${encodeURIComponent(failure.failedAt)}`, + sourceKind: "source_github_failure", + sourceMtimeMs: Date.parse(failure.failedAt) || Date.now(), + capturedAt: failure.failedAt, + content: failure.message, + sourceMeta: failure.metadata, + }); + return 1; +} + +async function resolveRepos(settings: GitHubSourceSettings, token?: string): Promise { + const resolved: ResolvedRepo[] = []; + for (const pattern of settings.repos) { + const [owner, repoPart] = pattern.split("/"); + if (!owner || !repoPart) continue; + if (repoPart.includes("*")) { + const expanded = await expandRepoGlob(owner, repoPart, token, settings.maxItemsPerRepo); + if (expanded.truncated) { + logger.warn("github-source", "Wildcard repo source expansion hit configured cap", { + owner, + pattern: repoPart, + limit: settings.maxItemsPerRepo, + }); + } + for (const fullName of expanded.repos) { + const [expandedOwner, expandedRepo] = fullName.split("/"); + if (expandedOwner && expandedRepo) { + resolved.push({ owner: expandedOwner, repo: expandedRepo, fullName, defaultBranch: "main" }); + } + } + } else { + resolved.push({ owner, repo: repoPart, fullName: `${owner}/${repoPart}`, defaultBranch: "main" }); + } + } + const withDefaultBranches: ResolvedRepo[] = []; + for (const repo of resolved) { + const info = await fetchRepoInfo({ owner: repo.owner, repo: repo.repo, token }).catch(() => null); + withDefaultBranches.push({ ...repo, defaultBranch: info?.defaultBranch ?? repo.defaultBranch }); + } + return withDefaultBranches; +} + +async function resolveToken(tokenRef: string): Promise { + try { + return await getSecret(tokenRef); + } catch (err) { + throw new Error(`Failed to resolve GitHub token ref '${tokenRef}': ${errorMessage(err)}`); + } +} + +function failureState( + source: SignetSourceEntry, + message: string, + metadata?: Readonly>, +): SourceFailureState { + return { + sourceId: source.id, + providerKind: GITHUB_PROVIDER_KIND, + failedAt: new Date().toISOString(), + recoverable: true, + message, + metadata, + }; +} + +function purgeStaleGitHubArtifacts( + sourceId: string, + agentId: string, + syncStartedAt: string, + seenPaths: ReadonlySet, + repo: string, +): void { + getDbAccessor().withWriteTx((db) => { + const rows = db + .prepare( + `SELECT rowid, source_path FROM memory_artifacts + WHERE agent_id = ? + AND source_id = ? + AND source_path >= ? + AND source_path < ? + AND updated_at < ? + AND COALESCE(is_deleted, 0) = 0`, + ) + .all(agentId, sourceId, `github://${repo}`, `github://${repo}\uffff`, syncStartedAt) as Array<{ + rowid: number; + source_path: string; + }>; + for (const row of rows) { + if (seenPaths.has(row.source_path)) continue; + countChanges( + db + .prepare("UPDATE memory_artifacts SET is_deleted = 1, updated_at = ? WHERE rowid = ?") + .run(syncStartedAt, row.rowid), + ); + } + }); +} + +function resourceExternalId(repo: string, resource: GitHubResource): string { + if (resource.type === "doc") return `${repo}:docs:${resource.path ?? ""}`; + return `${repo}:${resource.type}:${resource.number ?? 0}`; +} + +function resourcePath(repo: string, resource: GitHubResource): string { + if (resource.type === "doc") return `github://${repo}/docs/${resource.path ?? ""}`; + return `github://${repo}/${resource.type}s/${resource.number ?? 0}`; +} + +function resourceContent(repo: string, resource: GitHubResource): string { + const title = + resource.type === "doc" ? resource.title : `${repo} ${resource.type} #${resource.number}: ${resource.title}`; + return [ + `# ${title}`, + "", + `URL: ${resource.url || `https://github.com/${repo}`}`, + `State: ${resource.state}`, + resource.author ? `Author: ${resource.author}` : undefined, + resource.labels.length > 0 ? `Labels: ${resource.labels.join(", ")}` : undefined, + "", + resource.body, + ] + .filter((line): line is string => line !== undefined) + .join("\n"); +} + +function errorMessage(err: unknown): string { + return err instanceof Error ? err.message : String(err); +} diff --git a/platform/daemon/src/routes/sources-routes.test.ts b/platform/daemon/src/routes/sources-routes.test.ts index 477cd938d..307cb0b68 100644 --- a/platform/daemon/src/routes/sources-routes.test.ts +++ b/platform/daemon/src/routes/sources-routes.test.ts @@ -1,4 +1,4 @@ -import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -19,6 +19,8 @@ import { } from "../source-index-progress"; import { registerSourcesRoutes } from "./sources-routes"; +const originalFetch = globalThis.fetch; + describe("Sources routes", () => { let dir = ""; let vault = ""; @@ -41,6 +43,7 @@ describe("Sources routes", () => { }); afterEach(() => { + globalThis.fetch = originalFetch; clearSourceIndexProgressForTests(); closeDbAccessor(); if (previousSignetPath === undefined) Reflect.deleteProperty(process.env, "SIGNET_PATH"); @@ -199,6 +202,41 @@ describe("Sources routes", () => { ).toBeGreaterThan(0); }); + it("connects a GitHub source through provider-neutral source config", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?") || text.includes("/pulls?")) return Promise.resolve(Response.json([])); + if (text.includes("/contents/")) return Promise.resolve(new Response("missing", { status: 404 })); + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const res = await makeApp().request("/api/sources/github", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + repos: ["Signet-AI/signetai"], + name: "Route GitHub", + resourceTypes: ["issues", "docs"], + maxItemsPerRepo: 5, + }), + }); + + expect(res.status).toBe(202); + const body = (await res.json()) as { + source: { kind: string; providerSettings?: { repos?: string[] } }; + queued: boolean; + }; + expect(body.queued).toBe(true); + expect(body.source.kind).toBe("github"); + expect(body.source.providerSettings?.repos).toEqual(["Signet-AI/signetai"]); + expect(loadSourcesConfig(dir).sources[0]?.kind).toBe("github"); + }); + it("rejects raw Discord tokens at the route boundary", async () => { const res = await makeApp().request("/api/sources/discord", { method: "POST", diff --git a/platform/daemon/src/routes/sources-routes.ts b/platform/daemon/src/routes/sources-routes.ts index 3b7d71acd..95b068c6a 100644 --- a/platform/daemon/src/routes/sources-routes.ts +++ b/platform/daemon/src/routes/sources-routes.ts @@ -9,6 +9,7 @@ import { SOURCE_CHUNK_SOURCE_TYPE, type SignetSourceEntry, addDiscordSource, + addGitHubSource, addObsidianSource, loadSourcesConfig, markSourceIndexed, @@ -85,6 +86,19 @@ interface AddDiscordSourceBody { readonly syncMode?: "rest" | "gateway-tail" | "desktop-cache"; } +interface AddGitHubSourceBody { + readonly repos?: readonly string[]; + readonly repo?: string; + readonly tokenRef?: string; + readonly name?: string; + readonly resourceTypes?: readonly ("issues" | "pulls" | "discussions" | "docs")[]; + readonly state?: "open" | "closed" | "all"; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; +} + interface PickDirectoryBody { readonly title?: string; } @@ -245,6 +259,49 @@ export function registerSourcesRoutes(app: Hono, deps: RegisterSourcesRoutesDeps } }); + app.post("/api/sources/github", async (c) => { + let body: AddGitHubSourceBody = {}; + try { + body = (await c.req.json()) as AddGitHubSourceBody; + } catch { + return c.json({ error: "Invalid JSON body" }, 400); + } + + const repos = Array.isArray(body.repos) + ? body.repos.filter((entry): entry is string => typeof entry === "string") + : typeof body.repo === "string" + ? [body.repo] + : []; + const result = addGitHubSource( + { + repos, + tokenRef: typeof body.tokenRef === "string" ? body.tokenRef : undefined, + name: body.name, + resourceTypes: body.resourceTypes, + state: body.state, + includeComments: body.includeComments, + labels: Array.isArray(body.labels) + ? body.labels.filter((entry): entry is string => typeof entry === "string") + : undefined, + docPaths: Array.isArray(body.docPaths) + ? body.docPaths.filter((entry): entry is string => typeof entry === "string") + : undefined, + maxItemsPerRepo: body.maxItemsPerRepo, + }, + agentsDir, + ); + if (result.ok === false) return c.json({ error: result.error }, 400); + + const job = enqueueSourceIndexJob({ + source: result.source, + agentsDir, + startBridge, + purgeNativeSource, + }); + + return c.json({ source: result.source, created: result.created, indexed: 0, queued: true, job }, 202); + }); + app.delete("/api/sources/:sourceId", (c) => { const sourceId = c.req.param("sourceId"); const result = removeSource(sourceId, agentsDir); @@ -304,8 +361,16 @@ async function runSourceIndexJob(input: SourceIndexJobInput, job: SourceIndexJob }, }); if (!isCurrentSourceIndexJob(input.source.id, job.id)) return; - markSourceIndexed(input.source.id, undefined, input.agentsDir); - completeSourceIndexJob(input.source.id, job.id, result.indexed); + if (result.failures.length > 0) { + failSourceIndexJob( + input.source.id, + job.id, + `${input.source.kind} source sync completed with ${result.failures.length} failure(s)`, + ); + } else { + markSourceIndexed(input.source.id, undefined, input.agentsDir); + completeSourceIndexJob(input.source.id, job.id, result.indexed); + } return; } if (!provider.toNativeSource) throw new Error(`Source provider has no sync implementation: ${input.source.kind}`); diff --git a/platform/daemon/src/source-providers.ts b/platform/daemon/src/source-providers.ts index 11b9a6b3e..b8ae1f6f5 100644 --- a/platform/daemon/src/source-providers.ts +++ b/platform/daemon/src/source-providers.ts @@ -1,5 +1,6 @@ import type { SignetSourceEntry, SignetSourceKind, SourceFailureState } from "@signet/core"; import { discordSourceProvider } from "./discord-source-provider"; +import { githubSourceProvider } from "./github-source-provider"; import { type NativeMemorySource, obsidianNativeMemorySource, @@ -54,9 +55,10 @@ export function registerSourceProvider(provider: SourceProviderAdapter): void { export function getSourceProvider(kind: SignetSourceKind): SourceProviderAdapter | undefined { if (kind === obsidianSourceProvider.kind) return obsidianSourceProvider; if (kind === discordSourceProvider.kind) return discordSourceProvider; + if (kind === githubSourceProvider.kind) return githubSourceProvider; return additionalProviders.get(kind); } export function configuredSourceProviders(): readonly SourceProviderAdapter[] { - return [obsidianSourceProvider, discordSourceProvider, ...additionalProviders.values()]; + return [obsidianSourceProvider, discordSourceProvider, githubSourceProvider, ...additionalProviders.values()]; } diff --git a/surfaces/cli/src/commands/sources.ts b/surfaces/cli/src/commands/sources.ts index bc417e9c3..d4043775e 100644 --- a/surfaces/cli/src/commands/sources.ts +++ b/surfaces/cli/src/commands/sources.ts @@ -2,6 +2,7 @@ import type { Command } from "commander"; import { type SourcesDeps, addDiscordSourceFromCli, + addGitHubSourceFromCli, addObsidianVaultSource, exportConfiguredSourceSnapshot, importConfiguredSourceSnapshot, @@ -140,6 +141,20 @@ export function registerSourcesCommands(program: Command, deps: RegisterSourcesC addObsidianVaultSource(path, options, deps), ); + add + .command("github") + .description("Index GitHub repositories as read-only recall sources") + .requiredOption("--repo ", "GitHub repo pattern (repeatable, supports owner/*)", collect, []) + .option("--token-ref ", "Signet secret name or external secret reference for a GitHub token") + .option("--name ", "Display name for the GitHub source") + .option("--resource-type ", "Resource type: issues, pulls, discussions, docs (repeatable)", collect, []) + .option("--state ", "Resource state: open, closed, or all", "all") + .option("--no-include-comments", "Skip issue, PR, and discussion comments") + .option("--label