diff --git a/docs/SOURCES.md b/docs/SOURCES.md index b77dfcf54..21cdf84a2 100644 --- a/docs/SOURCES.md +++ b/docs/SOURCES.md @@ -10,7 +10,7 @@ Sources Sources are external knowledge bases that Signet can read, index, and recall from without turning them into ordinary saved memories. -Sources currently support **Obsidian** vaults and **Discord** guilds. Point Signet at an Obsidian vault and the daemon mounts that vault as a read-only knowledge base: Markdown files become searchable artifacts, the vault structure becomes graph topology, and heading-aware chunks participate in semantic recall. Add Discord with a bot-token secret reference and Signet indexes guild topology, channels, threads, members, message windows, and Discord metadata through the same source-owned artifact lifecycle. +Sources currently support **Obsidian** vaults, **Discord** guilds, and **GitHub** repositories. Point Signet at an Obsidian vault and the daemon mounts that vault as a read-only knowledge base: Markdown files become searchable artifacts, the vault structure becomes graph topology, and heading-aware chunks participate in semantic recall. Add Discord with a bot-token secret reference and Signet indexes guild topology, channels, threads, members, message windows, and Discord metadata through the same source-owned artifact lifecycle. Add GitHub repositories to index issues, pull requests, discussions, selected Markdown docs, comments, and source failure artifacts through the shared source provider pipeline. The important rule is simple: **the source stays canonical**. Signet reads from the vault. It does not edit notes, rewrite frontmatter, create files, or move anything inside the source directory. @@ -94,6 +94,34 @@ artifacts under the synthetic `@me` guild by default; use `--include-local-discord` only when intentionally moving that private local cache data. +GitHub v1 +--------- + +GitHub Sources v1 indexes configured repositories through the shared Sources job pipeline: + +```bash +signet sources add github --repo Signet-AI/signetai --name "Signet GitHub" +signet sources add github --repo Signet-AI/signetai --token-ref GITHUB_TOKEN --resource-type issues --resource-type discussions +signet sources add github --repo Signet-AI/* --resource-type docs --doc-path "docs/**/*.md" --max-items 50 +signet sources list +signet sources remove github:... +``` + +Without `--token-ref`, GitHub sources default to REST-fetchable resources: +issues, pull requests, and selected Markdown docs. Discussions use the GitHub +GraphQL API and require a token reference. Tokens must be stored in Signet +Secrets or an external secret reference; Signet does not store raw GitHub +tokens in source config. + +GitHub source config is bounded by `maxItemsPerRepo`. Repo globs, issue/PR +fetches, discussion fetches, and wildcard docs paths all honor configured caps. +Direct docs paths are limited to Markdown paths or Markdown globs, so GitHub v1 +does not become arbitrary source-code indexing by accident. + +Partial GitHub failures are written as source-owned failure artifacts and cause +the shared source job to report failure instead of silently marking incomplete +data as fully indexed. + Obsidian v1 ----------- @@ -228,6 +256,7 @@ The daemon exposes the Sources lifecycle under `/api/sources`: | `GET` | `/api/sources` | List configured sources. | | `POST` | `/api/sources/obsidian` | Add/update an Obsidian vault source and index it. | | `POST` | `/api/sources/discord` | Add/update a Discord source and queue a shared source index job. | +| `POST` | `/api/sources/github` | Add/update a GitHub source and queue a shared source index job. | | `DELETE` | `/api/sources/:sourceId` | Remove a source config and purge Signet-owned source rows. | | `POST` | `/api/sources/pick-directory` | Development/browser fallback for choosing a local directory. | diff --git a/docs/api/documents-sources.md b/docs/api/documents-sources.md index e1352df84..33247dab6 100644 --- a/docs/api/documents-sources.md +++ b/docs/api/documents-sources.md @@ -135,7 +135,7 @@ the document are soft-deleted one at a time with audit history. Sources connect read-only external knowledge bases to Signet recall without turning them into ordinary saved memories. Supported source kinds are -`obsidian` and `discord`. +`obsidian`, `discord`, and `github`. ### GET /api/sources @@ -267,6 +267,51 @@ windows, attachments, mentions, embeds, polls, checkpoints, and import stats. Cache imports are observational and never reconcile deletes from missing or evicted local cache files. +### POST /api/sources/github + +Add or update a GitHub source and queue a shared source index job. Without a +token reference, GitHub sources default to issues, pull requests, and selected +Markdown docs. Discussions require `tokenRef` because they use the GitHub +GraphQL API. Raw GitHub tokens are rejected; pass a Signet secret name or +external secret reference instead. + +**Request body** + +```json +{ + "repos": ["Signet-AI/signetai"], + "tokenRef": "GITHUB_TOKEN", + "name": "Signet GitHub", + "resourceTypes": ["issues", "pulls", "discussions", "docs"], + "state": "all", + "includeComments": true, + "labels": ["bug", "needs review"], + "docPaths": ["README.md", "docs/**/*.md"], + "maxItemsPerRepo": 500 +} +``` + +`repo` is accepted as a single-repository alias. `docPaths` are limited to +Markdown files or Markdown globs so GitHub source indexing stays focused on +chosen docs instead of broad source-code ingestion. + +**Response** + +```json +{ + "source": { "id": "github:abc123", "kind": "github" }, + "created": true, + "indexed": 0, + "queued": true, + "job": { "status": "queued", "sourceId": "github:abc123" } +} +``` + +The sync path indexes source-owned artifacts for issues, pull requests, +discussions, selected Markdown docs, comments, and partial-failure artifacts. +Partial GitHub failures cause the shared source job to report failure while +preserving source-owned rows that were indexed successfully. + ### DELETE /api/sources/:sourceId Remove a source config and purge Signet-owned source artifacts, graph rows, diff --git a/platform/core/src/index.ts b/platform/core/src/index.ts index 182453874..73c613a54 100644 --- a/platform/core/src/index.ts +++ b/platform/core/src/index.ts @@ -224,25 +224,36 @@ export type { } from "./workspace-source-repo"; export { addDiscordSource, + addGitHubSource, addObsidianSource, DEFAULT_DISCORD_DESKTOP_CACHE_PATH, DEFAULT_DISCORD_MAX_MESSAGES_PER_CHANNEL, + DEFAULT_GITHUB_DOC_PATHS, + DEFAULT_GITHUB_MAX_ITEMS_PER_REPO, + DEFAULT_GITHUB_RESOURCE_TYPES, + DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN, DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, MAX_DISCORD_MAX_MESSAGES_PER_CHANNEL, + MAX_GITHUB_MAX_ITEMS_PER_REPO, getAgentsDir, getSourcesConfigPath, loadSourcesConfig, markSourceIndexed, parseDiscordSettings, + parseGitHubSettings, removeSource, saveSourcesConfig, } from "./sources-config"; export type { AddDiscordSourceInput, + AddGitHubSourceInput, AddObsidianSourceInput, AddSourceResult, DiscordSourceSettings, DiscordSourceSyncMode, + GitHubSourceResourceType, + GitHubSourceSettings, + GitHubSourceState, RemoveSourceResult, SignetSourceEntry, SignetSourceKind, diff --git a/platform/core/src/sources-config.test.ts b/platform/core/src/sources-config.test.ts index 4a222aee2..6e4ece099 100644 --- a/platform/core/src/sources-config.test.ts +++ b/platform/core/src/sources-config.test.ts @@ -4,13 +4,16 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { DEFAULT_DISCORD_MAX_MESSAGES_PER_CHANNEL, + DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN, DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, addDiscordSource, + addGitHubSource, addObsidianSource, getSourcesConfigPath, loadSourcesConfig, markSourceIndexed, parseDiscordSettings, + parseGitHubSettings, removeSource, } from "./sources-config"; @@ -278,6 +281,120 @@ describe("sources-config", () => { }); }); + it("adds a GitHub source with validated provider settings", () => { + const agentsDir = tmp(); + + const result = addGitHubSource( + { + repos: ["Signet-AI/signetai", "Signet-AI/signetai"], + tokenRef: "GITHUB_TOKEN", + name: "Signet GitHub", + resourceTypes: ["issues", "pulls", "discussions", "docs"], + state: "open", + labels: ["bug", "needs review", "bug"], + docPaths: ["README.md", "docs/**/*.md"], + maxItemsPerRepo: 25, + now: "2026-01-02T00:00:00.000Z", + }, + agentsDir, + ); + + expect(result.ok).toBe(true); + if (result.ok === false) throw new Error(result.error); + expect(result.source.kind).toBe("github"); + expect(result.source.root).toBe("github://repos/Signet-AI/signetai"); + expect(result.source.providerSettings).toEqual({ + repos: ["Signet-AI/signetai"], + tokenRef: "GITHUB_TOKEN", + resourceTypes: ["issues", "pulls", "discussions", "docs"], + state: "open", + includeComments: true, + labels: ["bug", "needs review"], + docPaths: ["README.md", "docs/**/*.md"], + maxItemsPerRepo: 25, + }); + }); + + it("defaults GitHub sources without tokenRef to REST-fetchable resources", () => { + const result = addGitHubSource({ repos: ["Signet-AI/signetai"] }, tmp()); + + expect(result.ok).toBe(true); + if (result.ok === false) throw new Error(result.error); + expect(parseGitHubSettings(result.source.providerSettings).resourceTypes).toEqual([ + ...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN, + ]); + }); + + it("preserves GitHub settings on partial update", () => { + const agentsDir = tmp(); + const first = addGitHubSource( + { + repos: ["Signet-AI/signetai"], + tokenRef: "GITHUB_TOKEN", + resourceTypes: ["issues", "discussions"], + labels: ["reviewed"], + docPaths: ["docs/API.md"], + maxItemsPerRepo: 12, + now: "2026-01-01T00:00:00.000Z", + }, + agentsDir, + ); + const second = addGitHubSource( + { repos: ["Signet-AI/signetai"], name: "Renamed", now: "2026-01-02T00:00:00.000Z" }, + agentsDir, + ); + + expect(first.ok).toBe(true); + expect(second.ok).toBe(true); + if (second.ok === false) throw new Error(second.error); + expect(second.created).toBe(false); + expect(second.source.name).toBe("Renamed"); + expect(parseGitHubSettings(second.source.providerSettings)).toMatchObject({ + tokenRef: "GITHUB_TOKEN", + resourceTypes: ["issues", "discussions"], + labels: ["reviewed"], + docPaths: ["docs/API.md"], + maxItemsPerRepo: 12, + }); + expect(loadSourcesConfig(agentsDir).sources).toHaveLength(1); + }); + + it("rejects invalid GitHub source boundaries", () => { + const agentsDir = tmp(); + + expect(addGitHubSource({ repos: [] }, agentsDir)).toEqual({ + ok: false, + error: "At least one GitHub repo pattern is required", + }); + expect(addGitHubSource({ repos: ["not-a-repo"] }, agentsDir)).toEqual({ + ok: false, + error: "Invalid GitHub repo pattern: not-a-repo. Expected owner/repo or owner/*", + }); + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], resourceTypes: ["discussions"] }, agentsDir)).toEqual({ + ok: false, + error: "GitHub discussions require tokenRef because they use the GitHub GraphQL API", + }); + for (const tokenRef of [ + `ghp_${"a".repeat(36)}`, + `github_pat_${"b".repeat(60)}`, + `Bearer ghp_${"c".repeat(36)}`, + `Authorization: token ghp_${"d".repeat(36)}`, + ]) { + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], tokenRef }, agentsDir)).toEqual({ + ok: false, + error: "GitHub tokenRef must be a secret reference, not a raw token", + }); + } + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], docPaths: ["src/daemon.ts"] }, agentsDir)).toEqual({ + ok: false, + error: "Invalid GitHub docPaths: src/daemon.ts", + }); + expect(addGitHubSource({ repos: ["Signet-AI/signetai"], maxItemsPerRepo: 0 }, agentsDir)).toEqual({ + ok: false, + error: "GitHub maxItemsPerRepo must be an integer between 1 and 10000", + }); + }); + it("round-trips provider-neutral source settings for future adapters", () => { const agentsDir = tmp(); const source = { diff --git a/platform/core/src/sources-config.ts b/platform/core/src/sources-config.ts index 65e2bb62f..bf3574f41 100644 --- a/platform/core/src/sources-config.ts +++ b/platform/core/src/sources-config.ts @@ -42,6 +42,8 @@ export interface AddObsidianSourceInput { } export type DiscordSourceSyncMode = "rest" | "gateway-tail" | "desktop-cache"; +export type GitHubSourceResourceType = "issues" | "pulls" | "discussions" | "docs"; +export type GitHubSourceState = "open" | "closed" | "all"; export interface DiscordSourceSettings { readonly guildIds: readonly string[]; @@ -83,6 +85,30 @@ export interface AddDiscordSourceInput { readonly now?: string; } +export interface GitHubSourceSettings { + readonly repos: readonly string[]; + readonly tokenRef?: string; + readonly resourceTypes: readonly GitHubSourceResourceType[]; + readonly state: GitHubSourceState; + readonly includeComments: boolean; + readonly labels?: readonly string[]; + readonly docPaths: readonly string[]; + readonly maxItemsPerRepo: number; +} + +export interface AddGitHubSourceInput { + readonly repos: readonly string[]; + readonly tokenRef?: string; + readonly name?: string; + readonly resourceTypes?: readonly GitHubSourceResourceType[]; + readonly state?: GitHubSourceState; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; + readonly now?: string; +} + export type AddSourceResult = | { readonly ok: true; readonly source: SignetSourceEntry; readonly created: boolean } | { readonly ok: false; readonly error: string }; @@ -95,6 +121,12 @@ const SOURCES_CONFIG_VERSION = 1; export const DEFAULT_DISCORD_MAX_MESSAGES_PER_CHANNEL = 1000; export const MAX_DISCORD_MAX_MESSAGES_PER_CHANNEL = 10_000; export const DEFAULT_DISCORD_DESKTOP_CACHE_PATH = defaultDiscordDesktopCachePath(); +export const DEFAULT_GITHUB_RESOURCE_TYPES = ["issues", "pulls", "discussions", "docs"] as const; +export const DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN = ["issues", "pulls", "docs"] as const; +export const DEFAULT_GITHUB_DOC_PATHS = ["README.md", "CHANGELOG.md"] as const; +export const DEFAULT_GITHUB_MAX_ITEMS_PER_REPO = 500; +export const MAX_GITHUB_MAX_ITEMS_PER_REPO = 10_000; +const VALID_GITHUB_RESOURCE_TYPES = new Set(DEFAULT_GITHUB_RESOURCE_TYPES); export function getAgentsDir(): string { return process.env.SIGNET_PATH || `${homedir()}/.agents`; @@ -156,6 +188,10 @@ export function addDiscordSource(input: AddDiscordSourceInput, agentsDir = getAg return withSourcesConfigLock(agentsDir, () => addDiscordSourceUnlocked(input, agentsDir)); } +export function addGitHubSource(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + return withSourcesConfigLock(agentsDir, () => addGitHubSourceUnlocked(input, agentsDir)); +} + function addDiscordSourceUnlocked(input: AddDiscordSourceInput, agentsDir = getAgentsDir()): AddSourceResult { try { return addDiscordSourceChecked(input, agentsDir); @@ -214,6 +250,62 @@ function addDiscordSourceChecked(input: AddDiscordSourceInput, agentsDir = getAg return { ok: true, source, created: true }; } +function addGitHubSourceUnlocked(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + try { + return addGitHubSourceChecked(input, agentsDir); + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + return { ok: false, error: detail }; + } +} + +function addGitHubSourceChecked(input: AddGitHubSourceInput, agentsDir = getAgentsDir()): AddSourceResult { + const settings = buildGitHubSettings(input); + if ("error" in settings) return { ok: false, error: settings.error }; + + const now = input.now ?? new Date().toISOString(); + const cfg = loadSourcesConfigForWrite(agentsDir); + const settingsKey = settings.repos.slice().sort().join(","); + const sourceId = `github:${createHash("sha256").update(settingsKey).digest("hex").slice(0, 16)}`; + const root = `github://repos/${settings.repos.slice().sort().join(",")}`; + const existing = cfg.sources.find((source) => source.id === sourceId); + if (existing) { + const existingSettings = parseGitHubSettings(existing.providerSettings); + const updatedSettings = buildGitHubSettings(input, existingSettings); + if ("error" in updatedSettings) return { ok: false, error: updatedSettings.error }; + const updated: SignetSourceEntry = { + ...existing, + name: cleanName(input.name) ?? existing.name, + root, + enabled: true, + providerSettings: githubSettingsProviderSettings(updatedSettings), + updatedAt: now, + }; + saveSourcesConfig( + { + version: SOURCES_CONFIG_VERSION, + sources: cfg.sources.map((source) => (source.id === existing.id ? updated : source)), + }, + agentsDir, + ); + return { ok: true, source: updated, created: false }; + } + + const source: SignetSourceEntry = { + id: sourceId, + kind: "github", + name: cleanName(input.name) ?? settings.repos[0] ?? "GitHub Source", + root, + enabled: true, + mode: "read-only", + createdAt: now, + updatedAt: now, + providerSettings: githubSettingsProviderSettings(settings), + }; + saveSourcesConfig({ version: SOURCES_CONFIG_VERSION, sources: [...cfg.sources, source] }, agentsDir); + return { ok: true, source, created: true }; +} + export function parseDiscordSettings(raw?: SignetSourceProviderSettings): DiscordSourceSettings { const guildIds = Array.isArray(raw?.guildIds) ? cleanDiscordIds(raw.guildIds) : []; const tokenRef = typeof raw?.tokenRef === "string" ? raw.tokenRef.trim() : ""; @@ -243,6 +335,32 @@ export function parseDiscordSettings(raw?: SignetSourceProviderSettings): Discor }; } +export function parseGitHubSettings(raw?: SignetSourceProviderSettings): GitHubSourceSettings { + const repos = Array.isArray(raw?.repos) ? cleanGitHubRepos(raw.repos) : []; + const tokenRef = typeof raw?.tokenRef === "string" ? raw.tokenRef.trim() || undefined : undefined; + const resourceTypes = + Array.isArray(raw?.resourceTypes) && raw.resourceTypes.every((type) => typeof type === "string") + ? raw.resourceTypes.filter((type): type is GitHubSourceResourceType => isGitHubResourceType(type)) + : tokenRef + ? [...DEFAULT_GITHUB_RESOURCE_TYPES] + : [...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN]; + const labels = Array.isArray(raw?.labels) ? cleanStringArray(raw.labels) : undefined; + const docPaths = Array.isArray(raw?.docPaths) + ? cleanStringArray(raw.docPaths).filter(isSafeGitHubDocPath) + : [...DEFAULT_GITHUB_DOC_PATHS]; + return { + repos, + ...(tokenRef ? { tokenRef } : {}), + resourceTypes: resourceTypes.length > 0 ? resourceTypes : [...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN], + state: isGitHubState(raw?.state) ? raw.state : "all", + includeComments: raw?.includeComments !== false, + ...(labels && labels.length > 0 ? { labels } : {}), + docPaths: docPaths.length > 0 ? docPaths : [...DEFAULT_GITHUB_DOC_PATHS], + maxItemsPerRepo: + cleanPositiveInteger(raw?.maxItemsPerRepo, MAX_GITHUB_MAX_ITEMS_PER_REPO) ?? DEFAULT_GITHUB_MAX_ITEMS_PER_REPO, + }; +} + function buildDiscordSettings(input: AddDiscordSourceInput): DiscordSourceSettings | { readonly error: string } { if (input.syncMode && !isDiscordSyncMode(input.syncMode)) return { error: `Unsupported Discord sync mode: ${input.syncMode}` }; @@ -293,6 +411,77 @@ function buildDiscordSettings(input: AddDiscordSourceInput): DiscordSourceSettin }; } +function buildGitHubSettings( + input: AddGitHubSourceInput, + existing?: GitHubSourceSettings, +): GitHubSourceSettings | { readonly error: string } { + const repos = input.repos !== undefined ? cleanGitHubRepos(input.repos) : (existing?.repos ?? []); + if (repos.length === 0) return { error: "At least one GitHub repo pattern is required" }; + for (const repo of repos) { + if (!/^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_*.-]+$/.test(repo)) { + return { error: `Invalid GitHub repo pattern: ${repo}. Expected owner/repo or owner/*` }; + } + } + const tokenRef = input.tokenRef !== undefined ? input.tokenRef.trim() || undefined : existing?.tokenRef; + if (tokenRef && looksLikeRawGitHubToken(tokenRef)) { + return { error: "GitHub tokenRef must be a secret reference, not a raw token" }; + } + const resourceTypes = input.resourceTypes + ? [...input.resourceTypes] + : existing?.resourceTypes?.length + ? [...existing.resourceTypes] + : tokenRef + ? [...DEFAULT_GITHUB_RESOURCE_TYPES] + : [...DEFAULT_GITHUB_RESOURCE_TYPES_NO_TOKEN]; + if (resourceTypes.length === 0) return { error: "GitHub resourceTypes must include at least one resource type" }; + const invalidTypes = resourceTypes.filter((type) => !isGitHubResourceType(type)); + if (invalidTypes.length > 0) { + return { + error: `Invalid GitHub resource types: ${invalidTypes.join(", ")}. Must be one of: ${[...DEFAULT_GITHUB_RESOURCE_TYPES].join(", ")}`, + }; + } + if (!tokenRef && resourceTypes.includes("discussions")) { + return { error: "GitHub discussions require tokenRef because they use the GitHub GraphQL API" }; + } + if (input.state !== undefined && !isGitHubState(input.state)) { + return { error: "GitHub state must be one of: open, closed, all" }; + } + if (input.includeComments !== undefined && typeof input.includeComments !== "boolean") { + return { error: "GitHub includeComments must be a boolean" }; + } + if (input.labels !== undefined && !isStringArray(input.labels)) { + return { error: "GitHub labels must be an array of strings" }; + } + if (input.docPaths !== undefined) { + if (!isStringArray(input.docPaths)) return { error: "GitHub docPaths must be an array of strings" }; + const invalid = cleanStringArray(input.docPaths).filter((path) => !isSafeGitHubDocPath(path)); + if (invalid.length > 0) return { error: `Invalid GitHub docPaths: ${invalid.join(", ")}` }; + } + if (input.maxItemsPerRepo !== undefined) { + const maxItemsPerRepo = cleanPositiveInteger(input.maxItemsPerRepo, MAX_GITHUB_MAX_ITEMS_PER_REPO); + if (maxItemsPerRepo !== input.maxItemsPerRepo) { + return { + error: `GitHub maxItemsPerRepo must be an integer between 1 and ${MAX_GITHUB_MAX_ITEMS_PER_REPO}`, + }; + } + } + const labels = input.labels !== undefined ? cleanStringArray(input.labels) : existing?.labels; + const docPaths = + input.docPaths !== undefined + ? cleanStringArray(input.docPaths) + : (existing?.docPaths ?? [...DEFAULT_GITHUB_DOC_PATHS]); + return { + repos, + ...(tokenRef ? { tokenRef } : {}), + resourceTypes, + state: input.state ?? existing?.state ?? "all", + includeComments: input.includeComments ?? existing?.includeComments ?? true, + ...(labels && labels.length > 0 ? { labels } : {}), + docPaths, + maxItemsPerRepo: input.maxItemsPerRepo ?? existing?.maxItemsPerRepo ?? DEFAULT_GITHUB_MAX_ITEMS_PER_REPO, + }; +} + function discordSettingsProviderSettings(settings: DiscordSourceSettings): SignetSourceProviderSettings { return { guildIds: settings.guildIds, @@ -316,6 +505,19 @@ function discordSettingsProviderSettings(settings: DiscordSourceSettings): Signe }; } +function githubSettingsProviderSettings(settings: GitHubSourceSettings): SignetSourceProviderSettings { + return { + repos: settings.repos, + ...(settings.tokenRef ? { tokenRef: settings.tokenRef } : {}), + resourceTypes: settings.resourceTypes, + state: settings.state, + includeComments: settings.includeComments, + ...(settings.labels ? { labels: settings.labels } : {}), + docPaths: settings.docPaths, + maxItemsPerRepo: settings.maxItemsPerRepo, + }; +} + function addObsidianSourceUnlocked(input: AddObsidianSourceInput, agentsDir = getAgentsDir()): AddSourceResult { try { return addObsidianSourceChecked(input, agentsDir); @@ -497,6 +699,32 @@ function cleanLocalPath(value: string | undefined): string | undefined { return trimmed ? resolve(trimmed.replace(/^~(?=$|\/|\\)/, homedir())) : undefined; } +function cleanGitHubRepos(values: readonly unknown[]): readonly string[] { + return Array.from( + new Set( + values + .filter((value): value is string => typeof value === "string") + .map((value) => value.trim()) + .filter(Boolean), + ), + ); +} + +function cleanStringArray(values: readonly unknown[]): readonly string[] { + return Array.from( + new Set( + values + .filter((value): value is string => typeof value === "string") + .map((value) => value.trim()) + .filter(Boolean), + ), + ); +} + +function isStringArray(value: unknown): value is readonly string[] { + return Array.isArray(value) && value.every((entry) => typeof entry === "string"); +} + function isDiscordSnowflake(value: string): boolean { return /^\d{17,20}$/.test(value); } @@ -512,6 +740,16 @@ function looksLikeRawDiscordToken(value: string): boolean { ); } +function looksLikeRawGitHubToken(value: string): boolean { + const trimmed = value.trim(); + const withoutHeaderPrefix = trimmed.replace(/^authorization:\s*/i, "").trim(); + const withoutAuthScheme = withoutHeaderPrefix.replace(/^(bearer|token)\s+/i, "").trim(); + if (withoutAuthScheme !== trimmed) return true; + return ( + /^github_pat_[A-Za-z0-9_]{20,}$/.test(withoutAuthScheme) || /^gh[opsru]_[A-Za-z0-9_]{20,}$/.test(withoutAuthScheme) + ); +} + function cleanPositiveInteger(value: unknown, max: number): number | undefined { if (typeof value !== "number" || !Number.isInteger(value) || value < 1 || value > max) return undefined; return value; @@ -546,6 +784,31 @@ function looksLikeDiscordDesktopCacheRoot(value: string): boolean { return ["discord", "discordcanary", "discordptb", "discorddevelopment", "vesktop"].includes(base); } +function isGitHubResourceType(value: unknown): value is GitHubSourceResourceType { + return typeof value === "string" && VALID_GITHUB_RESOURCE_TYPES.has(value); +} + +function isGitHubState(value: unknown): value is GitHubSourceState { + return value === "open" || value === "closed" || value === "all"; +} + +function isMarkdownDocPath(path: string): boolean { + return path.toLowerCase().endsWith(".md"); +} + +function isMarkdownDocGlob(path: string): boolean { + const lowered = path.toLowerCase(); + return lowered.endsWith("/*.md") || lowered.endsWith("/**/*.md"); +} + +function isSafeGitHubDocPath(value: string): boolean { + const path = value.trim(); + if (!path) return false; + if (path.startsWith("/") || path.includes("\\") || path.includes("?") || path.includes("#")) return false; + if (path.split("/").some((segment) => segment === "" || segment === "." || segment === "..")) return false; + return isMarkdownDocPath(path) || isMarkdownDocGlob(path); +} + function mergeDefaultObsidianExcludeGlobs(values: readonly string[] | undefined): readonly string[] { return [...DEFAULT_OBSIDIAN_EXCLUDE_GLOBS, ...(cleanExcludeGlobs(values) ?? [])].filter( (value, index, all) => all.indexOf(value) === index, diff --git a/platform/daemon/src/github-source-fetch.test.ts b/platform/daemon/src/github-source-fetch.test.ts new file mode 100644 index 000000000..fde05a3b0 --- /dev/null +++ b/platform/daemon/src/github-source-fetch.test.ts @@ -0,0 +1,595 @@ +import { afterEach, describe, expect, it, mock } from "bun:test"; +import { + expandRepoGlob, + fetchDiscussionComments, + fetchDiscussions, + fetchIssues, + fetchPullRequests, + fetchPullRequestsBySearch, + fetchRepoDocs, + fetchRepoInfo, +} from "./github-source-fetch"; + +const originalFetch = globalThis.fetch; + +afterEach(() => { + globalThis.fetch = originalFetch; +}); + +describe("github-source-fetch", () => { + it("escapes wildcard repo glob literals and caps expansion", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + expect(String(url)).toContain("per_page=2"); + return Promise.resolve( + Response.json([ + { full_name: "owner/private.*", name: "private.*" }, + { full_name: "owner/privateXarchive", name: "privateXarchive" }, + ]), + ); + }) as typeof fetch; + + const result = await expandRepoGlob("owner", "private.*", undefined, 2); + + expect(result.repos).toEqual(["owner/private.*"]); + expect(result.truncated).toBe(true); + }); + + it("bounds issue scanning separately from indexed issue count on PR-heavy repos", async () => { + let calls = 0; + globalThis.fetch = mock(() => { + calls++; + return Promise.resolve( + Response.json([ + { + number: calls, + pull_request: { url: "x" }, + title: "PR", + body: "", + state: "open", + html_url: "", + user: null, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-01T00:00:00.000Z", + closed_at: null, + comments: 0, + }, + ]), + ); + }) as typeof fetch; + + const result = await fetchIssues({ owner: "o", repo: "r" }, undefined, "all", 1); + + expect(result.resources).toEqual([]); + expect(calls).toBeLessThanOrEqual(5); + }); + + it("clears request timeout handles when fetch attempts fail", async () => { + const originalSetTimeout = globalThis.setTimeout; + const originalClearTimeout = globalThis.clearTimeout; + const requestTimeouts: unknown[] = []; + const clearedTimeouts: unknown[] = []; + globalThis.setTimeout = ((callback: TimerHandler, delay?: number, ...args: unknown[]) => { + if (delay === 30_000) { + const handle = { id: `request-${requestTimeouts.length + 1}` }; + requestTimeouts.push(handle); + return handle as ReturnType; + } + return originalSetTimeout(callback, 0, ...args); + }) as typeof setTimeout; + globalThis.clearTimeout = ((handle?: ReturnType) => { + if (requestTimeouts.includes(handle)) { + clearedTimeouts.push(handle); + return; + } + originalClearTimeout(handle); + }) as typeof clearTimeout; + globalThis.fetch = mock(() => Promise.reject(new Error("network down"))) as typeof fetch; + + try { + await expect(fetchRepoInfo({ owner: "o", repo: "r" })).rejects.toThrow("network down"); + expect(clearedTimeouts).toEqual(requestTimeouts); + } finally { + globalThis.setTimeout = originalSetTimeout; + globalThis.clearTimeout = originalClearTimeout; + } + }); + + it("escapes PR label search values", async () => { + let requested = ""; + globalThis.fetch = mock((url: string | URL | Request) => { + requested = String(url); + return Promise.resolve(Response.json({ items: [] })); + }) as typeof fetch; + + await fetchPullRequestsBySearch({ owner: "o", repo: "r" }, ['quoted"label'], undefined, "open", 10); + + expect(decodeURIComponent(requested)).toContain('label:"quoted\\"label"'); + }); + + it("paginates label-filtered pull request search up to maxItems", async () => { + const requested: string[] = []; + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + requested.push(text); + const pullMatch = text.match(/\/pulls\/(\d+)$/); + if (pullMatch) { + const number = Number.parseInt(pullMatch[1] ?? "0", 10); + return Promise.resolve( + Response.json({ + number, + title: `PR ${number}`, + body: "", + state: "open", + html_url: `https://github.com/o/r/pull/${number}`, + user: null, + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-01T00:00:00.000Z", + closed_at: null, + merged_at: null, + draft: false, + base: { ref: "main" }, + head: { ref: `feature-${number}` }, + comments: 0, + review_comments: 0, + }), + ); + } + const page = new URL(text).searchParams.get("page"); + const makePull = (number: number) => ({ + number, + title: `PR ${number}`, + body: "", + state: "open", + html_url: `https://github.com/o/r/pull/${number}`, + user: null, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-01T00:00:00.000Z", + closed_at: null, + comments: 0, + }); + return Promise.resolve( + Response.json({ + items: page === "1" ? Array.from({ length: 100 }, (_, index) => makePull(index + 1)) : [makePull(101)], + }), + ); + }) as typeof fetch; + + const result = await fetchPullRequestsBySearch({ owner: "o", repo: "r" }, ["bug"], undefined, "open", 101); + + expect(result.resources).toHaveLength(101); + const searchRequests = requested.filter((entry) => entry.includes("/search/issues")); + expect(new URL(searchRequests[0] ?? "").searchParams.get("page")).toBe("1"); + expect(new URL(searchRequests[1] ?? "").searchParams.get("page")).toBe("2"); + expect(requested.filter((entry) => entry.includes("/pulls/"))).toHaveLength(101); + }); + + it("hydrates label-filtered pull request metadata while preserving search labels", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.includes("/search/issues")) { + return Promise.resolve( + Response.json({ + items: [ + { + number: 17, + title: "Search PR", + body: "search body", + state: "open", + html_url: "https://github.com/o/r/pull/17", + user: { login: "alice" }, + labels: [{ name: "sources" }], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-01T00:00:00.000Z", + closed_at: null, + comments: 4, + }, + ], + }), + ); + } + return Promise.resolve( + Response.json({ + number: 17, + title: "Hydrated PR", + body: "pull body", + state: "closed", + html_url: "https://github.com/o/r/pull/17", + user: { login: "alice" }, + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-03T00:00:00.000Z", + closed_at: "2026-01-04T00:00:00.000Z", + merged_at: "2026-01-04T00:00:00.000Z", + draft: true, + base: { ref: "main" }, + head: { ref: "feature" }, + comments: 2, + review_comments: 3, + }), + ); + }) as typeof fetch; + + const result = await fetchPullRequestsBySearch({ owner: "o", repo: "r" }, ["sources"], undefined, "all", 1); + + expect(result.errors).toEqual([]); + expect(result.resources[0]).toMatchObject({ + type: "pull", + title: "Hydrated PR", + body: "pull body", + state: "closed", + labels: ["sources"], + mergedAt: "2026-01-04T00:00:00.000Z", + commentsCount: 5, + extra: { draft: true, base: "main", head: "feature" }, + }); + }); + + it("maps pull request list responses without issue labels", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + Response.json([ + { + number: 17, + title: "Pull request", + body: "body", + state: "open", + html_url: "https://github.com/o/r/pull/17", + user: { login: "alice" }, + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + merged_at: null, + draft: false, + base: { ref: "main" }, + head: { ref: "feature" }, + }, + ]), + ), + ) as typeof fetch; + + const result = await fetchPullRequests({ owner: "o", repo: "r" }, undefined, "open", 1); + + expect(result.resources[0]?.number).toBe(17); + expect(result.resources[0]?.labels).toEqual([]); + expect(result.resources[0]?.commentsCount).toBe(0); + }); + + it("maps GraphQL discussion closed state without requiring a state string field", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + Response.json({ + data: { + repository: { + discussions: { + nodes: [ + { + number: 7, + title: "Closed discussion", + body: "body", + url: "https://github.com/o/r/discussions/7", + closed: true, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "alice" }, + labels: { nodes: [{ name: "roadmap" }] }, + comments: { totalCount: 0 }, + }, + ], + }, + }, + }, + }), + ), + ) as typeof fetch; + + const result = await fetchDiscussions({ owner: "o", repo: "r", token: "token" }, undefined, "closed", 10); + + expect(result.resources[0]?.state).toBe("closed"); + expect(result.resources[0]?.labels).toEqual(["roadmap"]); + }); + + it("paginates discussions until maxItems or the final GraphQL page", async () => { + const afterValues: Array = []; + globalThis.fetch = mock((_url: string | URL | Request, init?: RequestInit) => { + const variables = JSON.parse(String(init?.body)).variables as { after?: string | null }; + afterValues.push(variables.after ?? null); + return Promise.resolve( + Response.json({ + data: { + repository: { + discussions: { + nodes: [ + { + number: variables.after ? 2 : 1, + title: variables.after ? "Second discussion" : "First discussion", + body: "body", + url: `https://github.com/o/r/discussions/${variables.after ? 2 : 1}`, + closed: false, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "alice" }, + labels: { nodes: [] }, + comments: { totalCount: 0 }, + }, + ], + pageInfo: variables.after + ? { hasNextPage: false, endCursor: null } + : { hasNextPage: true, endCursor: "cursor-1" }, + }, + }, + }, + }), + ); + }) as typeof fetch; + + const result = await fetchDiscussions({ owner: "o", repo: "r", token: "token" }, undefined, "all", 2); + + expect(result.resources.map((resource) => resource.number)).toEqual([1, 2]); + expect(afterValues).toEqual([null, "cursor-1"]); + }); + + it("continues scanning discussions past state-filtered pages", async () => { + const afterValues: Array = []; + globalThis.fetch = mock((_url: string | URL | Request, init?: RequestInit) => { + const variables = JSON.parse(String(init?.body)).variables as { after?: string | null; first?: number }; + afterValues.push(variables.after ?? null); + expect(variables.first).toBe(100); + const closedNodes = Array.from({ length: 100 }, (_, index) => ({ + number: index + 1, + title: "Closed discussion", + body: "body", + url: `https://github.com/o/r/discussions/${index + 1}`, + closed: true, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "alice" }, + labels: { nodes: [] }, + comments: { totalCount: 0 }, + })); + return Promise.resolve( + Response.json({ + data: { + repository: { + discussions: variables.after + ? { + nodes: [ + { + number: 101, + title: "Open discussion", + body: "body", + url: "https://github.com/o/r/discussions/101", + closed: false, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "alice" }, + labels: { nodes: [] }, + comments: { totalCount: 0 }, + }, + ], + pageInfo: { hasNextPage: false, endCursor: null }, + } + : { + nodes: closedNodes, + pageInfo: { hasNextPage: true, endCursor: "cursor-1" }, + }, + }, + }, + }), + ); + }) as typeof fetch; + + const result = await fetchDiscussions({ owner: "o", repo: "r", token: "token" }, undefined, "open", 1); + + expect(result.resources.map((resource) => resource.number)).toEqual([101]); + expect(afterValues).toEqual([null, "cursor-1"]); + }); + + it("bounds discussion scanning when state filters reject fetched nodes", async () => { + const afterValues: Array = []; + globalThis.fetch = mock((_url: string | URL | Request, init?: RequestInit) => { + const variables = JSON.parse(String(init?.body)).variables as { after?: string | null; first?: number }; + afterValues.push(variables.after ?? null); + expect(variables.first).toBe(100); + const pageIndex = afterValues.length; + return Promise.resolve( + Response.json({ + data: { + repository: { + discussions: { + nodes: Array.from({ length: 100 }, (_, index) => ({ + number: (pageIndex - 1) * 100 + index + 1, + title: "Closed discussion", + body: "body", + url: `https://github.com/o/r/discussions/${(pageIndex - 1) * 100 + index + 1}`, + closed: true, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "alice" }, + labels: { nodes: [] }, + comments: { totalCount: 0 }, + })), + pageInfo: { hasNextPage: true, endCursor: `cursor-${pageIndex}` }, + }, + }, + }, + }), + ); + }) as typeof fetch; + + const result = await fetchDiscussions({ owner: "o", repo: "r", token: "token" }, undefined, "open", 1); + + expect(result.resources).toEqual([]); + expect(afterValues).toEqual([null, "cursor-1", "cursor-2", "cursor-3", "cursor-4"]); + }); + + it("preserves opaque GraphQL discussion comment ids", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + Response.json({ + data: { + repository: { + discussion: { + comments: { + nodes: [ + { + id: "DC_kwDOOpaqueOne", + body: "first", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + author: { login: "alice" }, + }, + { + id: "DC_kwDOOpaqueTwo", + body: "second", + createdAt: "2026-01-02T00:00:00.000Z", + updatedAt: "2026-01-02T00:00:00.000Z", + author: { login: "bob" }, + }, + ], + }, + }, + }, + }, + }), + ), + ) as typeof fetch; + + const comments = await fetchDiscussionComments({ owner: "o", repo: "r", token: "token" }, 7); + + expect(comments.map((comment) => comment.id)).toEqual(["DC_kwDOOpaqueOne", "DC_kwDOOpaqueTwo"]); + }); + + it("paginates discussion comments with GraphQL-safe page sizes", async () => { + const requests: Array<{ first?: number; after?: string | null }> = []; + globalThis.fetch = mock((_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(String(init?.body ?? "{}")) as { variables?: { first?: number; after?: string | null } }; + requests.push({ first: body.variables?.first, after: body.variables?.after }); + return Promise.resolve( + Response.json({ + data: { + repository: { + discussion: { + comments: { + nodes: [ + { + id: requests.length === 1 ? "DC_first" : "DC_second", + body: requests.length === 1 ? "first" : "second", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + author: { login: "alice" }, + }, + ], + pageInfo: { + hasNextPage: requests.length === 1, + endCursor: requests.length === 1 ? "cursor-1" : null, + }, + }, + }, + }, + }, + }), + ); + }) as typeof fetch; + + const comments = await fetchDiscussionComments({ owner: "o", repo: "r", token: "token" }, 7); + + expect(requests).toEqual([ + { first: 100, after: null }, + { first: 100, after: "cursor-1" }, + ]); + expect(comments.map((comment) => comment.id)).toEqual(["DC_first", "DC_second"]); + }); + + it("throws on discussion comment GraphQL errors", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + Response.json({ + errors: [{ message: "discussion comments unavailable" }], + data: { repository: { discussion: null } }, + }), + ), + ) as typeof fetch; + + await expect(fetchDiscussionComments({ owner: "o", repo: "r", token: "token" }, 7)).rejects.toThrow( + "Discussion comments GraphQL error: discussion comments unavailable", + ); + }); + + it("preserves nested path separators when fetching docs", async () => { + let requested = ""; + globalThis.fetch = mock((url: string | URL | Request) => { + requested = String(url); + return Promise.resolve( + Response.json({ + content: Buffer.from("# api").toString("base64"), + encoding: "base64", + sha: "abc", + }), + ); + }) as typeof fetch; + + const result = await fetchRepoDocs({ owner: "o", repo: "r" }, ["docs/API.md"], "main", 1); + + expect(result.resources[0]?.path).toBe("docs/API.md"); + expect(requested).toContain("/contents/docs/API.md?"); + expect(requested).not.toContain("docs%2FAPI.md"); + }); + + it("keeps single-star doc globs within one path segment", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.includes("/git/trees/")) { + return Promise.resolve( + Response.json({ + tree: [ + { type: "blob", path: "docs/API.md" }, + { type: "blob", path: "docs/private/notes.md" }, + ], + }), + ); + } + return Promise.resolve( + Response.json({ + content: Buffer.from("# doc").toString("base64"), + encoding: "base64", + sha: "abc", + }), + ); + }) as typeof fetch; + + const direct = await fetchRepoDocs({ owner: "o", repo: "r" }, ["docs/*.md"], "main", 10); + const recursive = await fetchRepoDocs({ owner: "o", repo: "r" }, ["docs/**/*.md"], "main", 10); + + expect(direct.resources.map((resource) => resource.path)).toEqual(["docs/API.md"]); + expect(recursive.resources.map((resource) => resource.path)).toEqual(["docs/API.md", "docs/private/notes.md"]); + }); + + it("applies maxItems to wildcard docs", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.includes("/git/trees/")) { + return Promise.resolve( + Response.json({ + tree: [ + { type: "blob", path: "docs/a.md" }, + { type: "blob", path: "docs/b.md" }, + ], + }), + ); + } + return Promise.resolve( + Response.json({ + content: Buffer.from("# doc").toString("base64"), + encoding: "base64", + sha: "abc", + }), + ); + }) as typeof fetch; + + const result = await fetchRepoDocs({ owner: "o", repo: "r" }, ["docs/*.md"], "main", 1); + + expect(result.resources).toHaveLength(1); + expect(result.resources[0]?.path).toBe("docs/a.md"); + }); +}); diff --git a/platform/daemon/src/github-source-fetch.ts b/platform/daemon/src/github-source-fetch.ts new file mode 100644 index 000000000..907afd9af --- /dev/null +++ b/platform/daemon/src/github-source-fetch.ts @@ -0,0 +1,711 @@ +import type { GitHubSourceState } from "@signet/core"; +import { logger } from "./logger"; + +export interface GitHubFetchConfig { + readonly token?: string; + readonly owner: string; + readonly repo: string; +} + +export interface GitHubLabel { + readonly name: string; + readonly color?: string; +} + +export interface GitHubIssue { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels: readonly GitHubLabel[]; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly pull_request?: { readonly url: string }; + readonly comments: number; +} + +export interface GitHubPullRequest { + readonly number: number; + readonly title: string; + readonly body: string | null; + readonly state: string; + readonly html_url: string; + readonly user: { readonly login: string } | null; + readonly labels?: readonly GitHubLabel[]; + readonly created_at: string; + readonly updated_at: string; + readonly closed_at: string | null; + readonly merged_at: string | null; + readonly draft: boolean; + readonly base: { readonly ref: string }; + readonly head: { readonly ref: string }; + readonly comments?: number; + readonly review_comments?: number; +} + +export interface GitHubComment { + readonly id: number | string; + readonly body: string; + readonly user?: { readonly login?: string } | null; + readonly author?: { readonly login?: string } | string | null; + readonly created_at: string; + readonly updated_at: string; +} + +export interface GitHubResource { + readonly type: "issue" | "pull" | "discussion" | "doc"; + readonly number?: number; + readonly path?: string; + readonly title: string; + readonly body: string; + readonly state: string; + readonly url: string; + readonly labels: readonly string[]; + readonly author: string | null; + readonly createdAt: string; + readonly updatedAt: string; + readonly closedAt: string | null; + readonly mergedAt: string | null; + readonly commentsCount: number; + readonly extra: Readonly>; +} + +export interface GitHubFetchResult { + readonly resources: readonly GitHubResource[]; + readonly errors: readonly { readonly message: string; readonly retryable: boolean }[]; +} + +export interface GitHubRepoInfo { + readonly owner: string; + readonly repo: string; + readonly fullName: string; + readonly defaultBranch: string; + readonly htmlUrl: string; +} + +export interface RepoGlobExpansion { + readonly repos: readonly string[]; + readonly truncated: boolean; +} + +interface GitHubApiResponse { + readonly status: number; + readonly headers: Headers; + readonly body: unknown; +} + +const GITHUB_API_BASE = "https://api.github.com"; +const GRAPHQL_URL = "https://api.github.com/graphql"; +const PER_PAGE = 100; +const REQUEST_TIMEOUT_MS = 30_000; +const MAX_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 1_000; +const MAX_FILTERED_SCAN_MULTIPLIER = 5; +const MAX_FILTERED_SCAN_FLOOR = PER_PAGE * 5; +const MAX_FILTERED_SCAN_CEILING = PER_PAGE * 20; +const MAX_COMMENTS_PER_RESOURCE = 200; + +async function githubRequest(url: string, token?: string, method = "GET", body?: unknown): Promise { + const headers: Record = { + Accept: "application/vnd.github.v3+json", + "User-Agent": "signet-daemon", + }; + if (token) headers.Authorization = `Bearer ${token}`; + if (body) headers["Content-Type"] = "application/json"; + + let lastError: Error | null = null; + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + const controller = new AbortController(); + let timeout: ReturnType | null = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + try { + const response = await fetch(url, { + method, + headers, + body: body ? JSON.stringify(body) : undefined, + signal: controller.signal, + }); + clearTimeout(timeout); + timeout = null; + const remaining = Number(response.headers.get("x-ratelimit-remaining") ?? "5000"); + const reset = Number(response.headers.get("x-ratelimit-reset") ?? "0") * 1000; + if (remaining < 10 && reset > Date.now()) { + await new Promise((resolve) => setTimeout(resolve, Math.min(reset - Date.now() + 1000, 60_000))); + } + if (response.status === 403 && remaining === 0 && reset > Date.now()) { + await new Promise((resolve) => setTimeout(resolve, Math.min(reset - Date.now() + 1000, 60_000))); + continue; + } + if (response.status >= 500) { + lastError = new Error(`GitHub API ${response.status}: ${await response.text()}`); + await new Promise((resolve) => setTimeout(resolve, RETRY_BASE_DELAY_MS * (attempt + 1))); + continue; + } + return { + status: response.status, + headers: response.headers, + body: response.status === 204 ? null : await response.json(), + }; + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + if (attempt < MAX_RETRIES - 1) { + await new Promise((resolve) => setTimeout(resolve, RETRY_BASE_DELAY_MS * (attempt + 1))); + } + } finally { + if (timeout) clearTimeout(timeout); + } + } + throw lastError ?? new Error("GitHub API request failed after retries"); +} + +export async function fetchRepoInfo(config: GitHubFetchConfig): Promise { + const response = await githubRequest(`${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}`, config.token); + if (response.status === 404) return null; + if (response.status !== 200) throw new Error(`Failed to fetch repo info: ${response.status}`); + const data = response.body as Record; + return { + owner: ((data.owner as Record | undefined)?.login as string | undefined) ?? config.owner, + repo: (data.name as string | undefined) ?? config.repo, + fullName: (data.full_name as string | undefined) ?? `${config.owner}/${config.repo}`, + defaultBranch: (data.default_branch as string | undefined) ?? "main", + htmlUrl: (data.html_url as string | undefined) ?? `https://github.com/${config.owner}/${config.repo}`, + }; +} + +export async function expandRepoGlob( + owner: string, + pattern: string, + token?: string, + maxRepos = 500, +): Promise { + if (!pattern.includes("*")) return { repos: [`${owner}/${pattern}`], truncated: false }; + const regex = new RegExp(`^${globToRegexSource(pattern)}$`); + for (const prefix of [`/orgs/${owner}/repos`, `/users/${owner}/repos`]) { + const repos: Array<{ full_name: string; name: string }> = []; + let page = 1; + let truncated = false; + while (repos.length < maxRepos) { + const remaining = Math.max(1, maxRepos - repos.length); + const response = await githubRequest( + `${GITHUB_API_BASE}${prefix}?per_page=${Math.min(PER_PAGE, remaining)}&page=${page}&type=all`, + token, + ); + if (response.status !== 200) break; + const batch = response.body as Array<{ full_name: string; name: string }>; + repos.push(...batch); + if (repos.length >= maxRepos) truncated = batch.length === Math.min(PER_PAGE, remaining); + if (batch.length < Math.min(PER_PAGE, remaining)) break; + page++; + } + const matches = repos.filter((repo) => regex.test(repo.name)).map((repo) => repo.full_name); + if (matches.length > 0 || truncated) return { repos: matches.slice(0, maxRepos), truncated }; + } + return { repos: [], truncated: false }; +} + +export async function fetchIssues( + config: GitHubFetchConfig, + since?: string, + state: GitHubSourceState = "all", + maxItems = 500, + labels?: readonly string[], +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + const scanLimit = Math.min( + Math.max(maxItems * MAX_FILTERED_SCAN_MULTIPLIER, MAX_FILTERED_SCAN_FLOOR), + MAX_FILTERED_SCAN_CEILING, + ); + let scanned = 0; + let page = 1; + while (resources.length < maxItems && scanned < scanLimit) { + const remainingScan = scanLimit - scanned; + const url = new URL(`${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/issues`); + url.searchParams.set("state", state === "all" ? "all" : state); + url.searchParams.set("per_page", String(Math.min(PER_PAGE, remainingScan))); + url.searchParams.set("page", String(page)); + url.searchParams.set("sort", "updated"); + url.searchParams.set("direction", "desc"); + if (since) url.searchParams.set("since", since); + if (labels?.length) url.searchParams.set("labels", labels.join(",")); + const response = await githubRequest(url.toString(), config.token); + if (response.status !== 200) { + errors.push({ message: `Issues fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const batch = response.body as GitHubIssue[]; + scanned += batch.length; + for (const issue of batch) { + if (resources.length >= maxItems) break; + if (issue.pull_request) continue; + resources.push(issueResource(issue)); + } + if (batch.length < Math.min(PER_PAGE, remainingScan)) break; + page++; + } + return { resources, errors }; +} + +export async function fetchPullRequests( + config: GitHubFetchConfig, + _since?: string, + state: GitHubSourceState = "all", + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + let page = 1; + while (resources.length < maxItems) { + const url = new URL(`${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls`); + url.searchParams.set("state", state === "all" ? "all" : state); + url.searchParams.set("per_page", String(Math.min(PER_PAGE, maxItems - resources.length))); + url.searchParams.set("page", String(page)); + const response = await githubRequest(url.toString(), config.token); + if (response.status !== 200) { + errors.push({ message: `Pull requests fetch failed: ${response.status}`, retryable: response.status >= 500 }); + break; + } + const batch = response.body as GitHubPullRequest[]; + resources.push(...batch.map(pullResource)); + if (batch.length < Math.min(PER_PAGE, maxItems - resources.length + batch.length)) break; + page++; + } + return { resources: resources.slice(0, maxItems), errors }; +} + +export async function fetchPullRequestsBySearch( + config: GitHubFetchConfig, + labels: readonly string[], + _since?: string, + state: GitHubSourceState = "all", + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + const statePart = state === "all" ? "" : ` state:${state}`; + const labelPart = labels.map((label) => ` label:${quoteSearchValue(label)}`).join(""); + const q = `repo:${config.owner}/${config.repo} is:pr${statePart}${labelPart}`; + let page = 1; + while (resources.length < maxItems) { + const remaining = maxItems - resources.length; + const response = await githubRequest( + `${GITHUB_API_BASE}/search/issues?q=${encodeURIComponent(q)}&per_page=${Math.min(PER_PAGE, remaining)}&page=${page}`, + config.token, + ); + if (response.status !== 200) { + errors.push({ message: `Pull request search failed: ${response.status}`, retryable: false }); + break; + } + const body = response.body as { items?: GitHubIssue[]; incomplete_results?: boolean }; + const batch = body.items ?? []; + for (const issue of batch) { + if (resources.length >= maxItems) break; + const pull = await fetchPullRequestDetail(config, issue.number); + if (pull) { + resources.push(pullResource({ ...pull, labels: issue.labels })); + } else { + errors.push({ + message: `Pull request detail fetch failed for #${issue.number}`, + retryable: true, + }); + resources.push(searchPullResource(issue)); + } + } + if (body.incomplete_results) { + errors.push({ message: "Pull request search returned incomplete GitHub results", retryable: true }); + } + if (batch.length < Math.min(PER_PAGE, remaining)) break; + page++; + } + return { resources: resources.slice(0, maxItems), errors }; +} + +async function fetchPullRequestDetail( + config: GitHubFetchConfig, + number: number, +): Promise { + const response = await githubRequest( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls/${number}`, + config.token, + ); + if (response.status !== 200) return undefined; + return response.body as GitHubPullRequest; +} + +export async function fetchIssueComments(config: GitHubFetchConfig, number: number): Promise { + return fetchComments( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/issues/${number}/comments`, + config.token, + ); +} + +export async function fetchPullRequestComments(config: GitHubFetchConfig, number: number): Promise { + return fetchComments( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/pulls/${number}/comments`, + config.token, + ); +} + +async function fetchComments(baseUrl: string, token?: string): Promise { + const comments: GitHubComment[] = []; + let page = 1; + while (comments.length < MAX_COMMENTS_PER_RESOURCE) { + const response = await githubRequest( + `${baseUrl}?per_page=${Math.min(PER_PAGE, MAX_COMMENTS_PER_RESOURCE - comments.length)}&page=${page}`, + token, + ); + if (response.status !== 200) throw new Error(`GitHub comments fetch failed: ${response.status}`); + const batch = response.body as GitHubComment[]; + comments.push(...batch); + if (batch.length < Math.min(PER_PAGE, MAX_COMMENTS_PER_RESOURCE - comments.length + batch.length)) break; + page++; + } + return comments.slice(0, MAX_COMMENTS_PER_RESOURCE); +} + +export async function fetchDiscussions( + config: GitHubFetchConfig, + _after?: string, + state: GitHubSourceState = "all", + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + const scanLimit = Math.min( + Math.max(maxItems * MAX_FILTERED_SCAN_MULTIPLIER, MAX_FILTERED_SCAN_FLOOR), + MAX_FILTERED_SCAN_CEILING, + ); + const query = ` + query($owner:String!, $name:String!, $first:Int!, $after:String) { + repository(owner:$owner, name:$name) { + discussions(first:$first, after:$after, orderBy:{field:UPDATED_AT, direction:DESC}) { + nodes { + number title body url closed createdAt updatedAt + author { login } + labels(first:20) { nodes { name } } + comments { totalCount } + } + pageInfo { hasNextPage endCursor } + } + } + }`; + let cursor: string | null = null; + let scanned = 0; + while (resources.length < maxItems && scanned < scanLimit) { + const remainingScan = scanLimit - scanned; + const response = await githubRequest(GRAPHQL_URL, config.token, "POST", { + query, + variables: { + owner: config.owner, + name: config.repo, + first: Math.min(remainingScan, PER_PAGE), + after: cursor, + }, + }); + if (response.status !== 200) { + errors.push({ message: `Discussions fetch failed: ${response.status}`, retryable: false }); + break; + } + const data = response.body as { + data?: { + repository?: { + discussions?: { nodes?: DiscussionNode[]; pageInfo?: DiscussionPageInfo }; + }; + }; + errors?: Array<{ message?: string }>; + }; + if (data.errors?.length) { + errors.push(...data.errors.map((error) => ({ message: error.message ?? "GraphQL error", retryable: false }))); + break; + } + const discussions = data.data?.repository?.discussions; + const nodes = discussions?.nodes ?? []; + scanned += nodes.length; + for (const resource of nodes.map(discussionResource)) { + if (resources.length >= maxItems) break; + if (state === "all" || resource.state === state) resources.push(resource); + } + if (nodes.length === 0) break; + if (!discussions?.pageInfo?.hasNextPage) break; + cursor = discussions.pageInfo.endCursor ?? null; + if (!cursor) break; + } + return { resources, errors }; +} + +export async function fetchDiscussionComments(config: GitHubFetchConfig, number: number): Promise { + const query = ` + query($owner:String!, $name:String!, $number:Int!, $first:Int!, $after:String) { + repository(owner:$owner, name:$name) { + discussion(number:$number) { + comments(first:$first, after:$after) { + nodes { id body createdAt updatedAt author { login } } + pageInfo { hasNextPage endCursor } + } + } + } + }`; + const comments: GitHubComment[] = []; + let cursor: string | null = null; + while (comments.length < MAX_COMMENTS_PER_RESOURCE) { + const response = await githubRequest(GRAPHQL_URL, config.token, "POST", { + query, + variables: { + owner: config.owner, + name: config.repo, + number, + first: Math.min(PER_PAGE, MAX_COMMENTS_PER_RESOURCE - comments.length), + after: cursor, + }, + }); + if (response.status !== 200) throw new Error(`Discussion comments fetch failed: ${response.status}`); + const body = response.body as { + data?: { + repository?: { discussion?: { comments?: { nodes?: DiscussionCommentNode[]; pageInfo?: DiscussionPageInfo } } }; + }; + errors?: Array<{ message?: string }>; + }; + if (body.errors?.length) { + throw new Error( + `Discussion comments GraphQL error: ${body.errors.map((error) => error.message ?? "GraphQL error").join("; ")}`, + ); + } + const discussionComments = body.data?.repository?.discussion?.comments; + const nodes = discussionComments?.nodes ?? []; + comments.push( + ...nodes.map((node) => ({ + id: node.id, + body: node.body, + author: node.author, + user: node.author, + created_at: node.createdAt, + updated_at: node.updatedAt, + })), + ); + if (nodes.length === 0) break; + if (!discussionComments?.pageInfo?.hasNextPage) break; + cursor = discussionComments.pageInfo.endCursor ?? null; + if (!cursor) break; + } + return comments.slice(0, MAX_COMMENTS_PER_RESOURCE); +} + +export async function fetchRepoDocs( + config: GitHubFetchConfig, + paths: readonly string[], + ref: string, + maxItems = 500, +): Promise { + const resources: GitHubResource[] = []; + const errors: GitHubFetchResult["errors"] = []; + for (const path of paths) { + if (resources.length >= maxItems) break; + try { + if (path.includes("*")) { + const result = await fetchTreeDocs(config, path, ref, maxItems - resources.length); + resources.push(...result.resources); + errors.push(...result.errors); + } else { + const resource = await fetchDoc(config, path, ref); + if (resource) resources.push(resource); + } + } catch (err) { + errors.push({ message: err instanceof Error ? err.message : String(err), retryable: false }); + } + } + return { resources: resources.slice(0, maxItems), errors }; +} + +async function fetchDoc(config: GitHubFetchConfig, path: string, ref: string): Promise { + const response = await githubRequest( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/contents/${encodeGitHubContentPath(path)}?ref=${encodeURIComponent(ref)}`, + config.token, + ); + if (response.status === 404) return null; + if (response.status !== 200) throw new Error(`Doc fetch failed for ${path}: ${response.status}`); + const body = response.body as { content?: string; encoding?: string; sha?: string; html_url?: string }; + if (body.encoding !== "base64" || !body.content) return null; + return docResource(path, Buffer.from(body.content, "base64").toString("utf8"), body.sha ?? "", body.html_url); +} + +async function fetchTreeDocs( + config: GitHubFetchConfig, + pattern: string, + ref: string, + maxItems: number, +): Promise { + const response = await githubRequest( + `${GITHUB_API_BASE}/repos/${config.owner}/${config.repo}/git/trees/${encodeURIComponent(ref)}?recursive=1`, + config.token, + ); + if (response.status !== 200) { + return { + resources: [], + errors: [{ message: `Tree fetch failed: ${response.status}`, retryable: response.status >= 500 }], + }; + } + const body = response.body as { tree?: Array<{ path?: string; type?: string }> }; + const regex = new RegExp(`^${globToRegexSource(pattern)}$`); + const paths = (body.tree ?? []) + .filter((entry) => entry.type === "blob" && typeof entry.path === "string" && regex.test(entry.path)) + .map((entry) => entry.path as string) + .slice(0, maxItems); + const resources: GitHubResource[] = []; + for (const path of paths) { + const resource = await fetchDoc(config, path, ref); + if (resource) resources.push(resource); + } + return { resources, errors: [] }; +} + +function issueResource(issue: GitHubIssue): GitHubResource { + return { + type: "issue", + number: issue.number, + title: issue.title, + body: issue.body ?? "", + state: issue.state, + url: issue.html_url, + labels: issue.labels.map((label) => label.name), + author: issue.user?.login ?? null, + createdAt: issue.created_at, + updatedAt: issue.updated_at, + closedAt: issue.closed_at, + mergedAt: null, + commentsCount: issue.comments, + extra: {}, + }; +} + +function pullResource(pull: GitHubPullRequest): GitHubResource { + return { + type: "pull", + number: pull.number, + title: pull.title, + body: pull.body ?? "", + state: pull.state, + url: pull.html_url, + labels: (pull.labels ?? []).map((label) => label.name), + author: pull.user?.login ?? null, + createdAt: pull.created_at, + updatedAt: pull.updated_at, + closedAt: pull.closed_at, + mergedAt: pull.merged_at, + commentsCount: (pull.comments ?? 0) + (pull.review_comments ?? 0), + extra: { draft: pull.draft, base: pull.base.ref, head: pull.head.ref }, + }; +} + +function searchPullResource(issue: GitHubIssue): GitHubResource { + return { ...issueResource(issue), type: "pull", mergedAt: null }; +} + +interface DiscussionNode { + readonly number: number; + readonly title: string; + readonly body: string; + readonly url: string; + readonly closed?: boolean; + readonly createdAt: string; + readonly updatedAt: string; + readonly author?: { readonly login?: string } | null; + readonly labels?: { readonly nodes?: Array<{ readonly name?: string }> }; + readonly comments?: { readonly totalCount?: number }; +} + +interface DiscussionPageInfo { + readonly hasNextPage?: boolean; + readonly endCursor?: string | null; +} + +interface DiscussionCommentNode { + readonly id: string; + readonly body: string; + readonly createdAt: string; + readonly updatedAt: string; + readonly author?: { readonly login?: string } | null; +} + +function discussionResource(node: DiscussionNode): GitHubResource { + return { + type: "discussion", + number: node.number, + title: node.title, + body: node.body, + state: node.closed ? "closed" : "open", + url: node.url, + labels: node.labels?.nodes?.map((label) => label.name).filter((name): name is string => !!name) ?? [], + author: node.author?.login ?? null, + createdAt: node.createdAt, + updatedAt: node.updatedAt, + closedAt: node.closed ? node.updatedAt : null, + mergedAt: null, + commentsCount: node.comments?.totalCount ?? 0, + extra: {}, + }; +} + +function docResource(path: string, content: string, sha: string, url?: string): GitHubResource { + return { + type: "doc", + path, + title: path, + body: content, + state: "current", + url: url ?? "", + labels: [], + author: null, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + closedAt: null, + mergedAt: null, + commentsCount: 0, + extra: { sha }, + }; +} + +function quoteSearchValue(value: string): string { + return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; +} + +function encodeGitHubContentPath(path: string): string { + return path.split("/").map(encodeURIComponent).join("/"); +} + +function globToRegexSource(pattern: string): string { + let source = ""; + let index = 0; + while (index < pattern.length) { + if (pattern.startsWith("**/", index)) { + source += "(?:.*/)?"; + index += 3; + continue; + } + if (pattern.startsWith("**", index)) { + source += ".*"; + index += 2; + continue; + } + const char = pattern[index] ?? ""; + if (char === "*") source += "[^/]*"; + else if (char === "?") source += "[^/]"; + else source += escapeRegex(char); + index++; + } + return source; +} + +function escapeRegex(char: string): string { + return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char; +} + +export function logGitHubFetchError(sourceId: string, repo: string, phase: string, err: unknown): void { + logger.warn("github-source", "GitHub source fetch failed", { + sourceId, + repo, + phase, + error: err instanceof Error ? err.message : String(err), + }); +} diff --git a/platform/daemon/src/github-source-provider.test.ts b/platform/daemon/src/github-source-provider.test.ts new file mode 100644 index 000000000..d8807cb4e --- /dev/null +++ b/platform/daemon/src/github-source-provider.test.ts @@ -0,0 +1,861 @@ +import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { type SignetSourceEntry, addGitHubSource } from "@signet/core"; +import { closeDbAccessor, getDbAccessor, initDbAccessor } from "./db-accessor"; +import { githubSourceProvider } from "./github-source-provider"; +import { indexExternalMemoryArtifact } from "./memory-lineage"; + +const originalFetch = globalThis.fetch; + +describe("github-source-provider", () => { + let dir = ""; + let previousSignetPath: string | undefined; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "signet-github-source-")); + previousSignetPath = process.env.SIGNET_PATH; + process.env.SIGNET_PATH = dir; + mkdirSync(join(dir, "memory"), { recursive: true }); + closeDbAccessor(); + initDbAccessor(join(dir, "memory", "memories.db")); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + closeDbAccessor(); + if (previousSignetPath === undefined) Reflect.deleteProperty(process.env, "SIGNET_PATH"); + else process.env.SIGNET_PATH = previousSignetPath; + rmSync(dir, { recursive: true, force: true }); + }); + + it("indexes GitHub issue and comment artifacts with source provenance", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ + name: "signetai", + full_name: "Signet-AI/signetai", + default_branch: "main", + html_url: "https://github.com/Signet-AI/signetai", + owner: { login: "Signet-AI" }, + }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Index GitHub", + body: "issue body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [{ name: "sources" }], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 1, + }, + ]), + ); + } + if (text.includes("/issues/12/comments")) { + return Promise.resolve( + Response.json([ + { + id: 99, + body: "comment body", + user: { login: "bob" }, + created_at: "2026-01-03T00:00:00.000Z", + updated_at: "2026-01-03T00:00:00.000Z", + }, + ]), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + const added = addGitHubSource( + { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues"], + maxItemsPerRepo: 5, + now: "2026-01-01T00:00:00.000Z", + }, + dir, + ); + expect(added.ok).toBe(true); + if (added.ok === false) throw new Error(added.error); + + const result = await githubSourceProvider.sync?.({ + source: added.source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + expect(result?.failures).toEqual([]); + const rows = sourceRows(added.source.id); + expect(rows.map((row) => row.source_kind)).toContain("source_github_issue"); + expect(rows.map((row) => row.source_kind)).toContain("source_github_comment"); + expect(rows.find((row) => row.source_kind === "source_github_issue")?.source_external_id).toBe( + "Signet-AI/signetai:issue:12", + ); + expect(rows.find((row) => row.source_kind === "source_github_comment")?.content).toContain("comment body"); + }); + + it("records requested discussion failures when no token is available", async () => { + const source: SignetSourceEntry = { + id: "github:test", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai"], + resourceTypes: ["discussions"], + state: "all", + includeComments: true, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + globalThis.fetch = mock((url: string | URL | Request) => { + if (String(url).endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + expect(result?.failures[0]?.message).toContain("discussions require tokenRef"); + expect(sourceRows(source.id).map((row) => row.source_kind)).toContain("source_github_failure"); + }); + + it("records a failure when a wildcard repo pattern matches nothing", async () => { + const source: SignetSourceEntry = { + id: "github:wildcard", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/no-match-*", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/no-match-*"], + resourceTypes: ["issues"], + state: "all", + includeComments: true, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + globalThis.fetch = mock(() => Promise.resolve(Response.json([]))) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + expect(result?.failures[0]?.message).toContain("matched no repositories"); + expect(sourceRows(source.id).map((row) => row.source_kind)).toContain("source_github_failure"); + }); + + it("keeps same-timestamp GitHub failure artifacts distinct", async () => { + const source: SignetSourceEntry = { + id: "github:failure-collision", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/no-match-*,Signet-AI/also-missing-*", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/no-match-*", "Signet-AI/also-missing-*"], + resourceTypes: ["issues"], + state: "all", + includeComments: false, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + const originalDate = globalThis.Date; + const fixedNow = originalDate.parse("2026-02-03T04:05:06.007Z"); + globalThis.Date = class extends originalDate { + constructor(value?: string | number | Date) { + if (value === undefined) super(fixedNow); + else super(value); + } + + static now(): number { + return fixedNow; + } + + static parse(value: string): number { + return originalDate.parse(value); + } + + static UTC( + year: number, + monthIndex: number, + date?: number, + hours?: number, + minutes?: number, + seconds?: number, + ms?: number, + ): number { + return originalDate.UTC(year, monthIndex, date, hours, minutes, seconds, ms); + } + } as DateConstructor; + globalThis.fetch = mock(() => Promise.resolve(Response.json([]))) as typeof fetch; + + try { + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const failureRows = sourceRows(source.id).filter((row) => row.source_kind === "source_github_failure"); + expect(result?.failures).toHaveLength(2); + expect(failureRows).toHaveLength(2); + expect(new Set(failureRows.map((row) => row.source_path)).size).toBe(2); + } finally { + globalThis.Date = originalDate; + } + }); + + it("applies maxItemsPerRepo once across enabled primary resource types", async () => { + const source: SignetSourceEntry = { + id: "github:primary-cap", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues", "pulls", "docs"], + state: "all", + includeComments: false, + docPaths: ["README.md"], + maxItemsPerRepo: 1, + }, + }; + const requested: string[] = []; + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + requested.push(text); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Current issue", + body: "body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 0, + }, + ]), + ); + } + throw new Error(`unexpected GitHub request after cap reached: ${text}`); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const rows = sourceRows(source.id); + expect(result?.failures).toEqual([]); + expect(rows.map((row) => row.source_kind)).toEqual(["source_github_issue"]); + expect(requested.some((entry) => entry.includes("/pulls"))).toBe(false); + expect(requested.some((entry) => entry.includes("/contents/"))).toBe(false); + }); + + it("counts GitHub comments against the per-repo artifact cap", async () => { + const source: SignetSourceEntry = { + id: "github:comment-cap", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues"], + state: "all", + includeComments: true, + docPaths: ["README.md"], + maxItemsPerRepo: 2, + }, + }; + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Current issue", + body: "body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 2, + }, + ]), + ); + } + if (text.includes("/issues/12/comments")) { + return Promise.resolve( + Response.json([ + { + id: 1, + body: "first comment", + user: { login: "bob" }, + created_at: "2026-01-03T00:00:00.000Z", + updated_at: "2026-01-03T00:00:00.000Z", + }, + { + id: 2, + body: "second comment", + user: { login: "carol" }, + created_at: "2026-01-04T00:00:00.000Z", + updated_at: "2026-01-04T00:00:00.000Z", + }, + ]), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const rows = sourceRows(source.id); + expect(result?.indexed).toBe(2); + expect(rows.map((row) => row.source_kind).sort()).toEqual(["source_github_comment", "source_github_issue"]); + expect(rows.map((row) => row.content).join("\n")).toContain("first comment"); + expect(rows.map((row) => row.content).join("\n")).not.toContain("second comment"); + }); + + it("tracks refreshed comment paths during stale purge", async () => { + const source: SignetSourceEntry = { + id: "github:comment-seen", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues"], + state: "all", + includeComments: true, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + const originalDate = globalThis.Date; + let constructedDates = 0; + globalThis.Date = class extends originalDate { + constructor(value?: string | number | Date) { + if (value === undefined) { + super(constructedDates === 0 ? "2026-02-01T00:00:00.000Z" : "2026-01-01T00:00:00.000Z"); + constructedDates++; + } else { + super(value); + } + } + + static now(): number { + return originalDate.parse("2026-01-01T00:00:00.000Z"); + } + + static parse(value: string): number { + return originalDate.parse(value); + } + + static UTC( + year: number, + monthIndex: number, + date?: number, + hours?: number, + minutes?: number, + seconds?: number, + ms?: number, + ): number { + return originalDate.UTC(year, monthIndex, date, hours, minutes, seconds, ms); + } + } as DateConstructor; + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Current issue", + body: "body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 1, + }, + ]), + ); + } + if (text.includes("/issues/12/comments")) { + return Promise.resolve( + Response.json([ + { + id: 1, + body: "current comment", + user: { login: "bob" }, + created_at: "2026-01-03T00:00:00.000Z", + updated_at: "2026-01-03T00:00:00.000Z", + }, + ]), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + try { + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const rows = sourceRows(source.id); + expect(result?.failures).toEqual([]); + expect(rows.map((row) => row.source_path)).toContain("github://Signet-AI/signetai/issues/12"); + expect(rows.map((row) => row.source_path)).toContain("github://Signet-AI/signetai/issues/12#comment-1"); + expect(rows.find((row) => row.source_kind === "source_github_comment")?.content).toContain("current comment"); + } finally { + globalThis.Date = originalDate; + } + }); + + it("purges stale artifacts for successful repos after another repo fails", async () => { + const source: SignetSourceEntry = { + id: "github:partial", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/no-match-*,Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/no-match-*", "Signet-AI/signetai"], + resourceTypes: ["issues"], + state: "all", + includeComments: true, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + indexExternalMemoryArtifact({ + agentId: "default", + harness: "github", + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: "Signet-AI/signetai:issue:999", + sourceParentPath: "github://Signet-AI/signetai", + sourcePath: "github://Signet-AI/signetai/issues/999", + sourceKind: "source_github_issue", + sourceMtimeMs: Date.parse("2025-01-01T00:00:00.000Z"), + capturedAt: "2025-01-01T00:00:00.000Z", + content: "stale issue", + }); + getDbAccessor().withWriteTx((db) => { + db.prepare("UPDATE memory_artifacts SET updated_at = ? WHERE source_id = ? AND source_path = ?").run( + "2025-01-01T00:00:00.000Z", + source.id, + "github://Signet-AI/signetai/issues/999", + ); + }); + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.includes("/orgs/Signet-AI/repos") || text.includes("/users/Signet-AI/repos")) { + return Promise.resolve(Response.json([])); + } + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Current issue", + body: "body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 0, + }, + ]), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const rows = sourceRows(source.id); + expect(result?.failures[0]?.message).toContain("matched no repositories"); + expect(rows.map((row) => row.source_external_id)).toContain("Signet-AI/signetai:issue:12"); + expect(rows.map((row) => row.source_external_id)).not.toContain("Signet-AI/signetai:issue:999"); + expect(rows.map((row) => row.source_kind)).toContain("source_github_failure"); + }); + + it("does not purge sibling repo paths with shared name prefixes", async () => { + const source: SignetSourceEntry = { + id: "github:sibling-prefix", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai,Signet-AI/signetai-extra", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai", "Signet-AI/signetai-extra"], + resourceTypes: ["issues"], + state: "all", + includeComments: false, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + indexExternalMemoryArtifact({ + agentId: "default", + harness: "github", + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: "Signet-AI/signetai-extra:issue:999", + sourceParentPath: "github://Signet-AI/signetai-extra", + sourcePath: "github://Signet-AI/signetai-extra/issues/999", + sourceKind: "source_github_issue", + sourceMtimeMs: Date.parse("2025-01-01T00:00:00.000Z"), + capturedAt: "2025-01-01T00:00:00.000Z", + content: "sibling stale issue", + }); + getDbAccessor().withWriteTx((db) => { + db.prepare("UPDATE memory_artifacts SET updated_at = ? WHERE source_id = ? AND source_path = ?").run( + "2025-01-01T00:00:00.000Z", + source.id, + "github://Signet-AI/signetai-extra/issues/999", + ); + }); + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.endsWith("/repos/Signet-AI/signetai-extra")) { + return Promise.resolve( + Response.json({ + name: "signetai-extra", + full_name: "Signet-AI/signetai-extra", + default_branch: "main", + }), + ); + } + if (text.includes("/repos/Signet-AI/signetai/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Current issue", + body: "body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 0, + }, + ]), + ); + } + if (text.includes("/repos/Signet-AI/signetai-extra/issues?")) { + return Promise.resolve(Response.json({ message: "missing" }, { status: 404 })); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const rows = sourceRows(source.id); + expect(result?.failures[0]?.message).toContain("Issues fetch failed: 404"); + expect(rows.map((row) => row.source_external_id)).toContain("Signet-AI/signetai:issue:12"); + expect(rows.map((row) => row.source_external_id)).toContain("Signet-AI/signetai-extra:issue:999"); + }); + + it("purges stale failure artifacts after a later successful sync", async () => { + const source: SignetSourceEntry = { + id: "github:recovered", + kind: "github", + name: "GitHub", + root: "github://repos/Signet-AI/signetai", + enabled: true, + mode: "read-only", + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:00.000Z", + providerSettings: { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues"], + state: "all", + includeComments: false, + docPaths: ["README.md"], + maxItemsPerRepo: 5, + }, + }; + indexExternalMemoryArtifact({ + agentId: "default", + harness: "github", + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: "failure:2025-01-01T00:00:00.000Z:old failure", + sourcePath: `github://source/${source.id}/failures/2025-01-01T00%3A00%3A00.000Z`, + sourceKind: "source_github_failure", + sourceMtimeMs: Date.parse("2025-01-01T00:00:00.000Z"), + capturedAt: "2025-01-01T00:00:00.000Z", + content: "old failure", + }); + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Current issue", + body: "body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 0, + }, + ]), + ); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const result = await githubSourceProvider.sync?.({ + source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + const rows = sourceRows(source.id); + expect(result?.failures).toEqual([]); + expect(rows.map((row) => row.source_external_id)).toContain("Signet-AI/signetai:issue:12"); + expect(rows.map((row) => row.source_kind)).not.toContain("source_github_failure"); + }); + + it("propagates comment fetch failures to the provider result", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ + name: "signetai", + full_name: "Signet-AI/signetai", + default_branch: "main", + html_url: "https://github.com/Signet-AI/signetai", + owner: { login: "Signet-AI" }, + }), + ); + } + if (text.includes("/issues?")) { + return Promise.resolve( + Response.json([ + { + number: 12, + title: "Index GitHub", + body: "issue body", + state: "open", + html_url: "https://github.com/Signet-AI/signetai/issues/12", + user: { login: "alice" }, + labels: [], + created_at: "2026-01-01T00:00:00.000Z", + updated_at: "2026-01-02T00:00:00.000Z", + closed_at: null, + comments: 1, + }, + ]), + ); + } + if (text.includes("/issues/12/comments")) { + return Promise.resolve(Response.json({ message: "missing" }, { status: 404 })); + } + return Promise.resolve(Response.json([])); + }) as typeof fetch; + const added = addGitHubSource( + { + repos: ["Signet-AI/signetai"], + resourceTypes: ["issues"], + maxItemsPerRepo: 5, + now: "2026-01-01T00:00:00.000Z", + }, + dir, + ); + expect(added.ok).toBe(true); + if (added.ok === false) throw new Error(added.error); + + const result = await githubSourceProvider.sync?.({ + source: added.source, + agentsDir: dir, + agentId: "default", + shouldContinue: () => true, + }); + + expect(result?.failures[0]?.message).toContain("comment fetch failed"); + expect(sourceRows(added.source.id).map((row) => row.source_kind)).toContain("source_github_failure"); + }); + + it("purges source-owned GitHub artifacts through the provider", () => { + indexExternalMemoryArtifact({ + agentId: "default", + harness: "github", + sourceId: "github:test", + sourceRoot: "github://repos/Signet-AI/signetai", + sourceExternalId: "Signet-AI/signetai:issue:1", + sourcePath: "github://Signet-AI/signetai/issues/1", + sourceKind: "source_github_issue", + sourceMtimeMs: Date.now(), + content: "old issue", + }); + + const purged = githubSourceProvider.purge({ id: "github:test" } as SignetSourceEntry, "default"); + + expect(purged).toBeGreaterThanOrEqual(1); + expect(sourceRows("github:test")).toEqual([]); + }); +}); + +function sourceRows(sourceId: string): Array<{ + source_kind: string; + source_path: string; + source_external_id: string | null; + source_meta_json: string | null; + content: string; +}> { + return getDbAccessor().withReadDb( + (db) => + db + .prepare( + `SELECT source_kind, source_path, source_external_id, source_meta_json, content + FROM memory_artifacts + WHERE source_id = ? + AND COALESCE(is_deleted, 0) = 0 + ORDER BY source_path`, + ) + .all(sourceId) as Array<{ + source_kind: string; + source_path: string; + source_external_id: string | null; + source_meta_json: string | null; + content: string; + }>, + ); +} diff --git a/platform/daemon/src/github-source-provider.ts b/platform/daemon/src/github-source-provider.ts new file mode 100644 index 000000000..157be20dc --- /dev/null +++ b/platform/daemon/src/github-source-provider.ts @@ -0,0 +1,512 @@ +import { createHash } from "node:crypto"; +import { + type GitHubSourceSettings, + type SignetSourceEntry, + type SourceFailureState, + type SourceProviderKind, + parseGitHubSettings, +} from "@signet/core"; +import { resolveDaemonAgentId } from "./agent-id"; +import { yieldEvery } from "./async-yield"; +import { getDbAccessor } from "./db-accessor"; +import { countChanges } from "./db-helpers"; +import { + type GitHubComment, + type GitHubFetchConfig, + type GitHubResource, + expandRepoGlob, + fetchDiscussionComments, + fetchDiscussions, + fetchIssueComments, + fetchIssues, + fetchPullRequestComments, + fetchPullRequests, + fetchPullRequestsBySearch, + fetchRepoDocs, + fetchRepoInfo, + logGitHubFetchError, +} from "./github-source-fetch"; +import { logger } from "./logger"; +import { indexExternalMemoryArtifact } from "./memory-lineage"; +import { getSecret } from "./secrets"; +import type { SourceProviderAdapter, SourceProviderSyncContext, SourceProviderSyncResult } from "./source-providers"; +import { purgeSourceOwnedRows } from "./source-purge"; + +const GITHUB_PROVIDER_KIND: SourceProviderKind = "github"; +const GITHUB_HARNESS = "github"; + +interface ResolvedRepo { + readonly owner: string; + readonly repo: string; + readonly fullName: string; + readonly defaultBranch: string; +} + +interface WrittenGitHubArtifacts { + readonly count: number; + readonly paths: readonly string[]; +} + +export const githubSourceProvider: SourceProviderAdapter = { + kind: "github", + sync: syncGitHubSource, + purge: (source, agentId) => purgeSourceOwnedRows({ sourceId: source.id, agentId }), +}; + +async function syncGitHubSource(context: SourceProviderSyncContext): Promise { + const settings = parseGitHubSettings(context.source.providerSettings); + if (settings.repos.length === 0) throw new Error("GitHub source has no repositories"); + + const failures: SourceFailureState[] = []; + const syncStartedAt = new Date().toISOString(); + const agentId = context.agentId || resolveDaemonAgentId(); + const token = settings.tokenRef ? await resolveToken(settings.tokenRef) : undefined; + const repos = await resolveRepos(context.source, settings, failures, token); + let indexed = 0; + let scanned = 0; + + for (const repo of repos) { + if (!context.shouldContinue()) break; + const failureCountBeforeRepo = failures.length; + context.onProgress?.({ scanned, total: repos.length, indexed, currentPath: `github://${repo.fullName}` }); + const config: GitHubFetchConfig = { owner: repo.owner, repo: repo.repo, token }; + const seenPaths = new Set(); + const yielder = yieldEvery(5); + let repoIndexed = 0; + + for (const resource of await fetchRepoResources(context.source, settings, config, repo, failures)) { + if (!context.shouldContinue()) break; + if (repoIndexed >= settings.maxItemsPerRepo) break; + const written = await writeResourceWithComments( + context.source, + agentId, + repo.fullName, + config, + resource, + settings, + failures, + settings.maxItemsPerRepo - repoIndexed, + ); + repoIndexed += written.count; + indexed += written.count; + for (const path of written.paths) { + seenPaths.add(path); + } + await yielder(); + } + scanned++; + context.onProgress?.({ scanned, total: repos.length, indexed, currentPath: `github://${repo.fullName}` }); + if (failures.length === failureCountBeforeRepo) + purgeStaleGitHubArtifacts(context.source.id, agentId, syncStartedAt, seenPaths, repo.fullName); + } + if (context.shouldContinue()) purgeStaleGitHubFailureArtifacts(context.source.id, agentId, syncStartedAt); + for (const failure of failures) { + indexed += writeFailureArtifact(context.source, agentId, failure); + } + + return { indexed, scanned, total: repos.length, failures }; +} + +async function fetchRepoResources( + source: SignetSourceEntry, + settings: GitHubSourceSettings, + config: GitHubFetchConfig, + repo: ResolvedRepo, + failures: SourceFailureState[], +): Promise { + const resources: GitHubResource[] = []; + if (settings.resourceTypes.includes("issues") && hasResourceBudget(resources, settings)) { + const result = await fetchIssues( + config, + undefined, + settings.state, + remainingResourceBudget(resources, settings), + settings.labels, + ); + resources.push(...result.resources); + writeFetchFailures(source, failures, repo.fullName, "issues", result.errors); + } + if (settings.resourceTypes.includes("pulls") && hasResourceBudget(resources, settings)) { + const result = settings.labels?.length + ? await fetchPullRequestsBySearch( + config, + settings.labels, + undefined, + settings.state, + remainingResourceBudget(resources, settings), + ) + : await fetchPullRequests(config, undefined, settings.state, remainingResourceBudget(resources, settings)); + resources.push(...result.resources); + writeFetchFailures(source, failures, repo.fullName, "pulls", result.errors); + } + if (settings.resourceTypes.includes("discussions") && hasResourceBudget(resources, settings)) { + if (!config.token) { + const failure = failureState(source, "GitHub discussions require tokenRef", { + repo: repo.fullName, + phase: "discussions", + }); + failures.push(failure); + } else { + const result = await fetchDiscussions( + config, + undefined, + settings.state, + remainingResourceBudget(resources, settings), + ); + const labelSet = settings.labels?.length ? new Set(settings.labels) : null; + resources.push( + ...result.resources.filter((resource) => !labelSet || resource.labels.some((label) => labelSet.has(label))), + ); + writeFetchFailures(source, failures, repo.fullName, "discussions", result.errors); + } + } + if (settings.resourceTypes.includes("docs") && hasResourceBudget(resources, settings)) { + const result = await fetchRepoDocs( + config, + settings.docPaths, + repo.defaultBranch, + remainingResourceBudget(resources, settings), + ); + resources.push(...result.resources); + writeFetchFailures(source, failures, repo.fullName, "docs", result.errors); + } + return resources; +} + +function hasResourceBudget(resources: readonly GitHubResource[], settings: GitHubSourceSettings): boolean { + return remainingResourceBudget(resources, settings) > 0; +} + +function remainingResourceBudget(resources: readonly GitHubResource[], settings: GitHubSourceSettings): number { + return Math.max(0, settings.maxItemsPerRepo - resources.length); +} + +async function writeResourceWithComments( + source: SignetSourceEntry, + agentId: string, + repo: string, + config: GitHubFetchConfig, + resource: GitHubResource, + settings: GitHubSourceSettings, + failures: SourceFailureState[], + remainingArtifactBudget: number, +): Promise { + if (remainingArtifactBudget <= 0) return { count: 0, paths: [] }; + const paths = [writeResourceArtifact(source, agentId, repo, resource)]; + const remainingCommentBudget = remainingArtifactBudget - paths.length; + if ( + !settings.includeComments || + resource.commentsCount <= 0 || + resource.type === "doc" || + remainingCommentBudget <= 0 + ) { + return { count: paths.length, paths }; + } + try { + const comments = await fetchCommentsForResource(config, resource); + for (const comment of comments.slice(0, remainingCommentBudget)) { + paths.push(writeCommentArtifact(source, agentId, repo, resource, comment)); + } + } catch (err) { + logGitHubFetchError(source.id, repo, `${resource.type}_comments`, err); + failures.push( + failureState(source, `GitHub ${resource.type} comment fetch failed: ${errorMessage(err)}`, { + repo, + type: resource.type, + number: resource.number, + path: resource.path, + }), + ); + } + return { count: paths.length, paths }; +} + +async function fetchCommentsForResource( + config: GitHubFetchConfig, + resource: GitHubResource, +): Promise { + if (!resource.number) return []; + if (resource.type === "issue") return fetchIssueComments(config, resource.number); + if (resource.type === "pull") { + const issueComments = await fetchIssueComments(config, resource.number); + const reviewComments = await fetchPullRequestComments(config, resource.number); + return [...issueComments, ...reviewComments]; + } + if (resource.type === "discussion") return fetchDiscussionComments(config, resource.number); + return []; +} + +function writeResourceArtifact( + source: SignetSourceEntry, + agentId: string, + repo: string, + resource: GitHubResource, +): string { + const path = resourcePath(repo, resource); + indexExternalMemoryArtifact({ + agentId, + harness: GITHUB_HARNESS, + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: resourceExternalId(repo, resource), + sourceParentPath: `github://${repo}`, + sourcePath: path, + sourceKind: `source_github_${resource.type}`, + sourceMtimeMs: Date.parse(resource.updatedAt) || Date.now(), + capturedAt: resource.updatedAt, + content: resourceContent(repo, resource), + sourceMeta: { + provider: GITHUB_PROVIDER_KIND, + repo, + type: resource.type, + number: resource.number, + path: resource.path, + url: resource.url, + state: resource.state, + labels: resource.labels, + author: resource.author, + createdAt: resource.createdAt, + closedAt: resource.closedAt, + mergedAt: resource.mergedAt, + commentsCount: resource.commentsCount, + ...resource.extra, + }, + }); + return path; +} + +function writeCommentArtifact( + source: SignetSourceEntry, + agentId: string, + repo: string, + resource: GitHubResource, + comment: GitHubComment, +): string { + const author = + typeof comment.author === "string" ? comment.author : (comment.author?.login ?? comment.user?.login ?? null); + const commentId = String(comment.id); + const path = `${resourcePath(repo, resource)}#comment-${commentId}`; + indexExternalMemoryArtifact({ + agentId, + harness: GITHUB_HARNESS, + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: `${resourceExternalId(repo, resource)}#comment:${commentId}`, + sourceParentPath: resourcePath(repo, resource), + sourcePath: path, + sourceKind: "source_github_comment", + sourceMtimeMs: Date.parse(comment.updated_at) || Date.now(), + capturedAt: comment.updated_at, + content: [`# Comment on ${resource.title}`, "", `Author: ${author ?? "unknown"}`, "", comment.body].join("\n"), + sourceMeta: { + provider: GITHUB_PROVIDER_KIND, + repo, + parentType: resource.type, + parentNumber: resource.number, + parentPath: resource.path, + commentId, + author, + createdAt: comment.created_at, + updatedAt: comment.updated_at, + }, + }); + return path; +} + +function writeFetchFailures( + source: SignetSourceEntry, + failures: SourceFailureState[], + repo: string, + phase: string, + errors: readonly { readonly message: string; readonly retryable: boolean }[], +): void { + for (const error of errors) { + failures.push(failureState(source, error.message, { repo, phase, retryable: error.retryable })); + } +} + +function writeFailureArtifact(source: SignetSourceEntry, agentId: string, failure: SourceFailureState): number { + indexExternalMemoryArtifact({ + agentId, + harness: GITHUB_HARNESS, + sourceId: source.id, + sourceRoot: source.root, + sourceExternalId: `failure:${failure.failedAt}:${failure.message}`, + sourcePath: failureArtifactPath(source, failure), + sourceKind: "source_github_failure", + sourceMtimeMs: Date.parse(failure.failedAt) || Date.now(), + capturedAt: failure.failedAt, + content: failure.message, + sourceMeta: failure.metadata, + }); + return 1; +} + +function failureArtifactPath(source: SignetSourceEntry, failure: SourceFailureState): string { + const fingerprint = createHash("sha256") + .update(failure.message) + .update("\0") + .update(JSON.stringify(failure.metadata ?? {})) + .digest("hex") + .slice(0, 16); + return `github://source/${source.id}/failures/${encodeURIComponent(failure.failedAt)}-${fingerprint}`; +} + +async function resolveRepos( + source: SignetSourceEntry, + settings: GitHubSourceSettings, + failures: SourceFailureState[], + token?: string, +): Promise { + const resolved: ResolvedRepo[] = []; + for (const pattern of settings.repos) { + const [owner, repoPart] = pattern.split("/"); + if (!owner || !repoPart) continue; + if (repoPart.includes("*")) { + const expanded = await expandRepoGlob(owner, repoPart, token, settings.maxItemsPerRepo); + if (expanded.repos.length === 0) { + failures.push( + failureState(source, `GitHub wildcard repo pattern matched no repositories: ${pattern}`, { + owner, + pattern, + phase: "repo_expansion", + }), + ); + } + if (expanded.truncated) { + logger.warn("github-source", "Wildcard repo source expansion hit configured cap", { + owner, + pattern: repoPart, + limit: settings.maxItemsPerRepo, + }); + } + for (const fullName of expanded.repos) { + const [expandedOwner, expandedRepo] = fullName.split("/"); + if (expandedOwner && expandedRepo) { + resolved.push({ owner: expandedOwner, repo: expandedRepo, fullName, defaultBranch: "main" }); + } + } + } else { + resolved.push({ owner, repo: repoPart, fullName: `${owner}/${repoPart}`, defaultBranch: "main" }); + } + } + const withDefaultBranches: ResolvedRepo[] = []; + for (const repo of resolved) { + const info = await fetchRepoInfo({ owner: repo.owner, repo: repo.repo, token }).catch(() => null); + withDefaultBranches.push({ ...repo, defaultBranch: info?.defaultBranch ?? repo.defaultBranch }); + } + return withDefaultBranches; +} + +async function resolveToken(tokenRef: string): Promise { + try { + return await getSecret(tokenRef); + } catch (err) { + throw new Error(`Failed to resolve GitHub token ref '${tokenRef}': ${errorMessage(err)}`); + } +} + +function failureState( + source: SignetSourceEntry, + message: string, + metadata?: Readonly>, +): SourceFailureState { + return { + sourceId: source.id, + providerKind: GITHUB_PROVIDER_KIND, + failedAt: new Date().toISOString(), + recoverable: true, + message, + metadata, + }; +} + +function purgeStaleGitHubArtifacts( + sourceId: string, + agentId: string, + syncStartedAt: string, + seenPaths: ReadonlySet, + repo: string, +): void { + const repoPathPrefix = `github://${repo}/`; + getDbAccessor().withWriteTx((db) => { + const rows = db + .prepare( + `SELECT rowid, source_path FROM memory_artifacts + WHERE agent_id = ? + AND source_id = ? + AND source_path >= ? + AND source_path < ? + AND updated_at < ? + AND COALESCE(is_deleted, 0) = 0`, + ) + .all(agentId, sourceId, repoPathPrefix, `${repoPathPrefix}\uffff`, syncStartedAt) as Array<{ + rowid: number; + source_path: string; + }>; + for (const row of rows) { + if (seenPaths.has(row.source_path)) continue; + countChanges( + db + .prepare("UPDATE memory_artifacts SET is_deleted = 1, updated_at = ? WHERE rowid = ?") + .run(syncStartedAt, row.rowid), + ); + } + }); +} + +function purgeStaleGitHubFailureArtifacts(sourceId: string, agentId: string, syncStartedAt: string): void { + getDbAccessor().withWriteTx((db) => { + countChanges( + db + .prepare( + `UPDATE memory_artifacts + SET is_deleted = 1, updated_at = ? + WHERE agent_id = ? + AND source_id = ? + AND source_kind = 'source_github_failure' + AND source_path >= ? + AND source_path < ? + AND COALESCE(is_deleted, 0) = 0`, + ) + .run( + syncStartedAt, + agentId, + sourceId, + `github://source/${sourceId}/failures/`, + `github://source/${sourceId}/failures/\uffff`, + ), + ); + }); +} + +function resourceExternalId(repo: string, resource: GitHubResource): string { + if (resource.type === "doc") return `${repo}:docs:${resource.path ?? ""}`; + return `${repo}:${resource.type}:${resource.number ?? 0}`; +} + +function resourcePath(repo: string, resource: GitHubResource): string { + if (resource.type === "doc") return `github://${repo}/docs/${resource.path ?? ""}`; + return `github://${repo}/${resource.type}s/${resource.number ?? 0}`; +} + +function resourceContent(repo: string, resource: GitHubResource): string { + const title = + resource.type === "doc" ? resource.title : `${repo} ${resource.type} #${resource.number}: ${resource.title}`; + return [ + `# ${title}`, + "", + `URL: ${resource.url || `https://github.com/${repo}`}`, + `State: ${resource.state}`, + resource.author ? `Author: ${resource.author}` : undefined, + resource.labels.length > 0 ? `Labels: ${resource.labels.join(", ")}` : undefined, + "", + resource.body, + ] + .filter((line): line is string => line !== undefined) + .join("\n"); +} + +function errorMessage(err: unknown): string { + return err instanceof Error ? err.message : String(err); +} diff --git a/platform/daemon/src/routes/sources-routes.test.ts b/platform/daemon/src/routes/sources-routes.test.ts index 477cd938d..ca187434e 100644 --- a/platform/daemon/src/routes/sources-routes.test.ts +++ b/platform/daemon/src/routes/sources-routes.test.ts @@ -1,4 +1,4 @@ -import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -19,6 +19,8 @@ import { } from "../source-index-progress"; import { registerSourcesRoutes } from "./sources-routes"; +const originalFetch = globalThis.fetch; + describe("Sources routes", () => { let dir = ""; let vault = ""; @@ -41,6 +43,7 @@ describe("Sources routes", () => { }); afterEach(() => { + globalThis.fetch = originalFetch; clearSourceIndexProgressForTests(); closeDbAccessor(); if (previousSignetPath === undefined) Reflect.deleteProperty(process.env, "SIGNET_PATH"); @@ -199,6 +202,41 @@ describe("Sources routes", () => { ).toBeGreaterThan(0); }); + it("connects a GitHub source through provider-neutral source config", async () => { + globalThis.fetch = mock((url: string | URL | Request) => { + const text = String(url); + if (text.endsWith("/repos/Signet-AI/signetai")) { + return Promise.resolve( + Response.json({ name: "signetai", full_name: "Signet-AI/signetai", default_branch: "main" }), + ); + } + if (text.includes("/issues?") || text.includes("/pulls?")) return Promise.resolve(Response.json([])); + if (text.includes("/contents/")) return Promise.resolve(new Response("missing", { status: 404 })); + return Promise.resolve(Response.json([])); + }) as typeof fetch; + + const res = await makeApp().request("/api/sources/github", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + repos: ["Signet-AI/signetai"], + name: "Route GitHub", + resourceTypes: ["issues", "docs"], + maxItemsPerRepo: 5, + }), + }); + + expect(res.status).toBe(202); + const body = (await res.json()) as { + source: { kind: string; providerSettings?: { repos?: string[] } }; + queued: boolean; + }; + expect(body.queued).toBe(true); + expect(body.source.kind).toBe("github"); + expect(body.source.providerSettings?.repos).toEqual(["Signet-AI/signetai"]); + expect(loadSourcesConfig(dir).sources[0]?.kind).toBe("github"); + }); + it("rejects raw Discord tokens at the route boundary", async () => { const res = await makeApp().request("/api/sources/discord", { method: "POST", @@ -213,6 +251,21 @@ describe("Sources routes", () => { expect(((await res.json()) as { error: string }).error).toContain("not a raw token"); }); + it("rejects raw GitHub tokens at the route boundary", async () => { + const res = await makeApp().request("/api/sources/github", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + repos: ["Signet-AI/signetai"], + tokenRef: `github_pat_${"a".repeat(60)}`, + }), + }); + + expect(res.status).toBe(400); + expect(((await res.json()) as { error: string }).error).toContain("not a raw token"); + expect(loadSourcesConfig(dir).sources).toHaveLength(0); + }); + it("does not block the connect response on a slow Obsidian source scan", async () => { let releaseScan = () => {}; const syncGate = new Promise((resolve) => { diff --git a/platform/daemon/src/routes/sources-routes.ts b/platform/daemon/src/routes/sources-routes.ts index 3b7d71acd..95b068c6a 100644 --- a/platform/daemon/src/routes/sources-routes.ts +++ b/platform/daemon/src/routes/sources-routes.ts @@ -9,6 +9,7 @@ import { SOURCE_CHUNK_SOURCE_TYPE, type SignetSourceEntry, addDiscordSource, + addGitHubSource, addObsidianSource, loadSourcesConfig, markSourceIndexed, @@ -85,6 +86,19 @@ interface AddDiscordSourceBody { readonly syncMode?: "rest" | "gateway-tail" | "desktop-cache"; } +interface AddGitHubSourceBody { + readonly repos?: readonly string[]; + readonly repo?: string; + readonly tokenRef?: string; + readonly name?: string; + readonly resourceTypes?: readonly ("issues" | "pulls" | "discussions" | "docs")[]; + readonly state?: "open" | "closed" | "all"; + readonly includeComments?: boolean; + readonly labels?: readonly string[]; + readonly docPaths?: readonly string[]; + readonly maxItemsPerRepo?: number; +} + interface PickDirectoryBody { readonly title?: string; } @@ -245,6 +259,49 @@ export function registerSourcesRoutes(app: Hono, deps: RegisterSourcesRoutesDeps } }); + app.post("/api/sources/github", async (c) => { + let body: AddGitHubSourceBody = {}; + try { + body = (await c.req.json()) as AddGitHubSourceBody; + } catch { + return c.json({ error: "Invalid JSON body" }, 400); + } + + const repos = Array.isArray(body.repos) + ? body.repos.filter((entry): entry is string => typeof entry === "string") + : typeof body.repo === "string" + ? [body.repo] + : []; + const result = addGitHubSource( + { + repos, + tokenRef: typeof body.tokenRef === "string" ? body.tokenRef : undefined, + name: body.name, + resourceTypes: body.resourceTypes, + state: body.state, + includeComments: body.includeComments, + labels: Array.isArray(body.labels) + ? body.labels.filter((entry): entry is string => typeof entry === "string") + : undefined, + docPaths: Array.isArray(body.docPaths) + ? body.docPaths.filter((entry): entry is string => typeof entry === "string") + : undefined, + maxItemsPerRepo: body.maxItemsPerRepo, + }, + agentsDir, + ); + if (result.ok === false) return c.json({ error: result.error }, 400); + + const job = enqueueSourceIndexJob({ + source: result.source, + agentsDir, + startBridge, + purgeNativeSource, + }); + + return c.json({ source: result.source, created: result.created, indexed: 0, queued: true, job }, 202); + }); + app.delete("/api/sources/:sourceId", (c) => { const sourceId = c.req.param("sourceId"); const result = removeSource(sourceId, agentsDir); @@ -304,8 +361,16 @@ async function runSourceIndexJob(input: SourceIndexJobInput, job: SourceIndexJob }, }); if (!isCurrentSourceIndexJob(input.source.id, job.id)) return; - markSourceIndexed(input.source.id, undefined, input.agentsDir); - completeSourceIndexJob(input.source.id, job.id, result.indexed); + if (result.failures.length > 0) { + failSourceIndexJob( + input.source.id, + job.id, + `${input.source.kind} source sync completed with ${result.failures.length} failure(s)`, + ); + } else { + markSourceIndexed(input.source.id, undefined, input.agentsDir); + completeSourceIndexJob(input.source.id, job.id, result.indexed); + } return; } if (!provider.toNativeSource) throw new Error(`Source provider has no sync implementation: ${input.source.kind}`); diff --git a/platform/daemon/src/source-providers.ts b/platform/daemon/src/source-providers.ts index 11b9a6b3e..b8ae1f6f5 100644 --- a/platform/daemon/src/source-providers.ts +++ b/platform/daemon/src/source-providers.ts @@ -1,5 +1,6 @@ import type { SignetSourceEntry, SignetSourceKind, SourceFailureState } from "@signet/core"; import { discordSourceProvider } from "./discord-source-provider"; +import { githubSourceProvider } from "./github-source-provider"; import { type NativeMemorySource, obsidianNativeMemorySource, @@ -54,9 +55,10 @@ export function registerSourceProvider(provider: SourceProviderAdapter): void { export function getSourceProvider(kind: SignetSourceKind): SourceProviderAdapter | undefined { if (kind === obsidianSourceProvider.kind) return obsidianSourceProvider; if (kind === discordSourceProvider.kind) return discordSourceProvider; + if (kind === githubSourceProvider.kind) return githubSourceProvider; return additionalProviders.get(kind); } export function configuredSourceProviders(): readonly SourceProviderAdapter[] { - return [obsidianSourceProvider, discordSourceProvider, ...additionalProviders.values()]; + return [obsidianSourceProvider, discordSourceProvider, githubSourceProvider, ...additionalProviders.values()]; } diff --git a/surfaces/cli/src/commands/sources.ts b/surfaces/cli/src/commands/sources.ts index bc417e9c3..d4043775e 100644 --- a/surfaces/cli/src/commands/sources.ts +++ b/surfaces/cli/src/commands/sources.ts @@ -2,6 +2,7 @@ import type { Command } from "commander"; import { type SourcesDeps, addDiscordSourceFromCli, + addGitHubSourceFromCli, addObsidianVaultSource, exportConfiguredSourceSnapshot, importConfiguredSourceSnapshot, @@ -140,6 +141,20 @@ export function registerSourcesCommands(program: Command, deps: RegisterSourcesC addObsidianVaultSource(path, options, deps), ); + add + .command("github") + .description("Index GitHub repositories as read-only recall sources") + .requiredOption("--repo ", "GitHub repo pattern (repeatable, supports owner/*)", collect, []) + .option("--token-ref ", "Signet secret name or external secret reference for a GitHub token") + .option("--name ", "Display name for the GitHub source") + .option("--resource-type ", "Resource type: issues, pulls, discussions, docs (repeatable)", collect, []) + .option("--state ", "Resource state: open, closed, or all", "all") + .option("--no-include-comments", "Skip issue, PR, and discussion comments") + .option("--label