diff --git a/apps/backend/cmd/kandev/main.go b/apps/backend/cmd/kandev/main.go index 04b8ce2f1..41a771a43 100644 --- a/apps/backend/cmd/kandev/main.go +++ b/apps/backend/cmd/kandev/main.go @@ -422,6 +422,22 @@ func startAgentInfrastructure( return false } + // Wire the soft-deleted-profile pre-flight into the watcher dispatch. + // Orphan watchers (their agent profile was soft-deleted by the + // reconciler when its agent type left the registry) self-heal on the + // next poll instead of looping on "profile not found" forever. + orchestratorSvc.SetProfileLookup(&profileLookupAdapter{store: repos.AgentSettings}) + + // Wire the watcher-dependency enumerator into the agent settings + // controller so the profile-delete UI can surface "this will also + // disable N watchers" before the user confirms. + agentSettingsController.SetWatcherDependencyChecker(&watcherDepsAdapter{ + linear: services.Linear, + jira: services.Jira, + github: services.GitHub, + log: log, + }) + // Wire GitHub service into orchestrator for PR auto-detection on push if services.GitHub != nil { orchestratorSvc.SetGitHubService(services.GitHub) diff --git a/apps/backend/cmd/kandev/orchestrator.go b/apps/backend/cmd/kandev/orchestrator.go index 228f3e3f7..1aa1b3a1d 100644 --- a/apps/backend/cmd/kandev/orchestrator.go +++ b/apps/backend/cmd/kandev/orchestrator.go @@ -3,6 +3,7 @@ package main import ( "context" "crypto/sha1" + "database/sql" "errors" "fmt" "os" @@ -14,10 +15,13 @@ import ( "github.com/kandev/kandev/internal/agent/registry" "github.com/kandev/kandev/internal/agent/runtime/lifecycle" + agentsettingscontroller "github.com/kandev/kandev/internal/agent/settings/controller" + settingsstore "github.com/kandev/kandev/internal/agent/settings/store" "github.com/kandev/kandev/internal/common/config" "github.com/kandev/kandev/internal/common/logger" "github.com/kandev/kandev/internal/db" "github.com/kandev/kandev/internal/events/bus" + githubpkg "github.com/kandev/kandev/internal/github" jirapkg "github.com/kandev/kandev/internal/jira" linearpkg "github.com/kandev/kandev/internal/linear" "github.com/kandev/kandev/internal/orchestrator" @@ -310,6 +314,10 @@ func (a *jiraServiceAdapter) ReleaseIssueWatchTask(ctx context.Context, watchID, return a.svc.Store().ReleaseIssueWatchTask(ctx, watchID, issueKey) } +func (a *jiraServiceAdapter) DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error { + return a.svc.Store().DisableIssueWatchWithError(ctx, watchID, cause) +} + // linearServiceAdapter exposes the Linear service's issue-watch dedup methods // to the orchestrator without leaking the rest of the package surface area. type linearServiceAdapter struct { @@ -328,6 +336,261 @@ func (a *linearServiceAdapter) ReleaseIssueWatchTask(ctx context.Context, watchI return a.svc.Store().ReleaseIssueWatchTask(ctx, watchID, identifier) } +func (a *linearServiceAdapter) DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error { + return a.svc.Store().DisableIssueWatchWithError(ctx, watchID, cause) +} + +// profileLookupAdapter satisfies orchestrator.ProfileLookup by delegating to +// the agent settings store. Returns (true, name, nil) when the row exists +// but is soft-deleted, which the orchestrator treats as a signal to self-heal +// the bound watcher (set enabled=0 + last_error). All other shapes — live +// row, missing row, driver error — are returned verbatim so the dispatch +// pipeline fails open on transient failures. +type profileLookupAdapter struct { + store settingsstore.Repository +} + +func (a *profileLookupAdapter) LookupProfile(ctx context.Context, profileID string) (bool, string, error) { + if _, err := a.store.GetAgentProfile(ctx, profileID); err == nil { + return false, "", nil + } else if !errors.Is(err, sql.ErrNoRows) { + return false, "", err + } + profile, err := a.store.GetAgentProfileIncludingDeleted(ctx, profileID) + if err != nil || profile == nil || profile.DeletedAt == nil { + return false, "", err + } + return true, profile.Name, nil +} + +// watcherDepsAdapter enumerates linear / jira / github_issue / github_review +// watcher rows that reference an agent profile. The profile-delete confirm +// dialog uses the list to render "this will also disable N watchers — are +// you sure?". +// +// Each integration's package is optional in dev mode; nil-safe so the +// adapter degrades gracefully when one isn't wired. +type watcherDepsAdapter struct { + linear *linearpkg.Service + jira *jirapkg.Service + github githubWatcherLister + log *logger.Logger +} + +// githubWatcherLister is the slice of github.Service the adapter needs. +// Local alias so the adapter file stays free of the wider GitHubService +// orchestrator interface, which carries many unrelated methods. Uses the +// enabled-only listers so already-disabled (self-healed) watchers do not +// inflate the dependency count surfaced in ErrProfileInUseDetail. Disable +// methods are the same store-level helpers the dispatch coordinator's +// self-heal path uses. +type githubWatcherLister interface { + ListEnabledIssueWatches(ctx context.Context) ([]*githubpkg.IssueWatch, error) + ListEnabledReviewWatches(ctx context.Context) ([]*githubpkg.ReviewWatch, error) + DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error + DisableReviewWatchWithError(ctx context.Context, watchID, cause string) error +} + +func (a *watcherDepsAdapter) ListWatchersByAgentProfile(ctx context.Context, profileID string) ([]agentsettingscontroller.WatcherReference, error) { + if profileID == "" { + return nil, nil + } + var refs []agentsettingscontroller.WatcherReference + more, err := a.listLinearRefs(ctx, profileID) + if err != nil { + return nil, err + } + refs = append(refs, more...) + more, err = a.listJiraRefs(ctx, profileID) + if err != nil { + return nil, err + } + refs = append(refs, more...) + more, err = a.listGitHubRefs(ctx, profileID) + if err != nil { + return nil, err + } + refs = append(refs, more...) + return refs, nil +} + +// DisableWatchersByAgentProfile enumerates the enabled watcher rows that +// reference profileID and flips each to enabled=0 with the supplied cause. +// Returns the list it disabled so the caller can log. Best-effort across +// integrations: a failure for one integration logs and continues to the +// next so a single broken store doesn't strand the rest of the user's +// orphaned watchers. +func (a *watcherDepsAdapter) DisableWatchersByAgentProfile(ctx context.Context, profileID, cause string) ([]agentsettingscontroller.WatcherReference, error) { + refs, err := a.ListWatchersByAgentProfile(ctx, profileID) + if err != nil { + return nil, err + } + disabled := make([]agentsettingscontroller.WatcherReference, 0, len(refs)) + for _, ref := range refs { + if err := a.disableByKind(ctx, ref.Kind, ref.ID, cause); err != nil { + a.log.Warn("failed to disable referencing watcher", + zap.String("profile_id", profileID), + zap.String("watcher_kind", ref.Kind), + zap.String("watcher_id", ref.ID), + zap.Error(err)) + continue + } + disabled = append(disabled, ref) + } + return disabled, nil +} + +func (a *watcherDepsAdapter) disableByKind(ctx context.Context, kind, watchID, cause string) error { + switch kind { + case "linear": + if a.linear == nil { + return nil + } + return a.linear.Store().DisableIssueWatchWithError(ctx, watchID, cause) + case "jira": + if a.jira == nil { + return nil + } + return a.jira.Store().DisableIssueWatchWithError(ctx, watchID, cause) + case "github_issue": + if a.github == nil { + return nil + } + return a.github.DisableIssueWatchWithError(ctx, watchID, cause) + case "github_review": + if a.github == nil { + return nil + } + return a.github.DisableReviewWatchWithError(ctx, watchID, cause) + default: + return fmt.Errorf("unknown watcher kind: %s", kind) + } +} + +func (a *watcherDepsAdapter) listLinearRefs(ctx context.Context, profileID string) ([]agentsettingscontroller.WatcherReference, error) { + if a.linear == nil { + return nil, nil + } + watches, err := a.linear.Store().ListEnabledIssueWatches(ctx) + if err != nil { + return nil, fmt.Errorf("linear watchers: %w", err) + } + var out []agentsettingscontroller.WatcherReference + for _, w := range watches { + if w.AgentProfileID == profileID { + out = append(out, agentsettingscontroller.WatcherReference{ + ID: w.ID, Kind: "linear", Label: linearWatchLabel(w), + }) + } + } + return out, nil +} + +func (a *watcherDepsAdapter) listJiraRefs(ctx context.Context, profileID string) ([]agentsettingscontroller.WatcherReference, error) { + if a.jira == nil { + return nil, nil + } + watches, err := a.jira.Store().ListEnabledIssueWatches(ctx) + if err != nil { + return nil, fmt.Errorf("jira watchers: %w", err) + } + var out []agentsettingscontroller.WatcherReference + for _, w := range watches { + if w.AgentProfileID == profileID { + out = append(out, agentsettingscontroller.WatcherReference{ + ID: w.ID, Kind: "jira", Label: truncateWatcherLabel(w.JQL), + }) + } + } + return out, nil +} + +func (a *watcherDepsAdapter) listGitHubRefs(ctx context.Context, profileID string) ([]agentsettingscontroller.WatcherReference, error) { + if a.github == nil { + return nil, nil + } + issues, err := a.github.ListEnabledIssueWatches(ctx) + if err != nil { + return nil, fmt.Errorf("github issue watchers: %w", err) + } + var out []agentsettingscontroller.WatcherReference + for _, w := range issues { + if w.AgentProfileID == profileID { + out = append(out, agentsettingscontroller.WatcherReference{ + ID: w.ID, Kind: "github_issue", Label: githubIssueWatchLabel(w), + }) + } + } + reviews, err := a.github.ListEnabledReviewWatches(ctx) + if err != nil { + return nil, fmt.Errorf("github review watchers: %w", err) + } + for _, w := range reviews { + if w.AgentProfileID == profileID { + out = append(out, agentsettingscontroller.WatcherReference{ + ID: w.ID, Kind: "github_review", Label: githubReviewWatchLabel(w), + }) + } + } + return out, nil +} + +// linearWatchLabel renders a Linear filter into a short human-readable label +// for the confirmation dialog. The team key is the most-recognisable scoping +// fact; the query string is a fallback. Capped at watcherLabelMaxLen. +func linearWatchLabel(w *linearpkg.IssueWatch) string { + switch { + case w.Filter.TeamKey != "": + return truncateWatcherLabel("team " + w.Filter.TeamKey) + case w.Filter.Query != "": + return truncateWatcherLabel(w.Filter.Query) + default: + return "all teams" + } +} + +func githubIssueWatchLabel(w *githubpkg.IssueWatch) string { + if w.CustomQuery != "" { + return truncateWatcherLabel(w.CustomQuery) + } + return truncateWatcherLabel(githubReposJoin(w.Repos)) +} + +func githubReviewWatchLabel(w *githubpkg.ReviewWatch) string { + if w.CustomQuery != "" { + return truncateWatcherLabel(w.CustomQuery) + } + return truncateWatcherLabel(githubReposJoin(w.Repos)) +} + +func githubReposJoin(repos []githubpkg.RepoFilter) string { + if len(repos) == 0 { + return "all repos" + } + parts := make([]string, 0, len(repos)) + for _, r := range repos { + parts = append(parts, r.Owner+"/"+r.Name) + } + return strings.Join(parts, ", ") +} + +const watcherLabelMaxLen = 80 + +// truncateWatcherLabel caps a watcher's human-readable label at +// watcherLabelMaxLen runes, appending "…" when truncation happens. Counted +// in runes (not bytes) so a JQL or filter ending in multi-byte UTF-8 +// (Japanese, emoji) is not split mid-codepoint — slicing a byte string +// would emit invalid UTF-8 and the UI would render the replacement +// character. +func truncateWatcherLabel(s string) string { + s = strings.TrimSpace(s) + runes := []rune(s) + if len(runes) <= watcherLabelMaxLen { + return s + } + return string(runes[:watcherLabelMaxLen-1]) + "…" +} + // repoLocalPathUpdater adapts the task service's UpdateRepository to the executor.RepoUpdater interface. type repoLocalPathUpdater struct { svc *taskservice.Service diff --git a/apps/backend/internal/agent/runtime/lifecycle/profile_resolver.go b/apps/backend/internal/agent/runtime/lifecycle/profile_resolver.go index df3355edf..9a6b02983 100644 --- a/apps/backend/internal/agent/runtime/lifecycle/profile_resolver.go +++ b/apps/backend/internal/agent/runtime/lifecycle/profile_resolver.go @@ -2,12 +2,31 @@ package lifecycle import ( "context" + "database/sql" + "errors" "fmt" "github.com/kandev/kandev/internal/agent/registry" "github.com/kandev/kandev/internal/agent/settings/store" ) +// DeletedProfileError carries the soft-deleted profile's ID and name so the +// caller (watcher self-heal) can surface a human-readable cause. Wraps +// store.ErrAgentProfileDeleted — use errors.Is / errors.As. +type DeletedProfileError struct { + ProfileID string + ProfileName string +} + +func (e *DeletedProfileError) Error() string { + if e.ProfileName != "" { + return fmt.Sprintf("agent profile %q (%s) was removed", e.ProfileName, e.ProfileID) + } + return fmt.Sprintf("agent profile %s was removed", e.ProfileID) +} + +func (e *DeletedProfileError) Unwrap() error { return store.ErrAgentProfileDeleted } + // StoreProfileResolver implements ProfileResolver using the agent settings store type StoreProfileResolver struct { store store.Repository @@ -20,11 +39,19 @@ func NewStoreProfileResolver(store store.Repository, reg *registry.Registry) *St return &StoreProfileResolver{store: store, registry: reg} } -// ResolveProfile looks up an agent profile by ID and returns the profile info +// ResolveProfile looks up an agent profile by ID and returns the profile info. +// +// When the row is missing, ResolveProfile retries via +// GetAgentProfileIncludingDeleted to disambiguate "never existed" (returns +// the original "profile not found" error) from "soft-deleted" (returns a +// *DeletedProfileError wrapping store.ErrAgentProfileDeleted so callers can +// trigger watcher self-heal). func (r *StoreProfileResolver) ResolveProfile(ctx context.Context, profileID string) (*AgentProfileInfo, error) { - // Get the profile from the store profile, err := r.store.GetAgentProfile(ctx, profileID) if err != nil { + if deletedErr := r.checkSoftDeleted(ctx, profileID, err); deletedErr != nil { + return nil, deletedErr + } return nil, fmt.Errorf("profile not found: %w", err) } @@ -55,6 +82,26 @@ func (r *StoreProfileResolver) ResolveProfile(ctx context.Context, profileID str }, nil } +// checkSoftDeleted returns a *DeletedProfileError when the missing-row error +// from GetAgentProfile resolves to an existing-but-soft-deleted row. Returns +// nil when the row really doesn't exist or the secondary lookup fails — the +// caller falls back to the original "profile not found" error. +func (r *StoreProfileResolver) checkSoftDeleted(ctx context.Context, profileID string, primaryErr error) error { + // Only retry on the well-known "missing row" signal. The store layer + // returns sql.ErrNoRows from QueryRow.Scan when the filtered SELECT + // matches nothing — that includes both "no row at all" and + // "row exists but deleted_at IS NOT NULL". Other errors (driver, + // permissions) are returned as-is by the caller. + if !errors.Is(primaryErr, sql.ErrNoRows) { + return nil + } + profile, err := r.store.GetAgentProfileIncludingDeleted(ctx, profileID) + if err != nil || profile == nil || profile.DeletedAt == nil { + return nil + } + return &DeletedProfileError{ProfileID: profile.ID, ProfileName: profile.Name} +} + // resolveAgentCapabilities looks up the agent in the registry and returns the // effective model and whether the agent supports native session resume. // The model comes straight from the profile; static per-agent defaults have diff --git a/apps/backend/internal/agent/runtime/lifecycle/profile_resolver_test.go b/apps/backend/internal/agent/runtime/lifecycle/profile_resolver_test.go index ab19386b0..7d5d7224a 100644 --- a/apps/backend/internal/agent/runtime/lifecycle/profile_resolver_test.go +++ b/apps/backend/internal/agent/runtime/lifecycle/profile_resolver_test.go @@ -2,8 +2,10 @@ package lifecycle import ( "context" + "database/sql" "errors" "testing" + "time" "github.com/kandev/kandev/internal/agent/registry" "github.com/kandev/kandev/internal/agent/settings/models" @@ -15,11 +17,12 @@ import ( // MockRepository implements store.Repository for testing type MockRepository struct { - GetAgentFn func(ctx context.Context, id string) (*models.Agent, error) - GetAgentByNameFn func(ctx context.Context, name string) (*models.Agent, error) - GetAgentProfileFn func(ctx context.Context, id string) (*models.AgentProfile, error) - ListAgentsFn func(ctx context.Context) ([]*models.Agent, error) - ListAgentProfilesFn func(ctx context.Context, agentID string) ([]*models.AgentProfile, error) + GetAgentFn func(ctx context.Context, id string) (*models.Agent, error) + GetAgentByNameFn func(ctx context.Context, name string) (*models.Agent, error) + GetAgentProfileFn func(ctx context.Context, id string) (*models.AgentProfile, error) + GetAgentProfileIncludingDeletedFn func(ctx context.Context, id string) (*models.AgentProfile, error) + ListAgentsFn func(ctx context.Context) ([]*models.Agent, error) + ListAgentProfilesFn func(ctx context.Context, agentID string) ([]*models.AgentProfile, error) } var _ store.Repository = (*MockRepository)(nil) @@ -84,6 +87,13 @@ func (m *MockRepository) GetAgentProfile(ctx context.Context, id string) (*model return nil, errors.New("profile not found") } +func (m *MockRepository) GetAgentProfileIncludingDeleted(ctx context.Context, id string) (*models.AgentProfile, error) { + if m.GetAgentProfileIncludingDeletedFn != nil { + return m.GetAgentProfileIncludingDeletedFn(ctx, id) + } + return nil, errors.New("profile not found") +} + func (m *MockRepository) ListAgentProfiles(ctx context.Context, agentID string) ([]*models.AgentProfile, error) { if m.ListAgentProfilesFn != nil { return m.ListAgentProfilesFn(ctx, agentID) @@ -166,6 +176,44 @@ func TestStoreProfileResolver_ResolveProfile_Success(t *testing.T) { } } +func TestStoreProfileResolver_ResolveProfile_SoftDeletedReturnsTypedError(t *testing.T) { + deletedAt := time.Date(2026, 5, 22, 21, 28, 12, 0, time.UTC) + mockRepo := &MockRepository{ + GetAgentProfileFn: func(ctx context.Context, id string) (*models.AgentProfile, error) { + return nil, sql.ErrNoRows + }, + GetAgentProfileIncludingDeletedFn: func(ctx context.Context, id string) (*models.AgentProfile, error) { + return &models.AgentProfile{ + ID: "deleted-profile", + AgentID: "agent-456", + Name: "Removed Kilo Profile", + DeletedAt: &deletedAt, + }, nil + }, + } + + resolver := NewStoreProfileResolver(mockRepo, nil) + + info, err := resolver.ResolveProfile(context.Background(), "deleted-profile") + + if info != nil { + t.Fatalf("expected nil profile info, got %+v", info) + } + if !errors.Is(err, store.ErrAgentProfileDeleted) { + t.Fatalf("expected ErrAgentProfileDeleted, got %v", err) + } + var detail *DeletedProfileError + if !errors.As(err, &detail) { + t.Fatalf("expected DeletedProfileError detail, got %T: %v", err, err) + } + if detail.ProfileID != "deleted-profile" { + t.Errorf("expected ProfileID 'deleted-profile', got %q", detail.ProfileID) + } + if detail.ProfileName != "Removed Kilo Profile" { + t.Errorf("expected ProfileName 'Removed Kilo Profile', got %q", detail.ProfileName) + } +} + func TestStoreProfileResolver_ResolveProfile_ProfileNotFound(t *testing.T) { mockRepo := &MockRepository{ GetAgentProfileFn: func(ctx context.Context, id string) (*models.AgentProfile, error) { diff --git a/apps/backend/internal/agent/settings/controller/controller.go b/apps/backend/internal/agent/settings/controller/controller.go index 0251ebba7..509531a23 100644 --- a/apps/backend/internal/agent/settings/controller/controller.go +++ b/apps/backend/internal/agent/settings/controller/controller.go @@ -50,6 +50,7 @@ type Controller struct { discovery *discovery.Registry agentRegistry *registry.Registry sessionChecker SessionChecker + watcherDeps WatcherDependencyChecker mcpService *mcpconfig.Service modelCache *modelfetcher.Cache hostUtility *hostutility.Manager @@ -58,13 +59,51 @@ type Controller struct { logger *logger.Logger } -// ErrProfileInUseDetail is returned when a profile cannot be deleted because active sessions exist. +// SetWatcherDependencyChecker wires in the watcher dependency enumerator so +// DeleteProfile can include referencing watchers in ErrProfileInUseDetail. +// Optional; when unset the delete path keeps its pre-watcher behaviour. +func (c *Controller) SetWatcherDependencyChecker(w WatcherDependencyChecker) { + c.watcherDeps = w +} + +// ErrProfileInUseDetail is returned when a profile cannot be deleted because +// active sessions or external integration watchers reference it. The UI uses +// the breakdown to render a "this will also disable N watchers — continue?" +// confirmation dialog before re-issuing the request with force=true. type ErrProfileInUseDetail struct { ActiveSessions []agentdto.ActiveTaskInfo + Watchers []WatcherReference } func (e *ErrProfileInUseDetail) Error() string { - return fmt.Sprintf("agent profile is used by %d active session(s)", len(e.ActiveSessions)) + return fmt.Sprintf("agent profile is used by %d active session(s) and %d watcher(s)", + len(e.ActiveSessions), len(e.Watchers)) +} + +// WatcherReference points at one issue/PR watcher row that uses the profile +// being deleted. Kind is the integration name ("linear", "jira", +// "github_issue", "github_review"). Label is a short human-friendly string +// (the filter, repo list, or JQL clipped to a UI-safe length by the producer). +type WatcherReference struct { + ID string `json:"id"` + Kind string `json:"kind"` + Label string `json:"label"` +} + +// WatcherDependencyChecker enumerates watcher rows that reference an agent +// profile and disables them on force-delete. Implementations live in +// cmd/kandev (one per integration store); the controller stays decoupled +// from linear/jira/github packages. +// +// ListWatchersByAgentProfile feeds the confirmation dialog; the user sees +// the list and confirms. DisableWatchersByAgentProfile fires on force-delete +// so the watcher row reflects the deletion immediately — without it, the +// watcher stays enabled-but-orphaned until its next external trigger fires +// the lazy preflight, which never happens for filters that match nothing +// new after the profile is deleted. +type WatcherDependencyChecker interface { + ListWatchersByAgentProfile(ctx context.Context, agentProfileID string) ([]WatcherReference, error) + DisableWatchersByAgentProfile(ctx context.Context, agentProfileID, cause string) ([]WatcherReference, error) } type SessionChecker interface { diff --git a/apps/backend/internal/agent/settings/controller/profile_crud.go b/apps/backend/internal/agent/settings/controller/profile_crud.go index 1c7657cff..05e21ed34 100644 --- a/apps/backend/internal/agent/settings/controller/profile_crud.go +++ b/apps/backend/internal/agent/settings/controller/profile_crud.go @@ -209,32 +209,103 @@ func (c *Controller) DeleteProfile(ctx context.Context, id string, force bool) ( } return nil, err } + // Eagerly disable referencing watchers only AFTER the row is gone, so a + // failed delete never strands watchers disabled against a still-live + // profile. If this disable itself fails, the dispatch coordinator's + // preflight self-heals the watchers on their next poll. + if force { + c.disableReferencingWatchers(ctx, id, profile.Name) + } result := toProfileDTO(profile) return &result, nil } -// prepareProfileDeletion checks for active sessions and cleans up ephemeral tasks before deletion. +// prepareProfileDeletion checks for active sessions and referencing watchers, +// then cleans up ephemeral tasks. Returns *ErrProfileInUseDetail when force +// is false and any active session OR referencing watcher exists — the UI uses +// the breakdown to render a confirmation dialog. force=true skips both checks; +// the eager disable of referencing watchers runs in DeleteProfile after the +// row is actually gone (the dispatch coordinator's preflight stays as the +// safety net for profiles deleted by other paths, e.g. reconciler cleanup of +// disabled agent types). func (c *Controller) prepareProfileDeletion(ctx context.Context, profileID string, force bool) error { if c.sessionChecker == nil { return nil } - // Check for active non-ephemeral sessions (unless force is true) - // This allows the UI to show a confirmation dialog with affected tasks if !force { activeTasks, err := c.sessionChecker.GetActiveTaskInfoByAgentProfile(ctx, profileID) if err != nil { return err } - if len(activeTasks) > 0 { - return &ErrProfileInUseDetail{ActiveSessions: activeTasks} + var watcherRefs []WatcherReference + if c.watcherDeps != nil { + refs, err := c.watcherDeps.ListWatchersByAgentProfile(ctx, profileID) + if err != nil { + c.logger.Warn("watcher deps lookup failed; proceeding without watcher info", + zap.String("profile_id", profileID), zap.Error(err)) + } else { + watcherRefs = refs + } + } + if len(activeTasks) > 0 || len(watcherRefs) > 0 { + return &ErrProfileInUseDetail{ActiveSessions: activeTasks, Watchers: watcherRefs} } } - // Clean up ephemeral tasks (quick chat, config chat) using this profile - // Done after the force check since these don't need user confirmation + // Clean up ephemeral tasks (quick chat, config chat) using this profile. + // Done after the force check since these don't need user confirmation. c.cleanupEphemeralTasks(ctx, profileID) return nil } +// disableReferencingWatchers stamps the deletion cause onto every watcher +// row that referenced this profile so the UI shows "disabled because the +// agent profile was deleted" the moment the request returns. Without this +// eager disable, watchers whose filter no longer matches anything after the +// profile is gone would stay enabled-but-orphaned indefinitely — the +// dispatch coordinator's preflight only runs when a new external event +// fires the watcher. +// +// Best-effort: a failure is logged and ignored so the delete still proceeds. +// The preflight remains as a safety net for reconciler-driven deletes that +// don't pass through this path. +func (c *Controller) disableReferencingWatchers(ctx context.Context, profileID, profileName string) { + if c.watcherDeps == nil { + return + } + cause := formatDeletedProfileCause(profileID, profileName) + disabled, err := c.watcherDeps.DisableWatchersByAgentProfile(ctx, profileID, cause) + if err != nil { + c.logger.Warn("failed to disable referencing watchers on force-delete", + zap.String("profile_id", profileID), zap.Error(err)) + return + } + if len(disabled) > 0 { + c.logger.Info("disabled referencing watchers on profile force-delete", + zap.String("profile_id", profileID), zap.Int("count", len(disabled))) + } +} + +// profileNameCauseMaxLen caps the rendered profile name in the deletion +// cause. Mirrors the orchestrator preflight's cap (80 runes); both strings +// land in the same settings-page watcher banner, and the name is user-typed +// with no DB-level length constraint. +const profileNameCauseMaxLen = 80 + +// formatDeletedProfileCause renders the human-readable string stamped onto a +// watcher's last_error when its profile is force-deleted. Includes the profile +// name (truncated) so the settings banner shows "Kilo Profile" rather than a +// bare UUID — matching the shape of the orchestrator preflight's cause. +func formatDeletedProfileCause(profileID, profileName string) string { + name := profileName + if runes := []rune(name); len(runes) > profileNameCauseMaxLen { + name = string(runes[:profileNameCauseMaxLen-1]) + "…" + } + if name != "" { + return fmt.Sprintf("agent profile %q (%s) was deleted", name, profileID) + } + return fmt.Sprintf("agent profile %s was deleted", profileID) +} + // cleanupEphemeralTasks removes ephemeral tasks (quick chat, config chat) associated with a profile. func (c *Controller) cleanupEphemeralTasks(ctx context.Context, profileID string) { if c.sessionChecker == nil { diff --git a/apps/backend/internal/agent/settings/controller/profile_crud_watcher_deps_test.go b/apps/backend/internal/agent/settings/controller/profile_crud_watcher_deps_test.go new file mode 100644 index 000000000..bbab18e5c --- /dev/null +++ b/apps/backend/internal/agent/settings/controller/profile_crud_watcher_deps_test.go @@ -0,0 +1,138 @@ +package controller + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/kandev/kandev/internal/agent/agents" + agentdto "github.com/kandev/kandev/internal/agent/dto" + "github.com/kandev/kandev/internal/agent/settings/models" +) + +// fakeSessionChecker stubs the active-session lookup so the watcher-deps +// path is exercised independently of the existing active-session guard. +type fakeSessionChecker struct { + activeTasks []agentdto.ActiveTaskInfo +} + +func (f *fakeSessionChecker) HasActiveTaskSessionsByAgentProfile(context.Context, string) (bool, error) { + return len(f.activeTasks) > 0, nil +} + +func (f *fakeSessionChecker) DeleteEphemeralTasksByAgentProfile(context.Context, string) (int64, error) { + return 0, nil +} + +func (f *fakeSessionChecker) GetActiveTaskInfoByAgentProfile(context.Context, string) ([]agentdto.ActiveTaskInfo, error) { + return f.activeTasks, nil +} + +// fakeWatcherDependencyChecker returns a canned list of referencing +// watchers and records disable invocations so tests can assert on the +// force-delete eager-disable contract. +type fakeWatcherDependencyChecker struct { + refs []WatcherReference + err error + disableCalls int + disabledProfile string + disabledCause string +} + +func (f *fakeWatcherDependencyChecker) ListWatchersByAgentProfile(context.Context, string) ([]WatcherReference, error) { + return f.refs, f.err +} + +func (f *fakeWatcherDependencyChecker) DisableWatchersByAgentProfile(_ context.Context, profileID, cause string) ([]WatcherReference, error) { + f.disableCalls++ + f.disabledProfile = profileID + f.disabledCause = cause + return f.refs, f.err +} + +// TestDeleteProfile_BlocksOnReferencingWatchers is the regression guard for +// the UX hole: deleting a profile that linear/jira/github_issue/github_review +// watchers point at must surface a confirmation-detail error so the UI can +// say "this disables N watchers — continue?" instead of silently orphaning +// them. +func TestDeleteProfile_BlocksOnReferencingWatchers(t *testing.T) { + ctrl := newTestController(map[string]agents.Agent{"test-agent": &testAgent{id: "test-agent", name: "test-agent", enabled: true}}) + st := newFakeStore() + agent := &models.Agent{ID: "agent-1", Name: "test-agent"} + st.agents[agent.ID] = agent + st.byName[agent.Name] = agent + st.profiles[agent.ID] = []*models.AgentProfile{{ID: "prof-1", AgentID: agent.ID, Name: "Kilo Profile"}} + ctrl.repo = st + ctrl.sessionChecker = &fakeSessionChecker{} + ctrl.watcherDeps = &fakeWatcherDependencyChecker{refs: []WatcherReference{ + {ID: "linear-w1", Kind: "linear", Label: "ENG team backlog"}, + {ID: "github-w7", Kind: "github_issue", Label: "kdlbs/kandev bugs"}, + }} + + _, err := ctrl.DeleteProfile(context.Background(), "prof-1", false) + + var detail *ErrProfileInUseDetail + if !errors.As(err, &detail) { + t.Fatalf("expected ErrProfileInUseDetail, got %v", err) + } + if len(detail.Watchers) != 2 { + t.Fatalf("expected 2 watcher refs, got %d: %+v", len(detail.Watchers), detail.Watchers) + } + if detail.Watchers[0].Kind != "linear" || detail.Watchers[1].Kind != "github_issue" { + t.Errorf("unexpected watcher refs: %+v", detail.Watchers) + } +} + +// TestDeleteProfile_ForceBypassesWatcherCheck pins the override knob: when +// the user has already confirmed in the UI (force=true), DeleteProfile +// proceeds even though watchers reference the profile. The watchers will +// self-heal on their next poll via the dispatch coordinator's pre-flight. +func TestDeleteProfile_ForceBypassesWatcherCheck(t *testing.T) { + ctrl := newTestController(map[string]agents.Agent{"test-agent": &testAgent{id: "test-agent", name: "test-agent", enabled: true}}) + st := newFakeStore() + agent := &models.Agent{ID: "agent-1", Name: "test-agent"} + st.agents[agent.ID] = agent + st.byName[agent.Name] = agent + st.profiles[agent.ID] = []*models.AgentProfile{{ID: "prof-1", AgentID: agent.ID, Name: "Kilo Profile"}} + ctrl.repo = st + ctrl.sessionChecker = &fakeSessionChecker{} + deps := &fakeWatcherDependencyChecker{refs: []WatcherReference{{ID: "linear-w1", Kind: "linear"}}} + ctrl.watcherDeps = deps + + if _, err := ctrl.DeleteProfile(context.Background(), "prof-1", true); err != nil { + t.Fatalf("force=true must bypass the watcher check, got %v", err) + } + // Force-delete must NOT rely on the lazy preflight — it must eagerly + // disable each referencing watcher with the deletion cause so the UI + // reflects the change immediately, before any external event fires. + if deps.disableCalls != 1 { + t.Errorf("expected DisableWatchersByAgentProfile to fire once, got %d", deps.disableCalls) + } + if deps.disabledProfile != "prof-1" { + t.Errorf("disabled profile = %q, want %q", deps.disabledProfile, "prof-1") + } + // The cause must carry the human-readable profile name (not just the UUID) + // so the settings banner is legible and matches the lazy preflight's cause. + if !strings.Contains(deps.disabledCause, "Kilo Profile") { + t.Errorf("disable cause %q must include the profile name", deps.disabledCause) + } +} + +// TestDeleteProfile_NoWatchersStillSucceeds pins the negative case: when no +// watchers reference the profile, the existing happy path is preserved. +func TestDeleteProfile_NoWatchersStillSucceeds(t *testing.T) { + ctrl := newTestController(map[string]agents.Agent{"test-agent": &testAgent{id: "test-agent", name: "test-agent", enabled: true}}) + st := newFakeStore() + agent := &models.Agent{ID: "agent-1", Name: "test-agent"} + st.agents[agent.ID] = agent + st.byName[agent.Name] = agent + st.profiles[agent.ID] = []*models.AgentProfile{{ID: "prof-1", AgentID: agent.ID, Name: "Kilo Profile"}} + ctrl.repo = st + ctrl.sessionChecker = &fakeSessionChecker{} + ctrl.watcherDeps = &fakeWatcherDependencyChecker{} // empty refs + + if _, err := ctrl.DeleteProfile(context.Background(), "prof-1", false); err != nil { + t.Fatalf("no watchers should not block delete, got %v", err) + } +} diff --git a/apps/backend/internal/agent/settings/controller/reconciler_test.go b/apps/backend/internal/agent/settings/controller/reconciler_test.go index 892908995..645464d71 100644 --- a/apps/backend/internal/agent/settings/controller/reconciler_test.go +++ b/apps/backend/internal/agent/settings/controller/reconciler_test.go @@ -106,8 +106,22 @@ func (f *fakeStore) DeleteAgentProfile(_ context.Context, id string) error { return nil } -func (f *fakeStore) GetAgentProfile(context.Context, string) (*models.AgentProfile, error) { - return nil, nil +func (f *fakeStore) GetAgentProfile(_ context.Context, id string) (*models.AgentProfile, error) { + for _, list := range f.profiles { + for _, p := range list { + if p.ID == id { + return p, nil + } + } + } + return nil, sql.ErrNoRows +} + +func (f *fakeStore) GetAgentProfileIncludingDeleted(ctx context.Context, id string) (*models.AgentProfile, error) { + // Delegate to the deleted_at-aware lookup; the fake does not model + // soft-delete state, so both methods see the same rows. This keeps + // nil dereferences out of any test that calls the new method. + return f.GetAgentProfile(ctx, id) } func (f *fakeStore) ListAgentProfiles(_ context.Context, agentID string) ([]*models.AgentProfile, error) { diff --git a/apps/backend/internal/agent/settings/handlers/handlers.go b/apps/backend/internal/agent/settings/handlers/handlers.go index f44ff068b..497a0e88a 100644 --- a/apps/backend/internal/agent/settings/handlers/handlers.go +++ b/apps/backend/internal/agent/settings/handlers/handlers.go @@ -466,8 +466,9 @@ func (h *Handlers) httpDeleteProfile(c *gin.Context) { var inUseErr *controller.ErrProfileInUseDetail if errors.As(err, &inUseErr) { c.JSON(http.StatusConflict, gin.H{ - "error": "agent profile is used by active session(s)", + "error": "agent profile is used by active session(s) or watcher(s)", "active_sessions": inUseErr.ActiveSessions, + "watchers": inUseErr.Watchers, }) return } diff --git a/apps/backend/internal/agent/settings/store/errors.go b/apps/backend/internal/agent/settings/store/errors.go new file mode 100644 index 000000000..85c31fdaf --- /dev/null +++ b/apps/backend/internal/agent/settings/store/errors.go @@ -0,0 +1,9 @@ +package store + +import "errors" + +// ErrAgentProfileDeleted is returned by lookups that find a soft-deleted row. +// Distinguishes "the profile was removed" (recoverable: pick a new one) from +// "the profile never existed" (caller passed a bad ID). Wrapped — caller uses +// errors.Is. +var ErrAgentProfileDeleted = errors.New("agent profile soft-deleted") diff --git a/apps/backend/internal/agent/settings/store/sqlite.go b/apps/backend/internal/agent/settings/store/sqlite.go index aaa9407e7..20315040a 100644 --- a/apps/backend/internal/agent/settings/store/sqlite.go +++ b/apps/backend/internal/agent/settings/store/sqlite.go @@ -729,25 +729,47 @@ func (r *sqliteRepository) DeleteAgentProfile(ctx context.Context, id string) er return nil } +// agentProfileSelectColumns is the SELECT projection used by every +// AgentProfile read path. Extracted once so the next column added to +// agent_profiles only has to land here — duplicating the list across +// GetAgentProfile / GetAgentProfileIncludingDeleted / ListAgentProfiles +// risks the soft-delete path silently scanning zero values for a freshly +// added column, which is the same shape of bug this package fixes in +// another layer (orphaned watchers vs. stale projection drift). +const agentProfileSelectColumns = ` + SELECT id, agent_id, name, agent_display_name, model, mode, migrated_from, + auto_approve, dangerously_skip_permissions, allow_indexing, + cli_passthrough, user_modified, plan, cli_flags, + COALESCE(env_vars, '[]'), + created_at, updated_at, deleted_at, + COALESCE(workspace_id, ''), COALESCE(role, ''), COALESCE(icon, ''), + COALESCE(reports_to, ''), COALESCE(skill_ids, '[]'), + COALESCE(desired_skills, '[]'), COALESCE(custom_prompt, ''), + COALESCE(status, 'idle'), COALESCE(pause_reason, ''), + last_run_finished_at, + COALESCE(max_concurrent_sessions, 1), COALESCE(cooldown_sec, 0), + COALESCE(skip_idle_runs, 0), COALESCE(consecutive_failures, 0), + COALESCE(failure_threshold, 3), COALESCE(executor_preference, ''), + COALESCE(budget_monthly_cents, 0), + COALESCE(settings, '{}'), COALESCE(permissions, '{}') + FROM agent_profiles` + func (r *sqliteRepository) GetAgentProfile(ctx context.Context, id string) (*models.AgentProfile, error) { - row := r.ro.QueryRowContext(ctx, r.ro.Rebind(` - SELECT id, agent_id, name, agent_display_name, model, mode, migrated_from, - auto_approve, dangerously_skip_permissions, allow_indexing, - cli_passthrough, user_modified, plan, cli_flags, - COALESCE(env_vars, '[]'), - created_at, updated_at, deleted_at, - COALESCE(workspace_id, ''), COALESCE(role, ''), COALESCE(icon, ''), - COALESCE(reports_to, ''), COALESCE(skill_ids, '[]'), - COALESCE(desired_skills, '[]'), COALESCE(custom_prompt, ''), - COALESCE(status, 'idle'), COALESCE(pause_reason, ''), - last_run_finished_at, - COALESCE(max_concurrent_sessions, 1), COALESCE(cooldown_sec, 0), - COALESCE(skip_idle_runs, 0), COALESCE(consecutive_failures, 0), - COALESCE(failure_threshold, 3), COALESCE(executor_preference, ''), - COALESCE(budget_monthly_cents, 0), - COALESCE(settings, '{}'), COALESCE(permissions, '{}') - FROM agent_profiles WHERE id = ? AND deleted_at IS NULL - `), id) + row := r.ro.QueryRowContext(ctx, + r.ro.Rebind(agentProfileSelectColumns+` WHERE id = ? AND deleted_at IS NULL`), id) + profile, err := scanAgentProfile(row) + if err != nil { + return nil, err + } + return r.applyLegacyBackfill(ctx, profile), nil +} + +// GetAgentProfileIncludingDeleted returns the row even when soft-deleted. +// Resolver and watcher self-heal callers use this to disambiguate +// "row removed" (recoverable: orphan reference) from "row never existed". +func (r *sqliteRepository) GetAgentProfileIncludingDeleted(ctx context.Context, id string) (*models.AgentProfile, error) { + row := r.ro.QueryRowContext(ctx, + r.ro.Rebind(agentProfileSelectColumns+` WHERE id = ?`), id) profile, err := scanAgentProfile(row) if err != nil { return nil, err @@ -756,24 +778,9 @@ func (r *sqliteRepository) GetAgentProfile(ctx context.Context, id string) (*mod } func (r *sqliteRepository) ListAgentProfiles(ctx context.Context, agentID string) ([]*models.AgentProfile, error) { - rows, err := r.ro.QueryContext(ctx, r.ro.Rebind(` - SELECT id, agent_id, name, agent_display_name, model, mode, migrated_from, - auto_approve, dangerously_skip_permissions, allow_indexing, - cli_passthrough, user_modified, plan, cli_flags, - COALESCE(env_vars, '[]'), - created_at, updated_at, deleted_at, - COALESCE(workspace_id, ''), COALESCE(role, ''), COALESCE(icon, ''), - COALESCE(reports_to, ''), COALESCE(skill_ids, '[]'), - COALESCE(desired_skills, '[]'), COALESCE(custom_prompt, ''), - COALESCE(status, 'idle'), COALESCE(pause_reason, ''), - last_run_finished_at, - COALESCE(max_concurrent_sessions, 1), COALESCE(cooldown_sec, 0), - COALESCE(skip_idle_runs, 0), COALESCE(consecutive_failures, 0), - COALESCE(failure_threshold, 3), COALESCE(executor_preference, ''), - COALESCE(budget_monthly_cents, 0), - COALESCE(settings, '{}'), COALESCE(permissions, '{}') - FROM agent_profiles WHERE agent_id = ? AND deleted_at IS NULL ORDER BY created_at DESC - `), agentID) + rows, err := r.ro.QueryContext(ctx, + r.ro.Rebind(agentProfileSelectColumns+` WHERE agent_id = ? AND deleted_at IS NULL ORDER BY created_at DESC`), + agentID) if err != nil { return nil, err } diff --git a/apps/backend/internal/agent/settings/store/sqlite_soft_delete_test.go b/apps/backend/internal/agent/settings/store/sqlite_soft_delete_test.go new file mode 100644 index 000000000..e637a0270 --- /dev/null +++ b/apps/backend/internal/agent/settings/store/sqlite_soft_delete_test.go @@ -0,0 +1,92 @@ +package store + +import ( + "context" + "database/sql" + "errors" + "testing" + + "github.com/kandev/kandev/internal/agent/settings/models" +) + +// TestGetAgentProfile_DeletedRowIsHidden pins the existing semantic: the +// deleted_at-aware lookup keeps returning sql.ErrNoRows for a soft-deleted +// row. This is the trap that orphaned the watchers in the first place — if +// this assertion ever changes, the watcher self-heal flow needs to be +// rethought. +func TestGetAgentProfile_DeletedRowIsHidden(t *testing.T) { + repo := newTestRepo(t) + ctx := context.Background() + id := seedAgentProfile(t, repo, "deleted-profile", "removed-kilo") + + if err := repo.DeleteAgentProfile(ctx, id); err != nil { + t.Fatalf("soft-delete failed: %v", err) + } + + _, err := repo.GetAgentProfile(ctx, id) + if !errors.Is(err, sql.ErrNoRows) { + t.Fatalf("expected sql.ErrNoRows, got %v", err) + } +} + +// TestGetAgentProfileIncludingDeleted_ReturnsRowWithDeletedAtSet is the +// counterpart: the new method MUST surface the soft-deleted row so the +// resolver can disambiguate "removed" from "never existed". +func TestGetAgentProfileIncludingDeleted_ReturnsRowWithDeletedAtSet(t *testing.T) { + repo := newTestRepo(t) + ctx := context.Background() + id := seedAgentProfile(t, repo, "deleted-profile", "removed-kilo") + + if err := repo.DeleteAgentProfile(ctx, id); err != nil { + t.Fatalf("soft-delete failed: %v", err) + } + + got, err := repo.GetAgentProfileIncludingDeleted(ctx, id) + if err != nil { + t.Fatalf("expected row to be returned, got error %v", err) + } + if got == nil { + t.Fatal("expected non-nil profile") + } + if got.DeletedAt == nil { + t.Fatal("expected DeletedAt to be set on the returned row") + } + if got.Name != "deleted-profile" { + t.Errorf("Name = %q, want %q", got.Name, "deleted-profile") + } +} + +// TestGetAgentProfileIncludingDeleted_MissingRowStillErrors guards against +// regressions where the includes-deleted variant accidentally silences a +// genuine "row never existed" lookup. +func TestGetAgentProfileIncludingDeleted_MissingRowStillErrors(t *testing.T) { + repo := newTestRepo(t) + ctx := context.Background() + + _, err := repo.GetAgentProfileIncludingDeleted(ctx, "this-id-was-never-created") + if !errors.Is(err, sql.ErrNoRows) { + t.Fatalf("expected sql.ErrNoRows, got %v", err) + } +} + +// seedAgentProfile creates a parent agent + a profile referencing it and +// returns the profile id. Centralised so the table+FK setup stays in one +// place even if the schema grows new required columns. +func seedAgentProfile(t *testing.T, repo Repository, profileName, agentName string) string { + t.Helper() + ctx := context.Background() + parent := &models.Agent{Name: agentName} + if err := repo.CreateAgent(ctx, parent); err != nil { + t.Fatalf("create parent agent: %v", err) + } + profile := &models.AgentProfile{ + AgentID: parent.ID, + Name: profileName, + AgentDisplayName: agentName, + Model: "test-model", + } + if err := repo.CreateAgentProfile(ctx, profile); err != nil { + t.Fatalf("create profile: %v", err) + } + return profile.ID +} diff --git a/apps/backend/internal/agent/settings/store/store.go b/apps/backend/internal/agent/settings/store/store.go index 52e091042..ff974f2b3 100644 --- a/apps/backend/internal/agent/settings/store/store.go +++ b/apps/backend/internal/agent/settings/store/store.go @@ -22,6 +22,11 @@ type Repository interface { UpdateAgentProfile(ctx context.Context, profile *models.AgentProfile) error DeleteAgentProfile(ctx context.Context, id string) error GetAgentProfile(ctx context.Context, id string) (*models.AgentProfile, error) + // GetAgentProfileIncludingDeleted returns the row even when soft-deleted. + // Check profile.DeletedAt != nil to detect orphaned references (watchers, + // automations) pointing at removed profiles. ErrAgentProfileDeleted is + // only used by callers of ProfileResolver, which wraps this method. + GetAgentProfileIncludingDeleted(ctx context.Context, id string) (*models.AgentProfile, error) ListAgentProfiles(ctx context.Context, agentID string) ([]*models.AgentProfile, error) Close() error diff --git a/apps/backend/internal/github/models.go b/apps/backend/internal/github/models.go index 3388ec11a..47ec34e48 100644 --- a/apps/backend/internal/github/models.go +++ b/apps/backend/internal/github/models.go @@ -261,8 +261,12 @@ type ReviewWatch struct { PollIntervalSeconds int `json:"poll_interval_seconds" db:"poll_interval_seconds"` CleanupPolicy string `json:"cleanup_policy" db:"cleanup_policy"` LastPolledAt *time.Time `json:"last_polled_at,omitempty" db:"last_polled_at"` - CreatedAt time.Time `json:"created_at" db:"created_at"` - UpdatedAt time.Time `json:"updated_at" db:"updated_at"` + // LastError / LastErrorAt are stamped when the dispatch pipeline self- + // heals the watcher (e.g. the bound agent profile was soft-deleted). + LastError string `json:"last_error,omitempty" db:"last_error"` + LastErrorAt *time.Time `json:"last_error_at,omitempty" db:"last_error_at"` + CreatedAt time.Time `json:"created_at" db:"created_at"` + UpdatedAt time.Time `json:"updated_at" db:"updated_at"` } // ReviewPRTask records which PRs have already had tasks created (deduplication). @@ -485,8 +489,12 @@ type IssueWatch struct { PollIntervalSeconds int `json:"poll_interval_seconds" db:"poll_interval_seconds"` CleanupPolicy string `json:"cleanup_policy" db:"cleanup_policy"` LastPolledAt *time.Time `json:"last_polled_at,omitempty" db:"last_polled_at"` - CreatedAt time.Time `json:"created_at" db:"created_at"` - UpdatedAt time.Time `json:"updated_at" db:"updated_at"` + // LastError / LastErrorAt are stamped when the dispatch pipeline self- + // heals the watcher (e.g. the bound agent profile was soft-deleted). + LastError string `json:"last_error,omitempty" db:"last_error"` + LastErrorAt *time.Time `json:"last_error_at,omitempty" db:"last_error_at"` + CreatedAt time.Time `json:"created_at" db:"created_at"` + UpdatedAt time.Time `json:"updated_at" db:"updated_at"` } // IssueWatchTask records which issues have already had tasks created (deduplication). diff --git a/apps/backend/internal/github/service_issues.go b/apps/backend/internal/github/service_issues.go index 3c783da80..b47d4934d 100644 --- a/apps/backend/internal/github/service_issues.go +++ b/apps/backend/internal/github/service_issues.go @@ -87,6 +87,13 @@ func (s *Service) ListAllIssueWatches(ctx context.Context) ([]*IssueWatch, error return s.store.ListAllIssueWatches(ctx) } +// ListEnabledIssueWatches returns the live (enabled = 1) subset. Used by +// the profile-delete dependency check so self-healed (already-disabled) +// watchers do not inflate the count and trigger spurious 409 confirmations. +func (s *Service) ListEnabledIssueWatches(ctx context.Context) ([]*IssueWatch, error) { + return s.store.ListEnabledIssueWatches(ctx) +} + // UpdateIssueWatch updates an issue watch. // //nolint:dupl,cyclop // mirrors UpdateReviewWatch — different types, same structure; field-by-field nil-pointer apply @@ -296,6 +303,12 @@ func (s *Service) ReleaseIssueWatchTask(ctx context.Context, watchID, repoOwner, return s.store.ReleaseIssueWatchTask(ctx, watchID, repoOwner, repoName, issueNumber) } +// DisableIssueWatchWithError is invoked by the orchestrator's self-heal flow +// when the watcher's bound agent profile has been soft-deleted. +func (s *Service) DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error { + return s.store.DisableIssueWatchWithError(ctx, watchID, cause) +} + // TriggerAllIssueChecks triggers all issue watches for a workspace. // //nolint:dupl // mirrors TriggerAllReviewChecks — different types, same structure diff --git a/apps/backend/internal/github/service_reviews.go b/apps/backend/internal/github/service_reviews.go index f6b171d0a..6a7cb98be 100644 --- a/apps/backend/internal/github/service_reviews.go +++ b/apps/backend/internal/github/service_reviews.go @@ -96,6 +96,13 @@ func (s *Service) ListAllReviewWatches(ctx context.Context) ([]*ReviewWatch, err return s.store.ListAllReviewWatches(ctx) } +// ListEnabledReviewWatches returns the live (enabled = 1) subset. Used by +// the profile-delete dependency check so self-healed (already-disabled) +// watchers do not inflate the count and trigger spurious 409 confirmations. +func (s *Service) ListEnabledReviewWatches(ctx context.Context) ([]*ReviewWatch, error) { + return s.store.ListEnabledReviewWatches(ctx) +} + // UpdateReviewWatch updates a review watch. func (s *Service) UpdateReviewWatch(ctx context.Context, id string, req *UpdateReviewWatchRequest) error { rw, err := s.store.GetReviewWatch(ctx, id) @@ -599,6 +606,12 @@ func (s *Service) ReleaseReviewPRTask(ctx context.Context, watchID, repoOwner, r return s.store.ReleaseReviewPRTask(ctx, watchID, repoOwner, repoName, prNumber) } +// DisableReviewWatchWithError mirrors DisableIssueWatchWithError for the +// PR review watcher. +func (s *Service) DisableReviewWatchWithError(ctx context.Context, watchID, cause string) error { + return s.store.DisableReviewWatchWithError(ctx, watchID, cause) +} + // TriggerAllReviewChecks triggers all review watches for a workspace. func (s *Service) TriggerAllReviewChecks(ctx context.Context, workspaceID string) (int, error) { watches, err := s.store.ListReviewWatches(ctx, workspaceID) diff --git a/apps/backend/internal/github/store.go b/apps/backend/internal/github/store.go index 3f283723d..cf12e80a5 100644 --- a/apps/backend/internal/github/store.go +++ b/apps/backend/internal/github/store.go @@ -104,6 +104,8 @@ const createTablesSQL = ` poll_interval_seconds INTEGER DEFAULT 300, cleanup_policy TEXT NOT NULL DEFAULT 'auto', last_polled_at DATETIME, + last_error TEXT NOT NULL DEFAULT '', + last_error_at DATETIME, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL ); @@ -135,6 +137,8 @@ const createTablesSQL = ` poll_interval_seconds INTEGER DEFAULT 300, cleanup_policy TEXT NOT NULL DEFAULT 'auto', last_polled_at DATETIME, + last_error TEXT NOT NULL DEFAULT '', + last_error_at DATETIME, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL ); @@ -182,6 +186,18 @@ func (s *Store) initSchema() error { // engaged), 'always' (delete on terminal state), 'never' (manual only). _, _ = s.db.Exec(`ALTER TABLE github_review_watches ADD COLUMN cleanup_policy TEXT NOT NULL DEFAULT 'auto'`) _, _ = s.db.Exec(`ALTER TABLE github_issue_watches ADD COLUMN cleanup_policy TEXT NOT NULL DEFAULT 'auto'`) + // Watcher self-heal columns: when the dispatch pipeline detects an + // orphaned watcher (e.g. its agent profile has been soft-deleted), it + // disables the row and stamps a human-readable cause + timestamp here + // for the settings page to surface. Unlike the cleanup_policy column + // above, the readers (IssueWatch.LastError / LastErrorAt) scan these + // columns unconditionally — a driver-level ALTER failure here would + // turn into a confusing scan panic on the next poll instead of a + // clear boot error. Use the same fail-loud column-precheck idiom the + // sibling jira/linear stores already use. + if err := s.addWatchSelfHealColumns(); err != nil { + return err + } if err := s.migratePRTablesForMultiRepo(); err != nil { return fmt.Errorf("migrate PR tables for multi-repo: %w", err) } @@ -309,6 +325,60 @@ func (s *Store) backfillPRWatchesRepositoryID() error { return nil } +// addWatchSelfHealColumns adds last_error / last_error_at to the issue and +// review watch tables using a column-precheck (mirroring the jira and linear +// stores). Unlike the cleanup_policy ALTER above, the readers +// (IssueWatch.LastError / LastErrorAt) scan these columns unconditionally, +// so a driver-level failure must bubble up at boot rather than turn into +// a scan panic on the next poll. +func (s *Store) addWatchSelfHealColumns() error { + for _, table := range []string{"github_review_watches", "github_issue_watches"} { + cols, err := s.tableColumns(table) + if err != nil { + return fmt.Errorf("read %s columns: %w", table, err) + } + if _, ok := cols["last_error"]; !ok { + if _, err := s.db.Exec("ALTER TABLE " + table + " ADD COLUMN last_error TEXT NOT NULL DEFAULT ''"); err != nil { + return fmt.Errorf("add %s.last_error: %w", table, err) + } + } + if _, ok := cols["last_error_at"]; !ok { + if _, err := s.db.Exec("ALTER TABLE " + table + " ADD COLUMN last_error_at DATETIME"); err != nil { + return fmt.Errorf("add %s.last_error_at: %w", table, err) + } + } + } + return nil +} + +// tableColumns returns the set of column names declared on `table`. Cheap +// SQLite PRAGMA lookup; used by addWatchSelfHealColumns to skip ALTERs on a +// fresh install whose createTablesSQL already includes the columns. Mirrors +// the helper in jira/store.go. +func (s *Store) tableColumns(table string) (map[string]struct{}, error) { + rows, err := s.db.Query(fmt.Sprintf("PRAGMA table_info(%s)", table)) + if err != nil { + return nil, err + } + defer func() { _ = rows.Close() }() + cols := make(map[string]struct{}) + for rows.Next() { + var ( + cid int + name string + ctype string + notnull int + dflt sql.NullString + pk int + ) + if err := rows.Scan(&cid, &name, &ctype, ¬null, &dflt, &pk); err != nil { + return nil, err + } + cols[name] = struct{}{} + } + return cols, rows.Err() +} + // tableExists returns true when the named table is present in sqlite_master. // Used by the multi-repo backfill to skip cross-package healing in unit // tests that don't bring up the task schema. @@ -967,6 +1037,20 @@ func (s *Store) DeleteReviewWatch(ctx context.Context, id string) error { return tx.Commit() } +// DisableReviewWatchWithError is the self-heal write: it disables the watch +// and stamps a human-readable cause + timestamp so the settings UI can show +// a "disabled because ..." banner. Called by the orchestrator when the +// watcher's bound agent profile is detected as soft-deleted. +func (s *Store) DisableReviewWatchWithError(ctx context.Context, id, cause string) error { + now := time.Now().UTC() + _, err := s.db.ExecContext(ctx, + `UPDATE github_review_watches + SET enabled = 0, last_error = ?, last_error_at = ?, updated_at = ? + WHERE id = ?`, + cause, now, now, id) + return err +} + // --- Review PR Task deduplication --- // CreateReviewPRTask records that a task was created for a review PR. @@ -1336,6 +1420,20 @@ func (s *Store) DeleteIssueWatch(ctx context.Context, id string) error { return tx.Commit() } +// DisableIssueWatchWithError is the self-heal write: disables the watch and +// stamps a human-readable cause + timestamp. Symmetric with +// DisableReviewWatchWithError; called by the orchestrator when the +// watcher's bound agent profile is detected as soft-deleted. +func (s *Store) DisableIssueWatchWithError(ctx context.Context, id, cause string) error { + now := time.Now().UTC() + _, err := s.db.ExecContext(ctx, + `UPDATE github_issue_watches + SET enabled = 0, last_error = ?, last_error_at = ?, updated_at = ? + WHERE id = ?`, + cause, now, now, id) + return err +} + // --- Issue Watch Task deduplication --- // ReserveIssueWatchTask atomically claims a slot for a (watch, repo, issue) tuple. diff --git a/apps/backend/internal/github/store_watch_disable_test.go b/apps/backend/internal/github/store_watch_disable_test.go new file mode 100644 index 000000000..abedfb5c9 --- /dev/null +++ b/apps/backend/internal/github/store_watch_disable_test.go @@ -0,0 +1,103 @@ +package github + +import ( + "context" + "testing" + "time" +) + +// TestDisableIssueWatchWithError_StampsCauseAndDisables pins the self-heal +// contract for github_issue_watches: orphaned watcher is flipped to +// enabled=0 with a human-readable LastError + LastErrorAt timestamp. +func TestDisableIssueWatchWithError_StampsCauseAndDisables(t *testing.T) { + store := newTestStore(t) + ctx := context.Background() + + iw := &IssueWatch{ + WorkspaceID: "ws-1", + WorkflowID: "wf-1", + WorkflowStepID: "step-1", + AgentProfileID: "deleted-profile", + ExecutorProfileID: "exec-1", + Enabled: true, + } + if err := store.CreateIssueWatch(ctx, iw); err != nil { + t.Fatalf("create: %v", err) + } + + const cause = `agent profile "Removed Kilo" (deleted-profile) was removed` + // Widen the window by 1s on each side to absorb SQLite second-precision + // timestamp rounding. + before := time.Now().UTC().Add(-time.Second) + if err := store.DisableIssueWatchWithError(ctx, iw.ID, cause); err != nil { + t.Fatalf("disable: %v", err) + } + after := time.Now().UTC().Add(time.Second) + + got, err := store.GetIssueWatch(ctx, iw.ID) + if err != nil { + t.Fatalf("get: %v", err) + } + if got == nil { + t.Fatal("expected row, got nil") + } + if got.Enabled { + t.Error("Enabled should be false after self-heal") + } + if got.LastError != cause { + t.Errorf("LastError = %q, want %q", got.LastError, cause) + } + if got.LastErrorAt == nil { + t.Fatal("LastErrorAt should be set") + } + if got.LastErrorAt.Before(before) || got.LastErrorAt.After(after) { + t.Errorf("LastErrorAt %v outside [%v, %v]", got.LastErrorAt, before, after) + } +} + +// TestDisableReviewWatchWithError_StampsCauseAndDisables mirrors the issue +// test for review watches. +func TestDisableReviewWatchWithError_StampsCauseAndDisables(t *testing.T) { + store := newTestStore(t) + ctx := context.Background() + + rw := &ReviewWatch{ + WorkspaceID: "ws-1", + WorkflowID: "wf-1", + WorkflowStepID: "step-1", + AgentProfileID: "deleted-profile", + ExecutorProfileID: "exec-1", + ReviewScope: "user_and_teams", + Enabled: true, + } + if err := store.CreateReviewWatch(ctx, rw); err != nil { + t.Fatalf("create: %v", err) + } + + const cause = `agent profile "Removed Opencode" (deleted-profile) was removed` + before := time.Now().UTC().Add(-time.Second) + if err := store.DisableReviewWatchWithError(ctx, rw.ID, cause); err != nil { + t.Fatalf("disable: %v", err) + } + after := time.Now().UTC().Add(time.Second) + + got, err := store.GetReviewWatch(ctx, rw.ID) + if err != nil { + t.Fatalf("get: %v", err) + } + if got == nil { + t.Fatal("expected row, got nil") + } + if got.Enabled { + t.Error("Enabled should be false after self-heal") + } + if got.LastError != cause { + t.Errorf("LastError = %q, want %q", got.LastError, cause) + } + if got.LastErrorAt == nil { + t.Fatal("LastErrorAt should be set") + } + if got.LastErrorAt.Before(before) || got.LastErrorAt.After(after) { + t.Errorf("LastErrorAt %v outside [%v, %v]", got.LastErrorAt, before, after) + } +} diff --git a/apps/backend/internal/jira/models.go b/apps/backend/internal/jira/models.go index b3465aa3d..3c9010625 100644 --- a/apps/backend/internal/jira/models.go +++ b/apps/backend/internal/jira/models.go @@ -175,8 +175,13 @@ type IssueWatch struct { // See docs/specs/throttle-watcher-fanout/spec.md for the open-task definition. MaxInflightTasks *int `json:"maxInflightTasks,omitempty" db:"max_inflight_tasks"` LastPolledAt *time.Time `json:"lastPolledAt,omitempty" db:"last_polled_at"` - CreatedAt time.Time `json:"createdAt" db:"created_at"` - UpdatedAt time.Time `json:"updatedAt" db:"updated_at"` + // LastError / LastErrorAt are stamped when the dispatch pipeline self- + // heals the watcher (e.g. the bound agent profile was soft-deleted). + // Empty for a healthy watcher. + LastError string `json:"lastError,omitempty" db:"last_error"` + LastErrorAt *time.Time `json:"lastErrorAt,omitempty" db:"last_error_at"` + CreatedAt time.Time `json:"createdAt" db:"created_at"` + UpdatedAt time.Time `json:"updatedAt" db:"updated_at"` } // IssueWatchTask deduplicates task creation per (watch, ticket) tuple. The diff --git a/apps/backend/internal/jira/store.go b/apps/backend/internal/jira/store.go index af70ca909..5955fdee1 100644 --- a/apps/backend/internal/jira/store.go +++ b/apps/backend/internal/jira/store.go @@ -72,6 +72,8 @@ const createTablesSQL = ` -- the API layer. See docs/specs/throttle-watcher-fanout/. max_inflight_tasks INTEGER DEFAULT 5, last_polled_at DATETIME, + last_error TEXT NOT NULL DEFAULT '', + last_error_at DATETIME, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL ); @@ -108,6 +110,9 @@ func (s *Store) initSchema() error { if err := s.addMaxInflightTasksColumn(); err != nil { return err } + if err := s.addIssueWatchLastErrorColumns(); err != nil { + return err + } return nil } @@ -132,6 +137,28 @@ func (s *Store) addMaxInflightTasksColumn() error { return nil } +// addIssueWatchLastErrorColumns brings older databases up to the current +// schema by appending last_error / last_error_at to jira_issue_watches when +// missing. Idempotent — column lookup before each ALTER avoids the +// "duplicate column name" error on fresh installs that already have them. +func (s *Store) addIssueWatchLastErrorColumns() error { + cols, err := s.tableColumns("jira_issue_watches") + if err != nil { + return err + } + if _, ok := cols["last_error"]; !ok { + if _, err := s.db.Exec(`ALTER TABLE jira_issue_watches ADD COLUMN last_error TEXT NOT NULL DEFAULT ''`); err != nil { + return fmt.Errorf("add last_error column: %w", err) + } + } + if _, ok := cols["last_error_at"]; !ok { + if _, err := s.db.Exec(`ALTER TABLE jira_issue_watches ADD COLUMN last_error_at DATETIME`); err != nil { + return fmt.Errorf("add last_error_at column: %w", err) + } + } + return nil +} + // addInstanceTypeColumn brings older databases up to the current schema by // adding the instance_type column to jira_configs when missing. Existing rows // were all written when the code only spoke Cloud, so 'cloud' is the safe @@ -367,9 +394,14 @@ func (s *Store) UpdateAuthHealth(ctx context.Context, ok bool, errMsg string, ch // --- Issue watch operations --- -const issueWatchColumns = `id, workspace_id, workflow_id, workflow_step_id, jql, +// issueWatchInsertColumns / issueWatchSelectColumns split insert vs read so +// the SELECT path can wrap nullable last_error in COALESCE (older databases +// pre-self-heal migration may return NULL). +const issueWatchInsertColumns = `id, workspace_id, workflow_id, workflow_step_id, jql, agent_profile_id, executor_profile_id, prompt, enabled, - poll_interval_seconds, max_inflight_tasks, last_polled_at, created_at, updated_at` + poll_interval_seconds, max_inflight_tasks, last_polled_at, + last_error, last_error_at, + created_at, updated_at` // nullableInt converts a *int into a value suitable for a nullable SQL column. // A nil pointer becomes a SQL NULL; a non-nil pointer becomes the underlying @@ -381,6 +413,12 @@ func nullableInt(v *int) interface{} { return *v } +const issueWatchSelectColumns = `id, workspace_id, workflow_id, workflow_step_id, jql, + agent_profile_id, executor_profile_id, prompt, enabled, + poll_interval_seconds, max_inflight_tasks, last_polled_at, + COALESCE(last_error, '') AS last_error, last_error_at, + created_at, updated_at` + // CreateIssueWatch persists a new issue watch row. ID and timestamps are // assigned here so callers can pass a partially-populated struct. func (s *Store) CreateIssueWatch(ctx context.Context, w *IssueWatch) error { @@ -394,12 +432,13 @@ func (s *Store) CreateIssueWatch(ctx context.Context, w *IssueWatch) error { w.PollIntervalSeconds = DefaultIssueWatchPollInterval } _, err := s.db.ExecContext(ctx, ` - INSERT INTO jira_issue_watches (`+issueWatchColumns+`) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + INSERT INTO jira_issue_watches (`+issueWatchInsertColumns+`) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, w.ID, w.WorkspaceID, w.WorkflowID, w.WorkflowStepID, w.JQL, w.AgentProfileID, w.ExecutorProfileID, w.Prompt, w.Enabled, - w.PollIntervalSeconds, nullableInt(w.MaxInflightTasks), - w.LastPolledAt, w.CreatedAt, w.UpdatedAt) + w.PollIntervalSeconds, nullableInt(w.MaxInflightTasks), w.LastPolledAt, + w.LastError, w.LastErrorAt, + w.CreatedAt, w.UpdatedAt) return err } @@ -407,7 +446,7 @@ func (s *Store) CreateIssueWatch(ctx context.Context, w *IssueWatch) error { func (s *Store) GetIssueWatch(ctx context.Context, id string) (*IssueWatch, error) { var w IssueWatch err := s.ro.GetContext(ctx, &w, - `SELECT `+issueWatchColumns+` FROM jira_issue_watches WHERE id = ?`, id) + `SELECT `+issueWatchSelectColumns+` FROM jira_issue_watches WHERE id = ?`, id) if errors.Is(err, sql.ErrNoRows) { return nil, nil } @@ -422,7 +461,7 @@ func (s *Store) GetIssueWatch(ctx context.Context, id string) (*IssueWatch, erro func (s *Store) ListIssueWatches(ctx context.Context, workspaceID string) ([]*IssueWatch, error) { var watches []*IssueWatch err := s.ro.SelectContext(ctx, &watches, - `SELECT `+issueWatchColumns+` FROM jira_issue_watches + `SELECT `+issueWatchSelectColumns+` FROM jira_issue_watches WHERE workspace_id = ? ORDER BY created_at`, workspaceID) if err != nil { return nil, err @@ -437,7 +476,7 @@ func (s *Store) ListIssueWatches(ctx context.Context, workspaceID string) ([]*Is func (s *Store) ListAllIssueWatches(ctx context.Context) ([]*IssueWatch, error) { var watches []*IssueWatch err := s.ro.SelectContext(ctx, &watches, - `SELECT `+issueWatchColumns+` FROM jira_issue_watches ORDER BY workspace_id, created_at`) + `SELECT `+issueWatchSelectColumns+` FROM jira_issue_watches ORDER BY workspace_id, created_at`) if err != nil { return nil, err } @@ -449,7 +488,7 @@ func (s *Store) ListAllIssueWatches(ctx context.Context) ([]*IssueWatch, error) func (s *Store) ListEnabledIssueWatches(ctx context.Context) ([]*IssueWatch, error) { var watches []*IssueWatch err := s.ro.SelectContext(ctx, &watches, - `SELECT `+issueWatchColumns+` FROM jira_issue_watches + `SELECT `+issueWatchSelectColumns+` FROM jira_issue_watches WHERE enabled = 1 ORDER BY created_at`) if err != nil { return nil, err @@ -488,6 +527,21 @@ func (s *Store) UpdateIssueWatchLastPolled(ctx context.Context, id string, t tim return err } +// DisableIssueWatchWithError is the self-heal write: it disables the watch +// and stamps a human-readable cause + timestamp so the settings UI can show +// a "disabled because ..." banner. Called by the orchestrator dispatch +// pipeline when the watcher's bound agent profile is detected as +// soft-deleted. +func (s *Store) DisableIssueWatchWithError(ctx context.Context, id, cause string) error { + now := time.Now().UTC() + _, err := s.db.ExecContext(ctx, + `UPDATE jira_issue_watches + SET enabled = 0, last_error = ?, last_error_at = ?, updated_at = ? + WHERE id = ?`, + cause, now, now, id) + return err +} + // DeleteIssueWatch removes a watch and (via FK ON DELETE CASCADE) its dedup // rows in a single transaction. The explicit DELETE on the child table guards // older databases where foreign_keys may not have been enabled at attach time. diff --git a/apps/backend/internal/jira/store_issue_watch_disable_test.go b/apps/backend/internal/jira/store_issue_watch_disable_test.go new file mode 100644 index 000000000..e949be118 --- /dev/null +++ b/apps/backend/internal/jira/store_issue_watch_disable_test.go @@ -0,0 +1,56 @@ +package jira + +import ( + "context" + "testing" + "time" +) + +// TestDisableIssueWatchWithError_SetsDisabledStateAndStampsError mirrors the +// Linear self-heal contract: orphaned watcher is disabled with the cause +// stamped onto last_error / last_error_at. +func TestDisableIssueWatchWithError_SetsDisabledStateAndStampsError(t *testing.T) { + store := newTestStore(t) + ctx := context.Background() + + watch := &IssueWatch{ + WorkspaceID: "ws-1", + WorkflowID: "wf-1", + WorkflowStepID: "step-1", + JQL: "project = ENG", + AgentProfileID: "deleted-profile", + Enabled: true, + } + if err := store.CreateIssueWatch(ctx, watch); err != nil { + t.Fatalf("create: %v", err) + } + + const cause = `agent profile "Removed Kilo" (deleted-profile) was removed` + // Widen the window by 1s on each side to absorb SQLite second-precision + // timestamp rounding. + before := time.Now().UTC().Add(-time.Second) + if err := store.DisableIssueWatchWithError(ctx, watch.ID, cause); err != nil { + t.Fatalf("disable: %v", err) + } + after := time.Now().UTC().Add(time.Second) + + got, err := store.GetIssueWatch(ctx, watch.ID) + if err != nil { + t.Fatalf("get: %v", err) + } + if got == nil { + t.Fatal("expected row, got nil") + } + if got.Enabled { + t.Error("Enabled should be false after self-heal") + } + if got.LastError != cause { + t.Errorf("LastError = %q, want %q", got.LastError, cause) + } + if got.LastErrorAt == nil { + t.Fatal("LastErrorAt should be set") + } + if got.LastErrorAt.Before(before) || got.LastErrorAt.After(after) { + t.Errorf("LastErrorAt %v outside expected window [%v, %v]", got.LastErrorAt, before, after) + } +} diff --git a/apps/backend/internal/linear/models.go b/apps/backend/internal/linear/models.go index bf44cf72e..4beb7d7dc 100644 --- a/apps/backend/internal/linear/models.go +++ b/apps/backend/internal/linear/models.go @@ -203,8 +203,13 @@ type IssueWatch struct { // See docs/specs/throttle-watcher-fanout/spec.md for the open-task definition. MaxInflightTasks *int `json:"maxInflightTasks,omitempty" db:"max_inflight_tasks"` LastPolledAt *time.Time `json:"lastPolledAt,omitempty" db:"last_polled_at"` - CreatedAt time.Time `json:"createdAt" db:"created_at"` - UpdatedAt time.Time `json:"updatedAt" db:"updated_at"` + // LastError / LastErrorAt are stamped when the dispatch pipeline self- + // heals the watcher (e.g. the bound agent profile was soft-deleted). + // Empty for a healthy watcher. + LastError string `json:"lastError,omitempty" db:"last_error"` + LastErrorAt *time.Time `json:"lastErrorAt,omitempty" db:"last_error_at"` + CreatedAt time.Time `json:"createdAt" db:"created_at"` + UpdatedAt time.Time `json:"updatedAt" db:"updated_at"` } // IssueWatchTask deduplicates task creation per (watch, issue) tuple. The diff --git a/apps/backend/internal/linear/store.go b/apps/backend/internal/linear/store.go index 8e9ace835..f90dd596d 100644 --- a/apps/backend/internal/linear/store.go +++ b/apps/backend/internal/linear/store.go @@ -68,6 +68,8 @@ const createTablesSQL = ` -- the API layer. See docs/specs/throttle-watcher-fanout/. max_inflight_tasks INTEGER DEFAULT 5, last_polled_at DATETIME, + last_error TEXT NOT NULL DEFAULT '', + last_error_at DATETIME, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL ); @@ -99,6 +101,9 @@ func (s *Store) initSchema() error { if err := s.addMaxInflightTasksColumn(); err != nil { return err } + if err := s.addIssueWatchLastErrorColumns(); err != nil { + return err + } return nil } @@ -123,6 +128,29 @@ func (s *Store) addMaxInflightTasksColumn() error { return nil } +// addIssueWatchLastErrorColumns brings older databases up to the current +// schema by appending last_error / last_error_at to linear_issue_watches when +// missing. Fresh installs hit the column-already-present branch since +// createTablesSQL declares both columns. Idempotent — column lookup before +// each ALTER avoids the "duplicate column name" error. +func (s *Store) addIssueWatchLastErrorColumns() error { + cols, err := s.tableColumns("linear_issue_watches") + if err != nil { + return err + } + if _, ok := cols["last_error"]; !ok { + if _, err := s.db.Exec(`ALTER TABLE linear_issue_watches ADD COLUMN last_error TEXT NOT NULL DEFAULT ''`); err != nil { + return fmt.Errorf("add last_error column: %w", err) + } + } + if _, ok := cols["last_error_at"]; !ok { + if _, err := s.db.Exec(`ALTER TABLE linear_issue_watches ADD COLUMN last_error_at DATETIME`); err != nil { + return fmt.Errorf("add last_error_at column: %w", err) + } + } + return nil +} + // migrateLegacyPerWorkspaceTable detects the pre-singleton schema (where // linear_configs was keyed by workspace_id) and rewrites it into the singleton // shape. Picks the most-recently-updated row and records the source diff --git a/apps/backend/internal/linear/store_issue_watch.go b/apps/backend/internal/linear/store_issue_watch.go index 298ae6549..e6398b2f1 100644 --- a/apps/backend/internal/linear/store_issue_watch.go +++ b/apps/backend/internal/linear/store_issue_watch.go @@ -27,6 +27,8 @@ type issueWatchRow struct { PollIntervalSeconds int `db:"poll_interval_seconds"` MaxInflightTasks sql.NullInt64 `db:"max_inflight_tasks"` LastPolledAt *time.Time `db:"last_polled_at"` + LastError string `db:"last_error"` + LastErrorAt *time.Time `db:"last_error_at"` CreatedAt time.Time `db:"created_at"` UpdatedAt time.Time `db:"updated_at"` } @@ -56,6 +58,8 @@ func (r *issueWatchRow) toIssueWatch() (*IssueWatch, error) { PollIntervalSeconds: r.PollIntervalSeconds, MaxInflightTasks: maxInflight, LastPolledAt: r.LastPolledAt, + LastError: r.LastError, + LastErrorAt: r.LastErrorAt, CreatedAt: r.CreatedAt, UpdatedAt: r.UpdatedAt, }, nil @@ -69,9 +73,21 @@ func encodeFilter(f SearchFilter) (string, error) { return string(b), nil } -const issueWatchColumns = `id, workspace_id, workflow_id, workflow_step_id, filter_json, +// issueWatchInsertColumns lists the writable column names in row-insert order. +// SELECTs use issueWatchSelectColumns which wraps the nullable last_error in +// COALESCE so older databases (pre-self-heal migration) read back as empty +// strings rather than NULL. +const issueWatchInsertColumns = `id, workspace_id, workflow_id, workflow_step_id, filter_json, agent_profile_id, executor_profile_id, prompt, enabled, - poll_interval_seconds, max_inflight_tasks, last_polled_at, created_at, updated_at` + poll_interval_seconds, max_inflight_tasks, last_polled_at, + last_error, last_error_at, + created_at, updated_at` + +const issueWatchSelectColumns = `id, workspace_id, workflow_id, workflow_step_id, filter_json, + agent_profile_id, executor_profile_id, prompt, enabled, + poll_interval_seconds, max_inflight_tasks, last_polled_at, + COALESCE(last_error, '') AS last_error, last_error_at, + created_at, updated_at` // CreateIssueWatch persists a new issue watch row. ID and timestamps are // assigned here so callers can pass a partially-populated struct. @@ -90,11 +106,13 @@ func (s *Store) CreateIssueWatch(ctx context.Context, w *IssueWatch) error { return err } _, err = s.db.ExecContext(ctx, ` - INSERT INTO linear_issue_watches (`+issueWatchColumns+`) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + INSERT INTO linear_issue_watches (`+issueWatchInsertColumns+`) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, w.ID, w.WorkspaceID, w.WorkflowID, w.WorkflowStepID, filterJSON, w.AgentProfileID, w.ExecutorProfileID, w.Prompt, w.Enabled, - w.PollIntervalSeconds, nullableInt(w.MaxInflightTasks), w.LastPolledAt, w.CreatedAt, w.UpdatedAt) + w.PollIntervalSeconds, nullableInt(w.MaxInflightTasks), w.LastPolledAt, + w.LastError, w.LastErrorAt, + w.CreatedAt, w.UpdatedAt) return err } @@ -112,7 +130,7 @@ func nullableInt(v *int) interface{} { func (s *Store) GetIssueWatch(ctx context.Context, id string) (*IssueWatch, error) { var row issueWatchRow err := s.ro.GetContext(ctx, &row, - `SELECT `+issueWatchColumns+` FROM linear_issue_watches WHERE id = ?`, id) + `SELECT `+issueWatchSelectColumns+` FROM linear_issue_watches WHERE id = ?`, id) if errors.Is(err, sql.ErrNoRows) { return nil, nil } @@ -126,7 +144,7 @@ func (s *Store) GetIssueWatch(ctx context.Context, id string) (*IssueWatch, erro func (s *Store) ListIssueWatches(ctx context.Context, workspaceID string) ([]*IssueWatch, error) { var rows []issueWatchRow err := s.ro.SelectContext(ctx, &rows, - `SELECT `+issueWatchColumns+` FROM linear_issue_watches + `SELECT `+issueWatchSelectColumns+` FROM linear_issue_watches WHERE workspace_id = ? ORDER BY created_at`, workspaceID) if err != nil { return nil, err @@ -138,7 +156,7 @@ func (s *Store) ListIssueWatches(ctx context.Context, workspaceID string) ([]*Is func (s *Store) ListAllIssueWatches(ctx context.Context) ([]*IssueWatch, error) { var rows []issueWatchRow err := s.ro.SelectContext(ctx, &rows, - `SELECT `+issueWatchColumns+` FROM linear_issue_watches ORDER BY workspace_id, created_at`) + `SELECT `+issueWatchSelectColumns+` FROM linear_issue_watches ORDER BY workspace_id, created_at`) if err != nil { return nil, err } @@ -150,7 +168,7 @@ func (s *Store) ListAllIssueWatches(ctx context.Context) ([]*IssueWatch, error) func (s *Store) ListEnabledIssueWatches(ctx context.Context) ([]*IssueWatch, error) { var rows []issueWatchRow err := s.ro.SelectContext(ctx, &rows, - `SELECT `+issueWatchColumns+` FROM linear_issue_watches + `SELECT `+issueWatchSelectColumns+` FROM linear_issue_watches WHERE enabled = 1 ORDER BY created_at`) if err != nil { return nil, err @@ -204,6 +222,21 @@ func (s *Store) UpdateIssueWatchLastPolled(ctx context.Context, id string, t tim return err } +// DisableIssueWatchWithError is the self-heal write: it disables the watch +// and stamps a human-readable cause + timestamp so the settings UI can show +// a "disabled because ..." banner. Called by the orchestrator dispatch +// pipeline when the watcher's bound agent profile is detected as +// soft-deleted (see internal/agent/runtime/lifecycle/profile_resolver.go). +func (s *Store) DisableIssueWatchWithError(ctx context.Context, id, cause string) error { + now := time.Now().UTC() + _, err := s.db.ExecContext(ctx, + `UPDATE linear_issue_watches + SET enabled = 0, last_error = ?, last_error_at = ?, updated_at = ? + WHERE id = ?`, + cause, now, now, id) + return err +} + // DeleteIssueWatch removes a watch and its dedup rows in a single transaction. // The explicit child DELETE guards older databases where foreign_keys may not // have been enabled at attach time. diff --git a/apps/backend/internal/linear/store_issue_watch_disable_test.go b/apps/backend/internal/linear/store_issue_watch_disable_test.go new file mode 100644 index 000000000..fd0339399 --- /dev/null +++ b/apps/backend/internal/linear/store_issue_watch_disable_test.go @@ -0,0 +1,58 @@ +package linear + +import ( + "context" + "testing" + "time" +) + +// TestDisableIssueWatchWithError_SetsDisabledStateAndStampsError pins the +// self-heal contract: when an orphaned watcher is detected (its agent +// profile has been soft-deleted), DisableIssueWatchWithError flips Enabled +// to false, stamps a human-readable LastError, and records LastErrorAt so +// the settings UI can show a "disabled X ago because Y" banner. +func TestDisableIssueWatchWithError_SetsDisabledStateAndStampsError(t *testing.T) { + store := newTestStore(t) + ctx := context.Background() + + watch := &IssueWatch{ + WorkspaceID: "ws-1", + WorkflowID: "wf-1", + WorkflowStepID: "step-1", + AgentProfileID: "deleted-profile", + Enabled: true, + } + if err := store.CreateIssueWatch(ctx, watch); err != nil { + t.Fatalf("create: %v", err) + } + + const cause = `agent profile "Removed Kilo" (deleted-profile) was removed` + // Widen the window by 1s on each side: SQLite stores datetimes at second + // precision so a sub-second time.Now() reading can land outside a tighter + // bracket after round-tripping through the DB. + before := time.Now().UTC().Add(-time.Second) + if err := store.DisableIssueWatchWithError(ctx, watch.ID, cause); err != nil { + t.Fatalf("disable: %v", err) + } + after := time.Now().UTC().Add(time.Second) + + got, err := store.GetIssueWatch(ctx, watch.ID) + if err != nil { + t.Fatalf("get: %v", err) + } + if got == nil { + t.Fatal("expected watch row, got nil") + } + if got.Enabled { + t.Error("Enabled should be false after self-heal") + } + if got.LastError != cause { + t.Errorf("LastError = %q, want %q", got.LastError, cause) + } + if got.LastErrorAt == nil { + t.Fatal("LastErrorAt should be set") + } + if got.LastErrorAt.Before(before) || got.LastErrorAt.After(after) { + t.Errorf("LastErrorAt %v outside expected window [%v, %v]", got.LastErrorAt, before, after) + } +} diff --git a/apps/backend/internal/orchestrator/event_handlers_github.go b/apps/backend/internal/orchestrator/event_handlers_github.go index fc9438619..eda40b9ae 100644 --- a/apps/backend/internal/orchestrator/event_handlers_github.go +++ b/apps/backend/internal/orchestrator/event_handlers_github.go @@ -49,6 +49,12 @@ type GitHubService interface { ReserveIssueWatchTask(ctx context.Context, watchID, repoOwner, repoName string, issueNumber int, issueURL string) (bool, error) AssignIssueWatchTaskID(ctx context.Context, watchID, repoOwner, repoName string, issueNumber int, taskID string) error ReleaseIssueWatchTask(ctx context.Context, watchID, repoOwner, repoName string, issueNumber int) error + + // Self-heal operations: invoked from createIssueTask / createReviewTask + // when the watcher's bound agent profile has been soft-deleted. Symmetric + // with the Linear/Jira coordinator-driven path. + DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error + DisableReviewWatchWithError(ctx context.Context, watchID, cause string) error } // ReviewTaskCreator creates tasks from review watch events. @@ -124,10 +130,16 @@ func (s *Service) SetRepositoryResolver(rr RepositoryResolver) { s.repositoryResolver = rr } -// SetIssueTaskCreator sets the task creator for issue watch auto-task creation. +// SetIssueTaskCreator sets the task creator for issue watch auto-task +// creation. Holds s.mu across both the field write and the coordinator +// (re)init so the watcherCoordinator / issueTaskCreator pair stays +// consistent against concurrent SetProfileLookup or dispatchWatcherEvent +// goroutines — the asymmetric locking surface flagged on PR #1094 review. func (s *Service) SetIssueTaskCreator(tc IssueTaskCreator) { + s.mu.Lock() + defer s.mu.Unlock() s.issueTaskCreator = tc - s.initWatcherCoordinator() + s.initWatcherCoordinatorLocked() } // handlePRFeedback logs PR feedback events. WS broadcasting is handled in main.go. @@ -179,6 +191,10 @@ func (s *Service) createReviewTask(ctx context.Context, evt *github.NewReviewPRE zap.String("base_branch", pr.BaseBranch), zap.String("review_watch_id", evt.ReviewWatchID)) + if s.preflightDeletedProfileForGitHubReview(ctx, evt) { + return + } + if !s.reserveReviewPR(ctx, evt) { return } @@ -1113,6 +1129,10 @@ func (s *Service) createIssueTask(ctx context.Context, evt *github.NewIssueEvent issue := evt.Issue repoSlug := fmt.Sprintf("%s/%s", issue.RepoOwner, issue.RepoName) + if s.preflightDeletedProfileForGitHubIssue(ctx, evt) { + return + } + if !s.reserveIssueWatch(ctx, evt) { return } diff --git a/apps/backend/internal/orchestrator/event_handlers_github_selfheal_test.go b/apps/backend/internal/orchestrator/event_handlers_github_selfheal_test.go new file mode 100644 index 000000000..1b0592816 --- /dev/null +++ b/apps/backend/internal/orchestrator/event_handlers_github_selfheal_test.go @@ -0,0 +1,127 @@ +package orchestrator + +import ( + "context" + "strings" + "testing" + + "github.com/kandev/kandev/internal/github" + wfmodels "github.com/kandev/kandev/internal/workflow/models" +) + +// TestCreateIssueTask_SelfHealsWhenAgentProfileSoftDeleted is the regression +// guard for the GitHub side of the production bug: createIssueTask bypasses +// the WatcherDispatchCoordinator, so the soft-deleted-profile pre-flight +// lives directly on Service. When the watcher's agent profile has been +// soft-deleted, createIssueTask MUST disable the watcher and short-circuit +// before reserving a dedup slot or creating a task. +func TestCreateIssueTask_SelfHealsWhenAgentProfileSoftDeleted(t *testing.T) { + svc, _ := setupIssueTaskTest(t) + ghSvc := &mockGitHubService{issueReserveReturn: true} + svc.SetGitHubService(ghSvc) + creator := &countingIssueTaskCreator{taskID: "should-not-be-created"} + svc.SetIssueTaskCreator(creator) + svc.SetProfileLookup(&fakeProfileLookup{deleted: true, name: "Removed Kilo"}) + + evt := newIssueEvent() + evt.AgentProfileID = "deleted-profile" + evt.IssueWatchID = "github-iw-1" + + svc.createIssueTask(context.Background(), evt) + + if ghSvc.issueReserveCalls != 0 { + t.Errorf("Reserve must not run when profile is deleted, got %d calls", ghSvc.issueReserveCalls) + } + if creator.calls != 0 { + t.Errorf("CreateIssueTask must not run when profile is deleted, got %d calls", creator.calls) + } + if ghSvc.disableIssueWatchCalls != 1 { + t.Fatalf("expected DisableIssueWatchWithError to fire once, got %d calls", ghSvc.disableIssueWatchCalls) + } + if ghSvc.lastDisableIssueWatchID != "github-iw-1" { + t.Errorf("disable watch_id = %q, want %q", ghSvc.lastDisableIssueWatchID, "github-iw-1") + } + // Pin the invariant the settings UI relies on (see coordinator-level + // test for the same contract): the stamped cause must carry both the + // profile name and id so the disabled-watcher banner is actionable. + if !strings.Contains(ghSvc.lastDisableIssueCause, "Removed Kilo") || + !strings.Contains(ghSvc.lastDisableIssueCause, "deleted-profile") { + t.Errorf("disable cause missing profile name or id: %q", ghSvc.lastDisableIssueCause) + } +} + +// TestCreateIssueTask_LegacyEventWithoutProfileSkipsPreflight pins the +// no-regression contract for rows predating the agent_profile_id column: +// an empty AgentProfileID means the watcher predates self-heal wiring and +// the pre-flight must NOT run (the lookup would resolve "" to a not-found +// row and the empty-id branch must short-circuit cleanly). +func TestCreateIssueTask_LegacyEventWithoutProfileSkipsPreflight(t *testing.T) { + svc, _ := setupIssueTaskTest(t) + ghSvc := &mockGitHubService{issueReserveReturn: true} + svc.SetGitHubService(ghSvc) + creator := &countingIssueTaskCreator{taskID: "task-legacy"} + svc.SetIssueTaskCreator(creator) + lookup := &fakeProfileLookup{deleted: true, name: "Removed Kilo"} + svc.SetProfileLookup(lookup) + + evt := newIssueEvent() // AgentProfileID is "" by default + + svc.createIssueTask(context.Background(), evt) + + if lookup.calls != 0 { + t.Errorf("lookup must be skipped for empty profile id, got %d calls", lookup.calls) + } + if ghSvc.disableIssueWatchCalls != 0 { + t.Errorf("legacy watcher must not be self-healed, got %d disable calls", ghSvc.disableIssueWatchCalls) + } + if creator.calls != 1 { + t.Errorf("legacy event must still flow through pipeline, got CreateIssueTask calls %d", creator.calls) + } +} + +// TestCreateReviewTask_SelfHealsWhenAgentProfileSoftDeleted mirrors the +// issue-side regression guard for the PR review watcher path. +func TestCreateReviewTask_SelfHealsWhenAgentProfileSoftDeleted(t *testing.T) { + repo := setupTestRepo(t) + stepGetter := newMockStepGetter() + stepGetter.steps["step1"] = &wfmodels.WorkflowStep{ + ID: "step1", WorkflowID: "wf1", Name: "Step 1", Position: 0, + Events: wfmodels.StepEvents{}, + } + svc := createTestService(repo, stepGetter, newMockTaskRepo()) + + ghSvc := &mockGitHubService{reserveReturn: true} + svc.SetGitHubService(ghSvc) + reviewCreator := &countingReviewTaskCreator{taskID: "should-not-be-created"} + svc.SetReviewTaskCreator(reviewCreator) + svc.SetProfileLookup(&fakeProfileLookup{deleted: true, name: "Removed Opencode"}) + + evt := &github.NewReviewPREvent{ + ReviewWatchID: "github-rw-1", + WorkspaceID: "ws1", + WorkflowID: "wf1", + WorkflowStepID: "step1", + AgentProfileID: "deleted-profile", + PR: &github.PR{ + Number: 7, Title: "Fix login bug", + HTMLURL: "https://gh/acme/widget/pull/7", + RepoOwner: "acme", RepoName: "widget", + HeadBranch: "fix/login", BaseBranch: "main", + }, + } + + svc.createReviewTask(context.Background(), evt) + + if ghSvc.reserveCalls != 0 { + t.Errorf("ReserveReviewPRTask must not run when profile is deleted, got %d calls", ghSvc.reserveCalls) + } + if reviewCreator.calls != 0 { + t.Errorf("CreateReviewTask must not run when profile is deleted, got %d calls", reviewCreator.calls) + } + if ghSvc.disableReviewWatchCalls != 1 { + t.Fatalf("expected DisableReviewWatchWithError to fire once, got %d calls", ghSvc.disableReviewWatchCalls) + } + if ghSvc.lastDisableReviewWatchID != "github-rw-1" { + t.Errorf("disable watch_id = %q, want %q", ghSvc.lastDisableReviewWatchID, "github-rw-1") + } +} diff --git a/apps/backend/internal/orchestrator/event_handlers_github_test.go b/apps/backend/internal/orchestrator/event_handlers_github_test.go index 1f7540e61..52ab544f6 100644 --- a/apps/backend/internal/orchestrator/event_handlers_github_test.go +++ b/apps/backend/internal/orchestrator/event_handlers_github_test.go @@ -49,6 +49,14 @@ type mockGitHubService struct { issueAssignCalls int issueAssignedID string issueReleaseCalls int + + // Self-heal tracking (soft-deleted-profile pre-flight). + disableIssueWatchCalls int + lastDisableIssueWatchID string + lastDisableIssueCause string + disableReviewWatchCalls int + lastDisableReviewWatchID string + lastDisableReviewCause string } func (m *mockGitHubService) Client() github.Client { return m.client } @@ -125,6 +133,23 @@ func (m *mockGitHubService) ReleaseIssueWatchTask(_ context.Context, _, _, _ str return nil } +// disableIssueWatchCalls / disableReviewWatchCalls track self-heal invocations +// triggered by the soft-deleted-profile pre-flight in createIssueTask / +// createReviewTask. +func (m *mockGitHubService) DisableIssueWatchWithError(_ context.Context, watchID, cause string) error { + m.disableIssueWatchCalls++ + m.lastDisableIssueWatchID = watchID + m.lastDisableIssueCause = cause + return nil +} + +func (m *mockGitHubService) DisableReviewWatchWithError(_ context.Context, watchID, cause string) error { + m.disableReviewWatchCalls++ + m.lastDisableReviewWatchID = watchID + m.lastDisableReviewCause = cause + return nil +} + func TestInterpolateReviewPrompt(t *testing.T) { pr := &github.PR{ Number: 42, diff --git a/apps/backend/internal/orchestrator/event_handlers_jira.go b/apps/backend/internal/orchestrator/event_handlers_jira.go index 58222237b..9950a985e 100644 --- a/apps/backend/internal/orchestrator/event_handlers_jira.go +++ b/apps/backend/internal/orchestrator/event_handlers_jira.go @@ -19,6 +19,10 @@ type JiraService interface { ReserveIssueWatchTask(ctx context.Context, watchID, issueKey, issueURL string) (bool, error) AssignIssueWatchTaskID(ctx context.Context, watchID, issueKey, taskID string) error ReleaseIssueWatchTask(ctx context.Context, watchID, issueKey string) error + // DisableIssueWatchWithError is invoked by the dispatch coordinator's + // self-heal flow when the watcher's bound agent profile has been + // soft-deleted. + DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error } // SetJiraService wires the JIRA dedup helpers into the orchestrator so diff --git a/apps/backend/internal/orchestrator/event_handlers_jira_test.go b/apps/backend/internal/orchestrator/event_handlers_jira_test.go index 909f445b0..0d4529055 100644 --- a/apps/backend/internal/orchestrator/event_handlers_jira_test.go +++ b/apps/backend/internal/orchestrator/event_handlers_jira_test.go @@ -13,14 +13,17 @@ import ( // mockJiraService records dedup calls so tests can assert on the // reserve→assign→release contract used by handleNewJiraIssue. type mockJiraService struct { - reserveReturn bool - reserveErr error - reserveCalls int - assignCalls int - releaseCalls int - lastWatchID string - lastIssueKey string - assignedTaskID string + reserveReturn bool + reserveErr error + reserveCalls int + assignCalls int + releaseCalls int + lastWatchID string + lastIssueKey string + assignedTaskID string + disableCalls int + lastDisableWatchID string + lastDisableCause string } func (m *mockJiraService) ReserveIssueWatchTask(_ context.Context, watchID, issueKey, _ string) (bool, error) { @@ -41,6 +44,13 @@ func (m *mockJiraService) ReleaseIssueWatchTask(_ context.Context, _, _ string) return nil } +func (m *mockJiraService) DisableIssueWatchWithError(_ context.Context, watchID, cause string) error { + m.disableCalls++ + m.lastDisableWatchID = watchID + m.lastDisableCause = cause + return nil +} + func newJiraIssueEvent() *jira.NewJiraIssueEvent { return &jira.NewJiraIssueEvent{ IssueWatchID: "iw-1", diff --git a/apps/backend/internal/orchestrator/event_handlers_linear.go b/apps/backend/internal/orchestrator/event_handlers_linear.go index 83ae2361f..54e3b5ad9 100644 --- a/apps/backend/internal/orchestrator/event_handlers_linear.go +++ b/apps/backend/internal/orchestrator/event_handlers_linear.go @@ -18,6 +18,10 @@ type LinearService interface { ReserveIssueWatchTask(ctx context.Context, watchID, identifier, issueURL string) (bool, error) AssignIssueWatchTaskID(ctx context.Context, watchID, identifier, taskID string) error ReleaseIssueWatchTask(ctx context.Context, watchID, identifier string) error + // DisableIssueWatchWithError is invoked by the dispatch coordinator's + // self-heal flow when the watcher's bound agent profile has been + // soft-deleted. + DisableIssueWatchWithError(ctx context.Context, watchID, cause string) error } // SetLinearService wires the Linear dedup helpers into the orchestrator so diff --git a/apps/backend/internal/orchestrator/service.go b/apps/backend/internal/orchestrator/service.go index 0893ea97f..27bdc1bc7 100644 --- a/apps/backend/internal/orchestrator/service.go +++ b/apps/backend/internal/orchestrator/service.go @@ -281,6 +281,11 @@ type Service struct { // "cap cleared") only once per transition instead of every event. watcherSaturated map[string]bool + // profileLookup answers "is this agent profile still live?" for the + // dispatch pre-flight. Set via SetProfileLookup from main; nil-safe so + // the legacy code path (and tests without profile wiring) keep working. + profileLookup ProfileLookup + // Jira service for issue watch dedup operations jiraService JiraService // jiraSource adapts jiraService onto WatcherSource. Built once in diff --git a/apps/backend/internal/orchestrator/source_jira.go b/apps/backend/internal/orchestrator/source_jira.go index 390ec6b79..e0a8a6262 100644 --- a/apps/backend/internal/orchestrator/source_jira.go +++ b/apps/backend/internal/orchestrator/source_jira.go @@ -109,3 +109,23 @@ func (s *JiraWatcherSource) AutoStartParams(evt any) AutoStartParams { WorkflowStepID: e.WorkflowStepID, } } + +// AgentProfileID returns the watcher's bound profile id, or "" when the +// event payload is malformed. Pre-flight uses "" as the skip-check signal. +func (s *JiraWatcherSource) AgentProfileID(evt any) string { + e, ok := evt.(*jira.NewJiraIssueEvent) + if !ok || e == nil { + return "" + } + return e.AgentProfileID +} + +// SelfHeal disables the jira_issue_watches row that produced this event. +// Symmetric with LinearWatcherSource.SelfHeal; nil-safe. +func (s *JiraWatcherSource) SelfHeal(ctx context.Context, evt any, cause string) error { + e, ok := evt.(*jira.NewJiraIssueEvent) + if !ok || e == nil || s.service == nil { + return nil + } + return s.service.DisableIssueWatchWithError(ctx, e.IssueWatchID, cause) +} diff --git a/apps/backend/internal/orchestrator/source_jira_test.go b/apps/backend/internal/orchestrator/source_jira_test.go index 30de60b38..0ff4526c5 100644 --- a/apps/backend/internal/orchestrator/source_jira_test.go +++ b/apps/backend/internal/orchestrator/source_jira_test.go @@ -13,9 +13,11 @@ type fakeJiraService struct { reserveErr error assignErr error releaseErr error + disableErr error gotReserve []string gotAssign []string gotRelease []string + gotDisable []string } func (f *fakeJiraService) ReserveIssueWatchTask(_ context.Context, watchID, key, _ string) (bool, error) { @@ -33,6 +35,11 @@ func (f *fakeJiraService) ReleaseIssueWatchTask(_ context.Context, watchID, key return f.releaseErr } +func (f *fakeJiraService) DisableIssueWatchWithError(_ context.Context, watchID, cause string) error { + f.gotDisable = append(f.gotDisable, watchID+":"+cause) + return f.disableErr +} + func sampleJiraEvent() *jira.NewJiraIssueEvent { return &jira.NewJiraIssueEvent{ IssueWatchID: "watch-1", diff --git a/apps/backend/internal/orchestrator/source_linear.go b/apps/backend/internal/orchestrator/source_linear.go index 413b1c2f8..79099965e 100644 --- a/apps/backend/internal/orchestrator/source_linear.go +++ b/apps/backend/internal/orchestrator/source_linear.go @@ -110,3 +110,24 @@ func (s *LinearWatcherSource) AutoStartParams(evt any) AutoStartParams { WorkflowStepID: e.WorkflowStepID, } } + +// AgentProfileID returns the watcher's bound profile id, or "" when the +// event payload is malformed. Pre-flight uses "" as the skip-check signal. +func (s *LinearWatcherSource) AgentProfileID(evt any) string { + e, ok := evt.(*linear.NewLinearIssueEvent) + if !ok || e == nil { + return "" + } + return e.AgentProfileID +} + +// SelfHeal disables the linear_issue_watches row that produced this event. +// Nil-safe: with no LinearService wired the call is silently dropped — same +// pattern as Reserve / Release. +func (s *LinearWatcherSource) SelfHeal(ctx context.Context, evt any, cause string) error { + e, ok := evt.(*linear.NewLinearIssueEvent) + if !ok || e == nil || s.service == nil { + return nil + } + return s.service.DisableIssueWatchWithError(ctx, e.IssueWatchID, cause) +} diff --git a/apps/backend/internal/orchestrator/source_linear_test.go b/apps/backend/internal/orchestrator/source_linear_test.go index c6f390c6a..0cb7486ff 100644 --- a/apps/backend/internal/orchestrator/source_linear_test.go +++ b/apps/backend/internal/orchestrator/source_linear_test.go @@ -13,9 +13,11 @@ type fakeLinearService struct { reserveErr error assignErr error releaseErr error + disableErr error gotReserve []string gotAssign []string gotRelease []string + gotDisable []string } func (f *fakeLinearService) ReserveIssueWatchTask(_ context.Context, watchID, id, _ string) (bool, error) { @@ -33,6 +35,11 @@ func (f *fakeLinearService) ReleaseIssueWatchTask(_ context.Context, watchID, id return f.releaseErr } +func (f *fakeLinearService) DisableIssueWatchWithError(_ context.Context, watchID, cause string) error { + f.gotDisable = append(f.gotDisable, watchID+":"+cause) + return f.disableErr +} + func sampleLinearEvent() *linear.NewLinearIssueEvent { return &linear.NewLinearIssueEvent{ IssueWatchID: "watch-1", diff --git a/apps/backend/internal/orchestrator/watcher_dispatch.go b/apps/backend/internal/orchestrator/watcher_dispatch.go index 8bc3f2495..61847a42e 100644 --- a/apps/backend/internal/orchestrator/watcher_dispatch.go +++ b/apps/backend/internal/orchestrator/watcher_dispatch.go @@ -31,7 +31,41 @@ type WatcherDispatchCoordinator struct { taskCreator IssueTaskCreator startTask taskStarter shouldAutoStart func(ctx context.Context, workflowStepID string) bool - logger *logger.Logger + // profileLookup pre-flight checks the watcher's bound agent profile. + // When the profile has been soft-deleted (reconciler-driven cleanup of an + // agent type that fell off the registry), the coordinator short-circuits + // before creating any task and asks the source to self-heal the watcher + // row. nil means "skip the check" — production wires this in; tests can + // leave it unset. + profileLookup ProfileLookup + logger *logger.Logger +} + +// ProfileLookup answers "is this agent profile still live, and what was its +// display name?" — used by the watcher dispatch self-heal flow to detect +// orphaned watchers (their agent profile was removed by the orchestrator's +// reconciler when its agent type left the enabled registry). +// +// Returning (true, name, nil) means the row exists but has DeletedAt set; +// (false, _, nil) means the row is live; a non-nil err is treated as +// "couldn't tell" and the dispatch falls through (fail-open). +type ProfileLookup interface { + LookupProfile(ctx context.Context, profileID string) (deleted bool, name string, err error) +} + +// SetProfileLookup wires the pre-flight check into the coordinator. Safe to +// call before or after task-creator wiring; nil-ok and pre-flight just +// becomes a no-op until a real lookup is provided. +func (c *WatcherDispatchCoordinator) SetProfileLookup(p ProfileLookup) { + c.mu.Lock() + defer c.mu.Unlock() + c.profileLookup = p +} + +func (c *WatcherDispatchCoordinator) getProfileLookup() ProfileLookup { + c.mu.RLock() + defer c.mu.RUnlock() + return c.profileLookup } // SetTaskCreator atomically updates the task creator the coordinator @@ -75,6 +109,12 @@ type WatcherSource interface { // metrics labels and log fields. Name() string + // AgentProfileID returns the agent profile bound to the watcher that + // produced this event. The coordinator uses it for the soft-deleted- + // profile pre-flight check; empty means "no profile bound" (legacy + // rows) and the check is skipped. + AgentProfileID(evt any) string + // Reserve atomically claims the dedup slot for this event. Returns // (false, nil) when another concurrent reserver already won the race — // the coordinator treats that as "nothing to do". @@ -107,11 +147,97 @@ type WatcherSource interface { // tasks, or nil when the watch is uncapped. The orchestrator gate uses // this to decide whether to defer the event. MaxInflightTasks(evt any) *int + + // SelfHeal disables the watcher row that produced this event and stamps + // a human-readable cause so the settings UI can show "disabled because + // the bound agent profile was removed". Called by the coordinator + // (and the legacy GitHub createXTask paths) when the pre-flight check + // detects a soft-deleted profile. + SelfHeal(ctx context.Context, evt any, cause string) error +} + +// preflightDeletedProfile returns true when the watcher's bound profile has +// been soft-deleted (the production bug that orphans watchers via the +// reconciler's cleanup of disabled agent types). On a true return the +// coordinator MUST stop — SelfHeal has already been invoked. +func (c *WatcherDispatchCoordinator) preflightDeletedProfile(ctx context.Context, src WatcherSource, evt any) bool { + lookup := c.getProfileLookup() + if lookup == nil { + return false + } + profileID := src.AgentProfileID(evt) + if profileID == "" { + return false + } + deleted, name, err := lookup.LookupProfile(ctx, profileID) + if err != nil { + if c.logger != nil { + c.logger.Warn("watcher dispatch: profile lookup failed, falling through", + zap.String("source", src.Name()), + zap.String("profile_id", profileID), + zap.Error(err)) + } + return false + } + if !deleted { + return false + } + cause := formatDeletedProfileCause(profileID, name) + if c.logger != nil { + c.logger.Warn("watcher dispatch: agent profile soft-deleted, self-healing", + zap.String("source", src.Name()), + zap.String("profile_id", profileID), + zap.String("profile_name", name)) + } + if err := src.SelfHeal(ctx, evt, cause); err != nil && c.logger != nil { + c.logger.Error("watcher dispatch: self-heal failed", + zap.String("source", src.Name()), + zap.String("profile_id", profileID), + zap.Error(err)) + } + return true +} + +// formatDeletedProfileCause renders the human-readable string stamped onto +// the watcher's last_error column. Centralised so every integration uses +// the same phrasing. +// +// profileName is user-typed in the settings UI with no DB-level length +// constraint; truncate at the producer so an arbitrarily-long name does +// not pollute last_error / the settings banner. +func formatDeletedProfileCause(profileID, profileName string) string { + name := truncateProfileNameForCause(profileName) + if name != "" { + return "agent profile \"" + name + "\" (" + profileID + ") was removed" + } + return "agent profile " + profileID + " was removed" +} + +// profileNameCauseMaxLen caps the rendered profile name in +// formatDeletedProfileCause. 80 runes matches the watcher-label cap in +// cmd/kandev — both end up in the same settings UI surface. +const profileNameCauseMaxLen = 80 + +func truncateProfileNameForCause(s string) string { + runes := []rune(s) + if len(runes) <= profileNameCauseMaxLen { + return s + } + return string(runes[:profileNameCauseMaxLen-1]) + "…" } // Dispatch runs one event through the full pipeline. Safe to call from a // goroutine; callers typically do so in the bus subscriber. +// +// Pre-flight: when a ProfileLookup is wired AND the source exposes a +// non-empty AgentProfileID, the coordinator first checks that the profile is +// not soft-deleted. A deleted profile short-circuits to SelfHeal — no task +// is created, no dedup reservation is taken. A lookup error fails open: the +// existing pipeline runs and any genuine error surfaces downstream. func (c *WatcherDispatchCoordinator) Dispatch(ctx context.Context, src WatcherSource, evt any) { + if c.preflightDeletedProfile(ctx, src, evt) { + return + } reserved, err := src.Reserve(ctx, evt) if err != nil { c.logger.Error("watcher dispatch: reserve failed", diff --git a/apps/backend/internal/orchestrator/watcher_dispatch_selfheal_test.go b/apps/backend/internal/orchestrator/watcher_dispatch_selfheal_test.go new file mode 100644 index 000000000..a5d578b64 --- /dev/null +++ b/apps/backend/internal/orchestrator/watcher_dispatch_selfheal_test.go @@ -0,0 +1,227 @@ +package orchestrator + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/kandev/kandev/internal/common/logger" +) + +func newTestLogger() *logger.Logger { + l, _ := logger.NewLogger(logger.LoggingConfig{Level: "error", Format: "console"}) + return l +} + +// fakeProfileLookup answers a fixed verdict for any profileID. +type fakeProfileLookup struct { + deleted bool + name string + err error + calls int + lastID string +} + +func (f *fakeProfileLookup) LookupProfile(_ context.Context, profileID string) (deleted bool, name string, err error) { + f.calls++ + f.lastID = profileID + return f.deleted, f.name, f.err +} + +// stubWatcherSource is a minimal WatcherSource that records each pipeline +// hook the coordinator invokes. Lets tests assert which hooks ran (or +// didn't) without dragging in a real integration. +type stubWatcherSource struct { + name string + agentProfileID string + reserveCalls int + selfHealCalls int + selfHealCause string + buildCalls int +} + +func (s *stubWatcherSource) Name() string { return s.name } + +func (s *stubWatcherSource) AgentProfileID(_ any) string { return s.agentProfileID } + +func (s *stubWatcherSource) Reserve(_ context.Context, _ any) (bool, error) { + s.reserveCalls++ + return true, nil +} + +func (s *stubWatcherSource) Release(_ context.Context, _ any) {} + +func (s *stubWatcherSource) BuildTaskRequest(_ any) (*IssueTaskRequest, error) { + s.buildCalls++ + return &IssueTaskRequest{}, nil +} + +func (s *stubWatcherSource) AttachTaskID(_ context.Context, _ any, _ string) error { return nil } + +func (s *stubWatcherSource) AutoStartParams(_ any) AutoStartParams { return AutoStartParams{} } + +func (s *stubWatcherSource) WatchID(_ any) string { return "" } + +func (s *stubWatcherSource) MaxInflightTasks(_ any) *int { return nil } + +func (s *stubWatcherSource) SelfHeal(_ context.Context, _ any, cause string) error { + s.selfHealCalls++ + s.selfHealCause = cause + return nil +} + +// TestWatcherDispatchCoordinator_SkipsAndSelfHealsOnDeletedProfile is the +// regression guard for the production bug (BLA-1598): when the watcher's +// bound agent profile has been soft-deleted, the coordinator MUST short- +// circuit before creating a task and MUST invoke SelfHeal so the watcher +// disables itself and stops re-firing every poll. +func TestWatcherDispatchCoordinator_SkipsAndSelfHealsOnDeletedProfile(t *testing.T) { + src := &stubWatcherSource{name: "stub", agentProfileID: "deleted-profile"} + lookup := &fakeProfileLookup{deleted: true, name: "Removed Kilo"} + + c := &WatcherDispatchCoordinator{ + profileLookup: lookup, + logger: nil, + } + + c.Dispatch(context.Background(), src, struct{}{}) + + if lookup.calls != 1 { + t.Errorf("expected 1 profile lookup, got %d", lookup.calls) + } + if src.reserveCalls != 0 { + t.Errorf("Reserve must not run when profile is deleted, got %d calls", src.reserveCalls) + } + if src.buildCalls != 0 { + t.Errorf("BuildTaskRequest must not run when profile is deleted, got %d calls", src.buildCalls) + } + if src.selfHealCalls != 1 { + t.Fatalf("expected 1 SelfHeal call, got %d", src.selfHealCalls) + } + // Pin the invariant the settings UI relies on: the cause must carry + // both the human-readable profile name and the profile id so an + // operator can locate the removed row. A refactor that emits a bare + // "profile not found" would still pass a `!= ""` check but break the + // UI contract — assert the actual content. + if !strings.Contains(src.selfHealCause, "Removed Kilo") || + !strings.Contains(src.selfHealCause, "deleted-profile") { + t.Errorf("SelfHeal cause missing profile name or id: %q", src.selfHealCause) + } +} + +// TestFormatDeletedProfileCause_RendersBothBranches pins the two shapes the +// settings UI banner can render: a fully-named cause and the name-less +// fallback used when the row's name was cleared before deletion. +func TestFormatDeletedProfileCause_RendersBothBranches(t *testing.T) { + withName := formatDeletedProfileCause("abc-123", "Kilo Profile") + if !strings.Contains(withName, "Kilo Profile") || !strings.Contains(withName, "abc-123") { + t.Errorf("with-name branch missing fields: %q", withName) + } + noName := formatDeletedProfileCause("abc-123", "") + if strings.Contains(noName, "\"\"") { + t.Errorf("empty-name branch must not emit empty quotes: %q", noName) + } + if !strings.Contains(noName, "abc-123") { + t.Errorf("empty-name branch missing profile id: %q", noName) + } +} + +// TestFormatDeletedProfileCause_TruncatesLongName guards convention 6: a +// profile name is bounded at the producer, not by the consumer. An +// unbounded name would pollute last_error and the settings banner. +func TestFormatDeletedProfileCause_TruncatesLongName(t *testing.T) { + long := strings.Repeat("x", 500) + got := formatDeletedProfileCause("abc-123", long) + if len([]rune(got)) > profileNameCauseMaxLen+64 { + t.Errorf("cause too long (%d runes): %q", len([]rune(got)), got) + } + if !strings.Contains(got, "…") { + t.Errorf("truncated cause must end the name with the ellipsis marker: %q", got) + } +} + +// TestWatcherDispatchCoordinator_HealthyProfilePassesPreflight pins the +// negative side: a non-deleted profile lookup does NOT short-circuit and +// the existing pipeline still runs. +func TestWatcherDispatchCoordinator_HealthyProfilePassesPreflight(t *testing.T) { + src := &stubWatcherSource{name: "stub", agentProfileID: "live-profile"} + lookup := &fakeProfileLookup{deleted: false} + + c := &WatcherDispatchCoordinator{ + profileLookup: lookup, + shouldAutoStart: func(_ context.Context, _ string) bool { + return false + }, + logger: newTestLogger(), + } + c.SetTaskCreator(&countingIssueTaskCreator{taskID: "task-1"}) + + c.Dispatch(context.Background(), src, struct{}{}) + + if src.selfHealCalls != 0 { + t.Errorf("SelfHeal must NOT run for a live profile, got %d calls", src.selfHealCalls) + } + if src.reserveCalls != 1 { + t.Errorf("expected 1 Reserve call on the happy path, got %d", src.reserveCalls) + } + if src.buildCalls != 1 { + t.Errorf("expected 1 BuildTaskRequest call on the happy path, got %d", src.buildCalls) + } +} + +// TestWatcherDispatchCoordinator_LookupErrorDoesNotBlockDispatch guards +// against turning a transient lookup failure (DB hiccup) into a watcher +// outage. When the lookup returns an error, the coordinator must +// fail-open and let the existing pipeline run; the legacy StartTask +// path still surfaces any genuine problem. +func TestWatcherDispatchCoordinator_LookupErrorDoesNotBlockDispatch(t *testing.T) { + src := &stubWatcherSource{name: "stub", agentProfileID: "live-profile"} + lookup := &fakeProfileLookup{err: errors.New("db unavailable")} + + c := &WatcherDispatchCoordinator{ + profileLookup: lookup, + shouldAutoStart: func(_ context.Context, _ string) bool { + return false + }, + logger: newTestLogger(), + } + c.SetTaskCreator(&countingIssueTaskCreator{taskID: "task-1"}) + + c.Dispatch(context.Background(), src, struct{}{}) + + if src.selfHealCalls != 0 { + t.Errorf("SelfHeal must NOT run on lookup error, got %d calls", src.selfHealCalls) + } + if src.reserveCalls != 1 { + t.Errorf("expected pipeline to fall through on lookup error, got Reserve calls %d", src.reserveCalls) + } +} + +// TestWatcherDispatchCoordinator_EmptyProfileIDSkipsLookup keeps the +// legacy-zero-profile rows working: watchers created before the +// agent_profile_id column existed have an empty value, and the +// pre-flight check must not look them up (the lookup would return +// "not found" which is a different error path). +func TestWatcherDispatchCoordinator_EmptyProfileIDSkipsLookup(t *testing.T) { + src := &stubWatcherSource{name: "stub", agentProfileID: ""} + lookup := &fakeProfileLookup{} + + c := &WatcherDispatchCoordinator{ + profileLookup: lookup, + shouldAutoStart: func(_ context.Context, _ string) bool { + return false + }, + logger: newTestLogger(), + } + c.SetTaskCreator(&countingIssueTaskCreator{taskID: "task-1"}) + + c.Dispatch(context.Background(), src, struct{}{}) + + if lookup.calls != 0 { + t.Errorf("lookup must be skipped for empty profile id, got %d calls", lookup.calls) + } + if src.reserveCalls != 1 { + t.Errorf("pipeline must still run, got Reserve calls %d", src.reserveCalls) + } +} diff --git a/apps/backend/internal/orchestrator/watcher_dispatch_test.go b/apps/backend/internal/orchestrator/watcher_dispatch_test.go index 3bc16f412..74e956ecb 100644 --- a/apps/backend/internal/orchestrator/watcher_dispatch_test.go +++ b/apps/backend/internal/orchestrator/watcher_dispatch_test.go @@ -73,6 +73,12 @@ func (f *fakeWatcherSource) WatchID(_ any) string { return f.watchID } func (f *fakeWatcherSource) MaxInflightTasks(_ any) *int { return f.maxInflightTasks } +// AgentProfileID returns "" so the pre-flight check skips and behaviour +// matches pre-self-heal semantics for tests that don't wire a ProfileLookup. +func (f *fakeWatcherSource) AgentProfileID(_ any) string { return "" } + +func (f *fakeWatcherSource) SelfHeal(_ context.Context, _ any, _ string) error { return nil } + // fakeTaskCreator captures the request and returns a canned task or error. type fakeTaskCreator struct { createErr error diff --git a/apps/backend/internal/orchestrator/watcher_dispatch_wiring.go b/apps/backend/internal/orchestrator/watcher_dispatch_wiring.go index 77b785349..01488683a 100644 --- a/apps/backend/internal/orchestrator/watcher_dispatch_wiring.go +++ b/apps/backend/internal/orchestrator/watcher_dispatch_wiring.go @@ -5,6 +5,8 @@ import ( "fmt" "go.uber.org/zap" + + "github.com/kandev/kandev/internal/github" ) // serviceTaskStarter adapts Service.StartTask to the coordinator's @@ -21,13 +23,17 @@ func (s serviceTaskStarter) Start(ctx context.Context, taskID, workflowStepID, p return err } -// initWatcherCoordinator builds the coordinator (once) and (always) refreshes -// the mutable taskCreator dependency via SetTaskCreator. Called from -// SetIssueTaskCreator, which can be invoked multiple times — tests in +// initWatcherCoordinatorLocked builds the coordinator (once) and (always) +// refreshes the mutable taskCreator dependency via SetTaskCreator. Called +// from SetIssueTaskCreator, which can be invoked multiple times — tests in // particular may swap creators between scenarios. Re-running the setter MUST // update the coordinator, otherwise Dispatch silently keeps the original // creator. -func (s *Service) initWatcherCoordinator() { +// +// Locked variant: callers MUST hold s.mu (write). Reads s.profileLookup +// directly rather than via getProfileLookup so we don't re-acquire the +// read lock from inside the write-locked critical section. +func (s *Service) initWatcherCoordinatorLocked() { if s.watcherCoordinator == nil { s.watcherCoordinator = &WatcherDispatchCoordinator{ startTask: serviceTaskStarter{svc: s}, @@ -38,6 +44,126 @@ func (s *Service) initWatcherCoordinator() { } } s.watcherCoordinator.SetTaskCreator(s.issueTaskCreator) + if s.profileLookup != nil { + s.watcherCoordinator.SetProfileLookup(s.profileLookup) + } +} + +// SetProfileLookup wires the soft-deleted-profile pre-flight check into both +// the coordinator-driven Linear/Jira pipeline and the legacy GitHub +// createIssueTask / createReviewTask call sites. Safe to call before or after +// SetIssueTaskCreator — the coordinator picks the value up on its next +// initWatcherCoordinatorLocked pass. Mutex-guarded because bus handlers +// (createIssueTask, createReviewTask) read the field from background +// goroutines and the race detector flags any unsynchronised access. +func (s *Service) SetProfileLookup(p ProfileLookup) { + s.mu.Lock() + s.profileLookup = p + coord := s.watcherCoordinator + s.mu.Unlock() + if coord != nil { + coord.SetProfileLookup(p) + } +} + +// getProfileLookup is the read counterpart to SetProfileLookup. Returns the +// currently-wired ProfileLookup (or nil) under the read lock. +func (s *Service) getProfileLookup() ProfileLookup { + s.mu.RLock() + defer s.mu.RUnlock() + return s.profileLookup +} + +// getWatcherCoordinator / getIssueTaskCreator are the lock-aware reads +// paired with the write paths in SetIssueTaskCreator / SetProfileLookup. +// Used by dispatchWatcherEvent which runs from bus subscriber goroutines. +func (s *Service) getWatcherCoordinator() *WatcherDispatchCoordinator { + s.mu.RLock() + defer s.mu.RUnlock() + return s.watcherCoordinator +} + +func (s *Service) getIssueTaskCreator() IssueTaskCreator { + s.mu.RLock() + defer s.mu.RUnlock() + return s.issueTaskCreator +} + +// preflightDeletedProfileForGitHubIssue / ForGitHubReview run the same +// soft-deleted-profile check the coordinator does, for the legacy GitHub +// watcher paths that bypass the coordinator (createIssueTask / +// createReviewTask in event_handlers_github.go). Return true when the +// watcher was self-healed and the caller MUST stop. +func (s *Service) preflightDeletedProfileForGitHubIssue(ctx context.Context, evt *github.NewIssueEvent) bool { + if evt == nil { + return false + } + return s.preflightDeletedProfileForGitHub(ctx, "issue", evt.AgentProfileID, evt.IssueWatchID, s.disableGitHubIssueWatch) +} + +func (s *Service) preflightDeletedProfileForGitHubReview(ctx context.Context, evt *github.NewReviewPREvent) bool { + if evt == nil { + return false + } + return s.preflightDeletedProfileForGitHub(ctx, "review", evt.AgentProfileID, evt.ReviewWatchID, s.disableGitHubReviewWatch) +} + +// preflightDeletedProfileForGitHub is the shared body for the two GitHub +// pre-flights. kind ("issue" / "review") is passed as a zap.String field +// rather than concatenated into the log message so the aggregator can group +// "github watcher: ..." into a single filterable family with kind as an axis. +// disable is the integration-specific store write. +func (s *Service) preflightDeletedProfileForGitHub( + ctx context.Context, kind, profileID, watchID string, + disable func(ctx context.Context, watchID, cause string) error, +) bool { + lookup := s.getProfileLookup() + if lookup == nil || profileID == "" { + return false + } + deleted, name, err := lookup.LookupProfile(ctx, profileID) + if err != nil { + s.logger.Warn("github watcher: profile lookup failed, falling through", + zap.String("kind", kind), + zap.String("profile_id", profileID), + zap.Error(err)) + return false + } + if !deleted { + return false + } + cause := formatDeletedProfileCause(profileID, name) + s.logger.Warn("github watcher: agent profile soft-deleted, self-healing", + zap.String("kind", kind), + zap.String("watch_id", watchID), + zap.String("profile_id", profileID), + zap.String("profile_name", name)) + if disable != nil { + if err := disable(ctx, watchID, cause); err != nil { + s.logger.Error("github watcher: self-heal disable failed", + zap.String("kind", kind), + zap.String("watch_id", watchID), + zap.Error(err)) + } + } + return true +} + +// disableGitHubIssueWatch / disableGitHubReviewWatch are nil-safe shims +// around the github service's disable methods. nil githubService falls +// through silently — same idiom as reserveIssueWatch / releaseIssueWatch. +func (s *Service) disableGitHubIssueWatch(ctx context.Context, watchID, cause string) error { + if s.githubService == nil { + return nil + } + return s.githubService.DisableIssueWatchWithError(ctx, watchID, cause) +} + +func (s *Service) disableGitHubReviewWatch(ctx context.Context, watchID, cause string) error { + if s.githubService == nil { + return nil + } + return s.githubService.DisableReviewWatchWithError(ctx, watchID, cause) } // dispatchWatcherEvent runs the wiring guards every per-integration bus @@ -52,17 +178,15 @@ func (s *Service) initWatcherCoordinator() { // duplicate-block threshold without copy-pasting the same guards. func (s *Service) dispatchWatcherEvent(ctx context.Context, integration string, src WatcherSource, evt any, fields ...zap.Field) { s.logger.Info(fmt.Sprintf("new %s issue detected from watch", integration), fields...) - if s.issueTaskCreator == nil { + if s.getIssueTaskCreator() == nil { s.logger.Warn(fmt.Sprintf("issue task creator not configured, skipping %s task creation", integration)) return } - // Capture the coordinator pointer locally. issueTaskCreator is set before - // initWatcherCoordinator in SetIssueTaskCreator, so a concurrent bus event - // could otherwise see issueTaskCreator non-nil while watcherCoordinator is - // still nil and crash on the goroutine dispatch below. - coordinator := s.watcherCoordinator - if coordinator == nil { - s.logger.Warn(fmt.Sprintf("watcher coordinator not configured, skipping %s task creation", integration)) + // Read the coordinator through the RLock accessor (see getWatcherCoordinator): + // SetIssueTaskCreator writes issueTaskCreator and the coordinator under the + // same lock, so a concurrent bus event never sees a half-wired Service. + coord := s.getWatcherCoordinator() + if coord == nil { return } @@ -87,6 +211,6 @@ func (s *Service) dispatchWatcherEvent(ctx context.Context, integration string, // the bus delivery context may be cancelled before task creation finishes. go func() { defer release() - coordinator.Dispatch(context.WithoutCancel(ctx), src, evt) + coord.Dispatch(context.WithoutCancel(ctx), src, evt) }() } diff --git a/apps/web/app/actions/agents.ts b/apps/web/app/actions/agents.ts index 53faa7587..c506a4a66 100644 --- a/apps/web/app/actions/agents.ts +++ b/apps/web/app/actions/agents.ts @@ -141,11 +141,15 @@ export async function updateAgentProfileAction( return normalizeAgentProfile(raw); } -import type { ActiveSessionInfo } from "@/lib/types/agent-profile-errors"; +import type { ActiveSessionInfo, WatcherReference } from "@/lib/types/agent-profile-errors"; export type DeleteProfileResult = | { status: "ok" } - | { status: "conflict"; activeSessions: ActiveSessionInfo[] } + | { + status: "conflict"; + activeSessions: ActiveSessionInfo[]; + watchers: WatcherReference[]; + } | { status: "error"; message: string }; export async function deleteAgentProfileAction( @@ -160,8 +164,15 @@ export async function deleteAgentProfileAction( }); if (!response.ok) { const body = await response.json().catch(() => ({})); - if (response.status === 409 && body.active_sessions) { - return { status: "conflict", activeSessions: body.active_sessions }; + // A 409 is either active-sessions, referencing watchers, or both. + // Treat any non-empty list as the conflict signal — a watcher-only + // conflict (the new self-heal path) must still pop the dialog. + if (response.status === 409 && (body.active_sessions || body.watchers)) { + return { + status: "conflict", + activeSessions: body.active_sessions ?? [], + watchers: body.watchers ?? [], + }; } return { status: "error", diff --git a/apps/web/components/settings/agent-profile-delete-dialog.test.tsx b/apps/web/components/settings/agent-profile-delete-dialog.test.tsx new file mode 100644 index 000000000..2a48534d5 --- /dev/null +++ b/apps/web/components/settings/agent-profile-delete-dialog.test.tsx @@ -0,0 +1,83 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { render, screen, cleanup } from "@testing-library/react"; + +import { AgentProfileDeleteConflictDialog } from "./agent-profile-delete-dialog"; + +afterEach(cleanup); + +describe("AgentProfileDeleteConflictDialog", () => { + it("renders the watcher list grouped by kind on a watcher-only conflict", () => { + render( + {}} + onConfirm={() => {}} + />, + ); + + // Watcher group headings render the human-friendly kind label, and + // each watcher's label string appears once. Critically: the dialog + // pops even with no active sessions — this is the bug class the + // backend self-heal pre-flight fix would have left unaddressed + // without the frontend wiring. + expect(screen.getByText(/Watchers \(will be disabled\)/)).toBeTruthy(); + expect(screen.getByText(/Linear:/)).toBeTruthy(); + expect(screen.getByText(/GitHub Issues:/)).toBeTruthy(); + expect(screen.getByText(/team ENG/)).toBeTruthy(); + expect(screen.getByText(/team WEB/)).toBeTruthy(); + expect(screen.getByText(/kdlbs\/kandev/)).toBeTruthy(); + }); + + it("does not render the watchers section when the conflict is sessions-only", () => { + render( + {}} + onConfirm={() => {}} + />, + ); + + expect(screen.getByText(/Tasks:/)).toBeTruthy(); + expect(screen.getByText("Live task")).toBeTruthy(); + expect(screen.queryByText(/Watchers \(will be disabled\)/)).toBeNull(); + }); + + it("renders both sections when sessions and watchers coexist", () => { + render( + {}} + onConfirm={() => {}} + />, + ); + + expect(screen.getByText("Live task")).toBeTruthy(); + expect(screen.getByText(/Jira:/)).toBeTruthy(); + expect(screen.getByText(/project = ENG/)).toBeTruthy(); + }); + + it("does not render the dialog when conflict is null", () => { + render( + {}} + onConfirm={() => {}} + />, + ); + + expect(screen.queryByText(/Delete agent profile/i)).toBeNull(); + }); +}); diff --git a/apps/web/components/settings/agent-profile-delete-dialog.tsx b/apps/web/components/settings/agent-profile-delete-dialog.tsx index 82682c06b..f058fc173 100644 --- a/apps/web/components/settings/agent-profile-delete-dialog.tsx +++ b/apps/web/components/settings/agent-profile-delete-dialog.tsx @@ -10,7 +10,14 @@ import { AlertDialogHeader, AlertDialogTitle, } from "@kandev/ui/alert-dialog"; -import type { ActiveSessionInfo } from "@/lib/types/agent-profile-errors"; +import type { ActiveSessionInfo, WatcherReference } from "@/lib/types/agent-profile-errors"; + +const WATCHER_KIND_LABELS: Record = { + linear: "Linear", + jira: "Jira", + github_issue: "GitHub Issues", + github_review: "GitHub PR Reviews", +}; type AgentProfileDeleteConfirmDialogProps = { open: boolean; @@ -46,22 +53,32 @@ export function AgentProfileDeleteConfirmDialog({ ); } +// AgentProfileDeleteConflict carries the structured 409 payload from the +// backend. `open` is separate from the lists so a watcher-only conflict +// (no active sessions) still pops the dialog. +export type AgentProfileDeleteConflict = { + activeSessions: ActiveSessionInfo[]; + watchers: WatcherReference[]; +}; + type AgentProfileDeleteConflictDialogProps = { - activeSessions: ActiveSessionInfo[] | null; + conflict: AgentProfileDeleteConflict | null; onOpenChange: (open: boolean) => void; onConfirm: () => void; }; export function AgentProfileDeleteConflictDialog({ - activeSessions, + conflict, onOpenChange, onConfirm, }: AgentProfileDeleteConflictDialogProps) { - const tasks = activeSessions?.filter((s) => !s.is_ephemeral) ?? []; - const quickChats = activeSessions?.filter((s) => s.is_ephemeral) ?? []; + const tasks = conflict?.activeSessions.filter((s) => !s.is_ephemeral) ?? []; + const quickChats = conflict?.activeSessions.filter((s) => s.is_ephemeral) ?? []; + const watchers = conflict?.watchers ?? []; + const watchersByKind = groupWatchersByKind(watchers); return ( - + Delete agent profile? @@ -92,9 +109,24 @@ export function AgentProfileDeleteConflictDialog({ )} + {watchers.length > 0 && ( +
+

Watchers (will be disabled):

+
    + {Object.entries(watchersByKind).map(([kind, items]) => ( +
  • + + {WATCHER_KIND_LABELS[kind as WatcherReference["kind"]] ?? kind}: + {" "} + {items.map((w) => w.label || w.id).join(", ")} +
  • + ))} +
+
+ )}

- These sessions will no longer be able to use this profile. This action cannot be - undone. + These sessions will no longer be able to use this profile and the listed watchers + will be disabled. This action cannot be undone.

@@ -112,3 +144,10 @@ export function AgentProfileDeleteConflictDialog({
); } + +function groupWatchersByKind(watchers: WatcherReference[]): Record { + return watchers.reduce>((acc, w) => { + (acc[w.kind] ??= []).push(w); + return acc; + }, {}); +} diff --git a/apps/web/components/settings/agent-profile-page.tsx b/apps/web/components/settings/agent-profile-page.tsx index a70d2750d..7e2c8a3a1 100644 --- a/apps/web/components/settings/agent-profile-page.tsx +++ b/apps/web/components/settings/agent-profile-page.tsx @@ -14,10 +14,10 @@ import { UnsavedChangesBadge, UnsavedSaveButton } from "@/components/settings/un import { ProfileFormFields, type ProfileFormData } from "@/components/settings/profile-form-fields"; import { toAgentProfilePatch } from "@/app/settings/agents/[agentId]/agent-save-helpers"; import { deleteAgentProfileAction, updateAgentProfileAction } from "@/app/actions/agents"; -import type { ActiveSessionInfo } from "@/lib/types/agent-profile-errors"; import { AgentProfileDeleteConfirmDialog, AgentProfileDeleteConflictDialog, + type AgentProfileDeleteConflict, } from "@/components/settings/agent-profile-delete-dialog"; import { ProfileEnvVarsSection, @@ -293,7 +293,7 @@ function useProfileDelete( toast: ReturnType["toast"], ) { const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); - const [conflictSessions, setConflictSessions] = useState(null); + const [conflict, setConflict] = useState(null); const removeProfileFromStore = () => { const nextAgents = settingsAgents.map((agentItem: Agent) => @@ -318,7 +318,7 @@ function useProfileDelete( if (result.status === "ok") { removeProfileFromStore(); } else if (result.status === "conflict") { - setConflictSessions(result.activeSessions); + setConflict({ activeSessions: result.activeSessions, watchers: result.watchers }); } else { toast({ title: "Failed to delete profile", description: result.message, variant: "error" }); } @@ -326,7 +326,7 @@ function useProfileDelete( const handleForceDelete = async () => { const result = await deleteAgentProfileAction(draft.id, true); - setConflictSessions(null); + setConflict(null); if (result.status === "ok") { removeProfileFromStore(); } else if (result.status === "error") { @@ -339,8 +339,8 @@ function useProfileDelete( showDeleteConfirm, setShowDeleteConfirm, handleDeleteProfile, - conflictSessions, - setConflictSessions, + conflict, + setConflict, handleForceDelete, }; } @@ -349,8 +349,8 @@ type ProfileDeleteDialogsProps = { showDeleteConfirm: boolean; setShowDeleteConfirm: (open: boolean) => void; handleDeleteProfile: () => void; - conflictSessions: ActiveSessionInfo[] | null; - setConflictSessions: (sessions: ActiveSessionInfo[] | null) => void; + conflict: AgentProfileDeleteConflict | null; + setConflict: (c: AgentProfileDeleteConflict | null) => void; handleForceDelete: () => void; }; @@ -358,8 +358,8 @@ function ProfileDeleteDialogs({ showDeleteConfirm, setShowDeleteConfirm, handleDeleteProfile, - conflictSessions, - setConflictSessions, + conflict, + setConflict, handleForceDelete, }: ProfileDeleteDialogsProps) { return ( @@ -373,9 +373,9 @@ function ProfileDeleteDialogs({ /> { - if (!open) setConflictSessions(null); + if (!open) setConflict(null); }} onConfirm={handleForceDelete} /> @@ -488,8 +488,8 @@ function ProfileEditor({ showDeleteConfirm, setShowDeleteConfirm, handleDeleteProfile, - conflictSessions, - setConflictSessions, + conflict, + setConflict, handleForceDelete, } = useProfileDelete(agent, draft, settingsAgents, syncAgentsToStore, toast); @@ -531,8 +531,8 @@ function ProfileEditor({ showDeleteConfirm={showDeleteConfirm} setShowDeleteConfirm={setShowDeleteConfirm} handleDeleteProfile={handleDeleteProfile} - conflictSessions={conflictSessions} - setConflictSessions={setConflictSessions} + conflict={conflict} + setConflict={setConflict} handleForceDelete={handleForceDelete} /> diff --git a/apps/web/e2e/helpers/api-client.ts b/apps/web/e2e/helpers/api-client.ts index e765f1a3e..2b789f89b 100644 --- a/apps/web/e2e/helpers/api-client.ts +++ b/apps/web/e2e/helpers/api-client.ts @@ -1416,6 +1416,54 @@ export class ApiClient { await this.request("PUT", "/api/v1/linear/mock/get-issue-error", args); } + // --- Linear issue watch CRUD --- + // Used by the agent-profile-delete spec to exercise the watcher dependency + // surface added in the watcher self-heal PR. Filter shape matches + // linear.CreateIssueWatchRequest (Go side). + + async createLinearIssueWatch(opts: { + workspaceId: string; + workflowId: string; + workflowStepId: string; + agentProfileId: string; + executorProfileId?: string; + filter?: { teamKey?: string }; + prompt?: string; + enabled?: boolean; + pollIntervalSeconds?: number; + }): Promise<{ id: string; enabled: boolean; lastError?: string }> { + return this.request("POST", "/api/v1/linear/watches/issue", { + workspaceId: opts.workspaceId, + workflowId: opts.workflowId, + workflowStepId: opts.workflowStepId, + agentProfileId: opts.agentProfileId, + executorProfileId: opts.executorProfileId ?? "", + filter: { teamKey: "ENG", ...(opts.filter ?? {}) }, + prompt: opts.prompt ?? "", + enabled: opts.enabled ?? true, + pollIntervalSeconds: opts.pollIntervalSeconds ?? 300, + }); + } + + async getLinearIssueWatch( + workspaceId: string, + watchId: string, + ): Promise<{ + id: string; + enabled: boolean; + lastError?: string; + lastErrorAt?: string; + } | null> { + // No single-watch GET on the route table (only POST/PATCH/DELETE/trigger); + // walk the workspace list and find by id. Scoped by workspace_id so the + // result set stays small even if the install accumulates watchers. The + // list endpoint wraps the rows in a { watches: [...] } envelope. + const { watches } = await this.request<{ + watches: Array<{ id: string; enabled: boolean; lastError?: string; lastErrorAt?: string }>; + }>("GET", `/api/v1/linear/watches/issue?workspace_id=${encodeURIComponent(workspaceId)}`); + return watches.find((w) => w.id === watchId) ?? null; + } + // --- Agent dashboard E2E seed helpers (KANDEV_E2E_MOCK=true) --- // These wrappers append rows to office_runs / office_cost_events / // office_activity_log directly so the agent dashboard E2E spec can diff --git a/apps/web/e2e/tests/settings/agent-profile-delete.spec.ts b/apps/web/e2e/tests/settings/agent-profile-delete.spec.ts index 5e1551b8f..7293e3eae 100644 --- a/apps/web/e2e/tests/settings/agent-profile-delete.spec.ts +++ b/apps/web/e2e/tests/settings/agent-profile-delete.spec.ts @@ -98,6 +98,109 @@ test.describe("Agent profile deletion", () => { ); }); + test("deleting profile referenced by a watcher shows watcher in conflict dialog", async ({ + testPage, + apiClient, + seedData, + }) => { + test.setTimeout(60_000); + + // Linear config has to be present before /api/v1/linear/watches/issue + // accepts the create POST; the auth-health probe is also what surfaces + // hasSecret on the watch row's downstream consumers. + await apiClient.setLinearConfig({ secret: "lin_api_xxx" }); + await apiClient.waitForIntegrationAuthHealthy("linear"); + + const { agents } = await apiClient.listAgents(); + const agent = agents[0]; + const profile = await apiClient.createAgentProfile(agent.id, "Watched Profile", { + model: agent.profiles[0].model, + }); + + await apiClient.createLinearIssueWatch({ + workspaceId: seedData.workspaceId, + workflowId: seedData.workflowId, + workflowStepId: seedData.startStepId, + agentProfileId: profile.id, + filter: { teamKey: "ENG" }, + }); + + await testPage.goto(`/settings/agents/${agent.name}/profiles/${profile.id}`); + await expect(testPage.getByText("Delete profile", { exact: true })).toBeVisible({ + timeout: 15_000, + }); + + await testPage.getByRole("button", { name: "Delete", exact: true }).click(); + const confirmDialog = testPage.getByRole("alertdialog"); + await expect(confirmDialog).toBeVisible({ timeout: 10_000 }); + await confirmDialog.getByRole("button", { name: "Delete", exact: true }).click(); + + // Conflict dialog pops with NO active sessions — only the watcher path. + // Without the cycle-5 frontend wiring the dialog would either not pop + // (sessions-only check) or pop empty (watchers ignored). + const conflictDialog = testPage.getByRole("alertdialog"); + await expect(conflictDialog).toBeVisible({ timeout: 10_000 }); + await expect(conflictDialog.getByText("Watchers (will be disabled):")).toBeVisible(); + await expect(conflictDialog.getByText(/Linear:/)).toBeVisible(); + await expect(conflictDialog.getByText(/team ENG/)).toBeVisible(); + + await conflictDialog.getByRole("button", { name: "Cancel" }).click(); + await expect(conflictDialog).not.toBeVisible(); + }); + + test("force-deleting profile with watcher disables the watcher row", async ({ + testPage, + apiClient, + seedData, + }) => { + test.setTimeout(60_000); + + await apiClient.setLinearConfig({ secret: "lin_api_xxx" }); + await apiClient.waitForIntegrationAuthHealthy("linear"); + + const { agents } = await apiClient.listAgents(); + const agent = agents[0]; + const profile = await apiClient.createAgentProfile(agent.id, "Watched ForceRemove", { + model: agent.profiles[0].model, + }); + + const watch = await apiClient.createLinearIssueWatch({ + workspaceId: seedData.workspaceId, + workflowId: seedData.workflowId, + workflowStepId: seedData.startStepId, + agentProfileId: profile.id, + filter: { teamKey: "ENG" }, + }); + expect(watch.enabled).toBe(true); + + await testPage.goto(`/settings/agents/${agent.name}/profiles/${profile.id}`); + await expect(testPage.getByText("Delete profile", { exact: true })).toBeVisible({ + timeout: 15_000, + }); + + await testPage.getByRole("button", { name: "Delete", exact: true }).click(); + const confirmDialog = testPage.getByRole("alertdialog"); + await expect(confirmDialog).toBeVisible({ timeout: 10_000 }); + await confirmDialog.getByRole("button", { name: "Delete", exact: true }).click(); + + const conflictDialog = testPage.getByRole("alertdialog"); + await expect(conflictDialog).toBeVisible({ timeout: 10_000 }); + await conflictDialog.getByRole("button", { name: "Delete Anyway" }).click(); + + await expect(testPage).toHaveURL(/\/settings\/agents$/, { timeout: 15_000 }); + + // The eager-disable path (DeleteProfile disables referencing watchers + // after the row delete succeeds) must have flipped the watcher row to + // enabled=0 and stamped a cause — without it the watcher would stay live, + // orphaned at the now-deleted profile, and only self-heal whenever the + // next external issue happens to match its filter (could be never for a + // narrow filter). + const after = await apiClient.getLinearIssueWatch(seedData.workspaceId, watch.id); + expect(after).not.toBeNull(); + expect(after!.enabled).toBe(false); + expect(after!.lastError ?? "").toContain(profile.id); + }); + test("force-deleting profile with active task succeeds after both confirmations", async ({ testPage, apiClient, diff --git a/apps/web/lib/types/agent-profile-errors.ts b/apps/web/lib/types/agent-profile-errors.ts index e5cfaff57..53cc440c1 100644 --- a/apps/web/lib/types/agent-profile-errors.ts +++ b/apps/web/lib/types/agent-profile-errors.ts @@ -3,3 +3,12 @@ export type ActiveSessionInfo = { task_title: string; is_ephemeral: boolean; }; + +// WatcherReference points at one issue/PR watcher row that uses the agent +// profile being deleted. Mirrors the Go shape returned from +// /api/v1/agent-profiles/:id?force=false on a 409 conflict. +export type WatcherReference = { + id: string; + kind: "linear" | "jira" | "github_issue" | "github_review"; + label: string; +};