From e416aa96da68d8d61d562047e55ca42a60b88bcf Mon Sep 17 00:00:00 2001 From: nfebe Date: Mon, 25 May 2026 20:59:35 +0100 Subject: [PATCH 1/3] feat(images): Auto-clean stale images after a deployment pulls After a successful pull (whether via the standalone pull endpoint or as part of a deploy with pull), the agent now removes images that this deployment previously used but no longer references. An image is only removed if no container references it and no other deployment's compose references it either, so cleanup never deletes anything still in use locally. The default is on; pass cleanup=false on the request to skip. Two new endpoints make the cleanup callable on demand: one scoped to a single deployment and one for host-wide pruning of dangling layers. A dry_run flag returns the would-be removals without deleting anything. Closes #124 --- internal/api/server.go | 101 +++++++++++-- internal/docker/cleanup.go | 256 ++++++++++++++++++++++++++++++++ internal/docker/cleanup_test.go | 72 +++++++++ 3 files changed, 419 insertions(+), 10 deletions(-) create mode 100644 internal/docker/cleanup.go create mode 100644 internal/docker/cleanup_test.go diff --git a/internal/api/server.go b/internal/api/server.go index 9a02076..7bd02e6 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -416,6 +416,8 @@ func (s *Server) setupRoutes() { protected.GET("/images", s.authMiddleware.RequirePermission(auth.PermImagesRead), s.listImages) protected.DELETE("/images/:id", s.authMiddleware.RequirePermission(auth.PermImagesDelete), s.removeImage) protected.POST("/images/pull", s.authMiddleware.RequirePermission(auth.PermImagesWrite), s.pullImage) + protected.POST("/images/cleanup", s.authMiddleware.RequirePermission(auth.PermImagesDelete), s.cleanupSystemImages) + protected.POST("/deployments/:name/images/cleanup", s.authMiddleware.RequirePermission(auth.PermImagesWrite), s.authMiddleware.RequireDeploymentAccess(auth.AccessLevelWrite), s.cleanupDeploymentImages) // Volume endpoints protected.GET("/volumes", s.authMiddleware.RequirePermission(auth.PermVolumesRead), s.listVolumes) @@ -1836,6 +1838,7 @@ func (s *Server) deployDeployment(c *gin.Context) { Action string `json:"action"` Pull *bool `json:"pull"` OnlyLatest bool `json:"only_latest"` + Cleanup *bool `json:"cleanup"` }{ Action: "restart", } @@ -1915,13 +1918,28 @@ func (s *Server) deployDeployment(c *gin.Context) { return } + cleanup := docker.CleanupResult{} + cleanupEnabled := pull + if req.Cleanup != nil { + cleanupEnabled = *req.Cleanup + } + if cleanupEnabled { + if r, err := s.manager.CleanupDeploymentImages(name, false); err == nil { + cleanup = r + } else { + log.Printf("Warning: post-deploy image cleanup for %s failed: %v", name, err) + } + } + c.JSON(http.StatusOK, gin.H{ - "message": "Deployment completed", - "name": name, - "action": req.Action, - "pulled": pull, - "pull_output": pullOutput, - "deploy_output": output, + "message": "Deployment completed", + "name": name, + "action": req.Action, + "pulled": pull, + "pull_output": pullOutput, + "deploy_output": output, + "cleanup_removed": cleanup.Removed, + "cleanup_freed": cleanup.FreedBytes, }) } @@ -1929,7 +1947,8 @@ func (s *Server) pullDeploymentImage(c *gin.Context) { name := c.Param("name") var req struct { - OnlyLatest bool `json:"only_latest"` + OnlyLatest bool `json:"only_latest"` + Cleanup *bool `json:"cleanup"` } _ = c.ShouldBindJSON(&req) @@ -1952,10 +1971,25 @@ func (s *Server) pullDeploymentImage(c *gin.Context) { return } + cleanup := docker.CleanupResult{} + cleanupEnabled := true + if req.Cleanup != nil { + cleanupEnabled = *req.Cleanup + } + if cleanupEnabled { + if r, err := s.manager.CleanupDeploymentImages(name, false); err == nil { + cleanup = r + } else { + log.Printf("Warning: post-pull image cleanup for %s failed: %v", name, err) + } + } + c.JSON(http.StatusOK, gin.H{ - "message": "Images pulled successfully", - "name": name, - "output": output, + "message": "Images pulled successfully", + "name": name, + "output": output, + "cleanup_removed": cleanup.Removed, + "cleanup_freed": cleanup.FreedBytes, }) } @@ -5072,6 +5106,53 @@ func (s *Server) pullImage(c *gin.Context) { }) } +func (s *Server) cleanupSystemImages(c *gin.Context) { + var req struct { + DryRun bool `json:"dry_run"` + } + _ = c.ShouldBindJSON(&req) + + result, err := s.manager.PruneDanglingImages(req.DryRun) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + c.JSON(http.StatusOK, gin.H{ + "message": "System image cleanup complete", + "removed": result.Removed, + "freed_bytes": result.FreedBytes, + "dry_run": result.DryRun, + }) +} + +func (s *Server) cleanupDeploymentImages(c *gin.Context) { + name := c.Param("name") + + var req struct { + DryRun bool `json:"dry_run"` + } + _ = c.ShouldBindJSON(&req) + + if _, err := s.manager.GetDeployment(name); err != nil { + c.JSON(http.StatusNotFound, gin.H{"error": "Deployment not found: " + err.Error()}) + return + } + + result, err := s.manager.CleanupDeploymentImages(name, req.DryRun) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + c.JSON(http.StatusOK, gin.H{ + "message": "Deployment image cleanup complete", + "name": name, + "removed": result.Removed, + "freed_bytes": result.FreedBytes, + "images_kept": result.ImagesKept, + "dry_run": result.DryRun, + }) +} + func (s *Server) listVolumes(c *gin.Context) { volumes, err := s.networksManager.ListVolumes() if err != nil { diff --git a/internal/docker/cleanup.go b/internal/docker/cleanup.go new file mode 100644 index 0000000..a3f1756 --- /dev/null +++ b/internal/docker/cleanup.go @@ -0,0 +1,256 @@ +package docker + +import ( + "context" + "strings" + "time" + + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/filters" + "github.com/docker/docker/api/types/image" +) + +type RemovedImage struct { + ID string `json:"id"` + Tag string `json:"tag,omitempty"` + Bytes int64 `json:"bytes"` +} + +type CleanupResult struct { + Removed []RemovedImage `json:"removed"` + FreedBytes int64 `json:"freed_bytes"` + ImagesKept int `json:"images_kept"` + DryRun bool `json:"dry_run,omitempty"` +} + +type imageRecord struct { + id string + repos []string + fullRefs []string + bytes int64 +} + +func (m *Manager) CleanupDeploymentImages(name string, dryRun bool) (CleanupResult, error) { + if m.apiClient == nil { + return CleanupResult{}, nil + } + + deployment, err := m.GetDeployment(name) + if err != nil { + return CleanupResult{}, err + } + + currentRefs, currentRepos, err := composeImageRefs(m.executor, deployment.Path) + if err != nil { + return CleanupResult{}, err + } + if len(currentRefs) == 0 { + return CleanupResult{}, nil + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + hostImages, err := m.apiClient.listImageRecords(ctx) + if err != nil { + return CleanupResult{}, err + } + + inUseByContainers, err := m.apiClient.containerImageRefs(ctx) + if err != nil { + return CleanupResult{}, err + } + + otherDeploymentRefs, _ := m.imageRefsAcrossDeployments(deployment.Name) + + stale, kept := selectStaleImages(hostImages, currentRefs, currentRepos, inUseByContainers, otherDeploymentRefs) + + result := CleanupResult{DryRun: dryRun, ImagesKept: kept} + for _, img := range stale { + tagLabel := "" + if len(img.fullRefs) > 0 { + tagLabel = img.fullRefs[0] + } + if !dryRun { + if _, err := m.apiClient.cli.ImageRemove(ctx, img.id, image.RemoveOptions{}); err != nil { + continue + } + } + result.Removed = append(result.Removed, RemovedImage{ + ID: img.id, + Tag: tagLabel, + Bytes: img.bytes, + }) + result.FreedBytes += img.bytes + } + + return result, nil +} + +func (m *Manager) PruneDanglingImages(dryRun bool) (CleanupResult, error) { + if m.apiClient == nil { + return CleanupResult{}, nil + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + if dryRun { + f := filters.NewArgs(filters.Arg("dangling", "true")) + images, err := m.apiClient.cli.ImageList(ctx, image.ListOptions{Filters: f, All: true}) + if err != nil { + return CleanupResult{}, err + } + result := CleanupResult{DryRun: true} + for _, img := range images { + result.Removed = append(result.Removed, RemovedImage{ID: img.ID, Bytes: img.Size}) + result.FreedBytes += img.Size + } + return result, nil + } + + report, err := m.apiClient.cli.ImagesPrune(ctx, filters.NewArgs(filters.Arg("dangling", "true"))) + if err != nil { + return CleanupResult{}, err + } + result := CleanupResult{FreedBytes: int64(report.SpaceReclaimed)} + for _, d := range report.ImagesDeleted { + if d.Deleted != "" { + result.Removed = append(result.Removed, RemovedImage{ID: d.Deleted}) + } + } + return result, nil +} + +func composeImageRefs(executor *ComposeExecutor, deploymentPath string) (map[string]bool, map[string]bool, error) { + images, err := executor.GetImageInfo(deploymentPath) + if err != nil { + return nil, nil, err + } + refs := make(map[string]bool) + repos := make(map[string]bool) + for _, img := range images { + if img.Image == "" || img.IsBuild { + continue + } + refs[img.Image] = true + repos[splitRepo(img.Image)] = true + } + return refs, repos, nil +} + +func (m *Manager) imageRefsAcrossDeployments(excludeName string) (map[string]bool, error) { + out := make(map[string]bool) + + deployments, err := m.discovery.FindDeployments() + if err != nil { + return out, nil + } + if infra, err := m.discovery.FindInfrastructure(); err == nil { + deployments = append(deployments, infra...) + } + + for _, dep := range deployments { + if dep.Name == excludeName { + continue + } + images, err := m.executor.GetImageInfo(dep.Path) + if err != nil { + continue + } + for _, img := range images { + if img.Image != "" && !img.IsBuild { + out[img.Image] = true + } + } + } + return out, nil +} + +func (a *APIClient) listImageRecords(ctx context.Context) ([]imageRecord, error) { + images, err := a.cli.ImageList(ctx, image.ListOptions{All: false}) + if err != nil { + return nil, err + } + out := make([]imageRecord, 0, len(images)) + for _, img := range images { + rec := imageRecord{id: img.ID, bytes: img.Size} + for _, ref := range img.RepoTags { + if ref == "" || ref == ":" { + continue + } + rec.fullRefs = append(rec.fullRefs, ref) + rec.repos = append(rec.repos, splitRepo(ref)) + } + out = append(out, rec) + } + return out, nil +} + +func (a *APIClient) containerImageRefs(ctx context.Context) (map[string]bool, error) { + containers, err := a.cli.ContainerList(ctx, container.ListOptions{All: true}) + if err != nil { + return nil, err + } + out := make(map[string]bool) + for _, c := range containers { + if c.ImageID != "" { + out[c.ImageID] = true + } + if c.Image != "" { + out[c.Image] = true + } + } + return out, nil +} + +func selectStaleImages(host []imageRecord, currentRefs, currentRepos, inUse, otherDeps map[string]bool) ([]imageRecord, int) { + var stale []imageRecord + kept := 0 + for _, img := range host { + if !anyMatch(img.repos, currentRepos) { + kept++ + continue + } + if anyMatch(img.fullRefs, currentRefs) { + kept++ + continue + } + if inUse[img.id] { + kept++ + continue + } + if anyMatch(img.fullRefs, inUse) { + kept++ + continue + } + if anyMatch(img.fullRefs, otherDeps) { + kept++ + continue + } + stale = append(stale, img) + } + return stale, kept +} + +func anyMatch(values []string, set map[string]bool) bool { + for _, v := range values { + if set[v] { + return true + } + } + return false +} + +func splitRepo(ref string) string { + if i := strings.LastIndex(ref, "@"); i > 0 { + return ref[:i] + } + if i := strings.LastIndex(ref, ":"); i > 0 { + after := ref[i+1:] + if !strings.Contains(after, "/") { + return ref[:i] + } + } + return ref +} diff --git a/internal/docker/cleanup_test.go b/internal/docker/cleanup_test.go new file mode 100644 index 0000000..a8e3bce --- /dev/null +++ b/internal/docker/cleanup_test.go @@ -0,0 +1,72 @@ +package docker + +import ( + "testing" +) + +func TestSplitRepo(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"nginx:1.20", "nginx"}, + {"ghcr.io/acme/api:sha-abc", "ghcr.io/acme/api"}, + {"registry.example.com:5000/app:v1", "registry.example.com:5000/app"}, + {"nginx", "nginx"}, + {"acme/api@sha256:deadbeef", "acme/api"}, + } + for _, tt := range tests { + got := splitRepo(tt.input) + if got != tt.want { + t.Errorf("splitRepo(%q) = %q, want %q", tt.input, got, tt.want) + } + } +} + +func TestSelectStaleImages(t *testing.T) { + hostImages := []imageRecord{ + {id: "id-current", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-def"}, bytes: 500}, + {id: "id-old", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-abc"}, bytes: 480}, + {id: "id-unrelated", repos: []string{"nginx"}, fullRefs: []string{"nginx:1.20"}, bytes: 142}, + {id: "id-other-deployment", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-xyz"}, bytes: 490}, + {id: "id-still-running", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-old"}, bytes: 470}, + {id: "id-dangling", bytes: 50}, + {id: "id-multitag", repos: []string{"ghcr.io/acme/api", "ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-abc2", "ghcr.io/acme/api:keep"}, bytes: 460}, + } + currentRefs := map[string]bool{"ghcr.io/acme/api:sha-def": true, "ghcr.io/acme/api:keep": true} + currentRepos := map[string]bool{"ghcr.io/acme/api": true} + inUse := map[string]bool{"id-still-running": true} + otherDeps := map[string]bool{"ghcr.io/acme/api:sha-xyz": true} + + stale, kept := selectStaleImages(hostImages, currentRefs, currentRepos, inUse, otherDeps) + if len(stale) != 1 { + t.Fatalf("expected exactly one stale image, got %d (ids: %v)", len(stale), staleIDs(stale)) + } + if stale[0].id != "id-old" { + t.Errorf("expected id-old to be the stale one, got %s", stale[0].id) + } + if kept != 6 { + t.Errorf("expected 6 kept, got %d", kept) + } +} + +func TestSelectStaleImagesNothingToRemove(t *testing.T) { + host := []imageRecord{ + {id: "id-current", repos: []string{"nginx"}, fullRefs: []string{"nginx:1.21"}, bytes: 142}, + } + stale, kept := selectStaleImages(host, map[string]bool{"nginx:1.21": true}, map[string]bool{"nginx": true}, nil, nil) + if len(stale) != 0 { + t.Fatalf("expected no stale images, got %d", len(stale)) + } + if kept != 1 { + t.Errorf("expected 1 kept, got %d", kept) + } +} + +func staleIDs(records []imageRecord) []string { + out := make([]string, 0, len(records)) + for _, r := range records { + out = append(out, r.id) + } + return out +} From 262c8aa52fba42579e98fe50c961829bfe4bc8a0 Mon Sep 17 00:00:00 2001 From: nfebe Date: Mon, 25 May 2026 21:20:03 +0100 Subject: [PATCH 2/3] feat(config): Add a key-based config API and a default timeout fallback Clients can now list, fetch, and update the agent's YAML configuration through dotted keys instead of one bespoke endpoint per setting. Sensitive keys are masked, string inputs are coerced to the field's real type, and runtime-applicable keys take effect immediately. The new endpoints sit behind dedicated config permissions granted only to admins, so the discovery surface is admin-only by default. A top-level default timeout setting is the fallback for any block that does not specify its own, used today by the image cleanup loop. The legacy /settings endpoints are unchanged. --- internal/api/config_handlers.go | 86 ++++++++++++++++ internal/api/server.go | 4 + internal/auth/permissions.go | 4 + internal/docker/cleanup.go | 7 +- internal/docker/manager.go | 24 ++++- pkg/config/config.go | 12 +++ pkg/config/registry.go | 177 ++++++++++++++++++++++++++++++++ pkg/config/registry_test.go | 128 +++++++++++++++++++++++ 8 files changed, 434 insertions(+), 8 deletions(-) create mode 100644 internal/api/config_handlers.go create mode 100644 pkg/config/registry.go create mode 100644 pkg/config/registry_test.go diff --git a/internal/api/config_handlers.go b/internal/api/config_handlers.go new file mode 100644 index 0000000..75b2d7c --- /dev/null +++ b/internal/api/config_handlers.go @@ -0,0 +1,86 @@ +package api + +import ( + "net/http" + "strings" + + "github.com/flatrun/agent/pkg/config" + "github.com/gin-gonic/gin" +) + +func (s *Server) listConfig(c *gin.Context) { + entries := config.Walk(s.config) + c.JSON(http.StatusOK, gin.H{ + "config": entries, + "runtime": s.runtimeConfigKeys(), + }) +} + +func (s *Server) getConfigKey(c *gin.Context) { + key := normalizeConfigKey(c.Param("key")) + entry, err := config.Get(s.config, key) + if err != nil { + c.JSON(http.StatusNotFound, gin.H{"error": err.Error()}) + return + } + c.JSON(http.StatusOK, gin.H{ + "entry": entry, + "runtime": s.runtimeConfigKeys()[key], + }) +} + +func (s *Server) updateConfigKey(c *gin.Context) { + key := normalizeConfigKey(c.Param("key")) + + var req struct { + Value interface{} `json:"value"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if err := config.Set(s.config, key, req.Value); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if s.configPath != "" { + if err := config.Save(s.config, s.configPath); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "value updated in memory but not persisted: " + err.Error()}) + return + } + } + + applied := false + if apply, ok := s.runtimeAppliers()[key]; ok { + apply(s) + applied = true + } + + entry, _ := config.Get(s.config, key) + c.JSON(http.StatusOK, gin.H{ + "entry": entry, + "applied": applied, + }) +} + +func (s *Server) runtimeAppliers() map[string]func(*Server) { + return map[string]func(*Server){ + "cleanup.timeout": func(srv *Server) { + srv.manager.SetCleanupTimeout(srv.config.Cleanup.Timeout) + }, + } +} + +func (s *Server) runtimeConfigKeys() map[string]bool { + keys := make(map[string]bool) + for k := range s.runtimeAppliers() { + keys[k] = true + } + return keys +} + +func normalizeConfigKey(raw string) string { + return strings.Trim(raw, "/") +} diff --git a/internal/api/server.go b/internal/api/server.go index 7bd02e6..020899e 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -127,6 +127,7 @@ func New(cfg *config.Config, configPath string) *Server { } manager := docker.NewManager(cfg.DeploymentsPath) + manager.SetCleanupTimeout(cfg.Cleanup.Timeout) certsDiscovery := certs.NewDiscovery(cfg.DeploymentsPath) networksManager := networks.NewManager() pluginsDir := filepath.Join(cfg.DeploymentsPath, ".flatrun", "plugins") @@ -375,6 +376,9 @@ func (s *Server) setupRoutes() { protected.GET("/settings", s.authMiddleware.RequirePermission(auth.PermSettingsRead), s.getSettings) protected.PUT("/settings", s.authMiddleware.RequirePermission(auth.PermSettingsWrite), s.updateSettings) protected.PUT("/settings/security", s.authMiddleware.RequirePermission(auth.PermSettingsWrite), s.updateSecuritySettings) + protected.GET("/config", s.authMiddleware.RequirePermission(auth.PermConfigRead), s.listConfig) + protected.GET("/config/*key", s.authMiddleware.RequirePermission(auth.PermConfigRead), s.getConfigKey) + protected.PUT("/config/*key", s.authMiddleware.RequirePermission(auth.PermConfigWrite), s.updateConfigKey) // Compose, stats, subdomain (deployment-scoped) protected.GET("/subdomain/generate", s.authMiddleware.RequirePermission(auth.PermDeploymentsRead), s.generateSubdomain) diff --git a/internal/auth/permissions.go b/internal/auth/permissions.go index 2812cbf..83e6b9e 100644 --- a/internal/auth/permissions.go +++ b/internal/auth/permissions.go @@ -33,6 +33,9 @@ const ( PermSettingsRead Permission = "settings:read" PermSettingsWrite Permission = "settings:write" + PermConfigRead Permission = "config:read" + PermConfigWrite Permission = "config:write" + PermAuditRead Permission = "audit:read" PermContainersRead Permission = "containers:read" @@ -88,6 +91,7 @@ var adminPermissions = []Permission{ PermUsersRead, PermUsersWrite, PermUsersDelete, PermAPIKeysRead, PermAPIKeysWrite, PermAPIKeysDelete, PermSettingsRead, PermSettingsWrite, + PermConfigRead, PermConfigWrite, PermAuditRead, PermContainersRead, PermContainersWrite, PermContainersDelete, PermImagesRead, PermImagesWrite, PermImagesDelete, diff --git a/internal/docker/cleanup.go b/internal/docker/cleanup.go index a3f1756..ac7d46c 100644 --- a/internal/docker/cleanup.go +++ b/internal/docker/cleanup.go @@ -2,8 +2,8 @@ package docker import ( "context" + "log" "strings" - "time" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" @@ -48,7 +48,7 @@ func (m *Manager) CleanupDeploymentImages(name string, dryRun bool) (CleanupResu return CleanupResult{}, nil } - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), m.CleanupTimeout()) defer cancel() hostImages, err := m.apiClient.listImageRecords(ctx) @@ -73,6 +73,7 @@ func (m *Manager) CleanupDeploymentImages(name string, dryRun bool) (CleanupResu } if !dryRun { if _, err := m.apiClient.cli.ImageRemove(ctx, img.id, image.RemoveOptions{}); err != nil { + log.Printf("cleanup: failed to remove image %s (%s) for deployment %s: %v", img.id, tagLabel, name, err) continue } } @@ -92,7 +93,7 @@ func (m *Manager) PruneDanglingImages(dryRun bool) (CleanupResult, error) { return CleanupResult{}, nil } - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), m.CleanupTimeout()) defer cancel() if dryRun { diff --git a/internal/docker/manager.go b/internal/docker/manager.go index ba87c6c..e0bd9c0 100644 --- a/internal/docker/manager.go +++ b/internal/docker/manager.go @@ -23,11 +23,25 @@ type composeContainer struct { } type Manager struct { - discovery *Discovery - executor *ComposeExecutor - apiClient *APIClient - basePath string - mu sync.RWMutex + discovery *Discovery + executor *ComposeExecutor + apiClient *APIClient + basePath string + cleanupTimeout time.Duration + mu sync.RWMutex +} + +func (m *Manager) SetCleanupTimeout(d time.Duration) { + if d > 0 { + m.cleanupTimeout = d + } +} + +func (m *Manager) CleanupTimeout() time.Duration { + if m.cleanupTimeout > 0 { + return m.cleanupTimeout + } + return 2 * time.Minute } func NewManager(deploymentsPath string) *Manager { diff --git a/pkg/config/config.go b/pkg/config/config.go index 1528a64..9753c4c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -25,6 +25,7 @@ type Config struct { DeploymentsPath string `yaml:"deployments_path"` SystemFilesRoot string `yaml:"system_files_root"` DockerSocket string `yaml:"docker_socket"` + DefaultTimeout time.Duration `yaml:"default_timeout"` API APIConfig `yaml:"api"` Auth AuthConfig `yaml:"auth"` Domain DomainConfig `yaml:"domain"` @@ -37,6 +38,7 @@ type Config struct { Audit AuditConfig `yaml:"audit"` Cluster ClusterConfig `yaml:"cluster"` SystemTerminal SystemTerminalConfig `yaml:"system_terminal"` + Cleanup CleanupConfig `yaml:"cleanup"` } type DomainConfig struct { @@ -179,6 +181,10 @@ type SystemTerminalConfig struct { ProtectedMode models.ProtectedModeConfig `yaml:"protected_mode" json:"protected_mode"` } +type CleanupConfig struct { + Timeout time.Duration `yaml:"timeout" json:"timeout"` +} + func FindConfigPath(providedPath string) string { if providedPath != "" && providedPath != "config.yml" { return providedPath @@ -265,6 +271,12 @@ func setDefaults(cfg *Config) { if cfg.Health.MetricsRetention == 0 { cfg.Health.MetricsRetention = 24 * time.Hour } + if cfg.DefaultTimeout == 0 { + cfg.DefaultTimeout = 2 * time.Minute + } + if cfg.Cleanup.Timeout == 0 { + cfg.Cleanup.Timeout = cfg.DefaultTimeout + } if cfg.Auth.JWTSecret == "" { cfg.Auth.JWTSecret = "default-secret-change-me" } diff --git a/pkg/config/registry.go b/pkg/config/registry.go new file mode 100644 index 0000000..0f28556 --- /dev/null +++ b/pkg/config/registry.go @@ -0,0 +1,177 @@ +package config + +import ( + "fmt" + "reflect" + "sort" + "strings" + "time" + + "gopkg.in/yaml.v3" +) + +type Entry struct { + Key string `json:"key"` + Type string `json:"type"` + Value interface{} `json:"value"` + Default interface{} `json:"default,omitempty"` + Description string `json:"description,omitempty"` + Sensitive bool `json:"sensitive,omitempty"` +} + +var hiddenKeys = map[string]bool{ + "auth.jwt_secret": true, + "auth.api_keys": true, +} + +func Walk(cfg *Config) []Entry { + defaults := &Config{} + setDefaults(defaults) + + current := walkValue(reflect.ValueOf(cfg).Elem(), reflect.TypeOf(*cfg), "") + defaultMap := walkValueMap(reflect.ValueOf(defaults).Elem(), reflect.TypeOf(*defaults), "") + + out := make([]Entry, 0, len(current)) + for _, e := range current { + if hiddenKeys[e.Key] { + e.Sensitive = true + e.Value = nil + } + if d, ok := defaultMap[e.Key]; ok { + e.Default = d + } + out = append(out, e) + } + sort.Slice(out, func(i, j int) bool { return out[i].Key < out[j].Key }) + return out +} + +func Get(cfg *Config, key string) (Entry, error) { + for _, e := range Walk(cfg) { + if e.Key == key { + return e, nil + } + } + return Entry{}, fmt.Errorf("unknown config key %q", key) +} + +func Set(cfg *Config, key string, raw interface{}) error { + if hiddenKeys[key] { + return fmt.Errorf("config key %q is not editable through this API", key) + } + field, err := resolveField(reflect.ValueOf(cfg).Elem(), reflect.TypeOf(*cfg), key) + if err != nil { + return err + } + if !field.CanSet() { + return fmt.Errorf("config key %q is read-only", key) + } + return assignField(field, raw) +} + +func walkValue(v reflect.Value, t reflect.Type, prefix string) []Entry { + if t == reflect.TypeOf(time.Duration(0)) { + return []Entry{{Key: prefix, Type: "duration", Value: v.Interface().(time.Duration).String()}} + } + if t == reflect.TypeOf(time.Time{}) { + return []Entry{{Key: prefix, Type: "time", Value: v.Interface()}} + } + + switch t.Kind() { + case reflect.Struct: + var out []Entry + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + if !f.IsExported() { + continue + } + tag := yamlKey(f) + if tag == "-" || tag == "" { + continue + } + child := prefix + if child != "" { + child += "." + } + child += tag + out = append(out, walkValue(v.Field(i), f.Type, child)...) + } + return out + case reflect.Ptr: + if v.IsNil() { + return []Entry{{Key: prefix, Type: t.Elem().Kind().String(), Value: nil}} + } + return walkValue(v.Elem(), t.Elem(), prefix) + case reflect.Slice: + return []Entry{{Key: prefix, Type: "slice", Value: v.Interface()}} + case reflect.Map: + return []Entry{{Key: prefix, Type: "map", Value: v.Interface()}} + case reflect.String, reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64: + return []Entry{{Key: prefix, Type: t.Kind().String(), Value: v.Interface()}} + } + return nil +} + +func walkValueMap(v reflect.Value, t reflect.Type, prefix string) map[string]interface{} { + out := make(map[string]interface{}) + for _, e := range walkValue(v, t, prefix) { + out[e.Key] = e.Value + } + return out +} + +func resolveField(v reflect.Value, t reflect.Type, key string) (reflect.Value, error) { + if key == "" { + return reflect.Value{}, fmt.Errorf("empty config key") + } + parts := strings.SplitN(key, ".", 2) + head := parts[0] + + if t.Kind() != reflect.Struct { + return reflect.Value{}, fmt.Errorf("cannot resolve %q at non-struct", key) + } + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + if !f.IsExported() { + continue + } + if yamlKey(f) != head { + continue + } + if len(parts) == 1 { + return v.Field(i), nil + } + return resolveField(v.Field(i), f.Type, parts[1]) + } + return reflect.Value{}, fmt.Errorf("unknown config key %q", key) +} + +func assignField(field reflect.Value, raw interface{}) error { + var data []byte + if s, ok := raw.(string); ok { + data = []byte(s) + } else { + encoded, err := yaml.Marshal(raw) + if err != nil { + return fmt.Errorf("encode value: %w", err) + } + data = encoded + } + target := reflect.New(field.Type()) + if err := yaml.Unmarshal(data, target.Interface()); err != nil { + return fmt.Errorf("decode into %s: %w", field.Type(), err) + } + field.Set(target.Elem()) + return nil +} + +func yamlKey(f reflect.StructField) string { + tag := f.Tag.Get("yaml") + if tag == "" { + return strings.ToLower(f.Name) + } + return strings.SplitN(tag, ",", 2)[0] +} diff --git a/pkg/config/registry_test.go b/pkg/config/registry_test.go new file mode 100644 index 0000000..b3defaa --- /dev/null +++ b/pkg/config/registry_test.go @@ -0,0 +1,128 @@ +package config + +import ( + "testing" + "time" +) + +func TestWalkFlattensKnownKeys(t *testing.T) { + cfg := &Config{} + setDefaults(cfg) + cfg.API.Port = 9090 + cfg.Cleanup.Timeout = 90 * time.Second + + entries := Walk(cfg) + if len(entries) == 0 { + t.Fatal("Walk returned no entries") + } + + keys := make(map[string]Entry, len(entries)) + for _, e := range entries { + keys[e.Key] = e + } + + if _, ok := keys["api.port"]; !ok { + t.Fatalf("expected api.port in entries, got: %v", entryKeys(entries)) + } + if v, _ := keys["api.port"].Value.(int); v != 9090 { + t.Errorf("api.port = %v, want 9090", keys["api.port"].Value) + } + + if v, _ := keys["cleanup.timeout"].Value.(string); v != "1m30s" { + t.Errorf("cleanup.timeout = %v, want 1m30s", keys["cleanup.timeout"].Value) + } + + if v, _ := keys["default_timeout"].Default.(string); v != "2m0s" { + t.Errorf("default_timeout default = %v, want 2m0s", keys["default_timeout"].Default) + } +} + +func TestWalkHidesSensitiveValues(t *testing.T) { + cfg := &Config{} + setDefaults(cfg) + cfg.Auth.JWTSecret = "actual-secret" + + entries := Walk(cfg) + for _, e := range entries { + if e.Key == "auth.jwt_secret" { + if e.Value != nil { + t.Errorf("expected nil value for sensitive key, got %v", e.Value) + } + if !e.Sensitive { + t.Error("expected Sensitive=true for auth.jwt_secret") + } + return + } + } + t.Fatalf("auth.jwt_secret not found in entries: %v", entryKeys(entries)) +} + +func TestSetCoercesTypes(t *testing.T) { + cfg := &Config{} + setDefaults(cfg) + + if err := Set(cfg, "api.port", "1234"); err != nil { + t.Fatalf("Set api.port from string: %v", err) + } + if cfg.API.Port != 1234 { + t.Errorf("api.port = %d, want 1234", cfg.API.Port) + } + + if err := Set(cfg, "cleanup.timeout", "45s"); err != nil { + t.Fatalf("Set cleanup.timeout from string: %v", err) + } + if cfg.Cleanup.Timeout != 45*time.Second { + t.Errorf("cleanup.timeout = %v, want 45s", cfg.Cleanup.Timeout) + } + + if err := Set(cfg, "api.enable_cors", true); err != nil { + t.Fatalf("Set api.enable_cors from bool: %v", err) + } + if !cfg.API.EnableCORS { + t.Errorf("api.enable_cors = false, want true") + } +} + +func TestSetRejectsUnknownKey(t *testing.T) { + cfg := &Config{} + setDefaults(cfg) + if err := Set(cfg, "no.such.thing", "x"); err == nil { + t.Fatal("expected error for unknown key, got nil") + } +} + +func TestSetRejectsHiddenKey(t *testing.T) { + cfg := &Config{} + setDefaults(cfg) + if err := Set(cfg, "auth.jwt_secret", "x"); err == nil { + t.Fatal("expected hidden-key error, got nil") + } + if cfg.Auth.JWTSecret == "x" { + t.Error("hidden key was written despite rejection") + } +} + +func TestGetReturnsEntry(t *testing.T) { + cfg := &Config{} + setDefaults(cfg) + cfg.Cleanup.Timeout = 30 * time.Second + + e, err := Get(cfg, "cleanup.timeout") + if err != nil { + t.Fatalf("Get cleanup.timeout: %v", err) + } + if e.Type != "duration" { + t.Errorf("type = %q, want duration", e.Type) + } + if e.Value.(string) != "30s" { + t.Errorf("value = %v, want 30s", e.Value) + } +} + +func entryKeys(entries []Entry) []string { + out := make([]string, 0, len(entries)) + for _, e := range entries { + out = append(out, e.Key) + } + return out +} From c52a9738a7340728c12ce4f93252505cb7ad5f7d Mon Sep 17 00:00:00 2001 From: nfebe Date: Mon, 25 May 2026 21:36:38 +0100 Subject: [PATCH 3/3] refactor(images): Use official reference parsing and gate cleanup on hash tags Image refs are now parsed with the official Docker reference package, so edge cases like registries with ports and digest references are handled the same way Docker itself handles them. The agent and the daemon now normalize refs identically, which removes a class of false negatives in the cross-deployment match. The auto-cleanup is also gated by tag shape: an image is only removed if every tag on it looks like a content hash (sha-* prefixes, git shas, sha256 digests). Floating labels (latest, edge, stable, semver) are preserved even when otherwise eligible, since they often still mean something to a human operator. --- internal/docker/cleanup.go | 83 +++++++++++++++++-- internal/docker/cleanup_test.go | 142 +++++++++++++++++++++++--------- 2 files changed, 178 insertions(+), 47 deletions(-) diff --git a/internal/docker/cleanup.go b/internal/docker/cleanup.go index ac7d46c..fb90e74 100644 --- a/internal/docker/cleanup.go +++ b/internal/docker/cleanup.go @@ -3,13 +3,21 @@ package docker import ( "context" "log" + "regexp" "strings" + "github.com/distribution/reference" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/image" ) +var ( + shaPrefixTag = regexp.MustCompile(`^sha-[0-9a-f]{4,}$`) + digestStyleTag = regexp.MustCompile(`^sha(?:256|512):[0-9a-f]{16,}$`) + gitShaTag = regexp.MustCompile(`^[0-9a-f]{7,}$`) +) + type RemovedImage struct { ID string `json:"id"` Tag string `json:"tag,omitempty"` @@ -27,6 +35,7 @@ type imageRecord struct { id string repos []string fullRefs []string + tags []string bytes int64 } @@ -134,8 +143,12 @@ func composeImageRefs(executor *ComposeExecutor, deploymentPath string) (map[str if img.Image == "" || img.IsBuild { continue } - refs[img.Image] = true - repos[splitRepo(img.Image)] = true + repo, _ := parseRef(img.Image) + if repo == "" { + continue + } + refs[normalizeRef(img.Image)] = true + repos[repo] = true } return refs, repos, nil } @@ -161,7 +174,7 @@ func (m *Manager) imageRefsAcrossDeployments(excludeName string) (map[string]boo } for _, img := range images { if img.Image != "" && !img.IsBuild { - out[img.Image] = true + out[normalizeRef(img.Image)] = true } } } @@ -180,8 +193,13 @@ func (a *APIClient) listImageRecords(ctx context.Context) ([]imageRecord, error) if ref == "" || ref == ":" { continue } - rec.fullRefs = append(rec.fullRefs, ref) - rec.repos = append(rec.repos, splitRepo(ref)) + repo, tag := parseRef(ref) + if repo == "" { + continue + } + rec.fullRefs = append(rec.fullRefs, normalizeRef(ref)) + rec.repos = append(rec.repos, repo) + rec.tags = append(rec.tags, tag) } out = append(out, rec) } @@ -229,11 +247,35 @@ func selectStaleImages(host []imageRecord, currentRefs, currentRepos, inUse, oth kept++ continue } + if !allContentHashTags(img.tags) { + kept++ + continue + } stale = append(stale, img) } return stale, kept } +func allContentHashTags(tags []string) bool { + if len(tags) == 0 { + return false + } + for _, t := range tags { + if !looksLikeContentHash(t) { + return false + } + } + return true +} + +func looksLikeContentHash(tag string) bool { + if tag == "" { + return false + } + lower := strings.ToLower(tag) + return shaPrefixTag.MatchString(lower) || digestStyleTag.MatchString(lower) || gitShaTag.MatchString(lower) +} + func anyMatch(values []string, set map[string]bool) bool { for _, v := range values { if set[v] { @@ -243,15 +285,38 @@ func anyMatch(values []string, set map[string]bool) bool { return false } -func splitRepo(ref string) string { +func parseRef(ref string) (repo, tag string) { + named, err := reference.ParseNormalizedNamed(ref) + if err != nil { + return splitRefManual(ref) + } + repo = named.Name() + if t, ok := named.(reference.Tagged); ok { + tag = t.Tag() + } + return repo, tag +} + +func normalizeRef(ref string) string { + repo, tag := parseRef(ref) + if repo == "" { + return ref + } + if tag != "" { + return repo + ":" + tag + } + return repo +} + +func splitRefManual(ref string) (repo, tag string) { if i := strings.LastIndex(ref, "@"); i > 0 { - return ref[:i] + return ref[:i], "" } if i := strings.LastIndex(ref, ":"); i > 0 { after := ref[i+1:] if !strings.Contains(after, "/") { - return ref[:i] + return ref[:i], after } } - return ref + return ref, "" } diff --git a/internal/docker/cleanup_test.go b/internal/docker/cleanup_test.go index a8e3bce..35425d4 100644 --- a/internal/docker/cleanup_test.go +++ b/internal/docker/cleanup_test.go @@ -4,62 +4,128 @@ import ( "testing" ) -func TestSplitRepo(t *testing.T) { +func TestParseRefNormalizes(t *testing.T) { tests := []struct { - input string - want string + input string + wantRepo string + wantTag string }{ - {"nginx:1.20", "nginx"}, - {"ghcr.io/acme/api:sha-abc", "ghcr.io/acme/api"}, - {"registry.example.com:5000/app:v1", "registry.example.com:5000/app"}, - {"nginx", "nginx"}, - {"acme/api@sha256:deadbeef", "acme/api"}, + {"nginx:1.20", "docker.io/library/nginx", "1.20"}, + {"ghcr.io/acme/api:sha-abc1234", "ghcr.io/acme/api", "sha-abc1234"}, + {"registry.example.com:5000/app:v1", "registry.example.com:5000/app", "v1"}, + {"nginx", "docker.io/library/nginx", ""}, + {"acme/api:1.0", "docker.io/acme/api", "1.0"}, } for _, tt := range tests { - got := splitRepo(tt.input) - if got != tt.want { - t.Errorf("splitRepo(%q) = %q, want %q", tt.input, got, tt.want) + repo, tag := parseRef(tt.input) + if repo != tt.wantRepo || tag != tt.wantTag { + t.Errorf("parseRef(%q) = (%q,%q), want (%q,%q)", tt.input, repo, tag, tt.wantRepo, tt.wantTag) } } } -func TestSelectStaleImages(t *testing.T) { - hostImages := []imageRecord{ - {id: "id-current", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-def"}, bytes: 500}, - {id: "id-old", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-abc"}, bytes: 480}, - {id: "id-unrelated", repos: []string{"nginx"}, fullRefs: []string{"nginx:1.20"}, bytes: 142}, - {id: "id-other-deployment", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-xyz"}, bytes: 490}, - {id: "id-still-running", repos: []string{"ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-old"}, bytes: 470}, - {id: "id-dangling", bytes: 50}, - {id: "id-multitag", repos: []string{"ghcr.io/acme/api", "ghcr.io/acme/api"}, fullRefs: []string{"ghcr.io/acme/api:sha-abc2", "ghcr.io/acme/api:keep"}, bytes: 460}, +func TestLooksLikeContentHash(t *testing.T) { + hashLike := []string{ + "sha-abc1234123", + "sha-0123456789abcdef", + "abc1234", + "abcdef0123456789abcdef0123456789abcdef01", + "sha256:deadbeefdeadbeef", } - currentRefs := map[string]bool{"ghcr.io/acme/api:sha-def": true, "ghcr.io/acme/api:keep": true} - currentRepos := map[string]bool{"ghcr.io/acme/api": true} - inUse := map[string]bool{"id-still-running": true} - otherDeps := map[string]bool{"ghcr.io/acme/api:sha-xyz": true} + for _, tag := range hashLike { + if !looksLikeContentHash(tag) { + t.Errorf("expected %q to look like content hash", tag) + } + } + + floating := []string{ + "latest", "stable", "edge", "main", "master", "dev", + "v1.2.3", "1.20", "production", "release-2026-05", + "", + } + for _, tag := range floating { + if looksLikeContentHash(tag) { + t.Errorf("did NOT expect %q to look like content hash", tag) + } + } +} - stale, kept := selectStaleImages(hostImages, currentRefs, currentRepos, inUse, otherDeps) - if len(stale) != 1 { - t.Fatalf("expected exactly one stale image, got %d (ids: %v)", len(stale), staleIDs(stale)) +func TestSelectStaleImagesContentHashGate(t *testing.T) { + host := []imageRecord{ + { + id: "id-old-hash", + repos: []string{"ghcr.io/acme/api"}, + fullRefs: []string{"ghcr.io/acme/api:sha-abc1234"}, + tags: []string{"sha-abc1234"}, + bytes: 480, + }, + { + id: "id-old-latest", + repos: []string{"ghcr.io/acme/api"}, + fullRefs: []string{"ghcr.io/acme/api:latest"}, + tags: []string{"latest"}, + bytes: 490, + }, + { + id: "id-old-semver", + repos: []string{"ghcr.io/acme/api"}, + fullRefs: []string{"ghcr.io/acme/api:v1.2.3"}, + tags: []string{"v1.2.3"}, + bytes: 470, + }, } - if stale[0].id != "id-old" { - t.Errorf("expected id-old to be the stale one, got %s", stale[0].id) + currentRefs := map[string]bool{"ghcr.io/acme/api:sha-def5678": true} + currentRepos := map[string]bool{"ghcr.io/acme/api": true} + + stale, kept := selectStaleImages(host, currentRefs, currentRepos, nil, nil) + if len(stale) != 1 || stale[0].id != "id-old-hash" { + t.Fatalf("expected only id-old-hash stale, got %v", staleIDs(stale)) } - if kept != 6 { - t.Errorf("expected 6 kept, got %d", kept) + if kept != 2 { + t.Errorf("expected 2 kept (latest + semver), got %d", kept) } } -func TestSelectStaleImagesNothingToRemove(t *testing.T) { +func TestSelectStaleImagesKeepsCurrentInUseAndOtherDeps(t *testing.T) { host := []imageRecord{ - {id: "id-current", repos: []string{"nginx"}, fullRefs: []string{"nginx:1.21"}, bytes: 142}, + { + id: "id-current", + repos: []string{"ghcr.io/acme/api"}, + fullRefs: []string{"ghcr.io/acme/api:sha-def5678"}, + tags: []string{"sha-def5678"}, + bytes: 500, + }, + { + id: "id-still-running", + repos: []string{"ghcr.io/acme/api"}, + fullRefs: []string{"ghcr.io/acme/api:sha-aaa1111"}, + tags: []string{"sha-aaa1111"}, + bytes: 470, + }, + { + id: "id-other-deployment", + repos: []string{"ghcr.io/acme/api"}, + fullRefs: []string{"ghcr.io/acme/api:sha-fed3210"}, + tags: []string{"sha-fed3210"}, + bytes: 490, + }, + { + id: "id-unrelated", + repos: []string{"nginx"}, + fullRefs: []string{"nginx:1.20"}, + tags: []string{"1.20"}, + bytes: 142, + }, + {id: "id-dangling", bytes: 50}, } - stale, kept := selectStaleImages(host, map[string]bool{"nginx:1.21": true}, map[string]bool{"nginx": true}, nil, nil) + currentRefs := map[string]bool{"ghcr.io/acme/api:sha-def5678": true} + currentRepos := map[string]bool{"ghcr.io/acme/api": true} + inUse := map[string]bool{"id-still-running": true} + otherDeps := map[string]bool{"ghcr.io/acme/api:sha-fed3210": true} + + stale, _ := selectStaleImages(host, currentRefs, currentRepos, inUse, otherDeps) if len(stale) != 0 { - t.Fatalf("expected no stale images, got %d", len(stale)) - } - if kept != 1 { - t.Errorf("expected 1 kept, got %d", kept) + t.Fatalf("expected no stale images, got %v", staleIDs(stale)) } }