Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions api/pkg/external-agent/hydra_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1001,9 +1001,9 @@ func (h *HydraExecutor) buildMounts(agent *types.DesktopAgent, workspaceDir stri
// The desktop's 17-start-dockerd.sh init script detects this mountpoint
// and starts dockerd automatically. No docker.sock mount needed.
mounts = append(mounts, hydra.MountConfig{
Source: fmt.Sprintf("docker-data-%s", agent.SessionID),
Source: hydra.DockerDataVolumePrefix + agent.SessionID,
Destination: "/var/lib/docker",
Type: "volume", // Docker named volume, backed by host ext4
Type: "volume", // Converted to bind mount by Hydra's buildMounts
})

// NOTE: Shared BuildKit cache mount (/buildkit-cache) and BUILDKIT_HOST env var
Expand Down
98 changes: 32 additions & 66 deletions api/pkg/hydra/devcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ type DevContainerManager struct {

// NewDevContainerManager creates a new dev container manager
func NewDevContainerManager(manager *Manager) *DevContainerManager {
if os.Getenv("CONTAINER_DOCKER_PATH") == "" {
log.Fatal().Msg("CONTAINER_DOCKER_PATH must be set — Hydra requires bind-mount-backed session storage")
}

dm := &DevContainerManager{
manager: manager,
containers: make(map[string]*DevContainer),
Expand Down Expand Up @@ -676,33 +680,24 @@ func (dm *DevContainerManager) buildHostConfig(req *CreateDevContainerRequest) *
func (dm *DevContainerManager) buildMounts(req *CreateDevContainerRequest) []mount.Mount {
var mounts []mount.Mount

// CONTAINER_DOCKER_PATH: if set, per-session inner dockerd data uses bind mounts
// from a ZFS-backed path instead of Docker named volumes. This keeps the sandbox's
// own Docker storage on the root disk (so provisioned desktop images persist)
// while inner dockerd data benefits from ZFS dedup+compression.
containerDockerPath := os.Getenv("CONTAINER_DOCKER_PATH")

for _, m := range req.Mounts {
mountType := mount.TypeBind
if m.Type == "volume" {
mountType = mount.TypeVolume
}

// Redirect inner dockerd volumes to ZFS-backed bind mounts when configured.
// The API sends docker-data-{sessionID} as a named volume for /var/lib/docker;
// we convert it to a bind mount from /container-docker/sessions/{volumeName}/docker/.
// Inner dockerd data: convert the named volume to a bind mount from
// /container-docker/sessions/{volumeName}/docker/. This keeps the
// sandbox's own Docker storage on the root disk while inner dockerd
// data lives on the CONTAINER_DOCKER_PATH filesystem.
//
// When a golden Docker cache exists for the project, copy it into the
// session's Docker data directory. This pre-populates the inner dockerd
// with cached images so builds start warm instead of cold.
if containerDockerPath != "" && m.Destination == "/var/lib/docker" && m.Type == "volume" {
// If the session dir already exists (e.g. session restart), reuse it
// instead of re-copying 30+ GB of golden cache (~28s).
if strings.HasPrefix(m.Source, DockerDataVolumePrefix) && m.Type == "volume" {
volumeName := m.Source // e.g. "docker-data-{sessionID}"
sessionDir := filepath.Join("/container-docker/sessions", volumeName, "docker")

// Check if this session already has a Docker data directory from a
// previous run (e.g. session restart). If so, reuse it — copying
// 30+ GB of golden cache on every restart is wasteful (~28s) and
// overwrites any Docker state changes made during the session.
// Reuse existing session dir on restart (skip golden copy).
// Golden builds are excluded: they need the latest golden snapshot
// for incremental rebuilds.
sessionDirExists := false
Expand All @@ -717,12 +712,9 @@ func (dm *DevContainerManager) buildMounts(req *CreateDevContainerRequest) []mou
m.Source = sessionDir
log.Info().
Str("session_dir", sessionDir).
Str("volume", volumeName).
Msg("Reusing existing session Docker data dir (skipping golden copy)")
} else if GoldenExists(req.ProjectID) {
// Golden cache available — copy to session dir (works for both
// normal sessions AND golden builds; golden builds start from the
// previous golden for incremental rebuilds)
// Golden cache available — copy to session dir
onProgress := func(copied, total int64) {
dm.setGoldenCopyProgress(req.ProjectID, copied, total, false)
}
Expand All @@ -733,7 +725,6 @@ func (dm *DevContainerManager) buildMounts(req *CreateDevContainerRequest) []mou
Str("project_id", req.ProjectID).
Str("volume", volumeName).
Msg("Failed to copy golden cache, falling back to empty dir")
// Fall back to plain directory
if mkErr := os.MkdirAll(sessionDir, 0755); mkErr == nil {
mountType = mount.TypeBind
m.Source = sessionDir
Expand All @@ -747,13 +738,12 @@ func (dm *DevContainerManager) buildMounts(req *CreateDevContainerRequest) []mou
Msg("Using golden cache copy for inner dockerd")
}
} else {
// No golden — plain bind mount (existing behavior)
// No golden cache — empty bind mount
if err := os.MkdirAll(sessionDir, 0755); err != nil {
log.Warn().Err(err).Str("path", sessionDir).Msg("Failed to create container-docker session dir, falling back to named volume")
log.Warn().Err(err).Str("path", sessionDir).Msg("Failed to create session docker dir")
} else {
mountType = mount.TypeBind
m.Source = sessionDir
log.Debug().Str("source", sessionDir).Msg("Using ZFS-backed bind mount for inner dockerd")
}
}
}
Expand Down Expand Up @@ -1011,43 +1001,23 @@ func (dm *DevContainerManager) DeleteDevContainer(ctx context.Context, sessionID
log.Warn().Err(err).Str("container_id", dc.ContainerID).Msg("Failed to remove container")
}

// Remove the per-session docker-data volume that was mounted at /var/lib/docker.
// This volume is created by hydra_executor.go when the container starts and
// accumulates docker images/layers from in-desktop Docker usage. Without cleanup
// these orphaned volumes leak tens of GB each and fill the disk over time.
dockerDataVolume := fmt.Sprintf("docker-data-%s", sessionID)
if err := dockerClient.VolumeRemove(ctx, dockerDataVolume, true); err != nil {
log.Warn().Err(err).Str("volume", dockerDataVolume).Msg("Failed to remove session docker-data volume")
// Preserve the session Docker data dir so restarts can reuse it instead
// of re-copying 30+ GB of golden cache (~28s). Write a .last-active marker
// so GCOrphanedSessions() (every 10 min) can clean dirs inactive for >7 days.
// For golden builds, monitorGoldenBuild handles promotion and cleanup.
dockerDataVolume := DockerDataVolumePrefix + sessionID
if dc.IsGoldenBuild && dc.ProjectID != "" {
_ = SetGoldenBuildRunning(dc.ProjectID, false)
log.Info().
Str("project_id", dc.ProjectID).
Str("session_id", sessionID).
Msg("Golden build session stopped, lock released (monitorGoldenBuild handles cleanup)")
} else {
log.Info().Str("volume", dockerDataVolume).Str("session_id", sessionID).Msg("Removed session docker-data volume")
}

// CONTAINER_DOCKER_PATH session directory cleanup.
// For golden builds, monitorGoldenBuild handles promotion and cleanup —
// we must NOT delete the Docker data here or it'll be gone before promotion.
// For normal sessions, we intentionally do NOT clean up the session Docker
// data directory here. This allows session restarts to reuse the existing
// Docker data instead of re-copying 30+ GB of golden cache (~28s).
// The periodic GCOrphanedSessions() (every 10 min) handles cleanup of
// session dirs that no longer have a running container.
if os.Getenv("CONTAINER_DOCKER_PATH") != "" {
if dc.IsGoldenBuild && dc.ProjectID != "" {
_ = SetGoldenBuildRunning(dc.ProjectID, false)
log.Info().
Str("project_id", dc.ProjectID).
Str("session_id", sessionID).
Msg("Golden build session stopped, lock released (monitorGoldenBuild handles cleanup)")
} else {
// Write a timestamp so GC knows when this session was last active.
// Directory mtime doesn't update when files deep inside are modified,
// so we use an explicit marker file.
sessionDir := filepath.Join("/container-docker/sessions", dockerDataVolume)
TouchSessionLastActive(sessionDir)
log.Info().
Str("session_id", sessionID).
Str("volume", dockerDataVolume).
Msg("Session Docker data dir preserved for potential restart (GC will clean orphans)")
}
sessionDir := filepath.Join("/container-docker/sessions", dockerDataVolume)
TouchSessionLastActive(sessionDir)
log.Info().
Str("session_id", sessionID).
Msg("Session Docker data dir preserved for potential restart (GC will clean orphans)")
}

// Update status
Expand All @@ -1074,10 +1044,6 @@ func (dm *DevContainerManager) DeleteDevContainer(ctx context.Context, sessionID
// running containers, and cleans up stale golden cache state.
// Should be called periodically and on startup.
func (dm *DevContainerManager) GCOrphanedSessions() {
if os.Getenv("CONTAINER_DOCKER_PATH") == "" {
return // Not using per-session docker dirs
}

dm.mu.RLock()
active := make(map[string]bool, len(dm.containers))
for sessionID := range dm.containers {
Expand Down Expand Up @@ -1411,7 +1377,7 @@ func (dm *DevContainerManager) monitorGoldenBuild(dc *DevContainer) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()

dockerDataVolume := fmt.Sprintf("docker-data-%s", dc.SessionID)
dockerDataVolume := DockerDataVolumePrefix + dc.SessionID
resultFile := filepath.Join(sessionsBaseDir, dockerDataVolume, "docker", ".golden-build-result")

// Poll for the result file. The workspace-setup script writes this after
Expand Down
4 changes: 2 additions & 2 deletions api/pkg/hydra/golden.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ func PromoteSessionToGolden(projectID, volumeName string) error {
}

// CleanupSessionDockerDir removes the per-session Docker data directory.
// Works for both golden-seeded and plain sessions that use CONTAINER_DOCKER_PATH.
// Only used for failed golden builds; normal sessions are cleaned by GC.
func CleanupSessionDockerDir(volumeName string) error {
base := sessionOverlayDir(volumeName)

Expand Down Expand Up @@ -561,7 +561,7 @@ func GCOrphanedSessionDirs(activeSessions map[string]bool) (int, int64, error) {

// Session dirs are named "docker-data-ses_xxxxx"
name := entry.Name()
sessionID := strings.TrimPrefix(name, "docker-data-")
sessionID := strings.TrimPrefix(name, DockerDataVolumePrefix)
dir := filepath.Join(sessionsBaseDir, name)

if activeSessions[sessionID] {
Expand Down
16 changes: 5 additions & 11 deletions api/pkg/hydra/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,18 +135,12 @@ func (m *Manager) setupSharedBuildKit(ctx context.Context) error {
Str("cache_dir", buildkitCacheDir).
Msg("Creating shared BuildKit container")

// BuildKit state volume: use ZFS-backed bind mount if CONTAINER_DOCKER_PATH is set,
// otherwise use a Docker named volume. ZFS provides dedup for content-addressed data.
buildkitStateMount := "buildkit_state:/var/lib/buildkit"
if containerDockerPath := os.Getenv("CONTAINER_DOCKER_PATH"); containerDockerPath != "" {
buildkitStateDir := "/container-docker/buildkit"
if err := os.MkdirAll(buildkitStateDir, 0755); err != nil {
log.Warn().Err(err).Msg("Failed to create buildkit state dir on ZFS, using named volume")
} else {
buildkitStateMount = buildkitStateDir + ":/var/lib/buildkit"
log.Info().Str("path", buildkitStateDir).Msg("Using ZFS-backed bind mount for BuildKit state")
}
// BuildKit state: bind mount from /container-docker/ filesystem (typically ZFS).
buildkitStateDir := "/container-docker/buildkit"
if err := os.MkdirAll(buildkitStateDir, 0755); err != nil {
log.Fatal().Err(err).Msg("Failed to create buildkit state dir")
}
buildkitStateMount := buildkitStateDir + ":/var/lib/buildkit"

createCmd := exec.CommandContext(ctx, "docker", "run", "-d",
"--name", SharedBuildKitContainerName,
Expand Down
5 changes: 5 additions & 0 deletions api/pkg/hydra/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ const (
DevContainerStatusError DevContainerStatus = "error"
)

// DockerDataVolumePrefix is the naming convention for per-session inner dockerd
// data volumes. Used by hydra_executor (creation) and devcontainer (mount
// conversion to bind mount, GC, cleanup).
const DockerDataVolumePrefix = "docker-data-"

// MountConfig represents a volume mount configuration
type MountConfig struct {
Source string `json:"source"`
Expand Down
Loading