Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
8878e5f
Initial plan
Copilot May 6, 2026
414e5dc
Implement AgentUpgrade blue green daemon flow
Copilot May 6, 2026
2cdee54
Render daemon recovery asset paths
Copilot May 6, 2026
1b4ac89
Address AgentUpgrade review feedback
Copilot May 6, 2026
89489ed
Apply validation review cleanups
Copilot May 6, 2026
424c334
Address sequential AgentUpgrade comments
Copilot May 6, 2026
aee80f3
Apply final AgentUpgrade review fixes
Copilot May 6, 2026
69d5cd9
Use AgentUpgrade goal state
Copilot May 6, 2026
74db774
Clean up AgentUpgrade path resolution
Copilot May 6, 2026
762f7b1
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 6, 2026
c6176e8
Move daemon binary link bootstrap into agent
Copilot May 6, 2026
7209de0
Cover daemon binary bootstrap helpers
Copilot May 6, 2026
edb3744
Reject broken AgentUpgrade binaries before switch
Copilot May 6, 2026
74767e9
Address AgentUpgrade validation review cleanup
Copilot May 6, 2026
9eb3a2a
Publish AgentUpgrade daemon rollback failures
Copilot May 6, 2026
8086047
Address AgentUpgrade rollback review cleanup
Copilot May 6, 2026
6f71a6e
Address AgentUpgrade utilio path review
Copilot May 6, 2026
3553ff8
Clean up AgentUpgrade helper refactor
Copilot May 6, 2026
653f683
Address helper validation feedback
Copilot May 6, 2026
f423ac6
Clarify symlink helper cleanup
Copilot May 6, 2026
dbb8ffc
Move env fallback helper to utilio
Copilot May 6, 2026
07d49f1
Align resolve symlink parameter naming
Copilot May 6, 2026
8444ba2
Address final helper review nits
Copilot May 6, 2026
ae30e00
Cover empty env fallback helper
Copilot May 6, 2026
4a898a1
Keep path resolution in goalstates
Copilot May 6, 2026
fafda75
Merge main into AgentUpgrade branch
Copilot May 6, 2026
3176e0f
Merge main into AgentUpgrade branch history
Copilot May 6, 2026
131f4fb
Clean up MachineOperation merge resolution
Copilot May 6, 2026
9cd0e0a
Address AgentUpgrade signal and validation feedback
Copilot May 6, 2026
dc4f43a
Harden AgentUpgrade review updates
Copilot May 6, 2026
569d9cf
Move recovery signal writing into agent command
Copilot May 6, 2026
e3e297b
Preserve pending signal on helper failure
Copilot May 6, 2026
019d2eb
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
dfd9212
Apply suggestions from code review
bcho May 7, 2026
69dea67
Refactor AgentUpgrade signal handling
Copilot May 7, 2026
bffa14e
Document AgentUpgrade signal helpers
Copilot May 7, 2026
28e1584
Simplify AgentUpgrade state handling
Copilot May 7, 2026
057d83e
Clarify AgentUpgrade helper names
Copilot May 7, 2026
58fb60d
Document AgentUpgrade state machine
Copilot May 7, 2026
693614a
Address AgentUpgrade review feedback
Copilot May 7, 2026
6a59748
Log ignored AgentUpgrade signals
Copilot May 7, 2026
d945d0f
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
51ce736
Remove blue-green install script checks
Copilot May 7, 2026
6cc01f1
Move daemon binary link setup to agentbinary
Copilot May 7, 2026
f7cd69d
Clarify agentbinary test path setup
Copilot May 7, 2026
915a9f2
Consolidate AgentUpgrade signal publishing
Copilot May 7, 2026
d7959e2
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
5e90d10
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/agent-e2e-kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,12 @@ jobs:
- name: Validate node restart operation
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-node-reboot-operation

- name: Validate agent upgrade operation
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-agent-upgrade-operation

- name: Validate agent upgrade rollback
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-agent-upgrade-rollback

- name: Validate workload on agent node
run: python3 ./hack/agent/e2e-kind/e2e.py --verbose validate-workload

Expand Down
254 changes: 254 additions & 0 deletions cmd/agent/internal/daemon/agentupgrade.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package daemon

import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/Azure/unbounded/pkg/agent/agentbinary"
"github.com/Azure/unbounded/pkg/agent/goalstates"
)

const (
agentUpgradeDownloadURLParameter = "downloadURL"
agentUpgradeBinaryMode = 0o755
agentUpgradeVerifyTimeout = 30 * time.Second
)

func agentUpgradeDownloadURL(parameters map[string]string) (string, error) {
Comment thread
bcho marked this conversation as resolved.
downloadURL := strings.TrimSpace(parameters[agentUpgradeDownloadURLParameter])
if downloadURL == "" {
return "", fmt.Errorf("missing required parameter %q", agentUpgradeDownloadURLParameter)
}

return downloadURL, nil
}

func upgradeDaemonBinary(ctx context.Context, log *slog.Logger, downloadURL string) error {
paths := daemonAgentUpgradePaths()
currentTarget, err := resolveSymlink(paths.CurrentPath)
if err != nil {
return fmt.Errorf("resolve current daemon binary symlink: %w", err)
}
upgrade := paths.ResolveAgentUpgrade(downloadURL, currentTarget)

if err := agentbinary.InstallFromTarGz(ctx, upgrade.DownloadURL, upgrade.TargetBinaryPath, upgrade.BinaryName, agentUpgradeBinaryMode); err != nil {
Comment thread
bcho marked this conversation as resolved.
Outdated
return fmt.Errorf("install upgraded daemon binary to %s: %w", upgrade.TargetBinaryPath, err)
}

if err := verifyAgentUpgradeBinary(ctx, upgrade.TargetBinaryPath); err != nil {
return err
}

if err := updateSymlink(upgrade.LastGoodLinkPath, upgrade.PreviousBinaryPath); err != nil {
return fmt.Errorf("update last-good daemon symlink: %w", err)
}

if err := updateSymlink(upgrade.CurrentLinkPath, upgrade.TargetBinaryPath); err != nil {
return fmt.Errorf("update current daemon symlink: %w", err)
}

log.Info("staged upgraded daemon binary",
"url", upgrade.DownloadURL,
"previous", upgrade.PreviousBinaryPath,
"current", upgrade.TargetBinaryPath,
)

return nil
}

func verifyAgentUpgradeBinary(ctx context.Context, path string) error {
Comment thread
bcho marked this conversation as resolved.
Outdated
verifyCtx, cancel := context.WithTimeout(ctx, agentUpgradeVerifyTimeout)
defer cancel()

output, err := exec.CommandContext(verifyCtx, path, "version").CombinedOutput()
if err != nil {
details := strings.TrimSpace(string(output))
if details != "" {
return fmt.Errorf("verify upgraded daemon binary %s: %w: %s", path, err, details)
}
return fmt.Errorf("verify upgraded daemon binary %s: %w", path, err)
}

return nil
}

func recordPendingAgentUpgradeOperation(operationName string) error {
return writeFile(agentUpgradeOperationSignalPath(), []byte(operationName+"\n"), 0o600)
}

func clearPendingAgentUpgradeOperation(log *slog.Logger) {
if err := os.Remove(agentUpgradeOperationSignalPath()); err != nil && !errors.Is(err, os.ErrNotExist) {
log.Warn("failed to clear pending AgentUpgrade operation signal", "error", err)
}
}

func agentUpgradeOperationSignalPath() string {
Comment thread
bcho marked this conversation as resolved.
Outdated
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonAgentUpgradeOperationPath)); path != "" {
return path
}

return goalstates.DaemonAgentUpgradeOperationPath
}

func agentUpgradeFailureSignalPath() string {
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonAgentUpgradeFailurePath)); path != "" {
return path
}

return goalstates.DaemonAgentUpgradeFailurePath
}

func daemonAgentUpgradePaths() goalstates.AgentUpgradePaths {
return goalstates.AgentUpgradePaths{
BinaryPath: daemonBinaryPath(),
BluePath: daemonBinaryBluePath(),
GreenPath: daemonBinaryGreenPath(),
CurrentPath: daemonBinaryCurrentPath(),
LastGoodPath: daemonBinaryLastGoodPath(),
}
}

func ensureDaemonBinaryLinks(log *slog.Logger) error {
Comment thread
bcho marked this conversation as resolved.
Outdated
paths := daemonAgentUpgradePaths()

if _, err := filepath.EvalSymlinks(paths.CurrentPath); err != nil {
Comment thread
bcho marked this conversation as resolved.
Outdated
if !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("resolve current daemon binary symlink: %w", err)
}
target, targetErr := initialDaemonBinaryTarget(paths)
if targetErr != nil {
return targetErr
}
if err := updateSymlink(paths.CurrentPath, target); err != nil {
return fmt.Errorf("initialize current daemon symlink: %w", err)
}
}

currentTarget, err := filepath.EvalSymlinks(paths.CurrentPath)
if err != nil {
return fmt.Errorf("resolve current daemon binary symlink: %w", err)
}

if _, err := filepath.EvalSymlinks(paths.LastGoodPath); err != nil {
if !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("resolve last-good daemon binary symlink: %w", err)
}
if err := updateSymlink(paths.LastGoodPath, currentTarget); err != nil {
return fmt.Errorf("initialize last-good daemon symlink: %w", err)
}
}

if currentTarget != paths.BinaryPath {
// Do not replace the compatibility path when the current symlink
// already resolves to that path. That preserves legacy installs and
// avoids creating a BinaryPath -> CurrentPath -> BinaryPath loop.
if err := updateSymlink(paths.BinaryPath, paths.CurrentPath); err != nil {
return fmt.Errorf("initialize daemon compatibility symlink: %w", err)
}
}

log.Info("daemon binary links initialized",
"current", paths.CurrentPath,
"last_good", paths.LastGoodPath,
)

return nil
}

func initialDaemonBinaryTarget(paths goalstates.AgentUpgradePaths) (string, error) {
for _, path := range []string{paths.BluePath, paths.GreenPath, paths.BinaryPath} {
if isExecutableFile(path) {
return path, nil
}
}

return "", fmt.Errorf("no executable agent binary found for daemon link initialization")
}

func isExecutableFile(path string) bool {
info, err := os.Stat(path)
if err != nil {
return false
}

return info.Mode().IsRegular() && info.Mode().Perm()&0o111 != 0
}

func resolveSymlink(path string) (string, error) {
targetPath, err := filepath.EvalSymlinks(path)
if err == nil {
return targetPath, nil
}

if os.IsNotExist(err) {
return daemonBinaryPath(), nil
}

return "", err
}

func daemonBinaryPath() string {
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonBinary)); path != "" {
return path
}

return goalstates.DaemonBinaryPath
}

func daemonBinaryBluePath() string {
Comment thread
bcho marked this conversation as resolved.
Outdated
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonBinaryBlue)); path != "" {
return path
}

return goalstates.DaemonBinaryBluePath
}

func daemonBinaryGreenPath() string {
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonBinaryGreen)); path != "" {
return path
}

return goalstates.DaemonBinaryGreenPath
}

func daemonBinaryCurrentPath() string {
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonBinaryCurrent)); path != "" {
return path
}

return goalstates.DaemonBinaryCurrentPath
}

func daemonBinaryLastGoodPath() string {
if path := strings.TrimSpace(os.Getenv(goalstates.EnvDaemonBinaryLastGood)); path != "" {
return path
}

return goalstates.DaemonBinaryLastGoodPath
}

func updateSymlink(linkPath, targetPath string) error {
Comment thread
bcho marked this conversation as resolved.
Outdated
if err := os.MkdirAll(filepath.Dir(linkPath), 0o750); err != nil {
return err
}

tmpPath := fmt.Sprintf("%s.tmp", linkPath)
if err := os.Remove(tmpPath); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if err := os.Symlink(targetPath, tmpPath); err != nil {
return err
}

return os.Rename(tmpPath, linkPath)
}
Loading
Loading