Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
8878e5f
Initial plan
Copilot May 6, 2026
414e5dc
Implement AgentUpgrade blue green daemon flow
Copilot May 6, 2026
2cdee54
Render daemon recovery asset paths
Copilot May 6, 2026
1b4ac89
Address AgentUpgrade review feedback
Copilot May 6, 2026
89489ed
Apply validation review cleanups
Copilot May 6, 2026
424c334
Address sequential AgentUpgrade comments
Copilot May 6, 2026
aee80f3
Apply final AgentUpgrade review fixes
Copilot May 6, 2026
69d5cd9
Use AgentUpgrade goal state
Copilot May 6, 2026
74db774
Clean up AgentUpgrade path resolution
Copilot May 6, 2026
762f7b1
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 6, 2026
c6176e8
Move daemon binary link bootstrap into agent
Copilot May 6, 2026
7209de0
Cover daemon binary bootstrap helpers
Copilot May 6, 2026
edb3744
Reject broken AgentUpgrade binaries before switch
Copilot May 6, 2026
74767e9
Address AgentUpgrade validation review cleanup
Copilot May 6, 2026
9eb3a2a
Publish AgentUpgrade daemon rollback failures
Copilot May 6, 2026
8086047
Address AgentUpgrade rollback review cleanup
Copilot May 6, 2026
6f71a6e
Address AgentUpgrade utilio path review
Copilot May 6, 2026
3553ff8
Clean up AgentUpgrade helper refactor
Copilot May 6, 2026
653f683
Address helper validation feedback
Copilot May 6, 2026
f423ac6
Clarify symlink helper cleanup
Copilot May 6, 2026
dbb8ffc
Move env fallback helper to utilio
Copilot May 6, 2026
07d49f1
Align resolve symlink parameter naming
Copilot May 6, 2026
8444ba2
Address final helper review nits
Copilot May 6, 2026
ae30e00
Cover empty env fallback helper
Copilot May 6, 2026
4a898a1
Keep path resolution in goalstates
Copilot May 6, 2026
fafda75
Merge main into AgentUpgrade branch
Copilot May 6, 2026
3176e0f
Merge main into AgentUpgrade branch history
Copilot May 6, 2026
131f4fb
Clean up MachineOperation merge resolution
Copilot May 6, 2026
9cd0e0a
Address AgentUpgrade signal and validation feedback
Copilot May 6, 2026
dc4f43a
Harden AgentUpgrade review updates
Copilot May 6, 2026
569d9cf
Move recovery signal writing into agent command
Copilot May 6, 2026
e3e297b
Preserve pending signal on helper failure
Copilot May 6, 2026
019d2eb
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
dfd9212
Apply suggestions from code review
bcho May 7, 2026
69dea67
Refactor AgentUpgrade signal handling
Copilot May 7, 2026
bffa14e
Document AgentUpgrade signal helpers
Copilot May 7, 2026
28e1584
Simplify AgentUpgrade state handling
Copilot May 7, 2026
057d83e
Clarify AgentUpgrade helper names
Copilot May 7, 2026
58fb60d
Document AgentUpgrade state machine
Copilot May 7, 2026
693614a
Address AgentUpgrade review feedback
Copilot May 7, 2026
6a59748
Log ignored AgentUpgrade signals
Copilot May 7, 2026
d945d0f
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
51ce736
Remove blue-green install script checks
Copilot May 7, 2026
6cc01f1
Move daemon binary link setup to agentbinary
Copilot May 7, 2026
f7cd69d
Clarify agentbinary test path setup
Copilot May 7, 2026
915a9f2
Consolidate AgentUpgrade signal publishing
Copilot May 7, 2026
d7959e2
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
5e90d10
Merge branch 'main' into copilot/agent-upgrade-blue-green-process
bcho May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions cmd/agent/internal/daemon/agentupgrade.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package daemon

import (
"archive/tar"
"compress/gzip"
"context"
"fmt"
"io"
"log/slog"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"

"github.com/Azure/unbounded/pkg/agent/goalstates"
)

const (
agentUpgradeDownloadURLParameter = "downloadURL"
agentBinaryArchiveName = "unbounded-agent"
maxAgentBinaryBytes = 256 * 1024 * 1024
)

func agentUpgradeDownloadURL(parameters map[string]string) (string, error) {
Comment thread
bcho marked this conversation as resolved.
downloadURL := strings.TrimSpace(parameters[agentUpgradeDownloadURLParameter])
if downloadURL == "" {
return "", fmt.Errorf("missing required parameter %q", agentUpgradeDownloadURLParameter)
}

return downloadURL, nil
}

func upgradeDaemonBinary(ctx context.Context, log *slog.Logger, downloadURL string) error {
currentTarget, err := resolveSymlink(daemonBinaryCurrentPath())
if err != nil {
return fmt.Errorf("resolve current daemon binary symlink: %w", err)
}

inactivePath := daemonBinaryBluePath()
if currentTarget == daemonBinaryBluePath() {
inactivePath = daemonBinaryGreenPath()
}

tmpDir, err := os.MkdirTemp("", "unbounded-agent-upgrade-*")
if err != nil {
return fmt.Errorf("create temp dir for agent upgrade: %w", err)
}
defer func() { _ = os.RemoveAll(tmpDir) }()

Check failure on line 52 in cmd/agent/internal/daemon/agentupgrade.go

View workflow job for this annotation

GitHub Actions / Lint

Error return value of `os.RemoveAll` is not checked (errcheck)

extractedBinaryPath := filepath.Join(tmpDir, agentBinaryArchiveName)
if err := downloadAgentBinaryFromTarGz(ctx, downloadURL, extractedBinaryPath); err != nil {
return err
}

binaryData, err := os.ReadFile(extractedBinaryPath)
if err != nil {
return fmt.Errorf("read extracted agent binary: %w", err)
}

if len(binaryData) == 0 {
return fmt.Errorf("extracted agent binary from %q is empty", downloadURL)
}

if err := writeFile(inactivePath, binaryData, 0o755); err != nil {
return fmt.Errorf("install upgraded daemon binary to %s: %w", inactivePath, err)
}

if err := updateSymlink(daemonBinaryLastGoodPath(), currentTarget); err != nil {
return fmt.Errorf("update last-good daemon symlink: %w", err)
}

if err := updateSymlink(daemonBinaryCurrentPath(), inactivePath); err != nil {
return fmt.Errorf("update current daemon symlink: %w", err)
}

log.Info("staged upgraded daemon binary",
"url", downloadURL,
"previous", currentTarget,
"current", inactivePath,
)

return nil
}

func downloadAgentBinaryFromTarGz(ctx context.Context, downloadURL, targetPath string) error {
Comment thread
bcho marked this conversation as resolved.
Outdated
reader, err := openDownloadStream(ctx, downloadURL)
if err != nil {
return err
}
defer func() { _ = reader.Close() }()

Check failure on line 94 in cmd/agent/internal/daemon/agentupgrade.go

View workflow job for this annotation

GitHub Actions / Lint

Error return value of `reader.Close` is not checked (errcheck)

gzipReader, err := gzip.NewReader(reader)
if err != nil {
return fmt.Errorf("open gzip stream from %q: %w", downloadURL, err)
}
defer func() { _ = gzipReader.Close() }()

Check failure on line 100 in cmd/agent/internal/daemon/agentupgrade.go

View workflow job for this annotation

GitHub Actions / Lint

Error return value of `gzipReader.Close` is not checked (errcheck)

tarReader := tar.NewReader(gzipReader)
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("read tar archive from %q: %w", downloadURL, err)
}

if header.Typeflag != tar.TypeReg || filepath.Base(header.Name) != agentBinaryArchiveName {
continue
}
if header.Size > maxAgentBinaryBytes {
return fmt.Errorf("agent binary in %q exceeds maximum size", downloadURL)
}

return writeExtractedAgentBinary(targetPath, tarReader)
}

return fmt.Errorf("agent binary %q not found in archive %q", agentBinaryArchiveName, downloadURL)
}

func writeExtractedAgentBinary(targetPath string, reader io.Reader) error {
file, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o755)
if err != nil {
return fmt.Errorf("create extracted agent binary %s: %w", targetPath, err)
}

limitedReader := io.LimitReader(reader, maxAgentBinaryBytes+1)
written, copyErr := io.Copy(file, limitedReader)
if closeErr := file.Close(); closeErr != nil && copyErr == nil {
copyErr = closeErr
}
if copyErr != nil {
return fmt.Errorf("extract agent binary: %w", copyErr)
}
if written > maxAgentBinaryBytes {
return fmt.Errorf("agent binary exceeds maximum size")
}

return nil
}

func openDownloadStream(ctx context.Context, downloadURL string) (io.ReadCloser, error) {
parsedURL, err := url.Parse(downloadURL)
if err != nil {
return nil, fmt.Errorf("parse download URL %q: %w", downloadURL, err)
}

switch parsedURL.Scheme {
case "http", "https":
req, err := http.NewRequestWithContext(ctx, http.MethodGet, downloadURL, nil)
if err != nil {
return nil, fmt.Errorf("create download request for %q: %w", downloadURL, err)
}

resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("download agent archive from %q: %w", downloadURL, err)
}
if resp.StatusCode != http.StatusOK {
_ = resp.Body.Close()

Check failure on line 164 in cmd/agent/internal/daemon/agentupgrade.go

View workflow job for this annotation

GitHub Actions / Lint

Error return value of `resp.Body.Close` is not checked (errcheck)
return nil, fmt.Errorf("download agent archive from %q returned status %s", downloadURL, resp.Status)
}

return resp.Body, nil
default:
return nil, fmt.Errorf("unsupported agent download URL scheme %q", parsedURL.Scheme)
}
}

func resolveSymlink(path string) (string, error) {
targetPath, err := filepath.EvalSymlinks(path)
if err == nil {
return targetPath, nil
}

if os.IsNotExist(err) {
return daemonBinaryPath(), nil
}

return "", err
}

func daemonBinaryPath() string {
if path := strings.TrimSpace(os.Getenv("UNBOUNDED_AGENT_DAEMON_BINARY")); path != "" {
Comment thread
bcho marked this conversation as resolved.
Outdated
return path
}

return goalstates.DaemonBinaryPath
}

func daemonBinaryBluePath() string {
Comment thread
bcho marked this conversation as resolved.
Outdated
if path := strings.TrimSpace(os.Getenv("UNBOUNDED_AGENT_DAEMON_BINARY_BLUE")); path != "" {
return path
}

return goalstates.DaemonBinaryBluePath
}

func daemonBinaryGreenPath() string {
if path := strings.TrimSpace(os.Getenv("UNBOUNDED_AGENT_DAEMON_BINARY_GREEN")); path != "" {
return path
}

return goalstates.DaemonBinaryGreenPath
}

func daemonBinaryCurrentPath() string {
if path := strings.TrimSpace(os.Getenv("UNBOUNDED_AGENT_DAEMON_BINARY_CURRENT")); path != "" {
return path
}

return goalstates.DaemonBinaryCurrentPath
}

func daemonBinaryLastGoodPath() string {
if path := strings.TrimSpace(os.Getenv("UNBOUNDED_AGENT_DAEMON_BINARY_LAST_GOOD")); path != "" {
return path
}

return goalstates.DaemonBinaryLastGoodPath
}

func updateSymlink(linkPath, targetPath string) error {
Comment thread
bcho marked this conversation as resolved.
Outdated
if err := os.MkdirAll(filepath.Dir(linkPath), 0o750); err != nil {
return err
}

tmpPath := fmt.Sprintf("%s.tmp", linkPath)
_ = os.Remove(tmpPath)

Check failure on line 233 in cmd/agent/internal/daemon/agentupgrade.go

View workflow job for this annotation

GitHub Actions / Lint

Error return value of `os.Remove` is not checked (errcheck)
if err := os.Symlink(targetPath, tmpPath); err != nil {
return err
}

return os.Rename(tmpPath, linkPath)
}
133 changes: 133 additions & 0 deletions cmd/agent/internal/daemon/agentupgrade_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package daemon

import (
"archive/tar"
"bytes"
"compress/gzip"
"context"
"io"
"log/slog"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestAgentUpgradeDownloadURL(t *testing.T) {
t.Parallel()

downloadURL, err := agentUpgradeDownloadURL(map[string]string{
agentUpgradeDownloadURLParameter: " https://example.com/agent.tar.gz ",
})
require.NoError(t, err)
assert.Equal(t, "https://example.com/agent.tar.gz", downloadURL)

_, err = agentUpgradeDownloadURL(nil)
require.Error(t, err)
assert.Contains(t, err.Error(), agentUpgradeDownloadURLParameter)
}

func TestUpgradeDaemonBinary(t *testing.T) {
dir := t.TempDir()
legacyPath := filepath.Join(dir, "unbounded-agent")
currentPath := filepath.Join(dir, "unbounded-agent-current")
lastGoodPath := filepath.Join(dir, "unbounded-agent-last-good")
bluePath := filepath.Join(dir, "unbounded-agent-blue")
greenPath := filepath.Join(dir, "unbounded-agent-green")

require.NoError(t, os.WriteFile(legacyPath, []byte("legacy"), 0o755))
require.NoError(t, os.Symlink(legacyPath, currentPath))

t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY", legacyPath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_CURRENT", currentPath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_LAST_GOOD", lastGoodPath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_BLUE", bluePath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_GREEN", greenPath)

server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "application/gzip")
w.WriteHeader(http.StatusOK)
require.NoError(t, writeAgentArchive(w, []byte("new-agent-binary")))
}))
t.Cleanup(server.Close)

require.NoError(t, upgradeDaemonBinary(context.Background(), slog.Default(), server.URL))

target, err := filepath.EvalSymlinks(currentPath)
require.NoError(t, err)
assert.Equal(t, bluePath, target)

lastGoodTarget, err := filepath.EvalSymlinks(lastGoodPath)
require.NoError(t, err)
assert.Equal(t, legacyPath, lastGoodTarget)

newData, err := os.ReadFile(bluePath)
require.NoError(t, err)
assert.Equal(t, []byte("new-agent-binary"), newData)
}

func TestUpgradeDaemonBinary_AlternatesFromBlueToGreen(t *testing.T) {
dir := t.TempDir()
currentPath := filepath.Join(dir, "unbounded-agent-current")
lastGoodPath := filepath.Join(dir, "unbounded-agent-last-good")
bluePath := filepath.Join(dir, "unbounded-agent-blue")
greenPath := filepath.Join(dir, "unbounded-agent-green")

require.NoError(t, os.WriteFile(bluePath, []byte("blue"), 0o755))
require.NoError(t, os.Symlink(bluePath, currentPath))

t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_CURRENT", currentPath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_LAST_GOOD", lastGoodPath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_BLUE", bluePath)
t.Setenv("UNBOUNDED_AGENT_DAEMON_BINARY_GREEN", greenPath)

server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
require.NoError(t, writeAgentArchive(w, []byte("green")))
}))
t.Cleanup(server.Close)

require.NoError(t, upgradeDaemonBinary(context.Background(), slog.Default(), server.URL))

target, err := filepath.EvalSymlinks(currentPath)
require.NoError(t, err)
assert.Equal(t, greenPath, target)

lastGoodTarget, err := filepath.EvalSymlinks(lastGoodPath)
require.NoError(t, err)
assert.Equal(t, bluePath, lastGoodTarget)
}

func TestDownloadAgentBinaryFromTarGz_RejectsUnsupportedScheme(t *testing.T) {
t.Parallel()

err := downloadAgentBinaryFromTarGz(context.Background(), "file:///tmp/unbounded-agent.tar.gz", filepath.Join(t.TempDir(), "agent"))
require.Error(t, err)
assert.Contains(t, err.Error(), "unsupported agent download URL scheme")
}

func writeAgentArchive(w io.Writer, binary []byte) error {
gz := gzip.NewWriter(w)
defer gz.Close()

tw := tar.NewWriter(gz)
defer tw.Close()

header := &tar.Header{
Name: "unbounded-agent",
Mode: 0o755,
Size: int64(len(binary)),
}
if err := tw.WriteHeader(header); err != nil {
return err
}

_, err := io.Copy(tw, bytes.NewReader(binary))
return err
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

[Unit]
Description=Recover Unbounded Agent Daemon to last known good binary

[Service]
Type=oneshot
ExecStart={{ .DaemonRecoveryScriptPath }}
Loading
Loading