Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/envd/internal/api/api.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions packages/envd/internal/api/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,21 +216,30 @@
a.defaults.Workdir = data.DefaultWorkdir
}

var wg sync.WaitGroup

if data.CaBundle != nil && *data.CaBundle != "" {
bundle := *data.CaBundle

wg.Go(func() {
a.caCertInstaller.Install(context.WithoutCancel(ctx), bundle)
})
}

if data.VolumeMounts != nil {
var wg sync.WaitGroup
for _, volume := range *data.VolumeMounts {
logger.Debug().Msgf("Mounting %s at %q", volume.NfsTarget, volume.Path)

wg.Go(func() {
a.setupNfs(context.WithoutCancel(ctx), volume.NfsTarget, volume.Path)
})
}

wg.Wait()
}

wg.Wait()

return nil
}

Check failure on line 242 in packages/envd/internal/api/init.go

View check run for this annotation

Claude / Claude Code Review

CA cert install failure silently returns HTTP 204

CA cert installation failures are silently swallowed in SetData(): Install() has a void return signature, so any error opening or writing to the CA bundle is only logged — after wg.Wait() returns, SetData() unconditionally returns nil and PostInit responds HTTP 204. The orchestrator has no signal that cert injection failed, so sandboxes proceed believing the custom CA is trusted when TLS connections will actually fail at runtime.

var nfsOptions = strings.Join([]string{
// wait for data to be sent to proxy server before returning.
Expand Down
17 changes: 10 additions & 7 deletions packages/envd/internal/api/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,20 @@ type API struct {

lastSetTime *utils.AtomicMax
initLock sync.Mutex

caCertInstaller *host.CACertInstaller
}

func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API {
return &API{
logger: l,
defaults: defaults,
mmdsChan: mmdsChan,
isNotFC: isNotFC,
mmdsClient: &DefaultMMDSClient{},
lastSetTime: utils.NewAtomicMax(),
accessToken: &SecureToken{},
logger: l,
defaults: defaults,
mmdsChan: mmdsChan,
isNotFC: isNotFC,
mmdsClient: &DefaultMMDSClient{},
lastSetTime: utils.NewAtomicMax(),
accessToken: &SecureToken{},
caCertInstaller: host.NewCACertInstaller(l),
}
}

Expand Down
246 changes: 246 additions & 0 deletions packages/envd/internal/host/cacerts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
package host

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
"time"

"github.com/rs/zerolog"
)

const (
CaBundlePath = "/etc/ssl/certs/ca-certificates.crt"
CaStatePath = E2BRunDir + "/ca-cert.pem"

// caBundleTmpfsPath is the tmpfs-backed copy of the CA bundle.
// CaBundlePath is bind-mounted over this so all writes bypass NBD.
caBundleTmpfsPath = E2BRunDir + "/ca-certificates.crt"
)

// BindMountCABundle copies the system CA bundle to tmpfs and bind-mounts it
// back over the original path so all subsequent reads and writes bypass the
// NBD-backed filesystem entirely. This reduces CA cert injection from ~2 ms
// (warm NBD) / ~460 ms (cold GCS) to ~0.01 ms.
//
// Must be called once at startup, before any /init handler runs. No-op if the
// bind mount is already in place (safe to call after a process restart).
func BindMountCABundle() error {
// Read the full bundle into memory before any write. On process restart the
// bind mount is already in place, meaning CaBundlePath and caBundleTmpfsPath
// are the same inode. Opening caBundleTmpfsPath with O_TRUNC while a read fd
// is open on CaBundlePath would zero the file before io.Copy runs, destroying
// the bundle. os.ReadFile completes the read atomically before we write.
content, err := os.ReadFile(CaBundlePath)
if err != nil {
return err
}

if err := os.MkdirAll(filepath.Dir(caBundleTmpfsPath), 0o755); err != nil {
return err
}

if err := os.WriteFile(caBundleTmpfsPath, content, 0o644); err != nil {
return err
}

// Bind-mount the tmpfs file over the original bundle path.
// MS_BIND makes the target appear as the source; the underlying NBD file
// is shadowed for all processes in this mount namespace.
if err := syscall.Mount(caBundleTmpfsPath, CaBundlePath, "", syscall.MS_BIND, ""); err != nil {
// EBUSY means the bind mount is already in place (process restart).
if err == syscall.EBUSY {
return nil
}

return err
}

return nil
}

// CACertInstaller manages installation of a CA certificate into the VM's
// system trust bundle.
type CACertInstaller struct {
mu sync.Mutex
logger *zerolog.Logger

// lastCACert caches the most recently installed PEM so that resume (same
// cert, same process) is a zero-I/O hot-path hit. Empty on process start;
// the state file at CaStatePath is the durable record across restarts.
lastCACert string
}

func NewCACertInstaller(logger *zerolog.Logger) *CACertInstaller {
return &CACertInstaller{logger: logger}
}

// Install injects certPEM into the system CA bundle.
func (c *CACertInstaller) Install(ctx context.Context, certPEM string) {
c.install(ctx, certPEM, CaBundlePath, CaStatePath)
}

// install is the testable core; tests supply their own paths.
//
// The cert changes on every sandbox create but stays the same across
// pause/resume cycles. The critical path only appends to the bundle (~0.04 ms
// after BindMountCABundle moves the file to tmpfs); removing the previous cert
// happens in a background goroutine.
//
// The state file survives process restarts (OOM, crashes). The background
// goroutine reads it to find the previously installed cert — lastCACert is ""
// after a restart and cannot be used for that purpose.
//
// All goroutine work runs under mu to keep the bundle and state file
// consistent with concurrent foreground appends.
func (c *CACertInstaller) install(_ context.Context, certPEM, bundlePath, statePath string) {
if certPEM == "" {
return
}

start := time.Now()

// Normalise to a single trailing newline so comparisons and removals are
// consistent regardless of how the caller formatted the PEM.
normalized := strings.TrimRight(certPEM, "\n") + "\n"

c.mu.Lock()
defer c.mu.Unlock()

if c.lastCACert == normalized {
c.logger.Debug().
Dur("duration", time.Since(start)).
Msg("CA cert unchanged, skipping install")

return
}

// Snapshot the previous cert before overwriting; used as fallback when no
// state file exists yet.
prevPEM := c.lastCACert

f, err := os.OpenFile(bundlePath, os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
c.logger.Error().Err(err).Msg("Failed to open CA bundle")

return
}

_, err = f.WriteString(normalized)
f.Close()

if err != nil {
c.logger.Error().Err(err).Msg("Failed to write CA cert to bundle")

return
}

c.lastCACert = normalized

c.logger.Info().
Dur("append_duration", time.Since(start)).
Msg("CA cert appended to bundle")

go func() {
cleanStart := time.Now()

c.mu.Lock()
defer c.mu.Unlock()

// A newer install has taken over; let that goroutine handle cleanup.
if c.lastCACert != normalized {
return
}

// State file takes priority over the in-memory prevPEM: it holds the
// cert from the previous process lifetime after a restart.
stateRaw, _ := os.ReadFile(statePath)
effectivePrev := string(stateRaw)
if effectivePrev == "" {
effectivePrev = prevPEM
}

if err := os.WriteFile(statePath, []byte(normalized), 0o644); err != nil {
c.logger.Error().Err(err).Msg("Failed to write CA cert state file")

return

Check failure on line 170 in packages/envd/internal/host/cacerts.go

View check run for this annotation

Claude / Claude Code Review

Intermediate cert permanently stranded after rapid A→B→C rotation

In a rapid certA→certB→certC rotation, the intermediate certB goroutine exits early (because `lastCACert` has already advanced to certC) without updating the state file, causing certC's goroutine to see the stale state file (certA) and remove certA instead of certB — permanently stranding certB in the trust bundle. This violates per-sandbox CA isolation: a previous sandbox's egress certificate remains trusted indefinitely until the next process restart clears all certs.
}

// No prior cert, or same cert received again after a restart.
if effectivePrev == "" || effectivePrev == normalized {
return
}

if err := removeCertFromBundle(bundlePath, statePath, effectivePrev); err != nil {
c.logger.Error().Err(err).Msg("Failed to remove old CA cert from bundle")

return
}

c.logger.Info().
Dur("cleanup_duration", time.Since(cleanStart)).
Msg("Old CA cert removed from bundle")
}()

Check warning on line 187 in packages/envd/internal/host/cacerts.go

View check run for this annotation

Claude / Claude Code Review

Cert duplicated in bundle on every process restart with same cert

After a process restart, `lastCACert` resets to `""`, causing the same cert to be appended again on the next `install()` call; the background goroutine then reads the state file, finds `effectivePrev == normalized`, and takes the early-return path at line 174 — skipping `removeCertFromBundle` and leaving the duplicate permanently in the bundle. Each restart with the same cert adds one more copy; cleanup only happens on cert rotation. `TestInstallCACert_RestartSameCert` explicitly encodes this wi
}

// removeCertFromBundle rewrites bundlePath removing all occurrences of certPEM.
// The write is atomic (write to temp file, then rename) so the bundle is never
// empty from the perspective of concurrent readers.
//
// tmpDir must be on the same filesystem as bundlePath. In production bundlePath
// is a bind-mounted file whose parent directory is on the NBD-backed filesystem;
// a temp file created there would be on a different device and os.Rename would
// fail with EXDEV. Passing filepath.Dir(statePath) (which is E2BRunDir — the
// same tmpfs as the bind mount source) keeps both files on the same device.
//
// Must be called under mu.
func removeCertFromBundle(bundlePath, statePath, certPEM string) error {
tmpDir := filepath.Dir(statePath)
content, err := os.ReadFile(bundlePath)
if err != nil {
return err
}

cleaned := strings.ReplaceAll(string(content), certPEM, "")

tmp, err := os.CreateTemp(tmpDir, "ca-bundle-*")
if err != nil {
return fmt.Errorf("create temp file: %w", err)
}

tmpPath := tmp.Name()

// os.CreateTemp creates with 0600; restore world-readable so non-root
// processes can still verify TLS after the rename replaces the bundle.
if err := tmp.Chmod(0o644); err != nil {
tmp.Close()
os.Remove(tmpPath)

return fmt.Errorf("chmod temp file: %w", err)
}

if _, err := tmp.WriteString(cleaned); err != nil {
tmp.Close()
os.Remove(tmpPath)

return fmt.Errorf("write temp file: %w", err)
}

if err := tmp.Close(); err != nil {
os.Remove(tmpPath)

return fmt.Errorf("close temp file: %w", err)
}

if err := os.Rename(tmpPath, bundlePath); err != nil {
os.Remove(tmpPath)

return fmt.Errorf("rename temp file: %w", err)
}

return nil
}
Loading
Loading