Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions iac/provider-gcp/nomad-cluster/worker-cluster/variables.tf
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
variable "cluster_size" {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the cluster_size >= 1 validation is a regression. Without it, cluster_size = 0 is silently accepted at plan time, which would result in a zero-node worker cluster. If autoscaling can manage the count to zero that is fine, but then the validation should be >= 0 rather than removed entirely.

type = number

validation {
condition = var.cluster_size >= 1
error_message = "Cluster size must be at least 1."
}
}

variable "autoscaler" {
Expand Down
69 changes: 57 additions & 12 deletions packages/envd/internal/api/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"os/exec"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/awnumar/memguard"
Expand Down Expand Up @@ -217,16 +218,7 @@
}

if data.VolumeMounts != nil {
var wg sync.WaitGroup
for _, volume := range *data.VolumeMounts {
logger.Debug().Msgf("Mounting %s at %q", volume.NfsTarget, volume.Path)

wg.Go(func() {
a.setupNfs(context.WithoutCancel(ctx), volume.NfsTarget, volume.Path)
})
}

wg.Wait()
a.setupNFS(ctx, logger, *data.VolumeMounts)
}

return nil
Expand All @@ -247,7 +239,58 @@
"noacl", // no reason for acl in the sandbox
}, ",")

func (a *API) setupNfs(ctx context.Context, nfsTarget, path string) {
const nfsMountTimeout = 5 * time.Second

func (a *API) setupNFS(ctx context.Context, logger zerolog.Logger, mounts []VolumeMount) {
// Already fully mounted, nothing to do
if a.isMountedNFS.Load() {
logger.Debug().Msg("NFS volumes already mounted")
return

Check failure on line 248 in packages/envd/internal/api/init.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/envd)

return with no blank line before (nlreturn)
}

// Prevent concurrent mounting attempts
if !a.isMountingNFS.CompareAndSwap(false, true) {
logger.Debug().Msg("NFS volumes already mounting")
return

Check failure on line 254 in packages/envd/internal/api/init.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/envd)

return with no blank line before (nlreturn)
}
defer a.isMountingNFS.Store(false)

logger.Debug().Msg("Mounting NFS volumes")

ctx = context.WithoutCancel(ctx)
ctx, cancel := context.WithTimeout(ctx, nfsMountTimeout)
defer cancel()

var wg sync.WaitGroup
var allSucceeded atomic.Bool
allSucceeded.Store(true)

for _, volume := range mounts {
// Skip already mounted paths
if _, ok := a.mountedPaths.Load(volume.Path); ok {
logger.Debug().Msgf("Skipping already mounted %q", volume.Path)
continue

Check failure on line 272 in packages/envd/internal/api/init.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/envd)

continue with no blank line before (nlreturn)
}

logger.Debug().Msgf("Mounting %s at %q", volume.NfsTarget, volume.Path)

wg.Go(func() {
if a.mountNFS(ctx, volume.NfsTarget, volume.Path) {
a.mountedPaths.Store(volume.Path, true)
} else {
allSucceeded.Store(false)
}
})
}

wg.Wait()

if allSucceeded.Load() {
a.isMountedNFS.Store(true)
}
}

func (a *API) mountNFS(ctx context.Context, nfsTarget, path string) bool {
commands := [][]string{
{"mkdir", "-p", path},
{"mount", "-v", "-t", "nfs", "-o", "fg,hard," + nfsOptions, nfsTarget, path},
Expand All @@ -264,9 +307,11 @@
Msg("Mount NFS")

if err != nil {
return
return false
}
}

return true
}

func (a *API) SetupHyperloop(address string) {
Expand Down
5 changes: 5 additions & 0 deletions packages/envd/internal/api/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"net/http"
"sync"
"sync/atomic"

"github.com/rs/zerolog"

Expand Down Expand Up @@ -37,6 +38,10 @@ type API struct {

lastSetTime *utils.AtomicMax
initLock sync.Mutex

isMountingNFS atomic.Bool
isMountedNFS atomic.Bool
mountedPaths sync.Map // tracks successfully mounted paths
}

func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API {
Expand Down
Loading