Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict false positive warning #693

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion internal/runner/nomad_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"time"

nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
"github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/nomad"
Expand All @@ -20,6 +21,8 @@
"github.com/openHPI/poseidon/pkg/util"
)

const environmentMigrationDelay = time.Minute

var (
log = logging.GetLogger("runner")
ErrUnknownExecutionEnvironment = errors.New("execution environment not found")
Expand Down Expand Up @@ -330,6 +333,34 @@
monitoring.WriteInfluxPoint(p)
}

// checkForMigratingEnvironmentJob checks if the Nomad environment job is still running after the delay.
func (m *NomadRunnerManager) checkForMigratingEnvironmentJob(ctx context.Context, jobID string, delay time.Duration) {
log.WithField(dto.KeyEnvironmentID, jobID).Debug("Environment stopped unexpectedly. Checking again..")
MrSerth marked this conversation as resolved.
Show resolved Hide resolved

select {
case <-ctx.Done():
return
case <-time.After(delay):

Check warning on line 343 in internal/runner/nomad_manager.go

View check run for this annotation

Codecov / codecov/patch

internal/runner/nomad_manager.go#L343

Added line #L343 was not covered by tests
}

templateJobs, err := m.apiClient.LoadEnvironmentJobs()
if err != nil {
log.WithError(err).Warn("couldn't load template jobs")

Check warning on line 348 in internal/runner/nomad_manager.go

View check run for this annotation

Codecov / codecov/patch

internal/runner/nomad_manager.go#L346-L348

Added lines #L346 - L348 were not covered by tests
}

var environmentStillRunning bool
for _, job := range templateJobs {
if jobID == *job.ID && *job.Status == structs.JobStatusRunning {
environmentStillRunning = true
break

Check warning on line 355 in internal/runner/nomad_manager.go

View check run for this annotation

Codecov / codecov/patch

internal/runner/nomad_manager.go#L351-L355

Added lines #L351 - L355 were not covered by tests
}
}

if !environmentStillRunning {
log.WithField(dto.KeyEnvironmentID, jobID).Warn("Environment stopped unexpectedly")

Check warning on line 360 in internal/runner/nomad_manager.go

View check run for this annotation

Codecov / codecov/patch

internal/runner/nomad_manager.go#L359-L360

Added lines #L359 - L360 were not covered by tests
}
}

// onAllocationStopped is the callback for when Nomad stopped an allocation.
func (m *NomadRunnerManager) onAllocationStopped(ctx context.Context, runnerID string, reason error) (alreadyRemoved bool) {
log.WithField(dto.KeyRunnerID, runnerID).Debug("Runner stopped")
Expand All @@ -343,7 +374,7 @@
}
_, ok := m.environments.Get(environmentID.ToString())
if ok {
log.WithField(dto.KeyEnvironmentID, environmentID).Warn("Environment stopped unexpectedly")
go m.checkForMigratingEnvironmentJob(ctx, runnerID, environmentMigrationDelay)
}
return !ok
}
Expand Down
10 changes: 0 additions & 10 deletions internal/runner/nomad_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,16 +510,6 @@ func (s *ManagerTestSuite) TestOnAllocationStopped() {
s.True(alreadyRemoved)
})
})
s.Run("logs unexpectedly stopped environments", func() {
logger, hook := test.NewNullLogger()
log = logger.WithField("package", "runner")

alreadyRemoved := s.nomadRunnerManager.onAllocationStopped(s.TestCtx, tests.DefaultTemplateJobID, nil)
s.False(alreadyRemoved)

s.Len(hook.Entries, 1)
s.Equal(logrus.WarnLevel, hook.LastEntry().Level)
})
s.Run("does not log expectedly stopped environments", func() {
logger, hook := test.NewNullLogger()
log = logger.WithField("package", "runner")
Expand Down
Loading