Skip to content

Commit f3693c5

Browse files
authored
Merge pull request #1117 from dtrudg/pick1116
fix: Wait directly in fakeroot MonitorContainer (release-3.10)
2 parents 0cdc9c7 + f72a952 commit f3693c5

File tree

2 files changed

+34
-16
lines changed

2 files changed

+34
-16
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
- Ensure `make dist` doesn't include conmon binary or intermediate files.
88
- Do not hang on pull from http(s) source that doesn't provide a content-length.
9+
- Avoid hang on fakeroot cleanup under high load seen on some
10+
distributions / kernels.
911

1012
## 3.10.3 \[2022-10-06\]
1113

internal/pkg/runtime/engine/fakeroot/engine_linux.go

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -257,26 +257,42 @@ func (e *EngineOperations) StartProcess(masterConn net.Conn) error {
257257
// not need them for wait4 and kill syscalls.
258258
func (e *EngineOperations) MonitorContainer(pid int, signals chan os.Signal) (syscall.WaitStatus, error) {
259259
var status syscall.WaitStatus
260+
waitStatus := make(chan syscall.WaitStatus, 1)
261+
waitError := make(chan error, 1)
262+
263+
go func() {
264+
sylog.Debugf("Waiting for container process %d", pid)
265+
_, err := syscall.Wait4(pid, &status, 0, nil)
266+
sylog.Debugf("Wait for process %d complete with status %v, error %v", pid, status, err)
267+
waitStatus <- status
268+
waitError <- err
269+
}()
260270

261271
for {
262-
s := <-signals
263-
switch s {
264-
case syscall.SIGCHLD:
265-
if wpid, err := syscall.Wait4(pid, &status, syscall.WNOHANG, nil); err != nil {
266-
return status, fmt.Errorf("error while waiting child: %s", err)
267-
} else if wpid != pid {
268-
continue
272+
select {
273+
case s := <-signals:
274+
// Signal received
275+
switch s {
276+
case syscall.SIGCHLD:
277+
// Our go routine waiting for the container pid will handle container exit.
278+
break
279+
case syscall.SIGURG:
280+
// Ignore SIGURG, which is used for non-cooperative goroutine
281+
// preemption starting with Go 1.14. For more information, see
282+
// https://github.com/golang/go/issues/24543.
283+
break
284+
default:
285+
if err := syscall.Kill(pid, s.(syscall.Signal)); err != nil {
286+
return status, fmt.Errorf("interrupted by signal %s", s.String())
287+
}
269288
}
270-
return status, nil
271-
case syscall.SIGURG:
272-
// Ignore SIGURG, which is used for non-cooperative goroutine
273-
// preemption starting with Go 1.14. For more information, see
274-
// https://github.com/golang/go/issues/24543.
275-
break
276-
default:
277-
if err := syscall.Kill(pid, s.(syscall.Signal)); err != nil {
278-
return status, fmt.Errorf("interrupted by signal %s", s.String())
289+
case ws := <-waitStatus:
290+
// Container process exited
291+
we := <-waitError
292+
if we != nil {
293+
return ws, fmt.Errorf("error while waiting for child: %w", we)
279294
}
295+
return ws, we
280296
}
281297
}
282298
}

0 commit comments

Comments
 (0)