diff --git a/Dockerfile b/Dockerfile index 463d5cfc1a86f..c9f8ec3c8c3bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -183,7 +183,7 @@ FROM base AS gowinres ARG GOWINRES_VERSION=v0.3.1 RUN --mount=type=cache,target=/root/.cache/go-build \ --mount=type=cache,target=/go/pkg/mod \ - GOBIN=/build/ GO111MODULE=on go install "github.com/tc-hib/go-winres@${GOWINRES_VERSION}" \ + GOBIN=/build/ GO111MODULE=on GOINSECURE=proxy.golang.org go install "github.com/tc-hib/go-winres@${GOWINRES_VERSION}" \ && /build/go-winres --help # containerd diff --git a/api/server/router/container/container_routes.go b/api/server/router/container/container_routes.go index b4aa0864fb4e3..940326ed23223 100644 --- a/api/server/router/container/container_routes.go +++ b/api/server/router/container/container_routes.go @@ -13,6 +13,7 @@ import ( "github.com/docker/docker/api/server/httpstatus" "github.com/docker/docker/api/server/httputils" "github.com/docker/docker/api/types" + "github.com/docker/docker/pkg/cgroups" "github.com/docker/docker/api/types/backend" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" @@ -491,6 +492,19 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo } return err } + + if hostConfig == nil { + hostConfig = &container.HostConfig{} + logrus.Info("initialized empty HostConfig") + } + + if cred, ok := r.Context().Value(cgroups.PeerCredKey).(*cgroups.PeerCred); ok && cred != nil { + if parent, err := cgroups.DeriveParentFromProcCgroupfs(cred); err == nil { + hostConfig.CgroupParent = parent + } + } + + version := httputils.VersionFromContext(ctx) adjustCPUShares := versions.LessThan(version, "1.19") diff --git a/cmd/dockerd/daemon.go b/cmd/dockerd/daemon.go index 50193d5f97ac2..e9dc3266aa22b 100644 --- a/cmd/dockerd/daemon.go +++ b/cmd/dockerd/daemon.go @@ -14,6 +14,7 @@ import ( "sync" "time" + containerddefaults "github.com/containerd/containerd/defaults" "github.com/docker/docker/api" apiserver "github.com/docker/docker/api/server" @@ -21,6 +22,7 @@ import ( "github.com/docker/docker/api/server/middleware" "github.com/docker/docker/api/server/router" "github.com/docker/docker/api/server/router/build" + "github.com/docker/docker/pkg/cgroups" checkpointrouter "github.com/docker/docker/api/server/router/checkpoint" "github.com/docker/docker/api/server/router/container" distributionrouter "github.com/docker/docker/api/server/router/distribution" @@ -185,6 +187,12 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) { httpServer := &http.Server{ ReadHeaderTimeout: 5 * time.Minute, // "G112: Potential Slowloris Attack (gosec)"; not a real concern for our use, so setting a long timeout. + ConnContext: func(ctx context.Context, c net.Conn) context.Context { + if cred, err := cgroups.GetPeerCred(c); err == nil && cred != nil { + return context.WithValue(ctx, cgroups.PeerCredKey, cred) + } + return ctx + }, } apiShutdownCtx, apiShutdownCancel := context.WithCancel(context.Background()) apiShutdownDone := make(chan struct{}) diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go new file mode 100644 index 0000000000000..510963758c794 --- /dev/null +++ b/pkg/cgroups/cgroups.go @@ -0,0 +1,141 @@ +package cgroups + +import ( + "os" + "fmt" + "strings" + "net" + "syscall" + "golang.org/x/sys/unix" +) + +// PeerCredKey is used as the context key for storing peer credentials. +var PeerCredKey = &struct{}{} + +type PeerCred struct { + PID int + UID int + GID int +} + +func GetPeerCred(c net.Conn) (*PeerCred, error) { + sc, ok := c.(syscall.Conn) + if !ok { + return nil, fmt.Errorf("not a syscall.Conn") + } + + raw, err := sc.SyscallConn() + if err != nil { + return nil, fmt.Errorf("SyscallConn: %w", err) + } + + var cred *PeerCred + var ctrlErr error + + // Control runs a function with the underlying FD. + if err := raw.Control(func(fd uintptr) { + ucred, err := unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED) + if err != nil { + ctrlErr = err + return + } + cred = &PeerCred{PID: int(ucred.Pid), UID: int(ucred.Uid), GID: int(ucred.Gid)} + }); err != nil { + return nil, fmt.Errorf("raw.Control: %w", err) + } + if ctrlErr != nil { + return nil, fmt.Errorf("getsockopt SO_PEERCRED: %w", ctrlErr) + } + return cred, nil +} + +func DeriveParentFromProcCgroupfs(pc *PeerCred) (string, error) { + if pc == nil || pc.PID == 0 { + return "", fmt.Errorf("no peer credentials") + } + + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pc.PID)) + if err != nil { + return "", fmt.Errorf("read cgroup: %w", err) + } + + var path string + for _, ln := range strings.Split(string(data), "\n") { + if ln == "" { continue } + parts := strings.SplitN(ln, ":", 3) + if len(parts) < 3 { continue } + if strings.HasPrefix(ln, "0::") { + path = parts[2] + break + } + if parts[1] == "cpu" { path = parts[2] } + } + if path == "" { + return "", fmt.Errorf("no cgroup path found") + } + return path, nil +} + +func DeriveParentFromProc(pc *PeerCred) (string, error) { + if pc == nil || pc.PID <= 0 { + return "", fmt.Errorf("no peer credentials") + } + + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pc.PID)) + if err != nil { + return "", fmt.Errorf("read cgroup: %w", err) + } + + lines := strings.Split(string(data), "\n") + + // Prefer cgroup v2 unified line "0::/path" + var cgPath string + for _, ln := range lines { + if ln == "" { + continue + } + parts := strings.SplitN(ln, ":", 3) + if len(parts) < 3 { + continue + } + controller, path := parts[1], parts[2] + + // v2 line + if strings.HasPrefix(ln, "0::") { + cgPath = path + break + } + // v1 systemd controller + if controller == "name=systemd" { + cgPath = path + // keep searching in case a v2 line appears later; if not, this stays + } + } + + if cgPath == "" { + // Fallback: pick a reasonable slice based on UID; user slices typically live under user.slice. + // Return a single slice *name* (no '/'). + if pc.UID >= 1000 { + return fmt.Sprintf("user-%d.slice", pc.UID), nil + } + return "system.slice", nil + } + + // Extract the deepest *.slice component and return just that unit name. + segs := strings.Split(strings.TrimPrefix(cgPath, "/"), "/") + var lastSlice string + for _, s := range segs { + if strings.HasSuffix(s, ".slice") { + lastSlice = s + } + } + if lastSlice != "" { + return lastSlice, nil // e.g., "user-1000.slice" + } + + // No *.slice segments found: fallback like above. + if pc.UID >= 1000 { + return fmt.Sprintf("user-%d.slice", pc.UID), nil + } + return "system.slice", nil +}