Skip to content

Commit

Permalink
Fix kublelet killing VMs upon Virtlet pod restart
Browse files Browse the repository at this point in the history
The culprit were cgroups that aren't handled by libvirt.
Of those, we already handle hugetlb by moving the emulator
process out of it. Still, need to do the same for systemd
(name=systemd) and pids cgroup controllers.

The problem manifested itself when cgroup-per-qos is enabled for
kubelet. This is the default, but in current kdc it may
be disabled as a workaround for old kubelet bug. This bug
is already fixed, so the workaround is to be removed soon.
  • Loading branch information
Ivan Shvedunov committed Feb 20, 2019
1 parent 34f37d9 commit 9ec2870
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
16 changes: 10 additions & 6 deletions cmd/vmwrapper/vmwrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,17 @@ func main() {
}
}

// FIXME: move the pid of qemu instance out of /kubepods/podxxxxxxx
// for some cases it will be killed by kubelet after the virtlet pod is deleted/recreated
// FIXME: move the pid of qemu instance out of kubelet-managed
// for cgroups that aren't managed by libvirt.
// If we don't do this, the VM pod will be killed by kubelet when Virtlet pod
// is removed dnd cgroup-per-qos is enabled in kubelet settings.
cm := cgroups.NewManager(os.Getpid(), nil)
if _, err := cm.GetProcessController("hugetlb"); err == nil {
err = cm.MoveProcess("hugetlb", "/")
if err != nil {
glog.Warningf("failed to move pid into hugetlb path /: %v", err)
for _, ctl := range []string{"hugetlb", "systemd", "pids"} {
if _, err := cm.GetProcessController(ctl); err == nil {
err = cm.MoveProcess(ctl, "/")
if err != nil {
glog.Warningf("failed to move pid into cgroup %q path /: %v", ctl, err)
}
}
}

Expand Down
9 changes: 8 additions & 1 deletion pkg/utils/cgroups/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,15 @@ func (c *RealManager) GetProcessControllers() (map[string]string, error) {
// "6:memory:/user.slice/user-xxx.slice/session-xx.scope"
parts := strings.SplitN(line, ":", 3)

name := parts[1]
if strings.HasPrefix(name, "name=") {
// Handle named cgroup hierarchies like name=systemd
// The corresponding directory tree will be /sys/fs/cgroup/systemd
name = name[5:]
}

// use second part as controller name and third as its path
ctrls[parts[1]] = parts[2]
ctrls[name] = parts[2]

if err == io.EOF {
break
Expand Down

0 comments on commit 9ec2870

Please sign in to comment.