From 7f0eb8e8585d0925fa05e5236c1fbff54d2e8a9b Mon Sep 17 00:00:00 2001 From: l1b0k Date: Mon, 13 Dec 2021 14:57:53 +0800 Subject: [PATCH] Fix teardown failed may cause resource leak --- plugin/driver/ipvlan.go | 87 +++++----------------- plugin/driver/netlink.go | 8 +- plugin/driver/raw_nic.go | 31 -------- plugin/driver/utils.go | 153 +++++++++++++++++++++++++++++++++------ plugin/driver/veth.go | 65 +---------------- plugin/terway/cni.go | 99 +++++++++---------------- 6 files changed, 190 insertions(+), 253 deletions(-) diff --git a/plugin/driver/ipvlan.go b/plugin/driver/ipvlan.go index 8f4b8b7a..5ca16a49 100644 --- a/plugin/driver/ipvlan.go +++ b/plugin/driver/ipvlan.go @@ -153,45 +153,13 @@ func (d *IPvlanDriver) Setup(cfg *SetupConfig, netNS ns.NetNS) error { } func (d *IPvlanDriver) Teardown(cfg *TeardownCfg, netNS ns.NetNS) error { - parents := make(map[int]struct{}) - link, err := netlink.LinkByName(cfg.HostVETHName) + err := DelLinkByName(cfg.HostVETHName) if err != nil { - if _, ok := err.(netlink.LinkNotFoundError); !ok { - return fmt.Errorf("error get link %s, %w", cfg.HostVETHName, err) - } - } else { - parents[link.Attrs().ParentIndex] = struct{}{} - _ = LinkSetDown(link) - err = LinkDel(link) - if err != nil { - return fmt.Errorf("error del link, %w", err) - } - } - err = netNS.Do(func(netNS ns.NetNS) error { - for _, ifName := range []string{cfg.HostVETHName, cfg.ContainerIfName} { - link, err := netlink.LinkByName(ifName) - if err != nil { - if _, ok := err.(netlink.LinkNotFoundError); !ok { - return fmt.Errorf("error get link %s, %w", ifName, err) - } - continue - } - - parents[link.Attrs().ParentIndex] = struct{}{} - _ = LinkSetDown(link) - err = LinkDel(link) - if err != nil { - return fmt.Errorf("error del link, %w", err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("error teardown container, %w", err) + return err } - delete(parents, 0) - return d.teardownInitNamespace(parents, cfg.ContainerIPNet) + // del route to container + return d.teardownInitNamespace(cfg.ContainerIPNet) } func (d *IPvlanDriver) Check(cfg *CheckConfig) error { @@ -445,20 +413,17 @@ func (d *IPvlanDriver) setupInitNamespace(parentLink netlink.Link, cfg *SetupCon return nil } -func (d *IPvlanDriver) teardownInitNamespace(parents map[int]struct{}, containerIP *terwayTypes.IPNetSet) error { +func (d *IPvlanDriver) teardownInitNamespace(containerIP *terwayTypes.IPNetSet) error { if containerIP == nil { return nil } - exec := func(link netlink.Link, ipNet *net.IPNet) error { - rt := &netlink.Route{ - LinkIndex: link.Attrs().Index, - Dst: NewIPNetWithMaxMask(ipNet), - } - - routes, err := netlink.RouteListFiltered(NetlinkFamily(ipNet.IP), rt, netlink.RT_FILTER_DST|netlink.RT_FILTER_OIF) + exec := func(ipNet *net.IPNet) error { + routes, err := FoundRoutes(&netlink.Route{ + Dst: ipNet, + }) if err != nil { - return fmt.Errorf("error get route by filter %s, %w", rt.String(), err) + return err } for _, route := range routes { err = RouteDel(&route) @@ -468,28 +433,20 @@ func (d *IPvlanDriver) teardownInitNamespace(parents map[int]struct{}, container } return nil } - // get slave link - for index := range parents { - initLink, err := d.initSlaveLink(index) + + if containerIP.IPv4 != nil { + err := exec(NewIPNetWithMaxMask(containerIP.IPv4)) if err != nil { - if _, ok := err.(netlink.LinkNotFoundError); !ok { - return fmt.Errorf("error get link by index %d, %w", index, err) - } - continue - } - if containerIP.IPv4 != nil { - err = exec(initLink, NewIPNetWithMaxMask(containerIP.IPv4)) - if err != nil { - return err - } + return err } - if containerIP.IPv6 != nil { - err = exec(initLink, NewIPNetWithMaxMask(containerIP.IPv6)) - if err != nil { - return err - } + } + if containerIP.IPv6 != nil { + err := exec(NewIPNetWithMaxMask(containerIP.IPv6)) + if err != nil { + return err } } + return nil } @@ -497,10 +454,6 @@ func (d *IPvlanDriver) initSlaveName(parentIndex int) string { return fmt.Sprintf("ipvl_%d", parentIndex) } -func (d *IPvlanDriver) initSlaveLink(parentIndex int) (netlink.Link, error) { - return netlink.LinkByName(d.initSlaveName(parentIndex)) -} - type redirectRule struct { index int proto uint16 diff --git a/plugin/driver/netlink.go b/plugin/driver/netlink.go index 39b42391..91950469 100644 --- a/plugin/driver/netlink.go +++ b/plugin/driver/netlink.go @@ -179,7 +179,13 @@ func RuleDel(rule *netlink.Rule) error { Log.Infof(cmd) err := netlink.RuleDel(rule) if err != nil { - return fmt.Errorf("error %s, %w", cmd, err) + rule.IifName = "" + rule.OifName = "" + + err = netlink.RuleDel(rule) + if err != nil { + return fmt.Errorf("error %s, %w", cmd, err) + } } return nil } diff --git a/plugin/driver/raw_nic.go b/plugin/driver/raw_nic.go index 11991d17..7a1906de 100644 --- a/plugin/driver/raw_nic.go +++ b/plugin/driver/raw_nic.go @@ -119,37 +119,6 @@ func (r *RawNicDriver) Setup(cfg *SetupConfig, netNS ns.NetNS) error { } func (r *RawNicDriver) Teardown(cfg *TeardownCfg, netNS ns.NetNS) error { - // 1. move link out - hostCurrentNs, err := ns.GetCurrentNS() - defer func() { - err = hostCurrentNs.Close() - }() - if err != nil { - return fmt.Errorf("error get host net ns, %w", err) - } - err = netNS.Do(func(netNS ns.NetNS) error { - var nicLink netlink.Link - nicLink, err = netlink.LinkByName(cfg.ContainerIfName) - if err == nil { - nicName, err1 := r.randomNicName() - if err1 != nil { - return fmt.Errorf("error generate random nic name, %w", err) - } - err = netlink.LinkSetDown(nicLink) - if err != nil { - return fmt.Errorf("error set link %s down, %w", nicLink.Attrs().Name, err) - } - err = netlink.LinkSetName(nicLink, nicName) - if err != nil { - return fmt.Errorf("error set link %s name %s, %w", nicLink.Attrs().Name, nicName, err) - } - return netlink.LinkSetNsFd(nicLink, int(hostCurrentNs.Fd())) - } - return nil - }) - if err != nil { - return fmt.Errorf("error move eni to host net ns, %w", err) - } return nil } diff --git a/plugin/driver/utils.go b/plugin/driver/utils.go index e52b686f..fdf0a7c9 100644 --- a/plugin/driver/utils.go +++ b/plugin/driver/utils.go @@ -9,7 +9,10 @@ import ( "syscall" "time" + "github.com/containernetworking/plugins/pkg/ip" + "github.com/containernetworking/plugins/pkg/ns" "github.com/pkg/errors" + k8sErr "k8s.io/apimachinery/pkg/util/errors" terwayIP "github.com/AliyunContainerService/terway/pkg/ip" terwaySysctl "github.com/AliyunContainerService/terway/pkg/sysctl" @@ -171,6 +174,17 @@ func EnsureLinkName(link netlink.Link, name string) (bool, error) { return true, LinkSetName(link, name) } +// DelLinkByName del by name and ignore if link not present +func DelLinkByName(ifName string) error { + contLink, err := netlink.LinkByName(ifName) + if err != nil { + if _, ok := err.(netlink.LinkNotFoundError); ok { //nolint + return nil + } + } + return LinkDel(contLink) +} + // EnsureAddrWithPrefix take the ipNet set and ensure only one IP for each family is present on link // it will remove other unmatched IPs func EnsureAddrWithPrefix(link netlink.Link, ipNetSet *terwayTypes.IPNetSet, prefixRoute bool) (bool, error) { @@ -562,30 +576,6 @@ func EnsureIPRule(link netlink.Link, ipNetSet *terwayTypes.IPNetSet, tableID int return changed, nil } -func DelIPRulesByIP(ipNet *net.IPNet) error { - var ruleList []netlink.Rule - var err error - if terwayIP.IPv6(ipNet.IP) { - ruleList, err = netlink.RuleList(netlink.FAMILY_V6) - } else { - ruleList, err = netlink.RuleList(netlink.FAMILY_V4) - } - if err != nil { - return fmt.Errorf("error get ip rule, %w", err) - } - - for _, rule := range ruleList { - if terwayIP.NetEqual(ipNet, rule.Src) || terwayIP.NetEqual(ipNet, rule.Dst) { - innerErr := RuleDel(&rule) - if innerErr != nil { - rule.IifName = "" - err = errors.Wrap(RuleDel(&rule), "error de") - } - } - } - return err -} - func EnableIPv6() error { err := terwaySysctl.EnsureConf("/proc/sys/net/ipv6/conf/all/disable_ipv6", "0") if err != nil { @@ -782,3 +772,118 @@ func EnsureClsActQdsic(link netlink.Link) error { } return nil } + +// GenericTearDown target to clean all related resource as much as possible +func GenericTearDown(netNS ns.NetNS) error { + var errList []error + hostNetNS, err := ns.GetCurrentNS() + if err != nil { + return fmt.Errorf("err get host net ns, %w", err) + } + err = netNS.Do(func(netNS ns.NetNS) error { + linkList, err := netlink.LinkList() + if err != nil { + return fmt.Errorf("error get link list from netlink, %w", err) + } + for _, l := range linkList { + _ = LinkSetDown(l) + switch l.(type) { + case *netlink.IPVlan, *netlink.Vlan, *netlink.Veth, *netlink.Ifb, *netlink.Dummy: + errList = append(errList, LinkDel(l)) + case *netlink.Device: + name, err := ip.RandomVethName() + if err != nil { + errList = append(errList, err) + continue + } + errList = append(errList, LinkSetName(l, name)) + errList = append(errList, LinkSetNsFd(l, hostNetNS)) + default: + continue + } + } + return nil + }) + if err != nil { + if _, ok := err.(ns.NSPathNotExistErr); !ok { + errList = append(errList, err) + } + } + errList = append(errList, CleanIPRules()) + return k8sErr.NewAggregate(errList) +} + +// CleanIPRules del ip rule for detached devs +func CleanIPRules() (err error) { + var rules []netlink.Rule + rules, err = netlink.RuleList(netlink.FAMILY_ALL) + if err != nil { + return err + } + + var ipNets []*net.IPNet + defer func() { + for _, r := range rules { + if r.Priority != 512 && r.Priority != 2048 { + continue + } + if r.IifName != "" || r.OifName != "" { + continue + } + found := false + + for _, ipNet := range ipNets { + if r.Dst != nil { + if r.Dst.String() == ipNet.String() { + found = true + break + } + } + if r.Src != nil { + if r.Src.String() == ipNet.String() { + found = true + break + } + } + } + if !found { + continue + } + _ = RuleDel(&r) + } + }() + for _, r := range rules { + if r.Priority != 512 && r.Priority != 2048 { + continue + } + name := r.IifName + if name == "" { + name = r.OifName + } + if name == "" { + continue + } + _, err = netlink.LinkByName(name) + if err != nil { + if _, ok := err.(netlink.LinkNotFoundError); !ok { + return err + } + err = RuleDel(&r) + if err != nil { + return err + } + var ipNet *net.IPNet + if r.Dst != nil { + ipNet = r.Dst + } + if r.Src != nil { + ipNet = r.Src + } + if ipNet != nil { + ipNets = append(ipNets, ipNet) + } + } + } + + return nil +} diff --git a/plugin/driver/veth.go b/plugin/driver/veth.go index 6a67426b..28ec47ab 100644 --- a/plugin/driver/veth.go +++ b/plugin/driver/veth.go @@ -5,7 +5,6 @@ import ( "net" "os" - "github.com/AliyunContainerService/terway/pkg/ip" terwaySysctl "github.com/AliyunContainerService/terway/pkg/sysctl" "github.com/AliyunContainerService/terway/pkg/tc" terwayTypes "github.com/AliyunContainerService/terway/types" @@ -241,39 +240,7 @@ func (d *VETHDriver) Setup(cfg *SetupConfig, netNS ns.NetNS) error { } func (d *VETHDriver) Teardown(cfg *TeardownCfg, netNS ns.NetNS) error { - // 1. get container ip - hostVeth, err := netlink.LinkByName(cfg.HostVETHName) - if err != nil { - return fmt.Errorf("error get link %s, %w", cfg.HostVETHName, err) - } - - containerIP, err := getIPsByNS(cfg.ContainerIfName, netNS) - if err != nil { - return fmt.Errorf("error get container ip %s, %w", cfg.HostVETHName, err) - } - - // 2. fixme remove ingress/egress rule for pod ip - - // 3. clean ip rules - if containerIP.IPv4 != nil { - innerErr := DelIPRulesByIP(containerIP.IPv4) - if innerErr != nil { - err = fmt.Errorf("%w", innerErr) - } - } - if containerIP.IPv6 != nil { - innerErr := DelIPRulesByIP(containerIP.IPv6) - if innerErr != nil { - err = fmt.Errorf("%w", innerErr) - } - } - if err != nil { - return err - } - - // 4. remove container veth - Log.Infof("ip link del %s", hostVeth.Attrs().Name) - return netlink.LinkDel(hostVeth) + return nil } func (d *VETHDriver) Check(cfg *CheckConfig) error { @@ -421,33 +388,3 @@ func setupVETHPair(contVethName, pairName string, mtu int, hostNetNS ns.NetNS) ( } return hostVETH, contVETH, nil } - -func getIPsByNS(ifName string, nsHandler ns.NetNS) (*terwayTypes.IPNetSet, error) { - ipSet := &terwayTypes.IPNetSet{} - err := nsHandler.Do(func(netNS ns.NetNS) error { - link, err := netlink.LinkByName(ifName) - if err != nil { - return err - } - addrs, err := netlink.AddrList(link, netlink.FAMILY_ALL) - if err != nil { - return err - } - for _, addr := range addrs { - if !addr.IP.IsGlobalUnicast() { - continue - } - if ip.IPv6(addr.IP) { - ipSet.IPv6 = NewIPNetWithMaxMask(&net.IPNet{ - IP: addr.IP, - }) - } else { - ipSet.IPv4 = NewIPNetWithMaxMask(&net.IPNet{ - IP: addr.IP, - }) - } - } - return nil - }) - return ipSet, err -} diff --git a/plugin/terway/cni.go b/plugin/terway/cni.go index 240e6d9c..8f372752 100644 --- a/plugin/terway/cni.go +++ b/plugin/terway/cni.go @@ -224,6 +224,12 @@ func cmdAdd(args *skel.CmdArgs) error { hostVETHName, _ := link.VethNameForPod(string(k8sConfig.K8S_POD_NAME), string(k8sConfig.K8S_POD_NAMESPACE), defaultVethPrefix) + l, err := driver.GrabFileLock(terwayCNILock) + if err != nil { + return err + } + defer l.Close() + var containerIPNet *terwayTypes.IPNetSet var gatewayIPSet *terwayTypes.IPSet switch allocResult.IPType { @@ -304,12 +310,7 @@ func cmdAdd(args *skel.CmdArgs) error { eniMultiIPDriver = ipvlan } } - l, err := driver.GrabFileLock(terwayCNILock) - if err != nil { - logger.Debug(err) - return nil - } - defer l.Close() + err = eniMultiIPDriver.Setup(setupCfg, cniNetns) if err != nil { return fmt.Errorf("setup network failed: %v", err) @@ -355,13 +356,6 @@ func cmdAdd(args *skel.CmdArgs) error { IPv4: gateway, } - l, err := driver.GrabFileLock(terwayCNILock) - if err != nil { - logger.Debug(err) - return nil - } - defer l.Close() - setupCfg := &driver.SetupConfig{ HostVETHName: hostVETHName, ContainerIfName: args.IfName, @@ -438,12 +432,6 @@ func cmdAdd(args *skel.CmdArgs) error { ingress := allocResult.GetPod().GetIngress() egress := allocResult.GetPod().GetEgress() - l, err := driver.GrabFileLock(terwayCNILock) - if err != nil { - logger.Debug(err) - return nil - } - defer l.Close() setupCfg := &driver.SetupConfig{ HostVETHName: hostVETHName, @@ -465,11 +453,9 @@ func cmdAdd(args *skel.CmdArgs) error { defer func() { if err != nil { - if e := veth.Teardown(&driver.TeardownCfg{ - HostVETHName: hostVETHName, - ContainerIfName: args.IfName, - }, cniNetns); e != nil { - err = errors.Wrapf(err, "tear down veth network for eni failed: %v", e) + e := driver.GenericTearDown(cniNetns) + if e != nil { + err = fmt.Errorf("tear down veth network for eni failed: %w", e) } } }() @@ -540,6 +526,22 @@ func cmdDel(args *skel.CmdArgs) error { logger.Debugf("args: %s", driver.JSONStr(args)) logger.Debugf("ns %s , k8s %s, cni std %s", cniNetns.Path(), driver.JSONStr(k8sConfig), driver.JSONStr(conf)) + l, err := driver.GrabFileLock(terwayCNILock) + if err != nil { + return err + } + // try cleanup all resource + err = driver.GenericTearDown(cniNetns) + if err != nil { + _ = l.Close() + logger.Errorf("error teardown %s", err.Error()) + + return types.PrintResult(¤t.Result{ + CNIVersion: conf.CNIVersion, + }, conf.CNIVersion) + } + _ = l.Close() + terwayBackendClient, closeConn, err := getNetworkClient() if err != nil { return fmt.Errorf("error create grpc client, pod %s/%s, %w", string(k8sConfig.K8S_POD_NAMESPACE), string(k8sConfig.K8S_POD_NAME), err) @@ -561,6 +563,12 @@ func cmdDel(args *skel.CmdArgs) error { return fmt.Errorf("error get ip from terway, pod %s/%s, %w", string(k8sConfig.K8S_POD_NAMESPACE), string(k8sConfig.K8S_POD_NAME), err) } + l, err = driver.GrabFileLock(terwayCNILock) + if err != nil { + return nil + } + defer l.Close() + ipv4, ipv6 := infoResult.IPv4, infoResult.IPv6 initDrivers(ipv4, ipv6) @@ -586,13 +594,6 @@ func cmdDel(args *skel.CmdArgs) error { } } - var l *driver.Locker - l, err = driver.GrabFileLock(terwayCNILock) - if err != nil { - logger.Debug(err) - return nil - } - defer l.Close() err = eniMultiIPDriver.Teardown(&driver.TeardownCfg{ HostVETHName: hostVETHName, ContainerIfName: args.IfName, @@ -607,19 +608,6 @@ func cmdDel(args *skel.CmdArgs) error { if subnet == "" { return fmt.Errorf("error get pod cidr") } - l, err := driver.GrabFileLock(terwayCNILock) - if err != nil { - logger.Debug(err) - return nil - } - defer l.Close() - err = veth.Teardown(&driver.TeardownCfg{ - HostVETHName: hostVETHName, - ContainerIfName: args.IfName, - }, cniNetns) - if err != nil { - return fmt.Errorf("error teardown pod %s/%s, %w", string(k8sConfig.K8S_POD_NAMESPACE), string(k8sConfig.K8S_POD_NAME), err) - } err = ipam.ExecDel(delegateIpam, []byte(fmt.Sprintf(delegateConf, subnet))) if err != nil { @@ -628,28 +616,7 @@ func cmdDel(args *skel.CmdArgs) error { } case rpc.IPType_TypeVPCENI: - l, err := driver.GrabFileLock(terwayCNILock) - if err != nil { - logger.Debug(err) - return nil - } - defer l.Close() - _ = veth.Teardown(&driver.TeardownCfg{ - HostVETHName: hostVETHName, - ContainerIfName: defaultVethForENI, - }, cniNetns) - // ignore ENI veth release error - //if err != nil { - // // ignore ENI veth release error - //} - err = rawNIC.Teardown(&driver.TeardownCfg{ - HostVETHName: hostVETHName, - ContainerIfName: args.IfName, - }, cniNetns) - if err != nil { - return fmt.Errorf("error teardown pod %s/%s, %w", string(k8sConfig.K8S_POD_NAMESPACE), string(k8sConfig.K8S_POD_NAME), err) - } - + break default: return fmt.Errorf("not support this network type") }