Skip to content

Commit

Permalink
Merge pull request #392 from l1b0k/feat/prio
Browse files Browse the repository at this point in the history
feat: add network priority
  • Loading branch information
BSWANG authored Jul 21, 2022
2 parents 1686f5d + 7ac39cb commit 2d21c09
Show file tree
Hide file tree
Showing 25 changed files with 900 additions and 166 deletions.
40 changes: 24 additions & 16 deletions daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,9 @@ func (n *networkService) AllocIP(ctx context.Context, r *rpc.AllocIPRequest) (*r
Trunk: false,
},
Pod: &rpc.Pod{
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
NetworkPriority: podinfo.NetworkPriority,
},
IfName: "",
ExtraRoutes: nil,
Expand Down Expand Up @@ -395,8 +396,9 @@ func (n *networkService) AllocIP(ctx context.Context, r *rpc.AllocIPRequest) (*r
Trunk: false,
},
Pod: &rpc.Pod{
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
NetworkPriority: podinfo.NetworkPriority,
},
IfName: "",
ExtraRoutes: nil,
Expand Down Expand Up @@ -432,8 +434,9 @@ func (n *networkService) AllocIP(ctx context.Context, r *rpc.AllocIPRequest) (*r
},
ENIInfo: nil,
Pod: &rpc.Pod{
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
NetworkPriority: podinfo.NetworkPriority,
},
IfName: "",
ExtraRoutes: nil,
Expand Down Expand Up @@ -610,8 +613,9 @@ func (n *networkService) GetIPInfo(ctx context.Context, r *rpc.GetInfoRequest) (
Trunk: false,
},
Pod: &rpc.Pod{
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
NetworkPriority: podinfo.NetworkPriority,
},
IfName: "",
ExtraRoutes: nil,
Expand All @@ -636,8 +640,9 @@ func (n *networkService) GetIPInfo(ctx context.Context, r *rpc.GetInfoRequest) (
ServiceCIDR: n.k8s.GetServiceCIDR().ToRPC(),
},
Pod: &rpc.Pod{
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
NetworkPriority: podinfo.NetworkPriority,
},
DefaultRoute: true,
})
Expand Down Expand Up @@ -672,8 +677,9 @@ func (n *networkService) GetIPInfo(ctx context.Context, r *rpc.GetInfoRequest) (
Trunk: podinfo.PodENI && n.enableTrunk && eni.Trunk,
},
Pod: &rpc.Pod{
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
Ingress: podinfo.TcIngress,
Egress: podinfo.TcEgress,
NetworkPriority: podinfo.NetworkPriority,
},
IfName: "",
ExtraRoutes: nil,
Expand Down Expand Up @@ -1018,8 +1024,9 @@ func (n *networkService) multiIPFromCRD(podInfo *types.PodInfo, waitReady bool)
},
ENIInfo: eniInfo,
Pod: &rpc.Pod{
Ingress: podInfo.TcIngress,
Egress: podInfo.TcEgress,
Ingress: podInfo.TcIngress,
Egress: podInfo.TcEgress,
NetworkPriority: podInfo.NetworkPriority,
},
IfName: alloc.Interface,
ExtraRoutes: parseExtraRoute(alloc.ExtraRoutes),
Expand Down Expand Up @@ -1092,8 +1099,9 @@ func (n *networkService) exclusiveENIFromCRD(podInfo *types.PodInfo, waitReady b
},
ENIInfo: eniInfo,
Pod: &rpc.Pod{
Ingress: podInfo.TcIngress,
Egress: podInfo.TcEgress,
Ingress: podInfo.TcIngress,
Egress: podInfo.TcEgress,
NetworkPriority: podInfo.NetworkPriority,
},
IfName: alloc.Interface,
ExtraRoutes: parseExtraRoute(alloc.ExtraRoutes),
Expand Down
10 changes: 10 additions & 0 deletions daemon/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,16 @@ func convertPod(daemonMode string, statefulWorkloadKindSet sets.String, pod *cor
}
}

if prio, ok := podAnnotation[types.NetworkPriority]; ok {
switch types.NetworkPrio(prio) {
case types.NetworkPrioBestEffort, types.NetworkPrioBurstable, types.NetworkPrioGuaranteed:
pi.NetworkPriority = prio
default:
_ = tracing.RecordPodEvent(pod.Name, pod.Namespace, eventTypeWarning,
"ParseFailed", fmt.Sprintf("Parse pod annotation %s failed.", types.NetworkPriority))
}
}

// determine whether pod's IP will stick 5 minutes for a reuse, priorities as below,
// 1. pod has a positive pod-ip-reservation annotation
// 2. pod is owned by a known stateful workload
Expand Down
36 changes: 36 additions & 0 deletions docs/qos.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@ Traffic control mainly have two aspect
| `kubernetes.io/ingress-bandwidth: 10M` | ingress banwidth |
| `kubernetes.io/egress-bandwidth: 10M` | egress banwidth |

### config shaping

to enable shaping, follow config need to add in `eni-config`

```yaml
# kubectl edit cm -n kube-system eni-config
apiVersion: v1
data:
10-terway.conf: |
{
"cniVersion": "0.3.1",
"name": "terway",
"capabilities": {"bandwidth": true}, # add
"type": "terway"
}
```
## priority
We have three annotations available for pod, to control different priority.
Expand All @@ -34,3 +51,22 @@ When priority is set, a `priority qdisc` is set to the `eni` related to pod.
Pod egress traffic will classify into different bands based on the config.
For more info about how priority works , please refer to [tc-prio](https://man7.org/linux/man-pages/man8/tc-prio.8.html)
.

> note: default qdisc will be replaced

### config priority

to enable priority, follow config need to add in `eni-config`

```yaml
# kubectl edit cm -n kube-system eni-config
apiVersion: v1
data:
10-terway.conf: |
{
"cniVersion": "0.3.1",
"name": "terway",
"enable_network_priority": true, # add
"type": "terway"
}
```
116 changes: 116 additions & 0 deletions pkg/tc/u32.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
Copyright 2022 The Terway Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tc

import (
"encoding/binary"
"net"

"github.com/vishvananda/netlink"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)

// FilterBySrcIP found u32 filter by pod ip
// used for prio only
func FilterBySrcIP(link netlink.Link, parent uint32, ipNet *net.IPNet) (*netlink.U32, error) {
filters, err := netlink.FilterList(link, parent)
if err != nil {
return nil, err
}

matches := U32MatchSrc(ipNet)
if len(matches) == 0 {
return nil, nil
}
OUT:
for _, f := range filters {
u32, ok := f.(*netlink.U32)
if !ok {
continue
}
if u32.Attrs().LinkIndex != link.Attrs().Index ||
u32.Protocol != unix.ETH_P_IP ||
u32.Sel == nil {
continue
}

// check all matches is satisfied with current
for _, m := range matches {
found := false
for _, key := range u32.Sel.Keys {
if key.Off != m.Off || key.Val != m.Val || key.Mask != m.Mask {
continue
}
found = true
break
}
if !found {
continue OUT
}
}
return u32, nil
}
return nil, nil
}

// MatchSrc add match for source ip
func MatchSrc(u32 *netlink.U32, ipNet *net.IPNet) {
if u32.Sel == nil {
u32.Sel = &netlink.TcU32Sel{
Flags: nl.TC_U32_TERMINAL,
}
}

u32.Sel.Keys = append(u32.Sel.Keys, U32MatchSrc(ipNet)...)
u32.Sel.Nkeys = uint8(len(u32.Sel.Keys))
}

// U32MatchSrc return u32 match key by src ip
func U32MatchSrc(ipNet *net.IPNet) []netlink.TcU32Key {
if ipNet.IP.To4() == nil {
return U32IPv6Src(ipNet)
}
return []netlink.TcU32Key{U32IPv4Src(ipNet)}
}

func U32IPv4Src(ipNet *net.IPNet) netlink.TcU32Key {
mask := net.IP(ipNet.Mask).To4()
val := ipNet.IP.Mask(ipNet.Mask).To4()
return netlink.TcU32Key{
Mask: binary.BigEndian.Uint32(mask),
Val: binary.BigEndian.Uint32(val),
Off: 12,
}
}

func U32IPv6Src(ipNet *net.IPNet) []netlink.TcU32Key {
mask := ipNet.Mask
val := ipNet.IP.Mask(ipNet.Mask)

r := make([]netlink.TcU32Key, 0, 4)
for i := 0; i < 4; i++ {
m := binary.BigEndian.Uint32(mask)
if m != 0 {
r = append(r, netlink.TcU32Key{
Mask: m,
Val: binary.BigEndian.Uint32(val),
Off: int32(8 + 4*i),
})
}
mask = mask[4:]
val = val[4:]
}
return r
}
Loading

0 comments on commit 2d21c09

Please sign in to comment.