Skip to content

Commit

Permalink
Merge pull request #7403 from gvnc/oci-nodepool-auto-discovery
Browse files Browse the repository at this point in the history
node-group-auto-discovery support for oci
  • Loading branch information
k8s-ci-robot authored Oct 25, 2024
2 parents 00e19fd + 1bf5f72 commit 6a02299
Show file tree
Hide file tree
Showing 6 changed files with 295 additions and 4 deletions.
10 changes: 9 additions & 1 deletion cluster-autoscaler/cloudprovider/oci/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ use-instance-principals = true
n/a
### Node Group Auto Discovery
`--node-group-auto-discovery` could be given in below pattern. It would discover the nodepools under given compartment by matching the nodepool tags (either they are Freeform or Defined tags). All of the parameters are mandatory.
```
clusterId:<clusterId>,compartmentId:<compartmentId>,nodepoolTags:<tagKey1>=<tagValue1>&<tagKey2>=<tagValue2>,min:<min>,max:<max>
```
Auto discovery can not be used along with static discovery (`node` parameter) to prevent conflicts.
## Deployment
### Create OCI config secret (only if _not_ using Instance Principals)
Expand Down Expand Up @@ -271,7 +278,8 @@ kubectl apply -f ./cloudprovider/oci/examples/oci-nodepool-cluster-autoscaler-w-
correctly (`oci-cloud-controller-manager`).
- Avoid manually changing pools that are managed by the Cluster Autoscaler. For example, do not add or remove nodes
using kubectl, or using the Console (or the Oracle Cloud Infrastructure CLI or API).
- `--node-group-auto-discovery` and `--node-autoprovisioning-enabled=true` are not supported.
- `--node-autoprovisioning-enabled=true` are not supported.
- `--node-group-auto-discovery` and `node` parameters can not be used together as it can cause conflicts.
- We set a `nvidia.com/gpu:NoSchedule` taint on nodes in a GPU enabled pools.
## Helpful links
Expand Down
21 changes: 21 additions & 0 deletions cluster-autoscaler/cloudprovider/oci/common/oci_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,24 @@ func GetAllPoolTypes(groups []string) (string, error) {
}
return ocidType, nil
}

// HasNodeGroupTags checks if nodepoolTags is provided
func HasNodeGroupTags(nodeGroupAutoDiscoveryList []string) (bool, bool, error) {
instancePoolTagsFound := false
nodePoolTagsFound := false
for _, arg := range nodeGroupAutoDiscoveryList {
if strings.Contains(arg, "nodepoolTags") {
nodePoolTagsFound = true
}
if strings.Contains(arg, "instancepoolTags") {
instancePoolTagsFound = true
}
}
if instancePoolTagsFound == true && nodePoolTagsFound == true {
return instancePoolTagsFound, nodePoolTagsFound, fmt.Errorf("can not use both instancepoolTags and nodepoolTags in node-group-auto-discovery")
}
if len(nodeGroupAutoDiscoveryList) > 0 && instancePoolTagsFound == false && nodePoolTagsFound == false {
return instancePoolTagsFound, nodePoolTagsFound, fmt.Errorf("either instancepoolTags or nodepoolTags should be provided in node-group-auto-discovery")
}
return instancePoolTagsFound, nodePoolTagsFound, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,14 @@ func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
if err != nil {
klog.Fatalf("Failed to get pool type: %v", err)
}
if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) {
manager, err := nodepools.CreateNodePoolManager(opts.CloudConfig, do, createKubeClient(opts))
_, nodepoolTagsFound, err := ocicommon.HasNodeGroupTags(opts.NodeGroupAutoDiscovery)
if err != nil {
klog.Fatalf("Failed to get auto discovery tags: %v", err)
}
if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) && nodepoolTagsFound == true {
klog.Fatalf("-nodes and -node-group-auto-discovery parameters can not be used together.")
} else if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) || nodepoolTagsFound == true {
manager, err := nodepools.CreateNodePoolManager(opts.CloudConfig, opts.NodeGroupAutoDiscovery, do, createKubeClient(opts))
if err != nil {
klog.Fatalf("Could not create OCI OKE cloud provider: %v", err)
}
Expand Down
150 changes: 149 additions & 1 deletion cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"math"
"os"
"regexp"
"strconv"
"strings"
"time"
Expand All @@ -34,6 +35,11 @@ import (
const (
maxAddTaintRetries = 5
maxGetNodepoolRetries = 3
clusterId = "clusterId"
compartmentId = "compartmentId"
nodepoolTags = "nodepoolTags"
min = "min"
max = "max"
)

var (
Expand Down Expand Up @@ -75,10 +81,11 @@ type okeClient interface {
GetNodePool(context.Context, oke.GetNodePoolRequest) (oke.GetNodePoolResponse, error)
UpdateNodePool(context.Context, oke.UpdateNodePoolRequest) (oke.UpdateNodePoolResponse, error)
DeleteNode(context.Context, oke.DeleteNodeRequest) (oke.DeleteNodeResponse, error)
ListNodePools(ctx context.Context, request oke.ListNodePoolsRequest) (oke.ListNodePoolsResponse, error)
}

// CreateNodePoolManager creates an NodePoolManager that can manage autoscaling node pools
func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, kubeClient kubernetes.Interface) (NodePoolManager, error) {
func CreateNodePoolManager(cloudConfigPath string, nodeGroupAutoDiscoveryList []string, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, kubeClient kubernetes.Interface) (NodePoolManager, error) {

var err error
var configProvider common.ConfigurationProvider
Expand Down Expand Up @@ -151,6 +158,20 @@ func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.N
nodePoolCache: newNodePoolCache(&okeClient),
}

// auto discover nodepools from compartments with nodeGroupAutoDiscovery parameter
klog.Infof("checking node groups for autodiscovery ... ")
for _, arg := range nodeGroupAutoDiscoveryList {
nodeGroup, err := nodeGroupFromArg(arg)
if err != nil {
return nil, fmt.Errorf("unable to construct node group auto discovery from argument: %v", err)
}
nodeGroup.manager = manager
nodeGroup.kubeClient = kubeClient

manager.nodeGroups = append(manager.nodeGroups, *nodeGroup)
autoDiscoverNodeGroups(manager, manager.okeClient, *nodeGroup)
}

// Contains all the specs from the args that give us the pools.
for _, arg := range discoveryOpts.NodeGroupSpecs {
np, err := nodePoolFromArg(arg)
Expand Down Expand Up @@ -180,6 +201,48 @@ func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.N
return manager, nil
}

func autoDiscoverNodeGroups(m *ociManagerImpl, okeClient okeClient, nodeGroup nodeGroupAutoDiscovery) (bool, error) {
var resp, reqErr = okeClient.ListNodePools(context.Background(), oke.ListNodePoolsRequest{
ClusterId: common.String(nodeGroup.clusterId),
CompartmentId: common.String(nodeGroup.compartmentId),
})
if reqErr != nil {
klog.Errorf("failed to fetch the nodepool list with clusterId: %s, compartmentId: %s. Error: %v", nodeGroup.clusterId, nodeGroup.compartmentId, reqErr)
return false, reqErr
}
for _, nodePoolSummary := range resp.Items {
if validateNodepoolTags(nodeGroup.tags, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) {
nodepool := &nodePool{}
nodepool.id = *nodePoolSummary.Id
nodepool.minSize = nodeGroup.minSize
nodepool.maxSize = nodeGroup.maxSize

nodepool.manager = nodeGroup.manager
nodepool.kubeClient = nodeGroup.kubeClient

m.staticNodePools[nodepool.id] = nodepool
klog.V(5).Infof("auto discovered nodepool in compartment : %s , nodepoolid: %s", nodeGroup.compartmentId, nodepool.id)
} else {
klog.Warningf("nodepool ignored as the tags do not satisfy the requirement : %s , %v, %v", *nodePoolSummary.Id, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags)
}
}
return true, nil
}

func validateNodepoolTags(nodeGroupTags map[string]string, freeFormTags map[string]string, definedTags map[string]map[string]interface{}) bool {
if nodeGroupTags != nil {
for tagKey, tagValue := range nodeGroupTags {
namespacedTagKey := strings.Split(tagKey, ".")
if len(namespacedTagKey) == 2 && tagValue != definedTags[namespacedTagKey[0]][namespacedTagKey[1]] {
return false
} else if len(namespacedTagKey) != 2 && tagValue != freeFormTags[tagKey] {
return false
}
}
}
return true
}

// nodePoolFromArg parses a node group spec represented in the form of `<minSize>:<maxSize>:<ocid>` and produces a node group spec object
func nodePoolFromArg(value string) (*nodePool, error) {
tokens := strings.SplitN(value, ":", 3)
Expand Down Expand Up @@ -207,6 +270,81 @@ func nodePoolFromArg(value string) (*nodePool, error) {
return spec, nil
}

// nodeGroupFromArg parses a node group spec represented in the form of
// `clusterId:<clusterId>,compartmentId:<compartmentId>,nodepoolTags:<tagKey1>=<tagValue1>&<tagKey2>=<tagValue2>,min:<min>,max:<max>`
// and produces a node group auto discovery object
func nodeGroupFromArg(value string) (*nodeGroupAutoDiscovery, error) {
// this regex will find the key-value pairs in any given order if separated with a colon
regexPattern := `(?:` + compartmentId + `:(?P<` + compartmentId + `>[^,]+)`
regexPattern = regexPattern + `|` + nodepoolTags + `:(?P<` + nodepoolTags + `>[^,]+)`
regexPattern = regexPattern + `|` + max + `:(?P<` + max + `>[^,]+)`
regexPattern = regexPattern + `|` + min + `:(?P<` + min + `>[^,]+)`
regexPattern = regexPattern + `|` + clusterId + `:(?P<` + clusterId + `>[^,]+)`
regexPattern = regexPattern + `)(?:,|$)`

re := regexp.MustCompile(regexPattern)

parametersMap := make(map[string]string)

// push key-value pairs into a map
for _, match := range re.FindAllStringSubmatch(value, -1) {
for i, name := range re.SubexpNames() {
if i != 0 && match[i] != "" {
parametersMap[name] = match[i]
}
}
}

spec := &nodeGroupAutoDiscovery{}

if parametersMap[clusterId] != "" {
spec.clusterId = parametersMap[clusterId]
} else {
return nil, fmt.Errorf("failed to set %s, it is missing in node-group-auto-discovery parameter", clusterId)
}

if parametersMap[compartmentId] != "" {
spec.compartmentId = parametersMap[compartmentId]
} else {
return nil, fmt.Errorf("failed to set %s, it is missing in node-group-auto-discovery parameter", compartmentId)
}

if size, err := strconv.Atoi(parametersMap[min]); err == nil {
spec.minSize = size
} else {
return nil, fmt.Errorf("failed to set %s size: %s, expected integer", min, parametersMap[min])
}

if size, err := strconv.Atoi(parametersMap[max]); err == nil {
spec.maxSize = size
} else {
return nil, fmt.Errorf("failed to set %s size: %s, expected integer", max, parametersMap[max])
}

if parametersMap[nodepoolTags] != "" {
nodepoolTags := parametersMap[nodepoolTags]

spec.tags = make(map[string]string)

pairs := strings.Split(nodepoolTags, "&")

for _, pair := range pairs {
parts := strings.Split(pair, "=")
if len(parts) == 2 {
spec.tags[parts[0]] = parts[1]
} else {
return nil, fmt.Errorf("nodepoolTags should be given in tagKey=tagValue format, this is not valid: %s", pair)
}
}
} else {
return nil, fmt.Errorf("failed to set %s, it is missing in node-group-auto-discovery parameter", nodepoolTags)
}

klog.Infof("node group auto discovery spec constructed: %+v", spec)

return spec, nil
}

type ociManagerImpl struct {
cfg *ocicommon.CloudConfig
okeClient okeClient
Expand All @@ -215,6 +353,7 @@ type ociManagerImpl struct {
ociTagsGetter ocicommon.TagsGetter
registeredTaintsGetter RegisteredTaintsGetter
staticNodePools map[string]NodePool
nodeGroups []nodeGroupAutoDiscovery

lastRefresh time.Time

Expand Down Expand Up @@ -253,6 +392,15 @@ func (m *ociManagerImpl) TaintToPreventFurtherSchedulingOnRestart(nodes []*apiv1
}

func (m *ociManagerImpl) forceRefresh() error {
// auto discover node groups
if m.nodeGroups != nil {
// empty previous nodepool map to do an auto discovery
m.staticNodePools = make(map[string]NodePool)
for _, nodeGroup := range m.nodeGroups {
autoDiscoverNodeGroups(m, m.okeClient, nodeGroup)
}
}
// rebuild nodepool cache
err := m.nodePoolCache.rebuild(m.staticNodePools, maxGetNodepoolRetries)
if err != nil {
return err
Expand Down
97 changes: 97 additions & 0 deletions cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ func (c mockOKEClient) DeleteNode(context.Context, oke.DeleteNodeRequest) (oke.D
}, nil
}

func (c mockOKEClient) ListNodePools(context.Context, oke.ListNodePoolsRequest) (oke.ListNodePoolsResponse, error) {
return oke.ListNodePoolsResponse{}, nil
}

func TestRemoveInstance(t *testing.T) {
instanceId1 := "instance1"
instanceId2 := "instance2"
Expand Down Expand Up @@ -384,3 +388,96 @@ func TestRemoveInstance(t *testing.T) {
}
}
}

func TestNodeGroupFromArg(t *testing.T) {
var nodeGroupArg = "clusterId:ocid1.cluster.oc1.test-region.test,compartmentId:ocid1.compartment.oc1.test-region.test,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5"
nodeGroupAutoDiscovery, err := nodeGroupFromArg(nodeGroupArg)
if err != nil {
t.Errorf("Error: #{err}")
}
if nodeGroupAutoDiscovery.clusterId != "ocid1.cluster.oc1.test-region.test" {
t.Errorf("Error: clusterId should be ocid1.cluster.oc1.test-region.test")
}
if nodeGroupAutoDiscovery.compartmentId != "ocid1.compartment.oc1.test-region.test" {
t.Errorf("Error: compartmentId should be ocid1.compartment.oc1.test-region.test")
}
if nodeGroupAutoDiscovery.minSize != 1 {
t.Errorf("Error: minSize should be 1")
}
if nodeGroupAutoDiscovery.maxSize != 5 {
t.Errorf("Error: maxSize should be 5")
}
if nodeGroupAutoDiscovery.tags["ca-managed"] != "true" {
t.Errorf("Error: ca-managed:true is missing in tags.")
}
if nodeGroupAutoDiscovery.tags["namespace.foo"] != "bar" {
t.Errorf("Error: namespace.foo:bar is missing in tags.")
}
}

func TestValidateNodePoolTags(t *testing.T) {

testCases := map[string]struct {
nodeGroupTags map[string]string
freeFormTags map[string]string
definedTags map[string]map[string]interface{}
expectedResult bool
}{
"no-tags": {
nodeGroupTags: nil,
freeFormTags: nil,
definedTags: nil,
expectedResult: true,
},
"node-group tags provided but no tags on nodepool": {
nodeGroupTags: map[string]string{
"testTag": "testTagValue",
},
freeFormTags: nil,
definedTags: nil,
expectedResult: false,
},
"node-group tags and free-form tags do not match": {
nodeGroupTags: map[string]string{
"testTag": "testTagValue",
},
freeFormTags: map[string]string{
"foo": "bar",
},
definedTags: nil,
expectedResult: false,
},
"free-form tags have required node-group tags": {
nodeGroupTags: map[string]string{
"testTag": "testTagValue",
},
freeFormTags: map[string]string{
"foo": "bar",
"testTag": "testTagValue",
},
definedTags: nil,
expectedResult: true,
},
"defined tags have required node-group tags": {
nodeGroupTags: map[string]string{
"ns.testTag": "testTagValue",
},
freeFormTags: nil,
definedTags: map[string]map[string]interface{}{
"ns": {
"testTag": "testTagValue",
},
},
expectedResult: true,
},
}

for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
result := validateNodepoolTags(tc.nodeGroupTags, tc.freeFormTags, tc.definedTags)
if result != tc.expectedResult {
t.Errorf("Testcase '%s' failed: got %t ; expected %t", name, result, tc.expectedResult)
}
})
}
}
Loading

0 comments on commit 6a02299

Please sign in to comment.