Skip to content

Commit

Permalink
Merge pull request #14 from elezar/CNT-4457/switch-to-v1alpha3
Browse files Browse the repository at this point in the history
Update kubelet plugin to v1alpha3 API
  • Loading branch information
klueska authored Mar 8, 2024
2 parents 1646035 + 91a444d commit 2e7ad7b
Show file tree
Hide file tree
Showing 1,024 changed files with 39,778 additions and 24,296 deletions.
20 changes: 15 additions & 5 deletions cmd/nvidia-dra-controller/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,17 @@ func (d driver) GetClaimParameters(ctx context.Context, claim *resourcev1.Resour
return nil, fmt.Errorf("unknown ResourceClaim.ParametersRef.Kind: %v", claim.Spec.ParametersRef.Kind)
}

func (d driver) Allocate(ctx context.Context, claim *resourcev1.ResourceClaim, claimParameters interface{}, class *resourcev1.ResourceClass, classParameters interface{}, selectedNode string) (*resourcev1.AllocationResult, error) {
func (d driver) Allocate(ctx context.Context, cas []*controller.ClaimAllocation, selectedNode string) {
// In production version of the driver the common operations for every
// d.allocate looped call should be done prior this loop, and can be reused
// for every d.allocate() looped call.
// E.g.: selectedNode=="" check, client stup and CRD fetching.
for _, ca := range cas {
ca.Allocation, ca.Error = d.allocate(ctx, ca.Claim, ca.ClaimParameters, ca.Class, ca.ClassParameters, selectedNode)
}
}

func (d driver) allocate(ctx context.Context, claim *resourcev1.ResourceClaim, claimParameters interface{}, class *resourcev1.ResourceClass, classParameters interface{}, selectedNode string) (*resourcev1.AllocationResult, error) {
if selectedNode == "" {
return nil, fmt.Errorf("TODO: immediate allocations not yet supported")
}
Expand All @@ -126,6 +136,10 @@ func (d driver) Allocate(ctx context.Context, claim *resourcev1.ResourceClaim, c
return nil, fmt.Errorf("error retrieving node specific Gpu CRD: %w", err)
}

if crd.Status != nascrd.NodeAllocationStateStatusReady {
return nil, fmt.Errorf("NodeAllocationStateStatus: %v", crd.Status)
}

if crd.Spec.AllocatedClaims == nil {
crd.Spec.AllocatedClaims = make(map[string]nascrd.AllocatedDevices)
}
Expand All @@ -134,10 +148,6 @@ func (d driver) Allocate(ctx context.Context, claim *resourcev1.ResourceClaim, c
return buildAllocationResult(selectedNode, true), nil
}

if crd.Status != nascrd.NodeAllocationStateStatusReady {
return nil, fmt.Errorf("NodeAllocationStateStatus: %v", crd.Status)
}

var onSuccess OnSuccessCallback
classParams, ok := classParameters.(*gpucrd.DeviceClassParametersSpec)
if !ok {
Expand Down
53 changes: 35 additions & 18 deletions cmd/nvidia-dra-plugin/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1alpha3"

nascrd "github.com/NVIDIA/k8s-dra-driver/api/nvidia.com/resource/gpu/nas/v1alpha1"
nasclient "github.com/NVIDIA/k8s-dra-driver/api/nvidia.com/resource/gpu/nas/v1alpha1/client"
Expand Down Expand Up @@ -99,39 +99,56 @@ func (d *driver) Shutdown(ctx context.Context) error {
})
}

func (d *driver) NodePrepareResource(ctx context.Context, req *drapbv1.NodePrepareResourceRequest) (*drapbv1.NodePrepareResourceResponse, error) {
func (d *driver) NodePrepareResources(ctx context.Context, req *drapbv1.NodePrepareResourcesRequest) (*drapbv1.NodePrepareResourcesResponse, error) {

klog.Infof("NodePrepareResource is called: number of claims: %d", len(req.Claims))
preparedResources := &drapbv1.NodePrepareResourcesResponse{Claims: map[string]*drapbv1.NodePrepareResourceResponse{}}

// In production version some common operations of d.nodeUnprepareResources
// should be done outside of the loop, for instance updating the CR could
// be done once after all HW was prepared.
for _, claim := range req.Claims {
preparedResources.Claims[claim.Uid] = d.nodePrepareResource(ctx, claim)
}

return preparedResources, nil
}

func (d *driver) nodePrepareResource(ctx context.Context, claim *drapbv1.Claim) *drapbv1.NodePrepareResourceResponse {
d.Lock()
defer d.Unlock()

klog.Infof("NodePrepareResource is called: request: %+v", req)

isPrepared, prepared, err := d.IsPrepared(ctx, req.ClaimUid)
isPrepared, prepared, err := d.isPrepared(ctx, claim.Uid)
if err != nil {
return nil, fmt.Errorf("error checking if claim is already prepared: %w", err)
return &drapbv1.NodePrepareResourceResponse{
Error: fmt.Sprintf("error checking if claim is already prepared: %v", err),
}
}

if isPrepared {
klog.Infof("Returning cached devices for claim '%v': %s", req.ClaimUid, prepared)
return &drapbv1.NodePrepareResourceResponse{CdiDevices: prepared}, nil
klog.Infof("Returning cached devices for claim '%v': %s", claim.Uid, prepared)
return &drapbv1.NodePrepareResourceResponse{CDIDevices: prepared}
}

prepared, err = d.Prepare(ctx, req.ClaimUid)
prepared, err = d.prepare(ctx, claim.Uid)
if err != nil {
return nil, fmt.Errorf("error preparing devices for claim %v: %w", req.ClaimUid, err)
return &drapbv1.NodePrepareResourceResponse{
Error: fmt.Sprintf("error preparing devices for claim %v: %v", claim.Uid, err),
}
}

klog.Infof("Returning newly prepared devices for claim '%v': %s", req.ClaimUid, prepared)
return &drapbv1.NodePrepareResourceResponse{CdiDevices: prepared}, nil
klog.Infof("Returning newly prepared devices for claim '%v': %s", claim.Uid, prepared)
return &drapbv1.NodePrepareResourceResponse{CDIDevices: prepared}
}

func (d *driver) NodeUnprepareResource(ctx context.Context, req *drapbv1.NodeUnprepareResourceRequest) (*drapbv1.NodeUnprepareResourceResponse, error) {
func (d *driver) NodeUnprepareResources(ctx context.Context, req *drapbv1.NodeUnprepareResourcesRequest) (*drapbv1.NodeUnprepareResourcesResponse, error) {
// We don't upprepare as part of NodeUnprepareResource, we do it
// asynchronously when the claims themselves are deleted and the
// AllocatedClaim has been removed.
return &drapbv1.NodeUnprepareResourceResponse{}, nil
return &drapbv1.NodeUnprepareResourcesResponse{}, nil
}

func (d *driver) IsPrepared(ctx context.Context, claimUID string) (bool, []string, error) {
func (d *driver) isPrepared(ctx context.Context, claimUID string) (bool, []string, error) {
err := d.nasclient.Get(ctx)
if err != nil {
return false, nil, err
Expand All @@ -142,7 +159,7 @@ func (d *driver) IsPrepared(ctx context.Context, claimUID string) (bool, []strin
return false, nil, nil
}

func (d *driver) Prepare(ctx context.Context, claimUID string) ([]string, error) {
func (d *driver) prepare(ctx context.Context, claimUID string) ([]string, error) {
var err error
var prepared []string
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
Expand All @@ -169,7 +186,7 @@ func (d *driver) Prepare(ctx context.Context, claimUID string) ([]string, error)
return prepared, nil
}

func (d *driver) Unprepare(ctx context.Context, claimUID string) error {
func (d *driver) unprepare(ctx context.Context, claimUID string) error {
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err := d.nasclient.Get(ctx)
if err != nil {
Expand Down Expand Up @@ -329,7 +346,7 @@ func (d *driver) cleanupClaimAllocations(ctx context.Context, nas *nascrd.NodeAl
go func(claimUID string) {
defer wg.Done()
klog.Infof("Attempting to unprepare resources for claim %v", claimUID)
err := d.Unprepare(ctx, claimUID)
err := d.unprepare(ctx, claimUID)
if err != nil {
errors <- fmt.Errorf("error unpreparing resources for claim %v: %w", claimUID, err)
return
Expand Down
96 changes: 35 additions & 61 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,24 @@ module github.com/NVIDIA/k8s-dra-driver

go 1.20

replace (
k8s.io/api => k8s.io/api v0.27.0-beta.0
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.27.0-beta.0
k8s.io/apimachinery => k8s.io/apimachinery v0.27.0-beta.0
k8s.io/apiserver => k8s.io/apiserver v0.27.0-beta.0
k8s.io/cli-runtime => k8s.io/cli-runtime v0.27.0-beta.0
k8s.io/client-go => k8s.io/client-go v0.27.0-beta.0
k8s.io/cloud-provider => k8s.io/cloud-provider v0.27.0-beta.0
k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.27.0-beta.0
k8s.io/code-generator => k8s.io/code-generator v0.27.0-beta.0
k8s.io/component-base => k8s.io/component-base v0.27.0-beta.0
k8s.io/component-helpers => k8s.io/component-helpers v0.27.0-beta.0
k8s.io/cri-api => k8s.io/cri-api v0.27.0-beta.0
k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.27.0-beta.0
k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v0.27.0-beta.0
k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.27.0-beta.0
k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.27.0-beta.0
k8s.io/kube-proxy => k8s.io/kube-proxy v0.27.0-beta.0
k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.27.0-beta.0
k8s.io/kubectl => k8s.io/kubectl v0.27.0-beta.0
k8s.io/kubelet => k8s.io/kubelet v0.27.0-beta.0
k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.27.0-beta.0
k8s.io/metrics => k8s.io/metrics v0.27.0-beta.0
k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.27.0-beta.0
)

require (
github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d
github.com/NVIDIA/go-nvml v0.12.0-2
github.com/NVIDIA/nvidia-container-toolkit v1.14.4-0.20231120225202-039d7fd32429
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/client_golang v1.16.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.1
golang.org/x/mod v0.15.0
k8s.io/api v0.27.0-beta.0
k8s.io/apimachinery v0.27.0-beta.0
k8s.io/client-go v0.27.0-beta.0
k8s.io/component-base v0.27.0-beta.0
k8s.io/dynamic-resource-allocation v0.0.0-00010101000000-000000000000
k8s.io/klog/v2 v2.90.1
k8s.io/kubelet v0.27.0-beta.0
k8s.io/mount-utils v0.26.3
k8s.io/api v0.29.2
k8s.io/apimachinery v0.29.2
k8s.io/client-go v0.29.2
k8s.io/component-base v0.29.2
k8s.io/dynamic-resource-allocation v0.29.2
k8s.io/klog/v2 v2.110.1
k8s.io/kubelet v0.29.2
k8s.io/mount-utils v0.29.2
tags.cncf.io/container-device-interface v0.6.2
tags.cncf.io/container-device-interface/specs-go v0.6.0
)
Expand All @@ -56,27 +30,27 @@ require (
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-logr/logr v1.3.0 // indirect
github.com/go-logr/zapr v1.2.3 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.1 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/moby/sys/mountinfo v0.6.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
Expand All @@ -85,32 +59,32 @@ require (
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/spf13/cobra v1.6.1 // indirect
github.com/spf13/cobra v1.7.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.8.0 // indirect
go.uber.org/atomic v1.10.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.21.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.7.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.1.0 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/oauth2 v0.10.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
google.golang.org/grpc v1.56.3 // indirect
google.golang.org/protobuf v1.30.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
google.golang.org/grpc v1.58.3 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/kube-openapi v0.0.0-20230308215209-15aac26d736a // indirect
k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)
Loading

0 comments on commit 2e7ad7b

Please sign in to comment.