diff --git a/perfkitbenchmarker/container_service.py b/perfkitbenchmarker/container_service.py index f8d5905b0..8657ebfd3 100644 --- a/perfkitbenchmarker/container_service.py +++ b/perfkitbenchmarker/container_service.py @@ -2143,7 +2143,7 @@ def _ModifyPodSpecPlacementYaml( del name node_selectors = self.GetNodeSelectors(machine_type) if node_selectors: - pod_spec_yaml['nodeSelector'].update(node_selectors) + pod_spec_yaml.setdefault('nodeSelector', {}).update(node_selectors) def DeployIngress( self, name: str, namespace: str, port: int, health_path: str = '' diff --git a/perfkitbenchmarker/data/container/kubernetes_scale/aws-gpu-nodepool.yaml.j2 b/perfkitbenchmarker/data/container/kubernetes_scale/aws-gpu-nodepool.yaml.j2 new file mode 100644 index 000000000..9d6fce3d9 --- /dev/null +++ b/perfkitbenchmarker/data/container/kubernetes_scale/aws-gpu-nodepool.yaml.j2 @@ -0,0 +1,38 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: {{ gpu_nodepool_name | default('gpu') }} +spec: + disruption: + consolidateAfter: {{ gpu_consolidate_after | default('1m') }} + consolidationPolicy: {{ gpu_consolidation_policy | default('WhenEmptyOrUnderutilized') }} + limits: + cpu: {{ gpu_nodepool_cpu_limit | default(1000) }} + template: + metadata: + labels: + pkb_nodepool: {{ gpu_nodepool_label | default('gpu') }} + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: {{ karpenter_ec2nodeclass_name | default('default') }} + requirements: + - key: kubernetes.io/arch + operator: In + values: {{ gpu_arch | default(['amd64']) }} + - key: kubernetes.io/os + operator: In + values: {{ gpu_os | default(['linux']) }} + - key: karpenter.sh/capacity-type + operator: In + values: {{ gpu_capacity_types | default(['on-demand']) }} + - key: karpenter.k8s.aws/instance-category + operator: In + values: {{ gpu_instance_categories | default(['g']) }} + - key: karpenter.k8s.aws/instance-family + operator: In + values: {{ gpu_instance_families | default(['g6','g6e']) }} + taints: + - key: {{ gpu_taint_key | default('nvidia.com/gpu') }} + effect: NoSchedule diff --git a/perfkitbenchmarker/data/container/kubernetes_scale/kubernetes_scale.yaml.j2 b/perfkitbenchmarker/data/container/kubernetes_scale/kubernetes_scale.yaml.j2 index 9b79aa6cd..a8ef0a611 100644 --- a/perfkitbenchmarker/data/container/kubernetes_scale/kubernetes_scale.yaml.j2 +++ b/perfkitbenchmarker/data/container/kubernetes_scale/kubernetes_scale.yaml.j2 @@ -20,6 +20,13 @@ spec: command: {{ Command }} {%- endif %} resources: + requests: + cpu: {{ CpuRequest }} + memory: {{ MemoryRequest }} + ephemeral-storage: {{ EphemeralStorageRequest }} + {%- if NvidiaGpuRequest %} + nvidia.com/gpu: {{ NvidiaGpuRequest }} + {%- endif %} limits: cpu: {{ CpuRequest }} memory: {{ MemoryRequest }} @@ -53,3 +60,8 @@ spec: operator: "Exists" effect: "NoExecute" tolerationSeconds: {{ PodTimeout }} + {%- if NvidiaGpuRequest and Cloud == 'aws' %} + - key: {{ GpuTaintKey | default('nvidia.com/gpu') }} + operator: Exists + effect: NoSchedule + {%- endif %} diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes_scale_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes_scale_benchmark.py index 8e8cb21f0..b0c8188ba 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes_scale_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes_scale_benchmark.py @@ -88,17 +88,44 @@ def GetConfig(user_config): config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) return config +def _IsEksKarpenterAwsGpu(cluster: container_service.KubernetesCluster) -> bool: + return ( + virtual_machine.GPU_COUNT.value + and FLAGS.cloud.lower() == 'aws' + and getattr(cluster, 'CLUSTER_TYPE', None) == 'Karpenter' + ) + +def _EnsureEksKarpenterGpuNodepool(cluster: container_service.KubernetesCluster) -> None: + """Ensures a GPU NodePool exists for EKS Karpenter before applying workloads.""" + if not _IsEksKarpenterAwsGpu(cluster): + return + cluster.ApplyManifest( + 'container/kubernetes_scale/aws-gpu-nodepool.yaml.j2', + gpu_nodepool_name='gpu', + gpu_nodepool_label='gpu', + karpenter_ec2nodeclass_name='default', + gpu_instance_categories=['g'], + gpu_instance_families=['g6', 'g6e'], + gpu_capacity_types=['on-demand'], + gpu_arch=['amd64'], + gpu_os=['linux'], + gpu_taint_key='nvidia.com/gpu', + gpu_consolidate_after='1m', + gpu_consolidation_policy='WhenEmptyOrUnderutilized', + gpu_nodepool_cpu_limit=1000, + ) def Prepare(bm_spec: benchmark_spec.BenchmarkSpec): """Sets additional spec attributes.""" bm_spec.always_call_cleanup = True + assert bm_spec.container_cluster + _EnsureEksKarpenterGpuNodepool(bm_spec.container_cluster) def _GetRolloutCreationTime(rollout_name: str) -> int: """Returns the time when the rollout was created.""" out, _, _ = container_service.RunRetryableKubectlCommand([ - 'rollout', - 'history', + 'get', rollout_name, '-o', 'jsonpath={.metadata.creationTimestamp}', @@ -180,8 +207,10 @@ def ScaleUpPods( max_wait_time = _GetScaleTimeout() resource_timeout = max_wait_time + 60 * 5 # 5 minutes after waiting to avoid # pod delete events from polluting data collection. - yaml_docs = cluster.ConvertManifestToYamlDicts( - MANIFEST_TEMPLATE, + + is_eks_karpenter_aws_gpu = _IsEksKarpenterAwsGpu(cluster) + + manifest_kwargs = dict( Name='kubernetes-scaleup', Replicas=num_new_pods, CpuRequest=CPUS_PER_POD.value, @@ -192,12 +221,25 @@ def ScaleUpPods( EphemeralStorageRequest='10Mi', RolloutTimeout=max_wait_time, PodTimeout=resource_timeout, + Cloud=FLAGS.cloud.lower(), + ) + + # GpuTaintKey is still needed for tolerations in the yaml template + if is_eks_karpenter_aws_gpu: + manifest_kwargs['GpuTaintKey'] = 'nvidia.com/gpu' + + yaml_docs = cluster.ConvertManifestToYamlDicts( + MANIFEST_TEMPLATE, + **manifest_kwargs, ) + + # Always use ModifyPodSpecPlacementYaml to add nodeSelectors via GetNodeSelectors() cluster.ModifyPodSpecPlacementYaml( yaml_docs, 'kubernetes-scaleup', cluster.default_nodepool.machine_type, ) + resource_names = cluster.ApplyYaml(yaml_docs) assert resource_names diff --git a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py index fd856b1e6..cfb9ed2d1 100644 --- a/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py +++ b/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py @@ -1283,10 +1283,16 @@ def ResizeNodePool( def GetNodeSelectors(self, machine_type: str | None = None) -> dict[str, str]: """Gets the node selectors section of a yaml for the provider.""" - machine_family = util.GetMachineFamily(machine_type) - if machine_family: - return {'karpenter.k8s.aws/instance-family': machine_family} - return {} + selectors = {} + # If GPU is requested, use the GPU nodepool + if virtual_machine.GPU_TYPE.value: + selectors['karpenter.sh/nodepool'] = 'gpu' + else: + # Otherwise, use instance-family selector if machine_type is specified + machine_family = util.GetMachineFamily(machine_type) + if machine_family: + selectors['karpenter.k8s.aws/instance-family'] = machine_family + return selectors def GetNodePoolNames(self) -> list[str]: """Gets node pool names for the cluster.