Skip to content

Commit b09a3c3

Browse files
Fix ODCR usage with Flexible Instance Types
1. Move the code to generate RunInstances with ODCR policies to a better place to clean up the code. 2. Add CreateFleet with Capacity Reservation Resource Groups ARN policy and ListGroupResources policy to head node. These policies are required to create_fleet with Capacity Reservation Resource Group. 3. Only run validator against the first instance type of the InstanceTypes 4. Make test_efa test to use Capacity Reservation Resource Groups ARN Signed-off-by: Hanwen <[email protected]>
1 parent bfc34e3 commit b09a3c3

File tree

3 files changed

+33
-29
lines changed

3 files changed

+33
-29
lines changed

cli/src/pcluster/config/cluster_config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,13 +2464,17 @@ def _register_validators(self):
24642464
self._register_validator(
24652465
CapacityReservationValidator,
24662466
capacity_reservation_id=cr_target.capacity_reservation_id,
2467-
instance_type=compute_resource.instance_type,
2467+
# ToDo: This validator is only correct for single instance type
2468+
# Add more validators to be check ODCR with flexible instance types
2469+
instance_type=compute_resource.instance_types[0],
24682470
subnet=queue.networking.subnet_ids[0],
24692471
)
24702472
self._register_validator(
24712473
CapacityReservationResourceGroupValidator,
24722474
capacity_reservation_resource_group_arn=cr_target.capacity_reservation_resource_group_arn,
2473-
instance_type=compute_resource.instance_type,
2475+
# ToDo: This validator is only correct for single instance type
2476+
# Add more validators to be check ODCR with flexible instance types
2477+
instance_type=compute_resource.instance_types[0],
24742478
subnet=queue.networking.subnet_ids[0],
24752479
)
24762480

cli/src/pcluster/templates/cdk_builder_utils.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,32 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
625625
),
626626
]
627627
)
628+
capacity_reservation_ids = self._config.capacity_reservation_ids
629+
if capacity_reservation_ids:
630+
policy.append(
631+
iam.PolicyStatement(
632+
actions=["ec2:RunInstances"],
633+
effect=iam.Effect.ALLOW,
634+
resources=[
635+
self._format_arn(
636+
service="ec2",
637+
resource=f"capacity-reservation/{capacity_reservation_id}",
638+
)
639+
for capacity_reservation_id in capacity_reservation_ids
640+
],
641+
)
642+
)
643+
capacity_reservation_resource_group_arns = self._config.capacity_reservation_resource_group_arns
644+
if capacity_reservation_resource_group_arns:
645+
policy.extend(
646+
[
647+
iam.PolicyStatement(
648+
actions=["ec2:RunInstances", "ec2:CreateFleet", "resource-groups:ListGroupResources"],
649+
effect=iam.Effect.ALLOW,
650+
resources=capacity_reservation_resource_group_arns,
651+
)
652+
]
653+
)
628654

629655
if self._config.scheduling.scheduler == "plugin":
630656
cluster_shared_artifacts = get_attr(
@@ -654,32 +680,6 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
654680
]
655681
)
656682

657-
if self._config.scheduling.scheduler != "awsbatch":
658-
capacity_reservation_ids = self._config.capacity_reservation_ids
659-
if self._config.capacity_reservation_ids:
660-
policy.append(
661-
iam.PolicyStatement(
662-
actions=["ec2:RunInstances"],
663-
effect=iam.Effect.ALLOW,
664-
resources=[
665-
self._format_arn(
666-
service="ec2",
667-
resource=f"capacity-reservation/{capacity_reservation_id}",
668-
)
669-
for capacity_reservation_id in capacity_reservation_ids
670-
],
671-
)
672-
)
673-
capacity_reservation_resource_group_arns = self._config.capacity_reservation_resource_group_arns
674-
if capacity_reservation_resource_group_arns:
675-
policy.append(
676-
iam.PolicyStatement(
677-
actions=["ec2:RunInstances"],
678-
effect=iam.Effect.ALLOW,
679-
resources=capacity_reservation_resource_group_arns,
680-
)
681-
)
682-
683683
if self._config.directory_service:
684684
policy.append(
685685
iam.PolicyStatement(

tests/integration-tests/tests/efa/test_efa/test_efa/pcluster.config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Scheduling:
3030
{% if instance == "p4d.24xlarge" %}GdrSupport: true{% endif %}
3131
{% if instance == "p4d.24xlarge" %}
3232
CapacityReservationTarget:
33-
CapacityReservationId: cr-0fa65fcdbd597f551
33+
CapacityReservationResourceGroupArn: arn:aws:resource-groups:us-east-1:447714826191:group/EC2CRGroup
3434
{% endif %}
3535
SharedStorage:
3636
- MountDir: /shared

0 commit comments

Comments
 (0)