diff --git a/.github/workflows/e2e_aws.yaml b/.github/workflows/e2e_aws.yaml index 68e56a664c..8c16ec1c72 100644 --- a/.github/workflows/e2e_aws.yaml +++ b/.github/workflows/e2e_aws.yaml @@ -68,6 +68,8 @@ jobs: needs: aws-credentials if: needs.aws-credentials.outputs.has_secrets == 'true' runs-on: ubuntu-22.04 + # Remove this when the job becomes stable + continue-on-error: ${{ inputs.cluster_type == 'eks' }} defaults: run: working-directory: src/cloud-api-adaptor @@ -107,6 +109,11 @@ jobs: echo "GO_VERSION=${go_version}" >> "$GITHUB_ENV" echo "ORAS_VERSION=$(yq -e '.tools.oras' versions.yaml)" >> "$GITHUB_ENV" + - name: Install qemu-img + run: | + sudo apt-get update + sudo apt-get install -y qemu-utils + - name: Setup Golang version ${{ env.GO_VERSION }} uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0 with: @@ -152,12 +159,29 @@ jobs: cat kustomization.yaml echo "::endgroup::" + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 + with: + aws-region: us-east-1 + role-to-assume: ${{ secrets.AWS_IAM_ROLE_ARN }} + role-duration-seconds: 7200 + - name: Config aws run: | + if [[ "${CLUSTER_TYPE}" = "eks" ]]; then + DISABLE_CVM="false" + AWS_REGION="us-east-2" + echo "AWS_REGION=${AWS_REGION}" >> "$GITHUB_ENV" + # m6a.large (AMD SEV-SNP) is available in us-east-2 + echo "aws_region=\"${AWS_REGION}\"" >> aws.properties + echo "podvm_aws_instance_type=\"m6a.large\"" >> aws.properties + else + DISABLE_CVM="true" + fi cat <>aws.properties CAA_IMAGE="${CAA_IMAGE}" container_runtime="${CONTAINER_RUNTIME}" - disablecvm="true" + disablecvm="${DISABLE_CVM}" cluster_type="${CLUSTER_TYPE}" ssh_kp_name="caa-e2e-test" resources_basename="${RESOURCES_BASENAME}" @@ -167,13 +191,6 @@ jobs: cat aws.properties echo "::endgroup::" - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 - with: - aws-region: us-east-1 - role-to-assume: ${{ secrets.AWS_IAM_ROLE_ARN }} - role-duration-seconds: 7200 - - name: Create on-prem cluster if: inputs.cluster_type == 'onprem' run: | @@ -188,6 +205,13 @@ jobs: echo "KUBECONFIG=$HOME/.kcli/clusters/peer-pods/auth/kubeconfig" >> "$GITHUB_ENV" echo "::endgroup::" + - name: Install eksctl + if: inputs.cluster_type == 'eks' + run: | + curl -sLO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar -xzf eksctl_Linux_amd64.tar.gz -C /usr/local/bin && rm eksctl_Linux_amd64.tar.gz + eksctl version + - name: run tests id: runTests run: | diff --git a/.github/workflows/e2e_run_all.yaml b/.github/workflows/e2e_run_all.yaml index 2f3921111c..da009aa258 100644 --- a/.github/workflows/e2e_run_all.yaml +++ b/.github/workflows/e2e_run_all.yaml @@ -223,14 +223,17 @@ jobs: strategy: fail-fast: false matrix: - container_runtime: - - crio - os: - - ubuntu - provider: - - generic - arch: - - amd64 + include: + - container_runtime: crio + cluster_type: onprem + os: ubuntu + provider: generic + arch: amd64 + - container_runtime: containerd + cluster_type: eks + os: ubuntu + provider: generic + arch: amd64 permissions: id-token: write # Required by aws-actions/configure-aws-credentials contents: read # Required by aws-actions/configure-aws-credentials @@ -238,6 +241,7 @@ jobs: with: caa_image: ${{ inputs.registry }}/cloud-api-adaptor:${{ inputs.caa_image_tag }} container_runtime: ${{ matrix.container_runtime }} + cluster_type: ${{ matrix.cluster_type }} podvm_image: ${{ inputs.registry }}/podvm-${{ matrix.provider }}-${{ matrix.os }}-${{ matrix.arch }}:${{ inputs.podvm_image_tag }} git_ref: ${{ inputs.git_ref }} oras: false diff --git a/hack/ci-e2e-aws-cleanup.sh b/hack/ci-e2e-aws-cleanup.sh index 7166984d31..60ad5e31b2 100755 --- a/hack/ci-e2e-aws-cleanup.sh +++ b/hack/ci-e2e-aws-cleanup.sh @@ -13,35 +13,46 @@ if [ -z "${RESOURCES_BASENAME:-}" ]; then exit 1 fi +AWS_REGION=${AWS_REGION:-"us-east-1"} +CLUSTER_TYPE=${CLUSTER_TYPE:-onprem} + delete_vpcs() { + if [ "${CLUSTER_TYPE}" = "eks" ]; then + local cluster_name="${RESOURCES_BASENAME}-k8s" + if aws eks describe-cluster --name "$cluster_name" --region "${AWS_REGION}" >/dev/null 2>&1; then + echo "cluster_type=\"eks\"" >> "$TEST_PROVISION_FILE" + echo "eks_name=\"${cluster_name}\"" >> "$TEST_PROVISION_FILE" + fi + fi + local tag_vpc="${RESOURCES_BASENAME}-vpc" - read -r -a vpcs <<< "$(aws ec2 describe-vpcs --filters Name=tag:Name,Values=$tag_vpc --query 'Vpcs[*].VpcId' --output text)" + read -r -a vpcs <<< "$(aws ec2 describe-vpcs --filters Name=tag:Name,Values=$tag_vpc --query 'Vpcs[*].VpcId' --region "${AWS_REGION}" --output text)" if [ ${#vpcs[@]} -eq 0 ]; then - echo "There aren't VPCs to delete" + echo "There aren't VPCs to delete in ${AWS_REGION}" return fi for vpc in "${vpcs[@]}"; do - echo "aws_vpc_id=\"$vpc\"" > "$TEST_PROVISION_FILE" + echo "aws_vpc_id=\"$vpc\"" >> "$TEST_PROVISION_FILE" # Find related subnets - read -r -a subnets <<< "$(aws ec2 describe-subnets --filter "Name=vpc-id,Values=$vpc" --query 'Subnets[*].SubnetId' --output text)" - for net in "${subnets[@]}"; do - echo "aws_vpc_subnet_id=\"$net\"" >> "$TEST_PROVISION_FILE" - done + read -r -a subnets <<< "$(aws ec2 describe-subnets --filter "Name=vpc-id,Values=$vpc" --query 'Subnets[*].SubnetId' --region "${AWS_REGION}" --output text)" + if [ ${#subnets[@]} -gt 0 ]; then + echo "aws_vpc_subnet_id=\"$(echo "${subnets[*]}" | tr ' ' ',')\"" >> "$TEST_PROVISION_FILE" + fi # Find related security groups - read -r -a sgs <<< "$(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$vpc" "Name=tag:Name,Values=${RESOURCES_BASENAME}-sg" --query 'SecurityGroups[*].GroupId' --output text)" + read -r -a sgs <<< "$(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$vpc" "Name=tag:Name,Values=${RESOURCES_BASENAME}-sg" --query 'SecurityGroups[*].GroupId' --region "${AWS_REGION}" --output text)" for sg in "${sgs[@]}"; do echo "aws_vpc_sg_id=\"$sg\"" >> "$TEST_PROVISION_FILE" done # Find related route tables and internet gateways - read -r -a rtbs <<< "$(aws ec2 describe-route-tables --filters "Name=vpc-id,Values=$vpc" "Name=tag:Name,Values=${RESOURCES_BASENAME}-rtb" --query 'RouteTables[*].RouteTableId' --output text)" + read -r -a rtbs <<< "$(aws ec2 describe-route-tables --filters "Name=vpc-id,Values=$vpc" "Name=tag:Name,Values=${RESOURCES_BASENAME}-rtb" --query 'RouteTables[*].RouteTableId' --region "${AWS_REGION}" --output text)" for rtb in "${rtbs[@]}"; do echo "aws_vpc_rt_id=\"$rtb\"" >> "$TEST_PROVISION_FILE" - read -r -a igws <<< "$(aws ec2 describe-route-tables --filter "Name=route-table-id,Values=$rtb" --query 'RouteTables[0].Routes[*].GatewayId' --output text)" + read -r -a igws <<< "$(aws ec2 describe-route-tables --filter "Name=route-table-id,Values=$rtb" --query 'RouteTables[0].Routes[*].GatewayId' --region "${AWS_REGION}" --output text)" for igw in "${igws[@]}"; do [ "$igw" != "local" ] && echo "aws_vpc_igw_id=\"$igw\"" >> "$TEST_PROVISION_FILE" done @@ -55,32 +66,32 @@ delete_vpcs() { delete_amis() { local tag_ami="${RESOURCES_BASENAME}-img" - read -r -a amis <<< "$(aws ec2 describe-images --owners self --filters "Name=tag:Name,Values=$tag_ami" --query 'Images[*].ImageId' --output text)" + read -r -a amis <<< "$(aws ec2 describe-images --owners self --filters "Name=tag:Name,Values=$tag_ami" --query 'Images[*].ImageId' --region "${AWS_REGION}" --output text)" if [ ${#amis[@]} -eq 0 ]; then - echo "There aren't AMIs to delete." + echo "There aren't AMIs to delete in ${AWS_REGION}." return fi for ami in "${amis[@]}"; do echo "Deregistering AMI: $ami" # Find related snapshots - snap_ids=$(aws ec2 describe-images --image-ids "$ami" --query 'Images[*].BlockDeviceMappings[*].Ebs.SnapshotId' --output text) - aws ec2 deregister-image --image-id "$ami" + snap_ids=$(aws ec2 describe-images --image-ids "$ami" --query 'Images[*].BlockDeviceMappings[*].Ebs.SnapshotId' --region "${AWS_REGION}" --output text) + aws ec2 deregister-image --image-id "$ami" --region "${AWS_REGION}" for snap in $snap_ids; do echo "Deleting snapshot: $snap" - aws ec2 delete-snapshot --snapshot-id "$snap" + aws ec2 delete-snapshot --snapshot-id "$snap" --region "${AWS_REGION}" done done # Delete the vmimport role if it exists local vmimport_role="${RESOURCES_BASENAME}-vmimport" - if aws iam get-role --role-name "$vmimport_role" >/dev/null 2>&1; then + if aws iam get-role --role-name "$vmimport_role" --region "${AWS_REGION}" >/dev/null 2>&1; then echo "Deleting vmimport role: $vmimport_role" # First delete the role policy - aws iam delete-role-policy --role-name "$vmimport_role" --policy-name "vmimport" 2>/dev/null || true + aws iam delete-role-policy --role-name "$vmimport_role" --policy-name "vmimport" --region "${AWS_REGION}" 2>/dev/null || true # Then delete the role - aws iam delete-role --role-name "$vmimport_role" 2>/dev/null || true + aws iam delete-role --role-name "$vmimport_role" --region "${AWS_REGION}" 2>/dev/null || true fi } @@ -88,19 +99,19 @@ delete_s3_buckets() { local tag_bucket="${RESOURCES_BASENAME}-bucket" # List all buckets and find ones that match our naming pattern - read -r -a buckets <<< "$(aws s3api list-buckets --query "Buckets[?contains(Name, '${tag_bucket}')].Name" --output text)" + read -r -a buckets <<< "$(aws s3api list-buckets --query "Buckets[?contains(Name, '${tag_bucket}')].Name" --region "${AWS_REGION}" --output text)" if [ ${#buckets[@]} -eq 0 ]; then - echo "There aren't S3 buckets to delete." + echo "There aren't S3 buckets to delete in ${AWS_REGION}." return fi for bucket in "${buckets[@]}"; do echo "Deleting S3 bucket: $bucket" # First, delete all objects in the bucket - aws s3 rm "s3://$bucket" --recursive 2>/dev/null || true + aws s3 rm "s3://$bucket" --recursive --region "${AWS_REGION}" 2>/dev/null || true # Then delete the bucket - aws s3api delete-bucket --bucket "$bucket" 2>/dev/null || true + aws s3api delete-bucket --bucket "$bucket" --region "${AWS_REGION}" 2>/dev/null || true done } @@ -113,7 +124,7 @@ main() { echo "Build the caa-provisioner-cli tool" cd "${script_dir}/../src/cloud-api-adaptor/test/tools" || exit 1 - make + make BUILTIN_CLOUD_PROVIDERS=aws delete_vpcs delete_amis diff --git a/src/cloud-api-adaptor/go.mod b/src/cloud-api-adaptor/go.mod index d70264eac7..e352a6eddc 100644 --- a/src/cloud-api-adaptor/go.mod +++ b/src/cloud-api-adaptor/go.mod @@ -67,7 +67,6 @@ require ( ) require ( - github.com/aws/aws-sdk-go-v2/service/eks v1.76.3 github.com/aws/aws-sdk-go-v2/service/iam v1.53.1 github.com/aws/aws-sdk-go-v2/service/s3 v1.93.2 github.com/confidential-containers/cloud-api-adaptor/src/cloud-providers v0.0.0-00010101000000-000000000000 diff --git a/src/cloud-api-adaptor/go.sum b/src/cloud-api-adaptor/go.sum index f997f1d5ce..a04d4df524 100644 --- a/src/cloud-api-adaptor/go.sum +++ b/src/cloud-api-adaptor/go.sum @@ -168,8 +168,6 @@ github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 h1:CjMzUs78RDDv4ROu3JnJn/Ig1r6 github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16/go.mod h1:uVW4OLBqbJXSHJYA9svT9BluSvvwbzLQ2Crf6UPzR3c= github.com/aws/aws-sdk-go-v2/service/ec2 v1.276.1 h1:P7db/Z55pXvwnueLuHUuVlxnqjbAtiadm01+QIC42OA= github.com/aws/aws-sdk-go-v2/service/ec2 v1.276.1/go.mod h1:Wg68QRgy2gEGGdmTPU/UbVpdv8sM14bUZmF64KFwAsY= -github.com/aws/aws-sdk-go-v2/service/eks v1.76.3 h1:840uwcJTIwrMPLuEUQVFKZbPgwnYzc5WDyXMiMYm5Ts= -github.com/aws/aws-sdk-go-v2/service/eks v1.76.3/go.mod h1:7IU8o/Snul26xioEWN5tgoOas1ISPGsiq5gME5rPh3o= github.com/aws/aws-sdk-go-v2/service/iam v1.53.1 h1:xNCUk9XN6Pa9PyzbEfzgRpvEIVlqtth402yjaWvNMu4= github.com/aws/aws-sdk-go-v2/service/iam v1.53.1/go.mod h1:GNQZL4JRSGH6L0/SNGOtffaB1vmlToYp3KtcUIB0NhI= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= diff --git a/src/cloud-api-adaptor/test/e2e/README.md b/src/cloud-api-adaptor/test/e2e/README.md index bda699ef96..c1948db56f 100644 --- a/src/cloud-api-adaptor/test/e2e/README.md +++ b/src/cloud-api-adaptor/test/e2e/README.md @@ -129,12 +129,14 @@ Use the properties on the table below for AWS: |aws_vpc_igw_id|AWS VPC Internet Gateway ID|| |aws_vpc_rt_id|AWS VPC Route Table ID|| |aws_vpc_sg_id|AWS VPC Security Groups ID|| -|aws_vpc_subnet_id|AWS VPC Subnet ID|| +|aws_vpc_subnet_id|AWS VPC Subnet ID. Use comma to pass a secondary subnet for EKS (e.g. 'subnet_id,secondary_subnet_id')|| |cluster_type|Kubernetes cluster type. Either **onprem** or **eks** (see Notes below) |onprem| |container_runtime|Test cluster configured container runtime. Either **containerd** or **crio** |containerd| |disablecvm|Set to `true` to disable confidential VM|| +|eks_name|Name an existing EKS (don't provision a new)|| |pause_image|Kubernetes pause image|| |podvm_aws_ami_id|AWS AMI ID of the podvm|| +|podvm_aws_instance_type|AWS instance type of the podvm|t2.medium| |ssh_kp_name|AWS SSH key-pair name || |use_public_ip|Set `true` to instantiate VMs with public IP. If `cluster_type=onprem` then this property is implictly applied|| |tunnel_type|Tunnel type|| @@ -144,7 +146,7 @@ Use the properties on the table below for AWS: * The AWS credentials are obtained from the CLI [configuration files](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). **Important**: the access key and secret are recorded in plain-text in [install/overlays/aws/kustomization.yaml](../../install/overlays/aws/kustomization.yaml) * The subnet is created with CIDR IPv4 block 10.0.0.0/25. In case of deploying an EKS cluster, a secondary (private) subnet is created with CIDR IPv4 block 10.0.0.128/25 - * The cluster type **onprem** assumes Kubernetes is already provisioned and its kubeconfig file path can be found at the `KUBECONFIG` environment variable or in the `~/.kube/config` file. Whereas **eks** type instructs to create an [AWS EKS](https://aws.amazon.com/eks/) cluster on the VPC + * The cluster type **onprem** assumes Kubernetes is already provisioned and its kubeconfig file path can be found at the `KUBECONFIG` environment variable or in the `~/.kube/config` file. Whereas **eks** type instructs to create an [AWS EKS](https://aws.amazon.com/eks/) cluster on the VPC. You must have `eksctl` installed in your workstation or CI runner because it is used to create/destroy EKS clusters. * You must have `qemu-img` installed in your workstation or CI runner because it is used to convert an qcow2 disk to raw. ### Libvirt provision properties diff --git a/src/cloud-api-adaptor/test/provisioner/aws/provision_common.go b/src/cloud-api-adaptor/test/provisioner/aws/provision_common.go index 805e637483..9f66fc4869 100644 --- a/src/cloud-api-adaptor/test/provisioner/aws/provision_common.go +++ b/src/cloud-api-adaptor/test/provisioner/aws/provision_common.go @@ -12,15 +12,12 @@ import ( "path/filepath" "strconv" "strings" - "text/template" "time" "github.com/aws/aws-sdk-go-v2/aws" awsConfig "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" - "github.com/aws/aws-sdk-go-v2/service/eks" - ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types" "github.com/aws/aws-sdk-go-v2/service/iam" "github.com/aws/aws-sdk-go-v2/service/s3" s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" @@ -34,8 +31,7 @@ import ( ) const ( - EksCniAddonVersion = "v1.12.5-eksbuild.2" - EksVersion = "1.26" + EksVersion = "1.34" AwsCredentialsFile = "aws-cred.env" ) @@ -46,6 +42,7 @@ type S3Bucket struct { Client *s3.Client Name string // Bucket name Key string // Object key + Region string // AWS region } // AMIImage represents an AMI image @@ -59,6 +56,7 @@ type AMIImage struct { ID string // AMI image ID RootDeviceName string // Root device name VmImportRole string // vmimport role name + BootUefi bool // If true, enable UEFI boot mode (required for AMD SEV-SNP) } // Vpc represents an AWS VPC @@ -84,17 +82,12 @@ type Cluster interface { // EKSCluster represents an EKS cluster type EKSCluster struct { - AwsConfig aws.Config - Client *eks.Client - ClusterRoleName string - IamClient *iam.Client - Name string - NodeGroupName string - NodesRoleName string - NumWorkers int32 - SshKpName string - Version string - Vpc *Vpc + AwsConfig aws.Config + Name string + NumWorkers int32 + SshKpName string + Version string + Vpc *Vpc } // OnPremCluster represents an existing and running cluster @@ -103,21 +96,22 @@ type OnPremCluster struct { // AWSProvisioner implements the CloudProvision interface. type AWSProvisioner struct { - AwsConfig aws.Config - iamClient *iam.Client - containerRuntime string // Name of the container runtime - Cluster Cluster - Disablecvm string - ec2Client *ec2.Client - s3Client *s3.Client - Bucket *S3Bucket - PauseImage string - Image *AMIImage - Vpc *Vpc - PublicIP string - TunnelType string - VxlanPort string - SshKpName string + AwsConfig aws.Config + iamClient *iam.Client + containerRuntime string // Name of the container runtime + Cluster Cluster + Disablecvm string + ec2Client *ec2.Client + s3Client *s3.Client + Bucket *S3Bucket + PauseImage string + Image *AMIImage + Vpc *Vpc + PodvmInstanceType string + PublicIP string + TunnelType string + VxlanPort string + SshKpName string } // AwsInstallOverlay implements the InstallOverlay interface @@ -147,6 +141,10 @@ func NewAWSProvisioner(properties map[string]string) (pv.CloudProvisioner, error properties["resources_basename"] = "caa-e2e-test-" + strconv.FormatInt(time.Now().Unix(), 10) } + if properties["podvm_aws_instance_type"] == "" { + properties["podvm_aws_instance_type"] = "t2.medium" + } + vpc := NewVpc(ec2Client, properties) if properties["cluster_type"] == "" || @@ -155,7 +153,7 @@ func NewAWSProvisioner(properties map[string]string) (pv.CloudProvisioner, error // The podvm should be created with public IP so CAA can connect properties["use_public_ip"] = "true" } else if properties["cluster_type"] == "eks" { - cluster = NewEKSCluster(cfg, vpc, properties["ssh_kp_name"]) + cluster = NewEKSCluster(cfg, vpc, properties["ssh_kp_name"], properties["eks_name"]) } else { return nil, fmt.Errorf("Cluster type '%s' not implemented", properties["cluster_type"]) @@ -170,17 +168,19 @@ func NewAWSProvisioner(properties map[string]string) (pv.CloudProvisioner, error Client: s3.NewFromConfig(cfg), Name: properties["resources_basename"] + "-bucket", Key: "", // To be defined when the file is uploaded + Region: cfg.Region, }, - containerRuntime: properties["container_runtime"], - Cluster: cluster, - Image: NewAMIImage(ec2Client, properties), - Disablecvm: properties["disablecvm"], - PauseImage: properties["pause_image"], - Vpc: vpc, - PublicIP: properties["use_public_ip"], - TunnelType: properties["tunnel_type"], - VxlanPort: properties["vxlan_port"], - SshKpName: properties["ssh_kp_name"], + containerRuntime: properties["container_runtime"], + Cluster: cluster, + Image: NewAMIImage(ec2Client, properties), + Disablecvm: properties["disablecvm"], + PauseImage: properties["pause_image"], + Vpc: vpc, + PodvmInstanceType: properties["podvm_aws_instance_type"], + PublicIP: properties["use_public_ip"], + TunnelType: properties["tunnel_type"], + VxlanPort: properties["vxlan_port"], + SshKpName: properties["ssh_kp_name"], } return AWSProps, nil @@ -236,7 +236,7 @@ func (a *AWSProvisioner) CreateVPC(ctx context.Context, cfg *envconf.Config) err } func (aws *AWSProvisioner) DeleteCluster(ctx context.Context, cfg *envconf.Config) error { - return nil + return aws.Cluster.DeleteCluster() } func (a *AWSProvisioner) DeleteVPC(ctx context.Context, cfg *envconf.Config) error { @@ -245,11 +245,17 @@ func (a *AWSProvisioner) DeleteVPC(ctx context.Context, cfg *envconf.Config) err if vpc.SubnetId != "" { log.Infof("Delete subnet: %s", vpc.SubnetId) - if err = vpc.deleteSubnet(); err != nil { + if err = vpc.deleteSubnet(vpc.SubnetId); err != nil { return err } } + if vpc.SecondarySubnetId != "" { + log.Infof("Delete secondary subnet: %s", vpc.SecondarySubnetId) + if err = vpc.deleteSubnet(vpc.SecondarySubnetId); err != nil { + return err + } + } if vpc.SecurityGroupId != "" { log.Infof("Delete security group: %s", vpc.SecurityGroupId) if err = vpc.deleteSecurityGroup(); err != nil { @@ -314,23 +320,23 @@ func (a *AWSProvisioner) GetProperties(ctx context.Context, cfg *envconf.Config) credentials, _ := a.AwsConfig.Credentials.Retrieve(context.TODO()) return map[string]string{ - "CONTAINER_RUNTIME": a.containerRuntime, - "disablecvm": a.Disablecvm, - "pause_image": a.PauseImage, - "podvm_launchtemplate": "", - "podvm_ami": a.Image.ID, - "podvm_instance_type": "t2.medium", - "sg_ids": a.Vpc.SecurityGroupId, // TODO: what other SG needed? - "subnet_id": a.Vpc.SubnetId, - "ssh_kp_name": a.SshKpName, - "region": a.AwsConfig.Region, - "resources_basename": a.Vpc.BaseName, - "access_key_id": credentials.AccessKeyID, - "secret_access_key": credentials.SecretAccessKey, - "session_token": credentials.SessionToken, - "use_public_ip": a.PublicIP, - "tunnel_type": a.TunnelType, - "vxlan_port": a.VxlanPort, + "CONTAINER_RUNTIME": a.containerRuntime, + "disablecvm": a.Disablecvm, + "pause_image": a.PauseImage, + "podvm_launchtemplate": "", + "podvm_ami": a.Image.ID, + "podvm_aws_instance_type": a.PodvmInstanceType, + "sg_ids": a.Vpc.SecurityGroupId, // TODO: what other SG needed? + "subnet_id": a.Vpc.SubnetId, + "ssh_kp_name": a.SshKpName, + "region": a.AwsConfig.Region, + "resources_basename": a.Vpc.BaseName, + "access_key_id": credentials.AccessKeyID, + "secret_access_key": credentials.SecretAccessKey, + "session_token": credentials.SessionToken, + "use_public_ip": a.PublicIP, + "tunnel_type": a.TunnelType, + "vxlan_port": a.VxlanPort, } } @@ -395,6 +401,17 @@ func NewVpc(client *ec2.Client, properties map[string]string) *Vpc { cidrBlock = "10.0.0.0/24" } + subnetIdValue := properties["aws_vpc_subnet_id"] + subnetId := "" + secondarySubnetId := "" + if subnetIdValue != "" { + subnetIds := strings.Split(subnetIdValue, ",") + subnetId = strings.TrimSpace(subnetIds[0]) + if len(subnetIds) > 1 { + secondarySubnetId = strings.TrimSpace(subnetIds[1]) + } + } + return &Vpc{ BaseName: properties["resources_basename"], CidrBlock: cidrBlock, @@ -402,7 +419,8 @@ func NewVpc(client *ec2.Client, properties map[string]string) *Vpc { ID: properties["aws_vpc_id"], Region: properties["aws_region"], SecurityGroupId: properties["aws_vpc_sg_id"], - SubnetId: properties["aws_vpc_subnet_id"], + SubnetId: subnetId, + SecondarySubnetId: secondarySubnetId, InternetGatewayId: properties["aws_vpc_igw_id"], RouteTableId: properties["aws_vpc_rt_id"], } @@ -656,8 +674,8 @@ func (v *Vpc) deleteSecurityGroup() error { // deleteSubnet deletes the subnet. Instances running on the subnet will // be terminated before. -func (v *Vpc) deleteSubnet() error { - if v.SubnetId == "" { +func (v *Vpc) deleteSubnet(id string) error { + if id == "" { return nil } @@ -669,7 +687,7 @@ func (v *Vpc) deleteSubnet() error { Filters: []ec2types.Filter{ { Name: aws.String("subnet-id"), - Values: []string{v.SubnetId}, + Values: []string{id}, }, }, }) @@ -705,7 +723,7 @@ func (v *Vpc) deleteSubnet() error { // Finally delete the subnet if _, err = v.Client.DeleteSubnet(context.TODO(), &ec2.DeleteSubnetInput{ - SubnetId: aws.String(v.SubnetId), + SubnetId: aws.String(id), }); err != nil { return err } @@ -788,9 +806,18 @@ func (b *S3Bucket) createBucket() error { return nil } - _, err = b.Client.CreateBucket(context.TODO(), &s3.CreateBucketInput{ + createBucketInput := &s3.CreateBucketInput{ Bucket: &b.Name, - }) + } + + // For regions other than us-east-1, we need to specify a location constraint + if b.Region != "" && b.Region != "us-east-1" { + createBucketInput.CreateBucketConfiguration = &s3types.CreateBucketConfiguration{ + LocationConstraint: s3types.BucketLocationConstraint(b.Region), + } + } + + _, err = b.Client.CreateBucket(context.TODO(), createBucketInput) if err != nil { return err } @@ -927,6 +954,14 @@ func createVmimportServiceRole(ctx context.Context, client *iam.Client, bucketNa } func NewAMIImage(client *ec2.Client, properties map[string]string) *AMIImage { + // If disablecvm is empty or false then it wants confidential VM and + // for AMD SEV-SNP it needs to enable UEFI boot. + bootUefi := false + disablecvm := properties["disablecvm"] + if disablecvm == "" || disablecvm == "false" { + bootUefi = true + } + return &AMIImage{ BaseName: properties["resources_basename"], Client: client, @@ -937,6 +972,7 @@ func NewAMIImage(client *ec2.Client, properties map[string]string) *AMIImage { ID: properties["podvm_aws_ami_id"], RootDeviceName: "/dev/xvda", VmImportRole: properties["resources_basename"] + "-vmimport", + BootUefi: bootUefi, } } @@ -1002,7 +1038,7 @@ func (i *AMIImage) registerImage(imageName string) error { return fmt.Errorf("EBS Snapshot ID not found\n") } - result, err := i.Client.RegisterImage(context.TODO(), &ec2.RegisterImageInput{ + registerInput := &ec2.RegisterImageInput{ Name: aws.String(imageName), Architecture: ec2types.ArchitectureValuesX8664, BlockDeviceMappings: []ec2types.BlockDeviceMapping{{ @@ -1017,7 +1053,14 @@ func (i *AMIImage) registerImage(imageName string) error { RootDeviceName: aws.String(i.RootDeviceName), VirtualizationType: aws.String("hvm"), TagSpecifications: defaultTagSpecifications(i.BaseName+"-img", ec2types.ResourceTypeImage), - }) + } + + // If BootUefi is true, enable UEFI boot mode for AMD SEV-SNP + if i.BootUefi { + registerInput.BootMode = ec2types.BootModeValuesUefi + } + + result, err := i.Client.RegisterImage(context.TODO(), registerInput) if err != nil { return err } @@ -1160,18 +1203,18 @@ func (a *AwsInstallOverlay) Edit(ctx context.Context, cfg *envconf.Config, prope // Mapping the internal properties to ConfigMapGenerator properties. mapProps := map[string]string{ - "disablecvm": "DISABLECVM", - "pause_image": "PAUSE_IMAGE", - "podvm_launchtemplate": "PODVM_LAUNCHTEMPLATE_NAME", - "podvm_ami": "PODVM_AMI_ID", - "podvm_instance_type": "PODVM_INSTANCE_TYPE", - "sg_ids": "AWS_SG_IDS", - "subnet_id": "AWS_SUBNET_ID", - "ssh_kp_name": "SSH_KP_NAME", - "region": "AWS_REGION", - "tunnel_type": "TUNNEL_TYPE", - "vxlan_port": "VXLAN_PORT", - "use_public_ip": "USE_PUBLIC_IP", + "disablecvm": "DISABLECVM", + "pause_image": "PAUSE_IMAGE", + "podvm_launchtemplate": "PODVM_LAUNCHTEMPLATE_NAME", + "podvm_ami": "PODVM_AMI_ID", + "podvm_aws_instance_type": "PODVM_INSTANCE_TYPE", + "sg_ids": "AWS_SG_IDS", + "subnet_id": "AWS_SUBNET_ID", + "ssh_kp_name": "SSH_KP_NAME", + "region": "AWS_REGION", + "tunnel_type": "TUNNEL_TYPE", + "vxlan_port": "VXLAN_PORT", + "use_public_ip": "USE_PUBLIC_IP", } for k, v := range mapProps { @@ -1200,85 +1243,37 @@ func (a *AwsInstallOverlay) Edit(ctx context.Context, cfg *envconf.Config, prope return nil } -// createRoleAndAttachPolicy creates a new role (if not exist) with the trust -// policy. Then It can attach policies and will return the role ARN. -func createRoleAndAttachPolicy(client *iam.Client, roleName string, trustPolicy string, policyArns []string) (string, error) { - var ( - err error - roleArn string - ) - - getRoleOutput, err := client.GetRole(context.TODO(), &iam.GetRoleInput{ - RoleName: aws.String(roleName), - }) - - if err == nil { - roleArn = *getRoleOutput.Role.Arn - } else { - createRoleOutput, err := client.CreateRole(context.TODO(), - &iam.CreateRoleInput{ - AssumeRolePolicyDocument: aws.String(trustPolicy), - RoleName: aws.String(roleName), - }) - if err != nil { - return "", err - } - roleArn = *createRoleOutput.Role.Arn - } - - for _, policyArn := range policyArns { - if _, err = client.AttachRolePolicy(context.TODO(), - &iam.AttachRolePolicyInput{ - PolicyArn: aws.String(policyArn), - RoleName: aws.String(roleName), - }); err != nil { - return roleArn, err - } - } - - return roleArn, nil -} - // NewEKSCluster instantiates a new EKS Cluster struct. // It requires a AWS configuration with access and authentication information, a // VPC already instantiated and with a public subnet, and an EC2 SSH key-pair used // to access the cluster's worker nodes. -func NewEKSCluster(cfg aws.Config, vpc *Vpc, SshKpName string) *EKSCluster { - name := "peer-pods-test-k8s" +// If eksName is provided, it will use an existing cluster with that name instead of creating a new one +// otherwise it will create a cluster and set a default name. +func NewEKSCluster(cfg aws.Config, vpc *Vpc, SshKpName string, eksName string) *EKSCluster { + return &EKSCluster{ - AwsConfig: cfg, - Client: eks.NewFromConfig(cfg), - IamClient: iam.NewFromConfig(cfg), - ClusterRoleName: "CaaEksClusterRole", - Name: name, - NodeGroupName: name + "-nodegrp", - NodesRoleName: "CaaEksNodesRole", - NumWorkers: 1, - SshKpName: SshKpName, - Version: EksVersion, - Vpc: vpc, + AwsConfig: cfg, + Name: eksName, + NumWorkers: 1, + SshKpName: SshKpName, + Version: EksVersion, + Vpc: vpc, } } -// CreateCluster creates a new EKS cluster. -// It will create needed roles, the cluster itself, nodes group and finally -// install add-ons. -// EKS should be created with at least two subnets so a secundary will be created If -// it does not exist on the VPC already. +// CreateCluster creates a new EKS cluster using eksctl. +// EKS should be created with at least two subnets so a secondary will be +// created if it does not exist on the VPC already. func (e *EKSCluster) CreateCluster() error { - var ( - err error - roleArn string - NodesRoleArn string - ) - activationTimeout := time.Minute * 15 - addonTimeout := time.Minute * 5 - nodesTimeout := time.Minute * 10 + var err error - if roleArn, err = e.CreateEKSClusterRole(); err != nil { - return err + if e.Name != "" { + log.Infof("Using existing EKS cluster: %s", e.Name) + return nil } + e.Name = e.Vpc.BaseName + "-k8s" + if e.Vpc.SecondarySubnetId == "" { log.Info("Create a secondary subnet for EKS") if err = e.Vpc.createSecondarySubnet(); err != nil { @@ -1287,71 +1282,32 @@ func (e *EKSCluster) CreateCluster() error { log.Infof("Secondary subnet Id: %s", e.Vpc.SecondarySubnetId) } - log.Infof("Creating the EKS cluster: %s ...", e.Name) - _, err = e.Client.CreateCluster(context.TODO(), - &eks.CreateClusterInput{ - Name: aws.String(e.Name), - Version: aws.String(e.Version), - ResourcesVpcConfig: &ekstypes.VpcConfigRequest{ - SubnetIds: []string{e.Vpc.SubnetId, e.Vpc.SecondarySubnetId}, - }, - RoleArn: aws.String(roleArn), - }) - if err != nil { - return err - } - - log.Infof("Cluster created. Waiting to be actived (timeout=%s)...", - activationTimeout) - clusterWaiter := eks.NewClusterActiveWaiter(e.Client) - if err = clusterWaiter.Wait(context.TODO(), &eks.DescribeClusterInput{ - Name: aws.String(e.Name), - }, activationTimeout); err != nil { - return err - } - - log.Info("Creating the managed nodes group...") - if NodesRoleArn, err = e.CreateEKSNodesRole(); err != nil { - return err - } - if _, err = e.Client.CreateNodegroup(context.TODO(), - &eks.CreateNodegroupInput{ - ClusterName: aws.String(e.Name), - NodeRole: aws.String(NodesRoleArn), - NodegroupName: aws.String(e.NodeGroupName), - // Let's simplify and create the nodes only on the public subnet so that it - // doesn't need to configure Amazon ECR for pulling container images. - Subnets: []string{e.Vpc.SubnetId}, - AmiType: ekstypes.AMITypesAl2X8664, - CapacityType: ekstypes.CapacityTypesOnDemand, - InstanceTypes: []string{"t3.medium"}, - RemoteAccess: &ekstypes.RemoteAccessConfig{ - Ec2SshKey: aws.String(e.SshKpName), - }, - ScalingConfig: &ekstypes.NodegroupScalingConfig{ - DesiredSize: aws.Int32(e.NumWorkers), - MaxSize: aws.Int32(e.NumWorkers), - MinSize: aws.Int32(e.NumWorkers), - }, - Version: aws.String(e.Version), - // Fail to create the node group due to https://github.com/aws/aws-sdk-go-v2/issues/2267 - //Labels: map[string]string{"node.kubernetes.io/worker": ""}, - }); err != nil { - return err - } - - log.Infof("Nodes group created. Waiting to be ready (timeout=%s)...", - nodesTimeout) - nodesWaiter := eks.NewNodegroupActiveWaiter(e.Client) - if err = nodesWaiter.Wait(context.TODO(), &eks.DescribeNodegroupInput{ - ClusterName: aws.String(e.Name), - NodegroupName: aws.String(e.NodeGroupName), - }, nodesTimeout); err != nil { - return err - } - - if err = e.CreateCniAddon(addonTimeout); err != nil { - return err + log.Infof("Creating the EKS cluster: %s", e.Name) + + cmdArgs := []string{ + "create", "cluster", + "--name", e.Name, + "--version", e.Version, + "--region", e.AwsConfig.Region, + "--kubeconfig", e.Name + "-kubeconfig", + "--vpc-private-subnets", e.Vpc.SubnetId + "," + e.Vpc.SecondarySubnetId, + "--nodegroup-name", e.Name, + "--nodes", strconv.FormatInt(int64(e.NumWorkers), 10), + "--nodes-min", strconv.FormatInt(int64(e.NumWorkers), 10), + "--nodes-max", strconv.FormatInt(int64(e.NumWorkers), 10), + "--node-type", "t3.medium", + "--node-ami-family", "Ubuntu2404", + "--ssh-access", + "--ssh-public-key", e.SshKpName, + "--node-private-networking", + "--with-oidc", + } + + cmd := exec.Command("eksctl", cmdArgs...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err = cmd.Run(); err != nil { + return fmt.Errorf("failed to create EKS cluster: %w", err) } // TODO: This block copy most of the `AddNodeRoleWorkerLabel()` code. We @@ -1387,129 +1343,33 @@ func (e *EKSCluster) CreateCluster() error { return nil } +// DeleteCluster deletes the EKS cluster using eksctl. func (e *EKSCluster) DeleteCluster() error { - // TODO: implement me! - return nil -} - -// CreateEKSClusterRole creates (if not exist) the needed role for EKS -// creation. -func (e *EKSCluster) CreateEKSClusterRole() (string, error) { - trustPolicy := `{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "eks.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }` - - return createRoleAndAttachPolicy(e.IamClient, e.ClusterRoleName, trustPolicy, - []string{"arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"}) -} - -// CreateEKSNodesRole creates (if not exist) the needed role for the managed -// nodes creation. -func (e *EKSCluster) CreateEKSNodesRole() (string, error) { - trustPolicy := `{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "ec2.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }` - - return createRoleAndAttachPolicy(e.IamClient, e.NodesRoleName, - trustPolicy, []string{ - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", // Needed by the CNI add-on - }) -} - -// CreateCniAddon applies the AWS CNI addon -func (e *EKSCluster) CreateCniAddon(addonTimeout time.Duration) error { - cniAddonName := "vpc-cni" + if e.Name == "" { + return nil + } + log.Infof("Deleting the EKS cluster: %s using eksctl...", e.Name) - log.Info("Creating the CNI add-on...") - if _, err := e.Client.CreateAddon(context.TODO(), &eks.CreateAddonInput{ - AddonName: aws.String(cniAddonName), - ClusterName: aws.String(e.Name), - AddonVersion: aws.String(EksCniAddonVersion), - ResolveConflicts: ekstypes.ResolveConflictsNone, - }); err != nil { - return err + cmdArgs := []string{ + "delete", "cluster", + "--name", e.Name, + "--region", e.AwsConfig.Region, + "--wait", + "--timeout", "15m", } - log.Infof("CNI add-on installed. Waiting to be activated (timeout=%s)...", - addonTimeout) - addonWaiter := eks.NewAddonActiveWaiter(e.Client) - if err := addonWaiter.Wait(context.TODO(), - &eks.DescribeAddonInput{ - AddonName: aws.String(cniAddonName), - ClusterName: aws.String(e.Name), - }, addonTimeout); err != nil { - return err + cmd := exec.Command("eksctl", cmdArgs...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to delete EKS cluster: %w", err) } return nil } -// GetKubeconfig returns a kubeconfig for the EKS cluster +// GetKubeconfigFile returns a kubeconfig for the EKS cluster func (e *EKSCluster) GetKubeconfigFile() (string, error) { - desc, err := e.Client.DescribeCluster(context.TODO(), - &eks.DescribeClusterInput{ - Name: aws.String(e.Name), - }) - if err != nil { - return "", err - } - cluster := desc.Cluster - credentials, _ := e.AwsConfig.Credentials.Retrieve(context.TODO()) - - kubecfgTemplate := ` -apiVersion: v1 -clusters: -- cluster: - certificate-authority-data: {{.Cert}} - server: {{.ClusterEndpoint}} - name: arn:aws:eks:{{.Region}}:{{.Account}}:cluster/{{.ClusterName}} -contexts: -- context: - cluster: arn:aws:eks:{{.Region}}:{{.Account}}:cluster/{{.ClusterName}} - user: arn:aws:eks:{{.Region}}:{{.Account}}:cluster/{{.ClusterName}} - name: arn:aws:eks:{{.Region}}:{{.Account}}:cluster/{{.ClusterName}} -current-context: arn:aws:eks:{{.Region}}:{{.Account}}:cluster/{{.ClusterName}} -kind: Config -preferences: {} -users: -- name: arn:aws:eks:{{.Region}}:{{.Account}}:cluster/{{.ClusterName}} - user: - exec: - apiVersion: client.authentication.k8s.io/v1beta1 - command: aws - args: - - --region - - {{.Region}} - - eks - - get-token - - --cluster-name - - {{.ClusterName}}` - - t, err := template.New("kubecfg").Parse(kubecfgTemplate) - if err != nil { - return "", err - } - homeDir, err := os.UserHomeDir() if err != nil { return "", err @@ -1520,19 +1380,20 @@ users: return "", err } targetFile := filepath.Join(targetDir, "config") - kubecfgFile, err := os.Create(targetFile) - if err != nil { - return "", err + + // Use eksctl to write kubeconfig + cmdArgs := []string{ + "utils", "write-kubeconfig", + "--cluster", e.Name, + "--region", e.AwsConfig.Region, + "--kubeconfig", targetFile, } - if err = t.Execute(kubecfgFile, map[string]string{ - "Account": credentials.AccessKeyID, - "Cert": *cluster.CertificateAuthority.Data, - "ClusterEndpoint": *cluster.Endpoint, - "ClusterName": *cluster.Name, - "Region": e.AwsConfig.Region, - }); err != nil { - return "", err + cmd := exec.Command("eksctl", cmdArgs...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err = cmd.Run(); err != nil { + return "", fmt.Errorf("failed to write kubeconfig: %w", err) } return targetFile, nil diff --git a/src/cloud-api-adaptor/test/provisioner/provision.go b/src/cloud-api-adaptor/test/provisioner/provision.go index 5003360240..06b9e6a9ad 100644 --- a/src/cloud-api-adaptor/test/provisioner/provision.go +++ b/src/cloud-api-adaptor/test/provisioner/provision.go @@ -313,7 +313,7 @@ func (p *CloudAPIAdaptor) Deploy(ctx context.Context, cfg *envconf.Config, props log.Infof("Wait for the %s runtimeclass be created\n", p.runtimeClass.GetName()) if err = wait.For(conditions.New(resources).ResourcesFound(&nodev1.RuntimeClassList{Items: []nodev1.RuntimeClass{*p.runtimeClass}}), - wait.WithTimeout(time.Second*60)); err != nil { + wait.WithTimeout(time.Second*560)); err != nil { return err }