@@ -69,8 +69,6 @@ const (
6969 // This corresponds with the quay tag: v0.1.90-f2e86a9
7070 networkValidatorImage = "quay.io/app-sre/osd-network-verifier@sha256:137bf177c2e87732b2692c1af39d3b79b2f84c7f0ee9254df4ea4412dddfab1e"
7171 networkValidatorRepo = "quay.io/app-sre/osd-network-verifier"
72- userdataEndVerifier = "USERDATA END"
73- prepulledImageMessage = "Warning: could not pull the specified docker image, will try to use the prepulled one"
7472 invalidKMSCode = "Client.InvalidKMSKey.InvalidState"
7573)
7674
@@ -79,7 +77,7 @@ type AwsVerifier struct {
7977 AwsClient * aws.Client
8078 Logger ocmlog.Logger
8179 Output output.Output
82- // This cache is only to be used inside of describeInstanceType() to minimize nil ptr error risk
80+ // This cache is only to be used inside describeInstanceType() to minimize nil ptr error risk
8381 cachedInstanceTypeInfo * ec2Types.InstanceTypeInfo
8482}
8583
@@ -158,7 +156,7 @@ func (a *AwsVerifier) describeInstanceType(ctx context.Context, instanceType str
158156 return a .cachedInstanceTypeInfo , nil
159157}
160158
161- // instanceTypeUsesNitro asks the AWS API whether or not the provided instanceType uses the "Nitro"
159+ // instanceTypeUsesNitro asks the AWS API whether the provided instanceType uses the "Nitro"
162160// hypervisor. Nitro is the only hypervisor supporting serial console output, which we need to
163161// collect in order to gather probe results
164162func (a * AwsVerifier ) instanceTypeUsesNitro (ctx context.Context , instanceType string ) (bool , error ) {
@@ -169,7 +167,7 @@ func (a *AwsVerifier) instanceTypeUsesNitro(ctx context.Context, instanceType st
169167 }
170168
171169 // Return true if instance type uses nitro
172- return ( instanceTypeInfo .Hypervisor == ec2Types .InstanceTypeHypervisorNitro ) , nil
170+ return instanceTypeInfo .Hypervisor == ec2Types .InstanceTypeHypervisorNitro , nil
173171}
174172
175173// instanceTypeArchitecture asks the AWS API about the CPU architecture(s) supported by the provided
@@ -256,6 +254,7 @@ type createEC2InstanceInput struct {
256254 tags map [string ]string
257255 ctx context.Context
258256 keyPair string
257+ vpcID string
259258}
260259
261260func (a * AwsVerifier ) createEC2Instance (input createEC2InstanceInput ) (string , error ) {
@@ -339,16 +338,16 @@ func (a *AwsVerifier) createEC2Instance(input createEC2InstanceInput) (string, e
339338
340339 // Wait up to 5 minutes for the instance to be running
341340 waiter := ec2 .NewInstanceRunningWaiter (a .AwsClient )
342- if err := waiter .Wait (input .ctx , & ec2.DescribeInstancesInput {InstanceIds : []string {instanceID }}, 2 * time .Minute ); err != nil {
343- resp , err := a .AwsClient .DescribeInstances (context . TODO () , & ec2.DescribeInstancesInput {
341+ if err := waiter .Wait (input .ctx , & ec2.DescribeInstancesInput {InstanceIds : []string {instanceID }}, 5 * time .Minute ); err != nil {
342+ resp , err := a .AwsClient .DescribeInstances (input . ctx , & ec2.DescribeInstancesInput {
344343 InstanceIds : []string {instanceID },
345344 })
346345 if err != nil {
347346 fmt .Println ("Warning: Waiter Describe instances failure." )
348347 }
349348
350349 var stateCode string
351- if resp .Reservations [0 ].Instances [0 ].StateReason .Code != nil {
350+ if resp != nil && resp .Reservations [0 ].Instances [0 ].StateReason .Code != nil {
352351 stateCode = * resp .Reservations [0 ].Instances [0 ].StateReason .Code
353352 }
354353
@@ -357,6 +356,17 @@ func (a *AwsVerifier) createEC2Instance(input createEC2InstanceInput) (string, e
357356 waiterErr = handledErrors .NewKmsError ("encountered issue accessing KMS key when launching instance." )
358357 }
359358
359+ // Switch the instance SecurityGroup to the default before terminating to avoid a cleanup race condition. This is
360+ // handled by the normal cleanup process, except in this specific case where we fail early because of KMS issues.
361+ defaultSecurityGroupID := a .fetchVpcDefaultSecurityGroup (input .ctx , input .vpcID )
362+ if defaultSecurityGroupID != "" {
363+ // Replace the SecurityGroup attached to the instance with the default one for the VPC to allow for graceful
364+ // termination of the network-verifier created temporary SecurityGroup. If we hit an error, we ignore it
365+ // and continue with normal termination of the instance.
366+ _ = a .modifyInstanceSecurityGroup (input .ctx , instanceID , defaultSecurityGroupID )
367+ a .Logger .Info (input .ctx , "Modified the instance to use the default security group" )
368+ }
369+
360370 if err := a .AwsClient .TerminateEC2Instance (input .ctx , instanceID ); err != nil {
361371 return instanceID , handledErrors .NewGenericError (err )
362372 }
@@ -491,16 +501,16 @@ func (a *AwsVerifier) CreateSecurityGroup(ctx context.Context, tags map[string]s
491501
492502 a .Logger .Info (ctx , "Created security group with ID: %s" , * output .GroupId )
493503
494- input_rules := & ec2.AuthorizeSecurityGroupEgressInput {
504+ inputRules := & ec2.AuthorizeSecurityGroupEgressInput {
495505 GroupId : output .GroupId ,
496506 IpPermissions : defaultIpPermissions ,
497507 }
498508
499- if _ , err := a .AwsClient .AuthorizeSecurityGroupEgress (ctx , input_rules ); err != nil {
509+ if _ , err := a .AwsClient .AuthorizeSecurityGroupEgress (ctx , inputRules ); err != nil {
500510 return & ec2.CreateSecurityGroupOutput {}, err
501511 }
502512
503- revoke_default_egress := & ec2.RevokeSecurityGroupEgressInput {
513+ revokeDefaultEgress := & ec2.RevokeSecurityGroupEgressInput {
504514 GroupId : output .GroupId ,
505515 IpPermissions : []ec2Types.IpPermission {
506516 {
@@ -516,7 +526,7 @@ func (a *AwsVerifier) CreateSecurityGroup(ctx context.Context, tags map[string]s
516526 },
517527 }
518528
519- if _ , err := a .AwsClient .RevokeSecurityGroupEgress (ctx , revoke_default_egress ); err != nil {
529+ if _ , err := a .AwsClient .RevokeSecurityGroupEgress (ctx , revokeDefaultEgress ); err != nil {
520530 return & ec2.CreateSecurityGroupOutput {}, err
521531 }
522532
@@ -694,3 +704,40 @@ func (a *AwsVerifier) GetVpcIdFromSubnetId(ctx context.Context, vpcSubnetID stri
694704 }
695705 return vpcId , nil
696706}
707+
708+ // fetchVpcDefaultSecurityGroup will return either the 'default' SG ID, or an empty string if not found/an error is hit
709+ func (a * AwsVerifier ) fetchVpcDefaultSecurityGroup (ctx context.Context , vpcId string ) string {
710+ describeSGOutput , err := a .AwsClient .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
711+ Filters : []ec2Types.Filter {
712+ {
713+ Name : awsTools .String ("vpc-id" ),
714+ Values : []string {vpcId },
715+ },
716+ {
717+ Name : awsTools .String ("group-name" ),
718+ Values : []string {"default" },
719+ },
720+ },
721+ })
722+
723+ if err != nil {
724+ return ""
725+ }
726+
727+ for _ , SG := range describeSGOutput .SecurityGroups {
728+ if * SG .GroupName == "default" {
729+ return * SG .GroupId
730+ }
731+ }
732+
733+ return ""
734+ }
735+
736+ func (a * AwsVerifier ) modifyInstanceSecurityGroup (ctx context.Context , instanceID string , securityGroupID string ) error {
737+ _ , err := a .AwsClient .ModifyInstanceAttribute (ctx , & ec2.ModifyInstanceAttributeInput {
738+ InstanceId : & instanceID ,
739+ Groups : []string {securityGroupID },
740+ })
741+
742+ return err
743+ }
0 commit comments