diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go index e4bb5a50b..0be649c13 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -49,6 +50,8 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT //create and upload the ssm document on the given aws service monitoring docs if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "could not create and upload the ssm document") + span.RecordError(err) return stacktrace.Propagate(err, "could not create and upload the ssm document") } experimentsDetails.IsDocsUploaded = true @@ -60,25 +63,37 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT //get the instance id or list of instance ids instanceIDList := strings.Split(experimentsDetails.EC2InstanceID, ",") if experimentsDetails.EC2InstanceID == "" || len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.SetStatus(codes.Error, "no instance id found for chaos injection") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.RecordError(err) + return err } switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Delete the ssm document on the given aws service monitoring docs err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to delete ssm doc") + span.RecordError(err) return stacktrace.Propagate(err, "failed to delete ssm doc") } diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go index c7e872c7b..d0baf474e 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // PrepareAWSSSMChaosByTag contains the prepration and injection steps for the experiment @@ -44,6 +45,8 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment //create and upload the ssm document on the given aws service monitoring docs if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "could not create and upload the ssm document") + span.RecordError(err) return stacktrace.Propagate(err, "could not create and upload the ssm document") } experimentsDetails.IsDocsUploaded = true @@ -55,25 +58,37 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIDList)) if len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.SetStatus(codes.Error, "no instance id found for chaos injection") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.RecordError(err) + return err } switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Delete the ssm document on the given aws service monitoring docs err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to delete ssm doc") + span.RecordError(err) return stacktrace.Propagate(err, "failed to delete ssm doc") } diff --git a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go index a22aee935..3464df48b 100644 --- a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go +++ b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go @@ -64,7 +64,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup) if err != nil { - span.SetStatus(codes.Error, "failed to get instance names for disks") + span.SetStatus(codes.Error, "error fetching attached instances for disks") span.RecordError(err) return stacktrace.Propagate(err, "error fetching attached instances for disks") } @@ -75,7 +75,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper for instanceName := range instanceNamesWithDiskNames { attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName) if err != nil { - span.SetStatus(codes.Error, "failed to get attached disks") + span.SetStatus(codes.Error, "error fetching virtual disks") span.RecordError(err) return stacktrace.Propagate(err, "error fetching virtual disks") } @@ -93,13 +93,13 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.SetStatus(codes.Error, "could not run chaos in serial mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.SetStatus(codes.Error, "could not run chaos in parallel mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } @@ -150,7 +150,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to detach disks") + span.SetStatus(codes.Error, "disk detachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } @@ -190,7 +190,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to attach disks") + span.SetStatus(codes.Error, "disk attachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } @@ -242,7 +242,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Waiting for disk to be detached log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to detach disks") + span.SetStatus(codes.Error, "disk detachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } @@ -253,6 +253,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -272,7 +274,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Waiting for disk to be attached log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { - span.SetStatus(codes.Error, "failed to attach disks") + span.SetStatus(codes.Error, "disk attachment check failed") span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } diff --git a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go index 3bcc75121..f019b0440 100644 --- a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go +++ b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go @@ -62,13 +62,13 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.SetStatus(codes.Error, "could not run chaos in serial mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { - span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.SetStatus(codes.Error, "could not run chaos in parallel mode") span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } @@ -118,13 +118,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } @@ -133,7 +133,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to completely stop log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance poweroff status") + span.SetStatus(codes.Error, "instance poweroff status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } @@ -156,13 +156,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Chaos]: Starting back the Azure instance") if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } @@ -171,7 +171,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to get in running state log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance power on status") + span.SetStatus(codes.Error, "instance power on status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } @@ -212,13 +212,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to stop the Azure instance") + span.SetStatus(codes.Error, "unable to stop Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } @@ -229,7 +229,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance poweroff status") + span.SetStatus(codes.Error, "instance poweroff status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } @@ -253,13 +253,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Starting back the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instance") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to start the Azure instance") + span.SetStatus(codes.Error, "unable to start the Azure instancee") span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } @@ -270,7 +270,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { - span.SetStatus(codes.Error, "failed to check instance power on status") + span.SetStatus(codes.Error, "instance power on status check failed") span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } diff --git a/chaoslib/litmus/container-kill/lib/container-kill.go b/chaoslib/litmus/container-kill/lib/container-kill.go index a1ca06e81..13b2a52be 100644 --- a/chaoslib/litmus/container-kill/lib/container-kill.go +++ b/chaoslib/litmus/container-kill/lib/container-kill.go @@ -175,6 +175,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } diff --git a/chaoslib/litmus/disk-fill/lib/disk-fill.go b/chaoslib/litmus/disk-fill/lib/disk-fill.go index 0c63f84b2..571d2dd96 100644 --- a/chaoslib/litmus/disk-fill/lib/disk-fill.go +++ b/chaoslib/litmus/disk-fill/lib/disk-fill.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/disk-fill/types" @@ -37,7 +38,10 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } //set up the tunables if provided in range setChaosTunables(experimentsDetails) @@ -51,6 +55,8 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -64,12 +70,16 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -78,14 +88,21 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, execCommandDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, execCommandDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -103,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -117,6 +136,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -126,6 +147,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -135,12 +158,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not wait for completion of helper pod") return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for disk-fill chaos log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -157,6 +183,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -171,6 +199,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -181,6 +211,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -190,12 +222,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not wait for completion of helper pod") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for disk-fill chaos log.Info("[Cleanup]: Deleting all the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -270,7 +306,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "unable to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go index f1fef9c9e..ef1932e61 100644 --- a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go +++ b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go @@ -9,6 +9,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -34,6 +35,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen //Select node for docker-service-kill experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node name") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node name") } } @@ -58,12 +61,16 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } // Creating the helper pod to perform docker-service-kill if err = createHelperPod(ctx, experimentsDetails, clients, chaosDetails, experimentsDetails.TargetNode); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -73,6 +80,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen log.Info("[Status]: Checking the status of the helper pod") if err = status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -80,6 +89,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -88,6 +99,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen log.Info("[Status]: Check for the node to be in NotReady state") if err = status.CheckNodeNotReadyState(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check for NOT READY state") + span.RecordError(err) return stacktrace.Propagate(err, "could not check for NOT READY state") } @@ -96,12 +109,16 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod") } @@ -204,7 +221,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "unable to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go index dbc504628..152d1463f 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -63,14 +64,21 @@ func PrepareEBSLossByID(ctx context.Context, experimentsDetails *experimentTypes switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = ebsloss.InjectChaosInSerialMode(ctx, experimentsDetails, volumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = ebsloss.InjectChaosInParallelMode(ctx, experimentsDetails, volumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go index 6e8589129..a8107e0ca 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -61,14 +62,21 @@ func PrepareEBSLossByTag(ctx context.Context, experimentsDetails *experimentType switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = ebsloss.InjectChaosInSerialMode(ctx, experimentsDetails, targetEBSVolumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = ebsloss.InjectChaosInParallelMode(ctx, experimentsDetails, targetEBSVolumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go index 8fa9bb0e4..5dc1d87d3 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // InjectChaosInSerialMode will inject the ebs loss chaos in serial mode which means one after other @@ -41,12 +42,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Get volume attachment details ec2InstanceID, device, err := ebs.GetVolumeAttachmentDetails(volumeID, experimentsDetails.VolumeTag, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the attachment info") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the attachment info") } //Detaching the ebs volume from the instance log.Info("[Chaos]: Detaching the EBS volume from the instance") if err = ebs.EBSVolumeDetach(volumeID, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } @@ -55,6 +60,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ebs volume detachment log.Infof("[Wait]: Wait for EBS volume detachment for volume %v", volumeID) if err = ebs.WaitForVolumeDetachment(volumeID, ec2InstanceID, experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } @@ -62,6 +69,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -73,6 +82,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Getting the EBS volume attachment status ebsState, err := ebs.GetEBSStatus(volumeID, ec2InstanceID, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the ebs status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the ebs status") } @@ -83,12 +94,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Attaching the ebs volume from the instance log.Info("[Chaos]: Attaching the EBS volume back to the instance") if err = ebs.EBSVolumeAttach(volumeID, ec2InstanceID, device, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } //Wait for ebs volume attachment log.Infof("[Wait]: Wait for EBS volume attachment for %v volume", volumeID) if err = ebs.WaitForVolumeAttachment(volumeID, ec2InstanceID, experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } } @@ -139,6 +154,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Detaching the ebs volume from the instance log.Info("[Chaos]: Detaching the EBS volume from the instance") if err := ebs.EBSVolumeDetach(volumeID, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } common.SetTargets(volumeID, "injected", "EBS", chaosDetails) @@ -146,6 +163,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Info]: Checking if the detachment process initiated") if err := ebs.CheckEBSDetachmentInitialisation(targetEBSVolumeIDList, ec2InstanceIDList, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "failed to initialise the detachment") + span.RecordError(err) return stacktrace.Propagate(err, "failed to initialise the detachment") } @@ -153,6 +172,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ebs volume detachment log.Infof("[Wait]: Wait for EBS volume detachment for volume %v", volumeID) if err := ebs.WaitForVolumeDetachment(volumeID, ec2InstanceIDList[i], experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } } @@ -160,6 +181,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -173,6 +196,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Getting the EBS volume attachment status ebsState, err := ebs.GetEBSStatus(volumeID, ec2InstanceIDList[i], experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the ebs status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the ebs status") } @@ -183,12 +208,16 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Attaching the ebs volume from the instance log.Info("[Chaos]: Attaching the EBS volume from the instance") if err = ebs.EBSVolumeAttach(volumeID, ec2InstanceIDList[i], deviceList[i], experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } //Wait for ebs volume attachment log.Infof("[Wait]: Wait for EBS volume attachment for volume %v", volumeID) if err = ebs.WaitForVolumeAttachment(volumeID, ec2InstanceIDList[i], experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } } diff --git a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go index 5a844099a..1483f6fd7 100644 --- a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go +++ b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -52,7 +53,10 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment //get the instance id or list of instance ids instanceIDList := strings.Split(experimentsDetails.Ec2InstanceID, ",") if experimentsDetails.Ec2InstanceID == "" || len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no EC2 instance ID found to terminate"} + span.SetStatus(codes.Error, "no EC2 instance ID found to terminate") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no EC2 instance ID found to terminate"} + span.RecordError(err) + return err } // watching for the abort signal and revert the chaos @@ -61,14 +65,21 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -109,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -117,6 +130,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -124,6 +139,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -136,12 +153,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if experimentsDetails.ManagedNodegroup != "enable" { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -182,6 +203,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "injected", "EC2", chaosDetails) @@ -191,6 +214,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "reverted", "EC2 Instance ID", chaosDetails) @@ -199,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -213,6 +240,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, id := range instanceIDList { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -221,6 +250,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } diff --git a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go index 2c34b83b4..eb2ac319a 100644 --- a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go +++ b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go @@ -22,6 +22,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var inject, abort chan os.Signal @@ -56,14 +57,21 @@ func PrepareEC2TerminateByTag(ctx context.Context, experimentsDetails *experimen switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not valid") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -104,6 +112,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -112,6 +122,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -119,6 +131,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -131,12 +145,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if experimentsDetails.ManagedNodegroup != "enable" { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -176,6 +194,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "injected", "EC2", chaosDetails) @@ -185,6 +205,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } } @@ -192,6 +214,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -206,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, id := range instanceIDList { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -214,6 +240,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } diff --git a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go index 42efdf8bd..15032769e 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go +++ b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -69,14 +70,21 @@ func PrepareDiskVolumeLossByLabel(ctx context.Context, computeService *compute.S switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, diskVolumeNamesList, experimentsDetails.TargetDiskInstanceNamesList, experimentsDetails.Zones, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, diskVolumeNamesList, experimentsDetails.TargetDiskInstanceNamesList, experimentsDetails.Zones, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } } @@ -111,6 +119,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Detaching the disk volume from the instance log.Info("[Chaos]: Detaching the disk volume from the instance") if err = gcp.DiskVolumeDetach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "failed to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -119,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for disk volume detachment log.Infof("[Wait]: Wait for disk volume detachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "failed to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach the disk volume from the vm instance") } @@ -126,6 +138,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -137,6 +151,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone) if err != nil { + span.SetStatus(codes.Error, "failed to get the disk volume status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the disk volume status") } @@ -147,12 +163,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Attaching the disk volume to the instance log.Info("[Chaos]: Attaching the disk volume back to the instance") if err = gcp.DiskVolumeAttach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for disk volume attachment for %v volume", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "failed to attach the disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach the disk volume to the vm instance") } } @@ -188,6 +208,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Detaching the disk volume from the instance log.Info("[Chaos]: Detaching the disk volume from the instance") if err = gcp.DiskVolumeDetach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -199,6 +221,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Wait for disk volume detachment log.Infof("[Wait]: Wait for disk volume detachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach the disk volume from the vm instance") } } @@ -206,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -219,6 +245,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone) if err != nil { + span.SetStatus(codes.Error, "failed to get the disk status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the disk status") } @@ -229,12 +257,16 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Attaching the disk volume to the instance log.Info("[Chaos]: Attaching the disk volume to the instance") if err = gcp.DiskVolumeAttach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for disk volume attachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach the disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach the disk volume to the vm instance") } } diff --git a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go index 6a99010d9..e19e8a59d 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go +++ b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go @@ -22,6 +22,7 @@ import ( "github.com/palantir/stacktrace" "github.com/pkg/errors" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -59,6 +60,8 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, //get the device names for the given disks if err := getDeviceNamesList(computeService, experimentsDetails, diskNamesList, diskZonesList); err != nil { + span.SetStatus(codes.Error, "failed to fetch the disk device names") + span.RecordError(err) return stacktrace.Propagate(err, "failed to fetch the disk device names") } @@ -74,14 +77,21 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, diskNamesList, diskZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, diskNamesList, diskZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } } @@ -114,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Detaching the disk volume from the instance log.Infof("[Chaos]: Detaching %s disk volume from the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeDetach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -122,6 +134,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for disk volume detachment log.Infof("[Wait]: Wait for %s disk volume detachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach disk volume from the vm instance") } @@ -129,6 +143,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -140,6 +156,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i]) if err != nil { + span.SetStatus(codes.Error, "failed to get disk status") + span.RecordError(err) return stacktrace.Propagate(err, fmt.Sprintf("failed to get %s disk volume status", targetDiskVolumeNamesList[i])) } @@ -150,12 +168,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Attaching the disk volume to the instance log.Infof("[Chaos]: Attaching %s disk volume back to the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeAttach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for %s disk volume attachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach disk volume to the vm instance") } } @@ -188,6 +210,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Detaching the disk volume from the instance log.Infof("[Chaos]: Detaching %s disk volume from the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeDetach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -199,6 +223,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Wait for disk volume detachment log.Infof("[Wait]: Wait for %s disk volume detachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach disk volume from the vm instance") } } @@ -206,6 +232,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -219,6 +247,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i]) if err != nil { + span.SetStatus(codes.Error, "failed to get disk status") + span.RecordError(err) return errors.Errorf("failed to get the disk status, err: %v", err) } @@ -229,12 +259,16 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Attaching the disk volume to the instance log.Infof("[Chaos]: Attaching %s disk volume to the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeAttach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for %s disk volume attachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach disk volume to the vm instance") } } diff --git a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go index 644a02137..3672b9d96 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go +++ b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -56,14 +57,21 @@ func PrepareVMStopByLabel(ctx context.Context, computeService *compute.Service, switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, computeService, experimentsDetails, instanceNamesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, computeService, experimentsDetails, instanceNamesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -105,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "VM instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "VM instance failed to stop") } @@ -113,6 +123,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to stop", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "VM instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } @@ -120,6 +132,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -134,6 +148,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in RUNNING state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start %s vm instance") } @@ -142,12 +158,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // starting the VM instance log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in RUNNING state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start %s vm instance") } } @@ -191,6 +211,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -202,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } } @@ -209,6 +233,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -225,6 +251,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance '%v' to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the vm instance") } @@ -238,6 +266,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Info("[Chaos]: Starting back the VM instance") if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } } @@ -247,6 +277,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance '%v' to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the vm instance") } diff --git a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go index 281e1c211..9ba8a0900 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go +++ b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -61,14 +62,21 @@ func PrepareVMStop(ctx context.Context, computeService *compute.Service, experim switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, instanceNamesList, instanceZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, instanceNamesList, instanceZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } // wait for the ramp time after chaos injection @@ -110,6 +118,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -118,6 +128,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } @@ -125,6 +137,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -139,12 +153,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // starting the VM instance log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } @@ -153,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } } @@ -197,6 +217,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -208,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } } @@ -215,6 +239,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -230,6 +256,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv for i := range instanceNamesList { log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } } @@ -239,6 +267,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } @@ -252,6 +282,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } diff --git a/chaoslib/litmus/http-chaos/lib/http-chaos.go b/chaoslib/litmus/http-chaos/lib/http-chaos.go index 59323f0b8..a36c7dbab 100644 --- a/chaoslib/litmus/http-chaos/lib/http-chaos.go +++ b/chaoslib/litmus/http-chaos/lib/http-chaos.go @@ -11,6 +11,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/http-chaos/types" @@ -27,6 +29,7 @@ import ( // PrepareAndInjectChaos contains the preparation & injection steps func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, args string) error { + span := trace.SpanFromContext(ctx) var err error // Get the target pod details for the chaos execution @@ -39,6 +42,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -52,12 +57,16 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not get experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -67,14 +76,21 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, args, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, args, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } return nil @@ -108,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -117,6 +135,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -126,12 +146,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for http chaos log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -147,6 +171,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -161,6 +187,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -171,6 +199,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -180,12 +210,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } // Deleting all the helper pod for http chaos log.Info("[Cleanup]: Deleting all the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -266,7 +300,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil }