Skip to content

Commit

Permalink
feat: ditributed tracing span error
Browse files Browse the repository at this point in the history
Signed-off-by: Jaeyeon Park <[email protected]>
  • Loading branch information
moggaa committed Jan 2, 2025
1 parent 36985be commit f97417f
Show file tree
Hide file tree
Showing 17 changed files with 405 additions and 41 deletions.
19 changes: 17 additions & 2 deletions chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/palantir/stacktrace"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
)

var (
Expand Down Expand Up @@ -49,6 +50,8 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT

//create and upload the ssm document on the given aws service monitoring docs
if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil {
span.SetStatus(codes.Error, "could not create and upload the ssm document")
span.RecordError(err)
return stacktrace.Propagate(err, "could not create and upload the ssm document")
}
experimentsDetails.IsDocsUploaded = true
Expand All @@ -60,25 +63,37 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT
//get the instance id or list of instance ids
instanceIDList := strings.Split(experimentsDetails.EC2InstanceID, ",")
if experimentsDetails.EC2InstanceID == "" || len(instanceIDList) == 0 {
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
span.SetStatus(codes.Error, "no instance id found for chaos injection")
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
span.RecordError(err)
return err
}

switch strings.ToLower(experimentsDetails.Sequence) {
case "serial":
if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
span.SetStatus(codes.Error, "could not run chaos in serial mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in serial mode")
}
case "parallel":
if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
}
default:
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
span.SetStatus(codes.Error, "sequence is not supported")
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
span.RecordError(err)
return err
}

//Delete the ssm document on the given aws service monitoring docs
err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region)
if err != nil {
span.SetStatus(codes.Error, "failed to delete ssm doc")
span.RecordError(err)
return stacktrace.Propagate(err, "failed to delete ssm doc")
}

Expand Down
19 changes: 17 additions & 2 deletions chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/litmuschaos/litmus-go/pkg/utils/common"
"github.com/palantir/stacktrace"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
)

// PrepareAWSSSMChaosByTag contains the prepration and injection steps for the experiment
Expand All @@ -44,6 +45,8 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment

//create and upload the ssm document on the given aws service monitoring docs
if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil {
span.SetStatus(codes.Error, "could not create and upload the ssm document")
span.RecordError(err)
return stacktrace.Propagate(err, "could not create and upload the ssm document")
}
experimentsDetails.IsDocsUploaded = true
Expand All @@ -55,25 +58,37 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment
log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIDList))

if len(instanceIDList) == 0 {
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
span.SetStatus(codes.Error, "no instance id found for chaos injection")
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"}
span.RecordError(err)
return err
}

switch strings.ToLower(experimentsDetails.Sequence) {
case "serial":
if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
span.SetStatus(codes.Error, "could not run chaos in serial mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in serial mode")
}
case "parallel":
if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil {
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
}
default:
return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
span.SetStatus(codes.Error, "sequence is not supported")
err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)}
span.RecordError(err)
return err
}

//Delete the ssm document on the given aws service monitoring docs
err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region)
if err != nil {
span.SetStatus(codes.Error, "failed to delete ssm doc")
span.RecordError(err)
return stacktrace.Propagate(err, "failed to delete ssm doc")
}

Expand Down
18 changes: 10 additions & 8 deletions chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup)

if err != nil {
span.SetStatus(codes.Error, "failed to get instance names for disks")
span.SetStatus(codes.Error, "error fetching attached instances for disks")
span.RecordError(err)
return stacktrace.Propagate(err, "error fetching attached instances for disks")
}
Expand All @@ -75,7 +75,7 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
for instanceName := range instanceNamesWithDiskNames {
attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName)
if err != nil {
span.SetStatus(codes.Error, "failed to get attached disks")
span.SetStatus(codes.Error, "error fetching virtual disks")
span.RecordError(err)
return stacktrace.Propagate(err, "error fetching virtual disks")
}
Expand All @@ -93,13 +93,13 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper
switch strings.ToLower(experimentsDetails.Sequence) {
case "serial":
if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
span.SetStatus(codes.Error, "failed to run chaos in serial mode")
span.SetStatus(codes.Error, "could not run chaos in serial mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in serial mode")
}
case "parallel":
if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
span.SetStatus(codes.Error, "failed to run chaos in parallel mode")
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
}
Expand Down Expand Up @@ -150,7 +150,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
for _, diskName := range diskNameList {
log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName)
if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil {
span.SetStatus(codes.Error, "failed to detach disks")
span.SetStatus(codes.Error, "disk detachment check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "disk detachment check failed")
}
Expand Down Expand Up @@ -190,7 +190,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
for _, diskName := range diskNameList {
log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName)
if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil {
span.SetStatus(codes.Error, "failed to attach disks")
span.SetStatus(codes.Error, "disk attachment check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "disk attachment check failed")
}
Expand Down Expand Up @@ -242,7 +242,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
// Waiting for disk to be detached
log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName)
if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil {
span.SetStatus(codes.Error, "failed to detach disks")
span.SetStatus(codes.Error, "disk detachment check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "disk detachment check failed")
}
Expand All @@ -253,6 +253,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
// the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration
if len(resultDetails.ProbeDetails) != 0 && i == 0 {
if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
span.SetStatus(codes.Error, "failed to run probes")
span.RecordError(err)
return stacktrace.Propagate(err, "failed to run probes")
}
}
Expand All @@ -272,7 +274,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
// Waiting for disk to be attached
log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName)
if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil {
span.SetStatus(codes.Error, "failed to attach disks")
span.SetStatus(codes.Error, "disk attachment check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "disk attachment check failed")
}
Expand Down
28 changes: 14 additions & 14 deletions chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E
switch strings.ToLower(experimentsDetails.Sequence) {
case "serial":
if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
span.SetStatus(codes.Error, "failed to run chaos in serial mode")
span.SetStatus(codes.Error, "could not run chaos in serial mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in serial mode")
}
case "parallel":
if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil {
span.SetStatus(codes.Error, "failed to run chaos in parallel mode")
span.SetStatus(codes.Error, "could not run chaos in parallel mode")
span.RecordError(err)
return stacktrace.Propagate(err, "could not run chaos in parallel mode")
}
Expand Down Expand Up @@ -118,13 +118,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName)
if experimentsDetails.ScaleSet == "enable" {
if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to stop the Azure instance")
span.SetStatus(codes.Error, "unable to stop the Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to stop the Azure instance")
}
} else {
if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to stop the Azure instance")
span.SetStatus(codes.Error, "unable to stop the Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to stop the Azure instance")
}
Expand All @@ -133,7 +133,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
// Wait for Azure instance to completely stop
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName)
if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to check instance poweroff status")
span.SetStatus(codes.Error, "instance poweroff status check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "instance poweroff status check failed")
}
Expand All @@ -156,13 +156,13 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
log.Info("[Chaos]: Starting back the Azure instance")
if experimentsDetails.ScaleSet == "enable" {
if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to start the Azure instance")
span.SetStatus(codes.Error, "unable to start the Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to start the Azure instance")
}
} else {
if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to start the Azure instance")
span.SetStatus(codes.Error, "unable to start the Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to start the Azure instance")
}
Expand All @@ -171,7 +171,7 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment
// Wait for Azure instance to get in running state
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName)
if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to check instance power on status")
span.SetStatus(codes.Error, "instance power on status check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "instance power on status check failed")
}
Expand Down Expand Up @@ -212,13 +212,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName)
if experimentsDetails.ScaleSet == "enable" {
if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to stop the Azure instance")
span.SetStatus(codes.Error, "unable to stop Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to stop Azure instance")
}
} else {
if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to stop the Azure instance")
span.SetStatus(codes.Error, "unable to stop Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to stop Azure instance")
}
Expand All @@ -229,7 +229,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
for _, vmName := range instanceNameList {
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName)
if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to check instance poweroff status")
span.SetStatus(codes.Error, "instance poweroff status check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "instance poweroff status check failed")
}
Expand All @@ -253,13 +253,13 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
log.Infof("[Chaos]: Starting back the Azure instance: %v", vmName)
if experimentsDetails.ScaleSet == "enable" {
if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to start the Azure instance")
span.SetStatus(codes.Error, "unable to start the Azure instance")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to start the Azure instance")
}
} else {
if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to start the Azure instance")
span.SetStatus(codes.Error, "unable to start the Azure instancee")
span.RecordError(err)
return stacktrace.Propagate(err, "unable to start the Azure instance")
}
Expand All @@ -270,7 +270,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
for _, vmName := range instanceNameList {
log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName)
if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil {
span.SetStatus(codes.Error, "failed to check instance power on status")
span.SetStatus(codes.Error, "instance power on status check failed")
span.RecordError(err)
return stacktrace.Propagate(err, "instance power on status check failed")
}
Expand Down
2 changes: 2 additions & 0 deletions chaoslib/litmus/container-kill/lib/container-kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime
// run the probes during chaos
if len(resultDetails.ProbeDetails) != 0 {
if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
span.SetStatus(codes.Error, "failed to run probes")
span.RecordError(err)
return err
}
}
Expand Down
Loading

0 comments on commit f97417f

Please sign in to comment.