Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TC Automation] Add continous node reboot tests #2741

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ RUN curl -fsSL https://clis.cloud.ibm.com/install/linux | sh && \
ibmcloud plugin install -f container-service

# Install vCluster binary
RUN curl -L -o vcluster "https://github.com/loft-sh/vcluster/releases/latest/download/vcluster-linux-amd64" \
&& install -c -m 0755 vcluster /usr/local/bin \
RUN curl -L -o vcluster "https://github.com/loft-sh/vcluster/releases/download/v0.15.7/vcluster-linux-amd64" \
&& install -c -m 0755 vcluster /usr/local/bin \
&& rm -f vcluster



# No need to copy *everything*. This keeps the cache useful
COPY vendor vendor
COPY Makefile Makefile
Expand Down
4 changes: 3 additions & 1 deletion drivers/volume/portworx/schedops/k8s-schedops.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,9 @@ PodLoop:

grepPattern := pvName // For normal PX vols, and for FBDA, we can grep for the filesystem name
if pureType, ok := vol.Labels[k8sdriver.PureDAVolumeLabel]; ok && pureType == k8sdriver.PureDAVolumeLabelValueFA {
grepPattern = strings.ToLower(vol.Labels[k8sdriver.FADAVolumeSerialLabel]) // FADA we need to grep by volume serial
if strings.ToLower(vol.Labels[k8sdriver.FADAVolumeSerialLabel])!= "unavailable"{
grepPattern = strings.ToLower(vol.Labels[k8sdriver.FADAVolumeSerialLabel])
} // FADA we need to grep by volume serial
if isNvme {
grepPattern = fmt.Sprintf("%s | grep %s", grepPattern[:14], grepPattern[14:])
}
Expand Down
93 changes: 93 additions & 0 deletions tests/basic/nodeCrash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"github.com/portworx/torpedo/pkg/log"
"github.com/portworx/torpedo/pkg/testrailuttils"
. "github.com/portworx/torpedo/tests"
"time"

)

var _ = Describe("{CrashOneNode}", func() {
Expand Down Expand Up @@ -102,3 +104,94 @@ var _ = Describe("{CrashOneNode}", func() {
AfterEachTest(contexts, testrailID, runID)
})
})

var _ = Describe("{NodeRebootForOneDay}", func() {
/* https://purestorage.atlassian.net/browse/PTX-25705
1. Schedule applications
2. Reboot 2 node(s) continuously for 24 hours
3. Validate applications
4. Destroy applications
*/

JustBeforeEach(func() {
StartTorpedoTest("NodeRebootForOneDay", "Reboot node(s) continuously for 24 hours", nil, 0)
})

itLog := "Reboot node(s) continuously for 24 hours"
It(itLog, func() {
log.InfoD(itLog)
contexts := make([]*scheduler.Context, 0)
stepLog := "schedule applications"
Step(stepLog, func() {
log.InfoD(stepLog)
for i := 0; i < Inst().GlobalScaleFactor; i++ {
contexts = append(contexts, ScheduleApplications(fmt.Sprintf("noderebootoneday-%d", i))...)
}
ValidateApplications(contexts)
})

stepLog = "reboot 2 nodes in parallel continuously for 24 hours and validate applications"
Step(stepLog, func() {
log.InfoD(stepLog)
nodesToReboot := node.GetStorageDriverNodes()[:2] // Get the first two nodes to reboot

// Start a timer for 24 hours
timer := time.NewTimer(24 * time.Hour)
defer timer.Stop()

doneChan := make(chan struct{}, len(nodesToReboot))

rebootNode := func(nodeToReboot node.Node) {
defer GinkgoRecover()

for {
select {
case <-timer.C:
doneChan <- struct{}{} // Signal the goroutine to stop after 24 hours
return
default:
// Reboot the node
err := Inst().N.RebootNode(nodeToReboot, node.RebootNodeOpts{
Force: false,
ConnectionOpts: node.ConnectionOpts{
Timeout: 60 * time.Minute,
TimeBeforeRetry: 30 * time.Second,
},
})
dash.VerifySafely(err, nil, "Validate node is rebooted")

// Verify that the node is back online
err = Inst().N.TestConnection(nodeToReboot, node.ConnectionOpts{
Timeout: 60 * time.Minute,
TimeBeforeRetry: 30 * time.Second,
})
dash.VerifyFatal(err, nil, "Validate node is back up")

// Wait until PX is up on the node
err = Inst().V.WaitDriverUpOnNode(nodeToReboot, 60*time.Minute)
dash.VerifyFatal(err, nil, "Validate volume driver is up")

log.InfoD("Rebooted and validated node %s", nodeToReboot.Name)

// Validate applications after each reboot cycle
ValidateApplications(contexts)
}
}
}

// Start rebooting nodes in parallel using goroutines
for _, nodeToReboot := range nodesToReboot {
go rebootNode(nodeToReboot)
}

// Wait for all nodes to complete the 24-hour reboot process
for range nodesToReboot {
<-doneChan
}
})
})

JustAfterEach(func() {
defer EndTorpedoTest()
})
})
77 changes: 24 additions & 53 deletions tests/basic/pure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ var _ = Describe("{FADAVolTokenTimout}", func() {
i := 0
Step(stepLog, func() {
contexts = make([]*scheduler.Context, 0)
appScale := 200
appScale := 12

for i = 1; i < appScale; i++ {
contexts = append(contexts, ScheduleApplications(fmt.Sprintf("fadavoltkn-%d", i))...)
Expand All @@ -434,7 +434,7 @@ var _ = Describe("{FADAVolTokenTimout}", func() {
var wg sync.WaitGroup

stepLog = "Attaching 40 volumes at same time"
scheduleCount := 40
scheduleCount := 15
Step(stepLog, func() {
scheduleAppParallel := func(c int) {
defer wg.Done()
Expand Down Expand Up @@ -1229,7 +1229,7 @@ var _ = Describe("{AppCleanUpWhenPxKill}", func() {

Provisioner := fmt.Sprintf("%v", portworx.PortworxCsi)
//Number of apps to be deployed
NumberOfAppsToBeDeployed := 300
NumberOfAppsToBeDeployed := 8

stepLog = fmt.Sprintf("schedule application")
Step(stepLog, func() {
Expand Down Expand Up @@ -2354,7 +2354,7 @@ var _ = Describe("{FADAVolMigrateValidation}", func() {
stepLog = "run the multipath -ll command on the node where the pods were scheduled before deleting"
Step(stepLog, func() {
// sleep for 60 seconds for all the entries to update
time.Sleep(30 * time.Second)
time.Sleep(120 * time.Second)
log.InfoD("Sleeping for 30 seconds for all the entries to update")
cmd := fmt.Sprintf("multipath -ll")
output, err := runCmd(cmd, selectedNode)
Expand Down Expand Up @@ -2421,6 +2421,7 @@ var _ = Describe("{FADAVolMigrateValidation}", func() {
})
})


var _ = Describe("{VolAttachFAPxRestart}", func() {
/*
https://purestorage.atlassian.net/browse/PTX-21440
Expand All @@ -2439,7 +2440,6 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
var (
hostName = fmt.Sprintf("torpedo-host-%v", time.Now().UnixNano())
volumeName = fmt.Sprintf("torpedo-vol-%v", time.Now().UnixNano())
faSecret = Inst().FaSecret
FAclient *flasharray.Client
MultipathBeforeRestart string
faMgmtEndPoint string
Expand All @@ -2454,6 +2454,9 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
// select a random node to run the test
n := node.GetStorageDriverNodes()[0]

faSecret := Inst().FaSecret
log.InfoD("Fa secret : %s",faSecret)

stepLog := "get the secrete of FA which is not present in pure secret"
Step(stepLog, func() {
log.InfoD(stepLog)
Expand Down Expand Up @@ -2524,7 +2527,7 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
}

//create a volume on the FA
volSize := 1048576 * rand.Intn(10)
volSize := 1048576 * (rand.Intn(10) + 1)
volume, err := pureutils.CreateVolumeOnFABackend(FAclient, volumeName, volSize)
log.FailOnError(err, "Failed to create volume on FA")
log.InfoD("Volume created on FA: %v", volume.Name)
Expand All @@ -2537,7 +2540,6 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
})
stepLog = "Run iscsiadm commands to login to the controllers"
Step(stepLog, func() {

//Run iscsiadm commands to login to the controllers
networkInterfaces, err := pureutils.GetSpecificInterfaceBasedOnServiceType(FAclient, "iscsi")

Expand All @@ -2546,7 +2548,7 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
log.FailOnError(err, "Failed to login into controller")
log.InfoD("Successfully logged into controller: %v", networkInterface.Address)
}

// run multipath after login
cmd := "multipath -ll"
MultipathBeforeRestart, err = runCmd(cmd, n)
Expand Down Expand Up @@ -2584,15 +2586,6 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
stepLog = "Delete the volume and host from the FA"
Step(stepLog, func() {
log.InfoD(stepLog)
//log out of all the controllers
networkInterfaces, err := pureutils.GetSpecificInterfaceBasedOnServiceType(FAclient, "iscsi")

for _, networkInterface := range networkInterfaces {
err = LogoutFromController(n, networkInterface, *FAclient)
log.FailOnError(err, "Failed to login into controller")
log.InfoD("Successfully logged out of controller: %v", networkInterface.Address)
}

//disconnect volume from host
_, err = pureutils.DisConnectVolumeFromHost(FAclient, hostName, volumeName)
log.FailOnError(err, "Failed to disconnect volume from host")
Expand All @@ -2609,6 +2602,14 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
log.FailOnError(err, "Failed to delete host on FA")
log.InfoD("Host deleted on FA: %v", hostName)
}
//log out of all the controllers
networkInterfaces, err := pureutils.GetSpecificInterfaceBasedOnServiceType(FAclient, "iscsi")

for _, networkInterface := range networkInterfaces {
err = LogoutFromController(n, networkInterface, *FAclient)
log.FailOnError(err, "Failed to login into controller")
log.InfoD("Successfully logged out of controller: %v", networkInterface.Address)
}
})

})
Expand All @@ -2618,6 +2619,7 @@ var _ = Describe("{VolAttachFAPxRestart}", func() {
})
})


func LoginIntoController(n node.Node, networkInterface flasharray.NetworkInterface, FAclient flasharray.Client) error {
ipAddress := networkInterface.Address
iqn, err := GetIQNOfFA(n, FAclient)
Expand Down Expand Up @@ -2742,30 +2744,13 @@ var _ = Describe("{VolAttachSameFAPxRestart}", func() {
log.InfoD("Volume connected to host: %v", connectedVolume.Name)

})
stepLog = "Run iscsiadm commands to login to the controllers"
stepLog = "Run multipath command before restart"
Step(stepLog, func() {

//Run iscsiadm commands to login to the controllers
networkInterfaces, err := pureutils.GetSpecificInterfaceBasedOnServiceType(FAclient, "iscsi")

for _, networkInterface := range networkInterfaces {
err = LoginIntoController(n, networkInterface, *FAclient)
log.FailOnError(err, "Failed to login into controller")
log.InfoD("Successfully logged into controller: %v", networkInterface.Address)
}

//run multipath before refresh
//run multipath before refresh
cmd := "multipath -ll"
output, err := runCmd(cmd, n)
log.FailOnError(err, "Failed to run multipath -ll command on node %v", n.Name)
log.InfoD("Output of multipath -ll command before PX restart : %v", output)

// Refresh the iscsi session
err = RefreshIscsiSession(n)
log.FailOnError(err, "Failed to refresh iscsi session")
log.InfoD("Successfully refreshed iscsi session")

//sleep for 10s for the entries to update
time.Sleep(10 * time.Second)

// run multipath after login
Expand All @@ -2774,8 +2759,7 @@ var _ = Describe("{VolAttachSameFAPxRestart}", func() {
log.FailOnError(err, "Failed to run multipath -ll command on node %v", n.Name)
log.InfoD("Output of multipath -ll command before PX restart : %v", MultipathBeforeRestart)

// multipath before and after shoouldn't be same
dash.VerifyFatal(MultipathBeforeRestart != output, true, "Multipath entries are different before and after refresh")


})
stepLog = "create ext4 file system on top of the volume,mount it to /home/test Start running fio on the volume"
Expand Down Expand Up @@ -2864,20 +2848,6 @@ var _ = Describe("{VolAttachSameFAPxRestart}", func() {
log.FailOnError(err, "Failed to delete volume on FA")
log.InfoD("Volume deleted on FA: %v", volumeName)

//Refresh the iscsi session
err = RefreshIscsiSession(n)
log.FailOnError(err, "Failed to refresh iscsi session")
log.InfoD("Successfully refreshed iscsi session")

//log out of all the controllers
networkInterfaces, err := pureutils.GetSpecificInterfaceBasedOnServiceType(FAclient, "iscsi")

for _, networkInterface := range networkInterfaces {
err = LogoutFromController(n, networkInterface, *FAclient)
log.FailOnError(err, "Failed to login into controller")
log.InfoD("Successfully logged out of controller: %v", networkInterface.Address)
}

})

})
Expand All @@ -2887,6 +2857,7 @@ var _ = Describe("{VolAttachSameFAPxRestart}", func() {
})
})


/*
This test deploys app with FBDA volume having storageClass with pure_nfs_endpoint parameter.
It validates that FBDA volume gets consumed over IP mentioned in `pure_nfs_endpoint` parameter of storageClass.
Expand Down Expand Up @@ -7391,4 +7362,4 @@ func modifyPVCName(filePath, newPVCName string) error {
}

return nil
}
}