Skip to content

Commit 6d0e770

Browse files
committed
roachtest: adding defensive code in ceph/reef test
We have seen sporadic failures in the ceph tests, due to failures in creating users in the ceph object gateway. To address this we are adding code to check that the gateway is up by submitting a read only request, before attempting to add the user. Epic: none Fixes: #148731 Release note: None
1 parent fafb5e5 commit 6d0e770

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

pkg/cmd/roachtest/tests/s3_microceph.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"fmt"
1111
"net/url"
1212
"path/filepath"
13+
"time"
1314

1415
"github.com/cockroachdb/cockroach/pkg/cloud/amazon"
1516
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
@@ -124,7 +125,9 @@ func (m cephManager) install(ctx context.Context) {
124125
rgwCmd = rgwCmd + ` --ssl-certificate="$(base64 -w0 certs/node.crt)" --ssl-private-key="$(base64 -w0 certs/node.key)"`
125126
}
126127
m.run(ctx, `starting object gateway`, rgwCmd)
127-
128+
// We have seen occasional failures in creating users, so we
129+
// wait until a read only request succeeds before proceeding.
130+
m.checkRGW(ctx)
128131
m.run(ctx, `creating backup user`,
129132
`sudo radosgw-admin user create --uid=backup --display-name=backup`)
130133
m.run(ctx, `add keys to the user`,
@@ -166,3 +169,18 @@ func (m cephManager) run(ctx context.Context, msg string, cmd ...string) {
166169
m.c.Run(ctx, option.WithNodes(m.cephNodes), cmd...)
167170
m.t.Status(msg, " done")
168171
}
172+
173+
// checkRGW verifies that the Ceph Object Gateway is up.
174+
func (m cephManager) checkRGW(ctx context.Context) {
175+
m.t.Status("waiting for Ceph Object Gateway...")
176+
cmd := `sudo radosgw-admin user list`
177+
var err error
178+
for i := 0; i < 10; i++ {
179+
// Sleep for few seconds, then try the command.
180+
time.Sleep(2 * time.Second)
181+
if err = m.c.RunE(ctx, option.WithNodes(m.cephNodes), cmd); err == nil {
182+
return
183+
}
184+
}
185+
m.t.Error("Ceph Object Gateway not running", err)
186+
}

0 commit comments

Comments
 (0)