Skip to content

Commit 393adc7

Browse files
committed
fix determinism
1 parent 841f54b commit 393adc7

File tree

12 files changed

+146
-54
lines changed

12 files changed

+146
-54
lines changed

pkg/exploration/explore.go

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,18 @@ import (
1212
"attacknet/cmd/pkg/types"
1313
"context"
1414
"fmt"
15+
"github.com/kurtosis-tech/stacktrace"
1516
log "github.com/sirupsen/logrus"
1617
"golang.org/x/exp/rand"
18+
"net/http"
1719
"os"
1820
"os/signal"
1921
"syscall"
2022
"time"
2123
)
2224

2325
const WaitBetweenTestsSecs = 60
24-
const Seed = 555
26+
const Seed = 557
2527

2628
func getRandomAttackSize() suite.AttackSize {
2729
//return suite.AttackOne
@@ -51,7 +53,7 @@ func buildRandomLatencyTest(targetDescription string, targetSelectors []*suite.C
5153
minDelayMilliSeconds := 10
5254
maxDelayMilliSeconds := 1000
5355
minDurationSeconds := 10
54-
maxDurationSeconds := 1000
56+
maxDurationSeconds := 300
5557
minJitterMilliseconds := 10
5658
maxJitterMilliseconds := 1000
5759
minCorrelation := 0
@@ -75,6 +77,38 @@ func buildRandomLatencyTest(targetDescription string, targetSelectors []*suite.C
7577
)
7678
}
7779

80+
func buildRandomClockSkewTest(targetDescription string, targetSelectors []*suite.ChaosTargetSelector) (*types.SuiteTest, error) {
81+
minDelaySeconds := -600
82+
maxDelaySeconds := 600
83+
minDurationSeconds := 10
84+
maxDurationSeconds := 300
85+
86+
grace := time.Second * 300
87+
delay := fmt.Sprintf("%ds", rand.Intn(maxDelaySeconds-minDelaySeconds)+minDelaySeconds)
88+
duration := fmt.Sprintf("%ds", rand.Intn(maxDurationSeconds-minDurationSeconds)+minDurationSeconds)
89+
90+
description := fmt.Sprintf("Apply %s clock skew for %s against %d targets. %s", delay, duration, len(targetSelectors), targetDescription)
91+
log.Info(description)
92+
return suite.ComposeNodeClockSkewTest(
93+
description,
94+
targetSelectors,
95+
delay,
96+
duration,
97+
&grace,
98+
)
99+
}
100+
101+
func buildRandomTest(targetDescription string, targetSelectors []*suite.ChaosTargetSelector) (*types.SuiteTest, error) {
102+
testId := rand.Intn(2)
103+
if testId == 0 {
104+
return buildRandomLatencyTest(targetDescription, targetSelectors)
105+
}
106+
if testId == 1 {
107+
return buildRandomClockSkewTest(targetDescription, targetSelectors)
108+
}
109+
return nil, stacktrace.NewError("unknown test id")
110+
}
111+
78112
func pickRandomClient(config *plan.PlannerConfig) (string, bool) {
79113
//return "reth", true
80114
isExec := rand.Intn(2)
@@ -104,6 +138,11 @@ func StartExploration(config *plan.PlannerConfig) error {
104138
}
105139
testableNodes := nodes[1:]
106140

141+
for _, n := range nodes {
142+
log.Infof("%s", suite.ConvertToNodeIdTag(len(nodes), n, "execution"))
143+
log.Infof("%s", suite.ConvertToNodeIdTag(len(nodes), n, "consensus"))
144+
}
145+
107146
// dedupe from runtime?
108147
kubeClient, err := kubernetes.CreateKubeClient(config.KubernetesNamespace)
109148
if err != nil {
@@ -118,22 +157,29 @@ func StartExploration(config *plan.PlannerConfig) error {
118157
}
119158

120159
var testArtifacts []*artifacts.TestArtifact
121-
var done = make(chan bool, 1)
122-
sigs := make(chan os.Signal, 1)
160+
var done = make(chan bool, 2)
161+
sigs := make(chan os.Signal, 2)
123162
signal.Notify(sigs, syscall.SIGINT)
124163
go func() {
125164
sig := <-sigs
126165
fmt.Println()
127166
fmt.Println(sig, "Signal received. Ending after next test is completed.")
128167
done <- true // Signal that we're done
129-
}()
130168

169+
}()
170+
killall := false
131171
for {
172+
loc := time.FixedZone("GMT", 0)
173+
log.Infof("Start loop. GMT time: %s", time.Now().In(loc).Format(http.TimeFormat))
132174
select {
133175
case <-done:
134176
fmt.Println("Writing test artifacts")
135177
return cleanup(testArtifacts)
136178
default:
179+
if killall {
180+
fmt.Println("Writing test artifacts")
181+
return cleanup(testArtifacts)
182+
}
137183
clientUnderTest, isExec := pickRandomClient(config)
138184
targetSpec := getTargetSpec()
139185
attackSize := getRandomAttackSize()
@@ -149,14 +195,25 @@ func StartExploration(config *plan.PlannerConfig) error {
149195
continue
150196
}
151197

198+
for _, selector := range targetSelectors {
199+
for _, s := range selector.Selector {
200+
msg := "Hitting "
201+
for _, target := range s.Values {
202+
msg = fmt.Sprintf("%s %s,", msg, target)
203+
}
204+
log.Info(msg)
205+
}
206+
}
207+
log.Infof("time: %d", time.Now().Unix())
208+
152209
var targetingDescription string
153210
if targetSpec == suite.TargetMatchingNode {
154211
targetingDescription = fmt.Sprintf("Impacting the full node of targeted %s clients. Injecting into %s of the matching targets.", clientUnderTest, attackSize)
155212
} else {
156213
targetingDescription = fmt.Sprintf("Impacting the client of targeted %s clients. Injecting into %s of the matching targets.", clientUnderTest, attackSize)
157214
}
158215

159-
test, err := buildRandomLatencyTest(
216+
test, err := buildRandomTest(
160217
targetingDescription,
161218
targetSelectors,
162219
)
@@ -169,12 +226,13 @@ func StartExploration(config *plan.PlannerConfig) error {
169226
err = executor.RunTestPlan(ctx)
170227
if err != nil {
171228
log.Errorf("Error while running test")
172-
return err
229+
fmt.Println("Writing test artifacts")
230+
return cleanup(testArtifacts)
173231
} else {
174232
log.Infof("Test steps completed.")
175233
}
176234

177-
log.Info("Starting health checks")
235+
log.Infof("Starting health checks at %s", time.Now().In(loc).Format(http.TimeFormat))
178236
podsUnderTest, err := executor.GetPodsUnderTest()
179237
if err != nil {
180238
return err
@@ -186,6 +244,9 @@ func StartExploration(config *plan.PlannerConfig) error {
186244
}
187245
results, err := hc.RunChecks(ctx)
188246
if err != nil {
247+
248+
fmt.Println("Writing test artifacts")
249+
err := cleanup(testArtifacts)
189250
return err
190251
}
191252
testArtifact := artifacts.BuildTestArtifact(results, podsUnderTest, *test)

pkg/health/checker.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func (hc *CheckOrchestrator) RunChecks(ctx context.Context) (*types.HealthCheckR
5353
}
5454

5555
if time.Now().After(latestAllowable) {
56-
log.Warn("Grace period elapsed and a health check is still failing")
56+
log.Warnf("Grace period elapsed and a health check is still failing. Time: %d", time.Now().Unix())
5757
return results, nil
5858
} else {
5959
log.Warn("Health checks failed but still in grace period")

pkg/health/ethereum/beacon_rpc.go

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,21 +119,32 @@ func (e *EthNetworkChecker) dialToBeaconClients(ctx context.Context) ([]*BeaconC
119119
}
120120

121121
func dialBeaconRpcClient(ctx context.Context, session *kubernetes.PortForwardsSession) (*BeaconClientRpc, error) {
122-
httpClient, err := http.New(ctx,
123-
http.WithAddress(fmt.Sprintf("http://localhost:%d", session.LocalPort)),
124-
http.WithLogLevel(zerolog.WarnLevel),
125-
)
126-
if err != nil {
127-
return nil, stacktrace.Propagate(err, "err while dialing RPC for %s", session.Pod.GetName())
128-
}
129-
provider, isProvider := httpClient.(eth2client.BeaconBlockHeadersProvider)
130-
if !isProvider {
131-
return nil, stacktrace.NewError("unable to cast http client to beacon rpc provider for %s", session.Pod.GetName())
122+
// 3 attempts
123+
retryCount := 8
124+
for i := 0; i <= retryCount; i++ {
125+
httpClient, err := http.New(ctx,
126+
http.WithAddress(fmt.Sprintf("http://localhost:%d", session.LocalPort)),
127+
http.WithLogLevel(zerolog.WarnLevel),
128+
)
129+
if err != nil {
130+
if i == retryCount {
131+
return nil, stacktrace.Propagate(err, "err while dialing RPC for %s", session.Pod.GetName())
132+
} else {
133+
time.Sleep(1 * time.Second)
134+
continue
135+
}
136+
137+
}
138+
provider, isProvider := httpClient.(eth2client.BeaconBlockHeadersProvider)
139+
if !isProvider {
140+
return nil, stacktrace.NewError("unable to cast http client to beacon rpc provider for %s", session.Pod.GetName())
141+
}
142+
return &BeaconClientRpc{
143+
session: session,
144+
client: provider,
145+
}, nil
132146
}
133-
return &BeaconClientRpc{
134-
session: session,
135-
client: provider,
136-
}, nil
147+
return nil, stacktrace.NewError("unreachable beacon rpc")
137148
}
138149

139150
func (c *BeaconClientRpc) Close() {
@@ -158,7 +169,14 @@ func (c *BeaconClientRpc) GetLatestBlockBy(ctx context.Context, blockType string
158169
}
159170
}
160171
}
161-
return nil, stacktrace.Propagate(err, "Unable to query for blockType %s with client for %s", blockType, c.session.Pod.GetName())
172+
// chock it up to a failure we need to retry
173+
choice := &ClientForkChoice{
174+
Pod: c.session.Pod,
175+
BlockNumber: 0,
176+
BlockHash: "N/A",
177+
}
178+
return choice, nil
179+
//return nil, stacktrace.Propagate(err, "Unable to query for blockType %s with client for %s", blockType, c.session.Pod.GetName())
162180
}
163181

164182
slot := uint64(result.Data.Header.Message.Slot)

pkg/health/ethereum/consensus.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"attacknet/cmd/pkg/kubernetes"
55
"context"
66
log "github.com/sirupsen/logrus"
7+
"time"
78
)
89

910
// var UnableToReachLatestConsensusError = fmt.Errorf("there are nodes that disagree on the latest block")
@@ -101,7 +102,7 @@ func reportConsensusDataToLogger(consensusType string,
101102

102103
log.Infof("Consensus %s block height: %d", consensusType, consensusBlockNum[0].BlockNumber)
103104
if len(wrongBlockNum) > 0 {
104-
log.Warnf("Some nodes are out of consensus for block type '%s'", consensusType)
105+
log.Warnf("Some nodes are out of consensus for block type '%s'. Time: %d", consensusType, time.Now().Unix())
105106
for _, n := range wrongBlockNum {
106107
log.Warnf("---> Node: %s %s BlockHeight: %d BlockHash: %s", n.Pod.GetName(), consensusType, n.BlockNumber, n.BlockHash)
107108
}

pkg/kubernetes/port_forward.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,25 +114,37 @@ func (c *KubeClient) StartPortForwarding(pod string, localPort, remotePort int,
114114
portForwardIssueCh := make(chan error, 1)
115115
defer close(portForwardIssueCh)
116116

117-
retries := 3
118-
go func(retriesRem int) {
117+
retriesRem := 5
118+
/*go func(retriesRem int) {
119119
for retriesRem > 0 {
120-
if err = portForward.ForwardPorts(); err == nil {
120+
if err = portForward.ForwardPorts(); err != nil {
121121
retriesRem--
122+
time.Sleep(200 * time.Millisecond)
122123
if retriesRem == 0 {
123124
portForwardIssueCh <- stacktrace.Propagate(err, "unable to start port forward session")
124125
}
125126
}
126127
}
127-
}(retries)
128+
}(retries)*/
129+
130+
for retriesRem > 0 {
131+
if err = portForward.ForwardPorts(); err != nil {
132+
retriesRem--
133+
time.Sleep(200 * time.Millisecond)
134+
} else {
135+
return stopCh, nil
136+
}
137+
}
138+
//portForwardIssueCh <- stacktrace.Propagate(err, "unable to start port forward session")
139+
return nil, errors.New("Failed to establish port forward")
128140

129-
select {
141+
/*select {
130142
case <-readyCh:
131143
log.Debugf("Port-forward established to pod/%s:%d", pod, remotePort)
132144
case <-time.After(time.Minute):
133145
return nil, errors.New("timed out after waiting to establish port forward")
134146
case err = <-portForwardIssueCh:
135147
return nil, err
136148
}
137-
return stopCh, nil
149+
return stopCh, nil*/
138150
}

pkg/plan/network/consensus.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const defaultValCpu = 500
1111
const defaultClMem = 1536
1212
const defaultValMem = 512
1313

14-
func composeConsensusTesterNetwork(nodeMultiplier int, consensusClient string, execClientMap, consClientMap map[string]ClientVersion) ([]*Node, error) {
14+
func composeConsensusTesterNetwork(nodeMultiplier int, consensusClient string, execClientList []ClientVersion, consClientMap map[string]ClientVersion) ([]*Node, error) {
1515

1616
// make sure consensusClient actually exists
1717
clientUnderTest, ok := consClientMap[consensusClient]
@@ -21,11 +21,11 @@ func composeConsensusTesterNetwork(nodeMultiplier int, consensusClient string, e
2121

2222
// start from 2 because bootnode is index 1
2323
index := 2
24-
nodes, err := composeNodesForClTesting(nodeMultiplier, index, clientUnderTest, execClientMap)
24+
nodes, err := composeNodesForClTesting(nodeMultiplier, index, clientUnderTest, execClientList)
2525
return nodes, err
2626
}
2727

28-
func composeNodesForClTesting(nodeMultiplier, index int, consensusClient ClientVersion, execClients map[string]ClientVersion) ([]*Node, error) {
28+
func composeNodesForClTesting(nodeMultiplier, index int, consensusClient ClientVersion, execClients []ClientVersion) ([]*Node, error) {
2929
var nodes []*Node
3030

3131
for _, execClient := range execClients {

pkg/plan/network/execution.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ import (
44
"github.com/kurtosis-tech/stacktrace"
55
)
66

7-
const defaultElCpu = 512
8-
const defaultElMem = 512
7+
const defaultElCpu = 768
8+
const defaultElMem = 1024
99

10-
func composeExecTesterNetwork(nodeMultiplier int, execClient string, execClientMap, consClientMap map[string]ClientVersion) ([]*Node, error) {
10+
func composeExecTesterNetwork(nodeMultiplier int, execClient string, consClientList []ClientVersion, execClientMap map[string]ClientVersion) ([]*Node, error) {
1111

1212
// make sure execClient actually exists
1313
clientUnderTest, ok := execClientMap[execClient]
@@ -17,14 +17,14 @@ func composeExecTesterNetwork(nodeMultiplier int, execClient string, execClientM
1717

1818
// start from 2 because bootnode is index 1
1919
index := 2
20-
nodes, err := composeNodesForElTesting(nodeMultiplier, index, clientUnderTest, consClientMap)
20+
nodes, err := composeNodesForElTesting(nodeMultiplier, index, clientUnderTest, consClientList)
2121
return nodes, err
2222
}
2323

24-
func composeNodesForElTesting(nodeMultiplier, index int, execClient ClientVersion, consensusClients map[string]ClientVersion) ([]*Node, error) {
24+
func composeNodesForElTesting(nodeMultiplier, index int, execClient ClientVersion, consClientList []ClientVersion) ([]*Node, error) {
2525
var nodes []*Node
2626

27-
for _, consensusClient := range consensusClients {
27+
for _, consensusClient := range consClientList {
2828
for i := 0; i < nodeMultiplier; i++ {
2929
node := buildNode(index, execClient, consensusClient)
3030
nodes = append(nodes, node)

pkg/plan/network/network_builder.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ func ComposeNetworkTopology(topology Topology, clientUnderTest string, execClien
6565
// assume already checked clientUnderTest is a member of consClients or execClients
6666
var nodesToTest []*Node
6767
if isExecutionClient {
68-
nodesToTest, err = composeExecTesterNetwork(nodeMultiplier, clientUnderTest, execClientMap, consClientMap)
68+
nodesToTest, err = composeExecTesterNetwork(nodeMultiplier, clientUnderTest, consClients, execClientMap)
6969
} else {
70-
nodesToTest, err = composeConsensusTesterNetwork(nodeMultiplier, clientUnderTest, execClientMap, consClientMap)
70+
nodesToTest, err = composeConsensusTesterNetwork(nodeMultiplier, clientUnderTest, execClients, consClientMap)
7171
}
7272
if err != nil {
7373
return nil, err
@@ -78,6 +78,7 @@ func ComposeNetworkTopology(topology Topology, clientUnderTest string, execClien
7878
extraNodes, err := composeNodesToSatisfyTargetPercent(
7979
topology.TargetsAsPercentOfNetwork,
8080
len(nodes)-1,
81+
nodes[len(nodes)-1].Index+1,
8182
clientUnderTest,
8283
execClients,
8384
consClients,
@@ -89,7 +90,7 @@ func ComposeNetworkTopology(topology Topology, clientUnderTest string, execClien
8990
return nodes, nil
9091
}
9192

92-
func composeNodesToSatisfyTargetPercent(percentTarget float32, targetedNodeCount int, clientUnderTest string, execClients, consClients []ClientVersion) ([]*Node, error) {
93+
func composeNodesToSatisfyTargetPercent(percentTarget float32, targetedNodeCount int, startIndex int, clientUnderTest string, execClients, consClients []ClientVersion) ([]*Node, error) {
9394
// percent target is unconfigured
9495
if percentTarget == 0 {
9596
return []*Node{}, nil
@@ -100,8 +101,7 @@ func composeNodesToSatisfyTargetPercent(percentTarget float32, targetedNodeCount
100101
return nil, err
101102
}
102103

103-
startNodeIndex := targetedNodeCount + 2
104-
nodes, err := pickExtraNodeClients(startNodeIndex, nodesToAdd, clientUnderTest, execClients, consClients)
104+
nodes, err := pickExtraNodeClients(startIndex, nodesToAdd, clientUnderTest, execClients, consClients)
105105
return nodes, err
106106
}
107107

0 commit comments

Comments
 (0)