Skip to content

Commit 42afad9

Browse files
committed
Migrate antithesis to K8s
Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
1 parent 5aebee8 commit 42afad9

File tree

9 files changed

+115
-384
lines changed

9 files changed

+115
-384
lines changed

.github/workflows/antithesis-test.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on:
1313
required: false
1414
type: string
1515
duration:
16-
description: 'Duration (exploration hours)'
16+
description: 'Duration (exploration minutes)'
1717
required: true
1818
type: int
1919
description:
@@ -78,7 +78,7 @@ jobs:
7878
- name: Run Antithesis Tests
7979
uses: antithesishq/antithesis-trigger-action@f6221e2ba819fe0ac3e36bd67a281fa439a03fba # v0.10
8080
with:
81-
notebook_name: etcd
81+
notebook_name: basic_k8s_test
8282
tenant: linuxfoundation
8383
username: ${{ secrets.ANTITHESIS_WEBHOOK_USERNAME }}
8484
password: ${{ secrets.ANTITHESIS_WEBHOOK_PASSWORD }}
@@ -89,5 +89,5 @@ jobs:
8989
email_recipients: ${{ inputs.email || 'siarkowicz@google.com' }}
9090
test_name: ${{ inputs.test || 'etcd nightly antithesis run' }}
9191
additional_parameters: |-
92-
custom.duration = ${{ inputs.duration || 12 }}
92+
antithesis.duration = ${{ inputs.duration || 720 }}
9393
antithesis.source = ${{ inputs.etcd_ref || 'main' }}
Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
name: Verify Antithesis Docker Compose Pipeline
2+
name: Antithesis Verify
33

44
permissions:
55
contents: read
@@ -12,61 +12,35 @@ on:
1212
- 'tests/antithesis/**'
1313
- '.github/workflows/antithesis-verify.yml'
1414
pull_request:
15+
branches:
16+
- main
1517
paths:
1618
- 'tests/antithesis/**'
1719
- '.github/workflows/antithesis-verify.yml'
1820

1921
jobs:
20-
test-docker-compose:
21-
strategy:
22-
matrix:
23-
node-count: [1, 3]
24-
name: Test ${{ matrix.node-count }}-node cluster
22+
test:
2523
runs-on: ubuntu-latest
2624
steps:
27-
- name: Checkout repository
28-
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
29-
30-
- name: Build etcd-server and etcd-client images
25+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
26+
- name: Build images
3127
run: |
3228
make -C tests/antithesis antithesis-build-etcd-image
3329
make -C tests/antithesis antithesis-build-client-docker-image
3430
35-
- name: Run docker-compose up
31+
- name: Set up Kubernetes cluster
3632
working-directory: ./tests/antithesis
37-
run: |
38-
make antithesis-docker-compose-up CFG_NODE_COUNT=${{ matrix.node-count }} &
39-
40-
- name: Check for healthy cluster
41-
working-directory: ./tests/antithesis
42-
run: |
43-
timeout=120
44-
interval=10
45-
end_time=$(( $(date +%s) + timeout ))
46-
47-
while [ $(date +%s) -lt $end_time ]; do
48-
# The client container might not be running yet, so ignore errors from docker compose logs
49-
if docker compose -f config/docker-compose-${{ matrix.node-count }}-node.yml logs client 2>/dev/null | grep -q "Client \[entrypoint\]: cluster is healthy!"; then
50-
echo "Cluster is healthy!"
51-
exit 0
52-
fi
53-
echo "Waiting for cluster to become healthy..."
54-
sleep $interval
55-
done
56-
57-
echo "Cluster did not become healthy in ${timeout} seconds."
58-
docker compose -f config/docker-compose-${{ matrix.node-count }}-node.yml logs
59-
exit 1
33+
run: make antithesis-k8s-up
6034

6135
- name: Run traffic
6236
working-directory: ./tests/antithesis
63-
run: make antithesis-run-container-traffic CFG_NODE_COUNT=${{ matrix.node-count }}
37+
run: make antithesis-run-k8s-traffic
6438

6539
- name: Run validation
6640
working-directory: ./tests/antithesis
67-
run: make antithesis-run-container-validation CFG_NODE_COUNT=${{ matrix.node-count }}
41+
run: make antithesis-run-k8s-validation
6842

6943
- name: Clean up
7044
if: always()
7145
working-directory: ./tests/antithesis
72-
run: make antithesis-clean CFG_NODE_COUNT=${{ matrix.node-count }}
46+
run: make antithesis-clean

tests/antithesis/Makefile

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,41 +45,41 @@ antithesis-build-etcd-image-main: REF=main
4545
antithesis-build-etcd-image-main: antithesis-build-etcd-image
4646

4747
.PHONY: antithesis-build-config-image
48-
antithesis-build-config-image: validate-node-count
48+
antithesis-build-config-image:
4949
docker build -f config/Dockerfile config -t etcd-config:latest \
50-
--build-arg IMAGE_TAG=$(IMAGE_TAG) \
51-
--build-arg NODE_COUNT=$(CFG_NODE_COUNT)
50+
--build-arg IMAGE_TAG=$(IMAGE_TAG)
5251

53-
.PHONY: antithesis-docker-compose-up
54-
antithesis-docker-compose-up: validate-node-count
55-
export USER_ID=$(USER_ID) && export GROUP_ID=$(GROUP_ID) && \
56-
docker compose -f config/docker-compose-$(CFG_NODE_COUNT)-node.yml up
52+
.PHONY: antithesis-k8s-up
53+
antithesis-k8s-up: check-k8s-tools
54+
kind create cluster --name etcd-antithesis
55+
kind load docker-image etcd-client:latest --name etcd-antithesis
56+
kind load docker-image etcd-server:latest --name etcd-antithesis
57+
kubectl apply -f config/manifests
58+
kubectl wait --for=condition=ready pod -l app=etcd --timeout=120s
59+
kubectl wait --for=condition=ready pod -l app=etcd-client --timeout=120s
5760

58-
.PHONY: antithesis-run-container-traffic
59-
antithesis-run-container-traffic: validate-node-count
60-
export USER_ID=$(USER_ID) && export GROUP_ID=$(GROUP_ID) && \
61-
docker compose -f config/docker-compose-$(CFG_NODE_COUNT)-node.yml exec client /opt/antithesis/test/v1/robustness/singleton_driver_traffic
61+
.PHONY: antithesis-run-k8s-traffic
62+
antithesis-run-k8s-traffic:
63+
kubectl exec deployment/etcd-client -- /opt/antithesis/test/v1/robustness/singleton_driver_traffic
6264

63-
.PHONY: antithesis-run-container-validation
64-
antithesis-run-container-validation: validate-node-count
65-
export USER_ID=$(USER_ID) && export GROUP_ID=$(GROUP_ID) && \
66-
docker compose -f config/docker-compose-$(CFG_NODE_COUNT)-node.yml exec client /opt/antithesis/test/v1/robustness/finally_validation
65+
.PHONY: antithesis-run-k8s-validation
66+
antithesis-run-k8s-validation:
67+
kubectl exec deployment/etcd-client -- /opt/antithesis/test/v1/robustness/finally_validation
6768

6869
.PHONY: antithesis-run-local-traffic
6970
antithesis-run-local-traffic:
70-
export ETCD_ROBUSTNESS_DATA_PATHS=/tmp/etcddata0,/tmp/etcddata1,/tmp/etcddata2 && export ETCD_ROBUSTNESS_REPORT_PATH=report && export ETCD_ROBUSTNESS_ENDPOINTS=127.0.0.1:12379,127.0.0.1:22379,127.0.0.1:32379 && \
71+
export ETCD_ROBUSTNESS_DATA_PATHS=/tmp/etcddata/etcd-0,/tmp/etcddata/etcd-1,/tmp/etcddata/etcd-2 && export ETCD_ROBUSTNESS_REPORT_PATH=reports/report && export ETCD_ROBUSTNESS_ENDPOINTS=127.0.0.1:12379,127.0.0.1:22379,127.0.0.1:32379 && \
7172
go run --race ./test-template/robustness/traffic/main.go
7273

7374
.PHONY: antithesis-run-local-validation
7475
antithesis-run-local-validation:
75-
export ETCD_ROBUSTNESS_DATA_PATHS=/tmp/etcddata0,/tmp/etcddata1,/tmp/etcddata2 && export ETCD_ROBUSTNESS_REPORT_PATH=report && export ETCD_ROBUSTNESS_ENDPOINTS=127.0.0.1:12379,127.0.0.1:22379,127.0.0.1:32379 && \
76+
export ETCD_ROBUSTNESS_DATA_PATHS=/tmp/etcddata/etcd-0,/tmp/etcddata/etcd-1,/tmp/etcddata/etcd-2 && export ETCD_ROBUSTNESS_REPORT_PATH=reports/report && export ETCD_ROBUSTNESS_ENDPOINTS=127.0.0.1:12379,127.0.0.1:22379,127.0.0.1:32379 && \
7677
go run --race ./test-template/robustness/finally/main.go
7778

7879
.PHONY: antithesis-clean
79-
antithesis-clean: validate-node-count
80-
export USER_ID=$(USER_ID) && export GROUP_ID=$(GROUP_ID) && \
81-
docker compose -f config/docker-compose-$(CFG_NODE_COUNT)-node.yml down --remove-orphans
82-
rm -rf /tmp/etcddata0 /tmp/etcddata1 /tmp/etcddata2 /tmp/etcdreport
80+
antithesis-clean: check-k8s-tools
81+
kind delete cluster --name etcd-antithesis
82+
rm -rf /tmp/etcddata /tmp/etcdreport
8383

8484
.PHONY: validate-node-count
8585
validate-node-count:

tests/antithesis/README.md

Lines changed: 26 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ For more details on robustness tests, see the [robustness directory](../robustne
1717
## Antithesis Setup
1818

1919
The setup consists of a 3-node etcd cluster and a client container, orchestrated
20-
via [Docker Compose](https://antithesis.com/docs/getting_started/setup/).
20+
via [Kubernetes](https://antithesis.com/docs/getting_started/setup_k8s/).
2121

2222
During the etcd Antithesis test suite the etcd server is built with the following patches:
2323

@@ -52,11 +52,18 @@ in the following way:
5252
[Singleton Driver Command]: https://antithesis.com/docs/test_templates/test_composer_reference/#singleton-driver
5353
[Finally Command]: https://antithesis.com/docs/test_templates/test_composer_reference/#finally-command
5454

55-
# Running tests with docker compose
55+
## Running tests locally with Kubernetes
56+
57+
### Prerequisites
58+
59+
Please make sure that you have the following tools installed on your local:
60+
61+
* [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl)
62+
* [kind](https://kind.sigs.k8s.io/docs/user/quick-start#installation)
5663

5764
## Quickstart
5865

59-
### 1. Build and Tag the Docker Image
66+
### 1. Build and Tag the Docker Images
6067

6168
Run this command from the `antithesis/test-template` directory:
6269

@@ -71,140 +78,42 @@ Both commands build etcd-server and etcd-client from the current branch. To buil
7178
make antithesis-build-etcd-image REF=${GIT_REF}
7279
```
7380

74-
### 2. (Optional) Check the Image Locally
75-
76-
You can verify your new image is built:
77-
78-
```bash
79-
docker images | grep etcd-client
80-
```
81-
82-
It should show something like:
83-
84-
```
85-
etcd-client latest <IMAGE_ID> <DATE>
86-
```
87-
88-
### 3. Use in Docker Compose
81+
#### 2. Set up the Kubernetes cluster
8982

9083
Run the following command from the root directory for Antithesis tests (`tests/antithesis`):
9184

9285
```bash
93-
make antithesis-docker-compose-up
86+
make antithesis-k8s-up
9487
```
9588

96-
The command uses the etcd client and server images built from step 1.
89+
This command will:
9790

98-
The client will continuously check the health of the etcd nodes and print logs similar to:
91+
* Create a local `kind` cluster named `etcd-antithesis`.
92+
* Load the built `etcd-client` and `etcd-server` images into the cluster.
93+
* Deploy the Kubernetes manifests from `config/manifests`.
9994

100-
```
101-
[+] Running 4/4
102-
✔ Container etcd0 Created 0.0s
103-
✔ Container etcd2 Created 0.0s
104-
✔ Container etcd1 Created 0.0s
105-
✔ Container client Recreated 0.1s
106-
Attaching to client, etcd0, etcd1, etcd2
107-
etcd2 | {"level":"info","ts":"2025-04-14T07:23:25.134294Z","caller":"flags/flag.go:113","msg":"recognized and used environment variable","variable-name":"ETCD_ADVERTISE_CLIENT_URLS","variable-value":"http://etcd2.etcd:2379"}
108-
etcd2 | {"level":"info","ts":"2025-04-14T07:23:25.138501Z","caller":"flags/flag.go:113","msg":"recognized and used environment variable","variable-name":"ETCD_INITIAL_ADVERTISE_PEER_URLS","variable-value":"http://etcd2:2380"}
109-
etcd2 | {"level":"info","ts":"2025-04-14T07:23:25.138646Z","caller":"flags/flag.go:113","msg":"recognized and used environment variable","variable-name":"ETCD_INITIAL_CLUSTER","variable-value":"etcd0=http://etcd0:2380,etcd1=http://etcd1:2380,etcd2=http://etcd2:2380"}
110-
etcd0 | {"level":"info","ts":"2025-04-14T07:23:25.138434Z","caller":"flags/flag.go:113","msg":"recognized and used environment variable","variable-name":"ETCD_ADVERTISE_CLIENT_URLS","variable-value":"http://etcd0.etcd:2379"}
111-
etcd0 | {"level":"info","ts":"2025-04-14T07:23:25.138582Z","caller":"flags/flag.go:113","msg":"recognized and used environment variable","variable-name":"ETCD_INITIAL_ADVERTISE_PEER_URLS","variable-value":"http://etcd0:2380"}
112-
etcd0 | {"level":"info","ts":"2025-04-14T07:23:25.138592Z","caller":"flags/flag.go:113","msg":"recognized and used environment variable","variable-name":"ETCD_INITIAL_CLUSTER","variable-value":"etcd0=http://etcd0:2380,etcd1=http://etcd1:2380,etcd2=http://etcd2:2380"}
113-
114-
...
115-
...
116-
(skipping some repeated logs for brevity)
117-
...
118-
...
119-
120-
etcd2 | {"level":"info","ts":"2025-04-14T07:23:25.484698Z","caller":"etcdmain/main.go:50","msg":"successfully notified init daemon"}
121-
etcd1 | {"level":"info","ts":"2025-04-14T07:23:25.484092Z","caller":"embed/serve.go:210","msg":"serving client traffic insecurely; this is strongly discouraged!","traffic":"grpc+http","address":"[::]:2379"}
122-
etcd0 | {"level":"info","ts":"2025-04-14T07:23:25.484563Z","caller":"etcdmain/main.go:50","msg":"successfully notified init daemon"}
123-
etcd2 | {"level":"info","ts":"2025-04-14T07:23:25.485101Z","caller":"v3rpc/health.go:61","msg":"grpc service status changed","service":"","status":"SERVING"}
124-
etcd1 | {"level":"info","ts":"2025-04-14T07:23:25.484130Z","caller":"etcdmain/main.go:44","msg":"notifying init daemon"}
125-
etcd2 | {"level":"info","ts":"2025-04-14T07:23:25.485782Z","caller":"embed/serve.go:210","msg":"serving client traffic insecurely; this is strongly discouraged!","traffic":"grpc+http","address":"[::]:2379"}
126-
etcd1 | {"level":"info","ts":"2025-04-14T07:23:25.484198Z","caller":"etcdmain/main.go:50","msg":"successfully notified init daemon"}
127-
client | Client [entrypoint]: starting...
128-
client | Client [entrypoint]: checking cluster health...
129-
client | Client [entrypoint]: connection successful with etcd0
130-
client | Client [entrypoint]: connection successful with etcd1
131-
client | Client [entrypoint]: connection successful with etcd2
132-
client | Client [entrypoint]: cluster is healthy!
133-
```
95+
The client pod will continuously check the health of the etcd nodes and wait for the cluster to be healthy.
13496

135-
And it will stay running indefinitely.
97+
#### 3. Running the tests
13698

137-
### 4. Running the tests
99+
To run the tests locally against the deployed cluster:
138100

139101
```bash
140-
make antithesis-run-container-traffic
141-
make antithesis-run-container-validation
102+
make antithesis-run-k8s-traffic
103+
make antithesis-run-k8s-validation
142104
```
143105

144-
Alternatively, with the etcd cluster from step 3, to run the tests locally without rebuilding the client image:
145-
146-
```bash
147-
make antithesis-run-local-traffic
148-
make antithesis-run-local-validation
149-
```
106+
#### 4. Prepare for next run
150107

151-
### 5. Prepare for next run
152-
153-
Unfortunatelly robustness tests don't support running on non empty database.
108+
Unfortunately robustness tests don't support running on a non-empty database.
154109
So for now you need to cleanup the storage before repeating the run or you will get "non empty database at start, required by model used for linearizability validation" error.
155110

156111
```bash
157112
make antithesis-clean
158113
```
159114

160-
## Troubleshooting
161-
162-
- **Image Pull Errors**: If Docker can’t pull `etcd-client:latest`, make sure you built it locally (see the “Build and Tag” step) or push it to a registry that Compose can access.
163-
164-
# Running Tests with Kubernetes (WIP)
165-
166-
## Prerequisites
167-
168-
Please make sure that you have the following tools installed on your local:
169-
170-
- [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl)
171-
- [kind](https://kind.sigs.k8s.io/docs/user/quick-start#installation)
172-
173-
## Testing locally
174-
175-
### Setting up the cluster and deploying the images
176-
177-
#### 1. Ensure your access to a test kubernetes cluster
178-
179-
You can use `kind` to create a local cluster to deploy the etcd-server and test client. Once you have `kind` installed, you can use the following command to create a local cluster:
180-
181-
```bash
182-
kind create cluster
183-
```
184-
185-
Alternatively, you can use any existing kubernetes cluster you have access to.
186-
187-
#### 2. Build and load the images
188-
189-
Please [build the client and server images](#1-build-and-tag-the-docker-image) first. Then load the images into the `kind` cluster:
115+
This command will delete the `kind` cluster and remove the local data directories.
190116

191-
If you use `kind`, the cluster will need to have access to the images using the following commands:
117+
### Troubleshooting
192118

193-
```bash
194-
kind load docker-image etcd-client:latest
195-
kind load docker-image etcd-server:latest
196-
```
197-
198-
If you use something other than `kind`, please make sure the images are accessible to your cluster. This might involve pushing the images to a container registry that your cluster can pull from.
199-
200-
#### 3. Deploy the kubernetes manifests
201-
202-
```bash
203-
kubectl apply -f ./config/manifests
204-
```
205-
206-
### Tearing down the cluster
207-
208-
```bash
209-
kind delete cluster --name kind
210-
```
119+
* **Image Pull Errors**: If Kubernetes can’t pull `etcd-client:latest`, make sure you built it locally and that it was successfully loaded into the `kind` cluster.

tests/antithesis/config/Dockerfile

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
1-
ARG GO_VERSION=1.25.5
2-
3-
FROM golang:$GO_VERSION AS build
4-
RUN go install github.com/a8m/envsubst/cmd/envsubst@v1.4.3
5-
6-
ARG IMAGE_TAG
7-
ARG NODE_COUNT
8-
COPY docker-compose-${NODE_COUNT}-node.yml /docker-compose.yml.template
9-
RUN IMAGE_TAG=${IMAGE_TAG} cat /docker-compose.yml.template | envsubst > /docker-compose.yml
1+
FROM alpine AS build
2+
ARG IMAGE_TAG=latest
3+
COPY manifests/ /manifests/
4+
RUN sed -i "s/etcd-client:latest/etcd-client:${IMAGE_TAG}/g" /manifests/client.yaml && \
5+
sed -i "s/etcd-server:latest/etcd-server:${IMAGE_TAG}/g" /manifests/default-etcd-3-replicas.yaml
106

117
FROM scratch
12-
COPY --from=build /docker-compose.yml /docker-compose.yml
8+
COPY --from=build /manifests/ /manifests/

0 commit comments

Comments
 (0)