Skip to content

Commit

Permalink
Add ta tests (#43)
Browse files Browse the repository at this point in the history
* Add TA tests

* Fix client behaviour

* Add grafana dashboard & alerts

* Get datanode thread pool worker count from env

* Increase datanode sync timeout

* Increase resources

* Add thread pool size test env
  • Loading branch information
kysre authored Feb 13, 2024
1 parent e6aff50 commit 17559ab
Show file tree
Hide file tree
Showing 15 changed files with 556 additions and 144 deletions.
14 changes: 5 additions & 9 deletions client_go/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,12 @@ func (c *queueClient) acknowledgePull(ctx context.Context, key string) {
}

func (c *queueClient) runSubscribe(function SubscribeFunction) {
tickerPeriod := time.Duration(1) * time.Second
ticker := time.NewTicker(tickerPeriod)
for {
select {
case <-ticker.C:
key, value := c.Pull()
if key == "" {
continue
}
function(key, value)
key, value := c.Pull()
if key == "" {
time.Sleep(time.Duration(10) * time.Millisecond)
continue
}
function(key, value)
}
}
7 changes: 4 additions & 3 deletions client_py/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class QueueClient:
replica_stub = None
HOST = "64.226.122.208"
PORT, REPLICA_PORT = "8000", "8001"
SUBSCRIBE_WORKERS = 3
SUBSCRIBE_SLEEP_TIMEOUT = 2
SUBSCRIBE_WORKERS = 1
SUBSCRIBE_SLEEP_TIMEOUT = 0.01

@classmethod
def get_stub(cls, host: str, port: str):
Expand Down Expand Up @@ -85,7 +85,8 @@ def run_subscribe(self, f):
pull_response = self.pull()
if pull_response is not None and pull_response is not False and pull_response[0] != '':
futures.append(executer.submit(f, pull_response[0], pull_response[1]))
time.sleep(QueueClient.SUBSCRIBE_SLEEP_TIMEOUT)
else:
time.sleep(QueueClient.SUBSCRIBE_SLEEP_TIMEOUT)

_ = [future.result() for future in futures]

Expand Down
3 changes: 2 additions & 1 deletion datanode/src/configs/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ class ConfigManager:
leader_host=env_config('LEADER_HOST'),
leader_port=env_config('LEADER_PORT'),
datanode_name=env_config('DATANODE_NAME'),
server_thread_pool_size=env_config('SERVER_THREAD_POOL_SIZE'),
)

@staticmethod
def get_prop(key):
return ConfigManager.configs[key]
return ConfigManager.configs[key]
3 changes: 2 additions & 1 deletion datanode/src/configs/test.env
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ PENDING_TIMEOUT=15
CLEANER_PERIOD=5
LEADER_HOST=leader_0
LEADER_PORT=8888
DATANODE_NAME=datanode_0
DATANODE_NAME=datanode_0
SERVER_THREAD_POOL_SIZE=100
6 changes: 5 additions & 1 deletion datanode/src/datanode_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,11 @@ def serve():

datanode_name = ConfigManager.get_prop('datanode_name')

server = grpc.server(futures.ThreadPoolExecutor(max_workers=50))
server = grpc.server(
futures.ThreadPoolExecutor(
max_workers=ConfigManager.get_prop('server_thread_pool_size')
)
)
datanode = DataNode(partitions_count, home_path, datanode_name)
datanode_pb2_grpc.add_DataNodeServicer_to_server(datanode, server)

Expand Down
40 changes: 23 additions & 17 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ services:
deploy:
resources:
limits:
cpus: '0.2'
cpus: '0.3'
memory: 300M

node_exporter:
Expand Down Expand Up @@ -44,8 +44,8 @@ services:
deploy:
resources:
limits:
cpus: '0.4'
memory: 400M
cpus: '0.5'
memory: 600M

leader_0:
image: kysre/turtlemq:leader-${LEADER_IMAGE_TAG}
Expand All @@ -54,16 +54,17 @@ services:
- prometheus
environment:
- LEADER_LOGGING_LEVEL=info
- LEADER_LEADER_DATANODEREMAININGCHECKPERIOD=2
- LEADER_LEADER_DATANODEREMAININGCHECKPERIOD=1
- LEADER_LEADER_DATANODEPARTITIONCOUNT=100
- LEADER_LEADER_DATANODESYNCTIMEOUT=1000
- LEADER_LEADER_REPLICAHOST=leader_1
ports:
- '8000:8888'
deploy:
resources:
limits:
cpus: '0.2'
memory: 200M
cpus: '0.3'
memory: 300M

leader_1:
image: kysre/turtlemq:leader-${LEADER_IMAGE_TAG}
Expand All @@ -73,16 +74,17 @@ services:
- leader_0
environment:
- LEADER_LOGGING_LEVEL=info
- LEADER_LEADER_DATANODEREMAININGCHECKPERIOD=2
- LEADER_LEADER_DATANODEREMAININGCHECKPERIOD=1
- LEADER_LEADER_DATANODEPARTITIONCOUNT=100
- LEADER_LEADER_DATANODESYNCTIMEOUT=1000
- LEADER_LEADER_REPLICAHOST=leader_0
ports:
- '8001:8888'
deploy:
resources:
limits:
cpus: '0.2'
memory: 200M
cpus: '0.3'
memory: 300M

datanode_0:
image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG}
Expand All @@ -100,13 +102,14 @@ services:
- PENDING_TIMEOUT=15
- CLEANER_PERIOD=3
- PARTITIONS_COUNT=100
- SERVER_THREAD_POOL_SIZE=100
volumes:
- datanode_0_vol:/var/lib/turtlemq/data/
deploy:
resources:
limits:
cpus: '0.15'
memory: 300M
cpus: '0.3'
memory: 400M

datanode_1:
image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG}
Expand All @@ -124,13 +127,14 @@ services:
- PENDING_TIMEOUT=15
- CLEANER_PERIOD=3
- PARTITIONS_COUNT=100
- SERVER_THREAD_POOL_SIZE=100
volumes:
- datanode_1_vol:/var/lib/turtlemq/data/
deploy:
resources:
limits:
cpus: '0.15'
memory: 300M
cpus: '0.3'
memory: 400M

datanode_2:
image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG}
Expand All @@ -148,13 +152,14 @@ services:
- PENDING_TIMEOUT=15
- CLEANER_PERIOD=3
- PARTITIONS_COUNT=100
- SERVER_THREAD_POOL_SIZE=100
volumes:
- datanode_2_vol:/var/lib/turtlemq/data/
deploy:
resources:
limits:
cpus: '0.15'
memory: 300M
cpus: '0.3'
memory: 400M

datanode_3:
image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG}
Expand All @@ -172,13 +177,14 @@ services:
- PENDING_TIMEOUT=15
- CLEANER_PERIOD=3
- PARTITIONS_COUNT=100
- SERVER_THREAD_POOL_SIZE=100
volumes:
- datanode_3_vol:/var/lib/turtlemq/data/
deploy:
resources:
limits:
cpus: '0.15'
memory: 300M
cpus: '0.3'
memory: 400M

volumes:
prom_data:
Expand Down
48 changes: 48 additions & 0 deletions grafana/alert_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"template_files": {},
"template_file_provenances": {},
"alertmanager_config": {
"route": {
"receiver": "turtlemq-contact",
"group_by": [
"grafana_folder",
"alertname"
]
},
"templates": null,
"receivers": [
{
"name": "grafana-default-email",
"grafana_managed_receiver_configs": [
{
"uid": "e729eec1-03e8-4d08-8d55-633ed1b6cb04",
"name": "email receiver",
"type": "email",
"disableResolveMessage": false,
"settings": {
"addresses": "<[email protected]>"
},
"secureFields": {}
}
]
},
{
"name": "turtlemq-contact",
"grafana_managed_receiver_configs": [
{
"uid": "e54db3b1-ec77-44a7-9caf-abef76cbdcb1",
"name": "turtlemq-contact",
"type": "email",
"disableResolveMessage": false,
"settings": {
"addresses": "[email protected]\n[email protected]\n",
"singleEmail": true,
"message": "TurtleMQ alerts are FIRING!"
},
"secureFields": {}
}
]
}
]
}
}
Loading

0 comments on commit 17559ab

Please sign in to comment.