From 08f4dfed37ee4b2c938dfc36bc1d717eb0e436a1 Mon Sep 17 00:00:00 2001 From: kasra Date: Sun, 11 Feb 2024 22:53:22 +0330 Subject: [PATCH 1/6] Save grafana dashboard --- grafana/dashboard.json | 986 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 986 insertions(+) create mode 100644 grafana/dashboard.json diff --git a/grafana/dashboard.json b/grafana/dashboard.json new file mode 100644 index 0000000..5547057 --- /dev/null +++ b/grafana/dashboard.json @@ -0,0 +1,986 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(leader_total_bucket{}[1m])) by (method)", + "instant": false, + "legendFormat": "{{method}}", + "range": true, + "refId": "A" + } + ], + "title": "Leader RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(${percentile}, sum(rate(leader_latency_bucket{}[1m])) by (le, method))", + "instant": false, + "legendFormat": "{{method}}", + "range": true, + "refId": "A" + } + ], + "title": "Leader Latency Percentile-${percentile}", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(push_throughput_bucket{}[1m])) by (provider)", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Push RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(${percentile}, sum(rate(push_latency_bucket{}[1m])) by (le, method))", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Push Latency Percentile-${percentile}", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(pull_throughput_bucket{}[1m])) by (provider)", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Pull RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(${percentile}, sum(rate(pull_latency_bucket{}[1m])) by (le, method))", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Pull Latency Percentile-${percentile}", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(ack_throughput_bucket{}[1m])) by (provider)", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Ack RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(${percentile}, sum(rate(ack_latency_bucket{}[1m])) by (le, method))", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Ack Latency Percentile-${percentile}", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 9, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(disk_used_size{}[1m])) by (provider)", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Disk Usage", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 10, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(disk_total_size{}[1m])) by (provider)", + "instant": false, + "legendFormat": "{{provider}}", + "range": true, + "refId": "A" + } + ], + "title": "Datanode Disk Total", + "type": "gauge" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "0.9", + "value": "0.9" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "0.9", + "value": "0.9" + }, + { + "selected": false, + "text": "0.95", + "value": "0.95" + }, + { + "selected": false, + "text": "0.99", + "value": "0.99" + } + ], + "query": "0.9, 0.95,0.99", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "TurtleMQ", + "uid": "ce242e9f-8458-4ae5-b053-e9c81ef0de2f", + "version": 7, + "weekStart": "" +} \ No newline at end of file From ffff319f3f174d256d96d6926ffe1501b2d70911 Mon Sep 17 00:00:00 2001 From: kasra Date: Sun, 11 Feb 2024 22:54:06 +0330 Subject: [PATCH 2/6] Hardcode server host --- client_go/client.go | 2 +- client_py/client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/client_go/client.go b/client_go/client.go index 4ec9b72..c553dff 100644 --- a/client_go/client.go +++ b/client_go/client.go @@ -12,7 +12,7 @@ import ( "github.com/kysre/TurtleMQ/client_go/queue" ) -const HOST = "localhost" +const HOST = "64.226.122.208" type SubscribeFunction func(key string, value []byte) diff --git a/client_py/client.py b/client_py/client.py index 246d479..024b6d4 100644 --- a/client_py/client.py +++ b/client_py/client.py @@ -13,7 +13,7 @@ class QueueClient: stub = None replica_stub = None - HOST = "localhost" + HOST = "64.226.122.208" PORT, REPLICA_PORT = "8000", "8001" SUBSCRIBE_WORKERS = 3 SUBSCRIBE_SLEEP_TIMEOUT = 2 From aac20c8375bf803ee01fe40f4b91d6dafb5a4924 Mon Sep 17 00:00:00 2001 From: kasra Date: Sun, 11 Feb 2024 23:42:32 +0330 Subject: [PATCH 3/6] Add some system tests --- functional_tests.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 functional_tests.py diff --git a/functional_tests.py b/functional_tests.py new file mode 100644 index 0000000..be9909e --- /dev/null +++ b/functional_tests.py @@ -0,0 +1,26 @@ +import time + +from client_py.client import QueueClient + + +def test_subscribe_function_1(key: str, value: bytes): + print(f'{key}, {value}') + + +def test_subscribe_function_2(key: str, value: bytes): + print(f'{key}, {value}') + + +if __name__ == '__main__': + test_client_1 = QueueClient() + test_client_2 = QueueClient() + test_client_3 = QueueClient() + + test_client_2.subscribe(test_subscribe_function_1) + test_client_3.subscribe(test_subscribe_function_2) + + for i in range(200): + test_client_1.push(f'{i}', b'value') + time.sleep(0.1) + + time.sleep(500) From 30b918c7aaf91f47a070afea311c9d796ee24bc9 Mon Sep 17 00:00:00 2001 From: kasra Date: Sun, 11 Feb 2024 23:42:50 +0330 Subject: [PATCH 4/6] Set resource limits for deployments --- docker-compose.yaml | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 299ec22..13b6a9d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -9,6 +9,11 @@ services: volumes: - ./prometheus:/etc/prometheus - prom_data:/prometheus + deploy: + resources: + limits: + cpus: '0.2' + memory: 200M grafana: image: grafana/grafana @@ -23,6 +28,11 @@ services: volumes: - ./grafana:/etc/grafana/provisioning/datasources - grafana_data:/var/lib/grafana + deploy: + resources: + limits: + cpus: '0.4' + memory: 400M leader_0: image: kysre/turtlemq:leader-${LEADER_IMAGE_TAG} @@ -36,6 +46,11 @@ services: - LEADER_LEADER_REPLICAHOST=leader_1 ports: - '8000:8888' + deploy: + resources: + limits: + cpus: '0.2' + memory: 200M leader_1: image: kysre/turtlemq:leader-${LEADER_IMAGE_TAG} @@ -50,6 +65,11 @@ services: - LEADER_LEADER_REPLICAHOST=leader_0 ports: - '8001:8888' + deploy: + resources: + limits: + cpus: '0.2' + memory: 200M datanode_0: image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG} @@ -69,6 +89,11 @@ services: - PARTITIONS_COUNT=100 volumes: - datanode_0_vol:/var/lib/turtlemq/data/ + deploy: + resources: + limits: + cpus: '0.15' + memory: 300M datanode_1: image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG} @@ -88,6 +113,11 @@ services: - PARTITIONS_COUNT=100 volumes: - datanode_1_vol:/var/lib/turtlemq/data/ + deploy: + resources: + limits: + cpus: '0.15' + memory: 300M datanode_2: image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG} @@ -107,6 +137,11 @@ services: - PARTITIONS_COUNT=100 volumes: - datanode_2_vol:/var/lib/turtlemq/data/ + deploy: + resources: + limits: + cpus: '0.15' + memory: 300M datanode_3: image: kysre/turtlemq:datanode-${DATANODE_IMAGE_TAG} @@ -126,6 +161,11 @@ services: - PARTITIONS_COUNT=100 volumes: - datanode_3_vol:/var/lib/turtlemq/data/ + deploy: + resources: + limits: + cpus: '0.15' + memory: 300M volumes: prom_data: @@ -134,9 +174,17 @@ volumes: driver: local datanode_0_vol: driver: local + driver_opts: + o: "size=5GB" datanode_1_vol: driver: local + driver_opts: + o: "size=5GB" datanode_2_vol: driver: local + driver_opts: + o: "size=5GB" datanode_3_vol: driver: local + driver_opts: + o: "size=5GB" From 44a6b9d031cecfe59e6c7e5fe901221f4de08830 Mon Sep 17 00:00:00 2001 From: kasra Date: Sun, 11 Feb 2024 23:50:54 +0330 Subject: [PATCH 5/6] Run notify leader in a thread --- datanode/src/datanode_server.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/datanode/src/datanode_server.py b/datanode/src/datanode_server.py index cf8c2af..7a61a4b 100644 --- a/datanode/src/datanode_server.py +++ b/datanode/src/datanode_server.py @@ -13,6 +13,7 @@ from prometheus_client import Counter, Gauge, Summary, Histogram, generate_latest, REGISTRY, start_http_server import os import time +from threading import Thread DISK_TOTAL_SIZE = Gauge('disk_total_size', 'Total size of disk', labelnames=["provider"]) DISK_USED_SIZE = Gauge('disk_used_size', 'Used size of disk', labelnames=["provider"]) @@ -176,6 +177,20 @@ def push_to_partition(partition_index: int, shared_partition.push(partition_message, partition_index) +def notify_leader_task(): + datanode_name, port = ConfigManager.get_prop('datanode_name'), ConfigManager.get_prop('server_port') + leader_host, leader_port = ConfigManager.get_prop('leader_host'), ConfigManager.get_prop('leader_port') + while True: + try: + channel = grpc.insecure_channel(f"{leader_host}:{leader_port}") + stub = leader_pb2_grpc.LeaderStub(channel) + add_request = leader_pb2.AddDataNodeRequest(address=f'{datanode_name}:{port}') + stub.AddDataNode(add_request) + except grpc.RpcError as e: + logger.exception(f"Error in notifying leader: {e}.") + time.sleep(5) + + def serve(): # Start metrics server start_http_server(9000) @@ -198,14 +213,8 @@ def serve(): logger.info('Server started, listening on ' + port) # notify leader - try: - leader_host, leader_port = ConfigManager.get_prop('leader_host'), ConfigManager.get_prop('leader_port') - channel = grpc.insecure_channel(f"{leader_host}:{leader_port}") - stub = leader_pb2_grpc.LeaderStub(channel) - add_request = leader_pb2.AddDataNodeRequest(address=f'{datanode_name}:{port}') - stub.AddDataNode(add_request) - except grpc.RpcError as e: - logger.exception(f"Error in notifying leader: {e}.") + notify_leader_task_thread = Thread(target=notify_leader_task) + notify_leader_task_thread.start() server.wait_for_termination() From 50a15afea8260d3b1bfcb88c1cddb07d634d1004 Mon Sep 17 00:00:00 2001 From: kasra Date: Mon, 12 Feb 2024 00:18:36 +0330 Subject: [PATCH 6/6] Add node exporter for metrics --- docker-compose.yaml | 10 +++++++++- prometheus/prometheus.yml | 3 ++- test.docker-compose.yaml | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 13b6a9d..43a3a36 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,7 +13,15 @@ services: resources: limits: cpus: '0.2' - memory: 200M + memory: 300M + + node_exporter: + image: prom/node-exporter:latest + deploy: + resources: + limits: + cpus: '0.1' + memory: 100M grafana: image: grafana/grafana diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 7b31675..ca5eb6f 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -12,11 +12,12 @@ alerting: scrape_configs: - job_name: prometheus honor_timestamps: true - scrape_interval: 15s + scrape_interval: 5s scrape_timeout: 10s metrics_path: /metrics scheme: http static_configs: + - targets: ['node_exporter:9100'] - targets: ['leader_0:9000'] - targets: ['leader_1:9000'] - targets: ['datanode_0:9000'] diff --git a/test.docker-compose.yaml b/test.docker-compose.yaml index 299ec22..b9d6013 100644 --- a/test.docker-compose.yaml +++ b/test.docker-compose.yaml @@ -10,6 +10,9 @@ services: - ./prometheus:/etc/prometheus - prom_data:/prometheus + node_exporter: + image: prom/node-exporter:latest + grafana: image: grafana/grafana container_name: grafana