From e52ebfea4aecf49df55f419d89c0558c02dd5a80 Mon Sep 17 00:00:00 2001
From: dmitryk-dk <kozlovdmitriyy@gmail.com>
Date: Mon, 4 May 2026 20:41:06 +0200
Subject: [PATCH 1/4] add victoriamterics stack as telemetry storage

---
 research/telemetry_storage_backend/Makefile   |  58 +-
 .../docker-compose.otel.yml                   |  12 +-
 .../docker-compose.victoriametrics.yml        |  73 ++
 .../otel-collector-config.yaml                |  18 +-
 .../correlation_by_timestamp.logql            |   1 +
 .../correlation_by_trace_id.logql             |   1 +
 .../queries/victoriametrics/data_volume.logql |   1 +
 .../victoriametrics/data_volume.metricsql     |   1 +
 .../victoriametrics/data_volume.traceql       |   1 +
 .../logs_errors_by_service.logql              |   1 +
 .../queries/victoriametrics/logs_recent.logql |   1 +
 .../victoriametrics/logs_search_error.logql   |   1 +
 .../metrics_by_service_hourly.metricsql       |   1 +
 .../metrics_p95_latency.metricsql             |   1 +
 .../sla_latency_compliance.traceql            |   1 +
 .../spans_error_by_service.traceql            |   1 +
 .../victoriametrics/trace_by_id.traceql       |   1 +
 .../traces_slow_by_service.traceql            |   1 +
 .../requirements.txt                          |   5 +
 .../runner/bench_compare.py                   | 701 +++++++++++++-----
 .../runner/run_otlp_ingest.py                 |  77 +-
 21 files changed, 739 insertions(+), 219 deletions(-)
 create mode 100644 research/telemetry_storage_backend/docker-compose.victoriametrics.yml
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/correlation_by_timestamp.logql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/correlation_by_trace_id.logql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/data_volume.logql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/data_volume.metricsql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/data_volume.traceql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/logs_errors_by_service.logql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/logs_recent.logql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/logs_search_error.logql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/metrics_by_service_hourly.metricsql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/metrics_p95_latency.metricsql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/sla_latency_compliance.traceql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/spans_error_by_service.traceql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/trace_by_id.traceql
 create mode 100644 research/telemetry_storage_backend/queries/victoriametrics/traces_slow_by_service.traceql

diff --git a/research/telemetry_storage_backend/Makefile b/research/telemetry_storage_backend/Makefile
index 81ee72b..ec73512 100644
--- a/research/telemetry_storage_backend/Makefile
+++ b/research/telemetry_storage_backend/Makefile
@@ -4,7 +4,7 @@ OUT   := $(ROOT)/out
 DATA_DIR ?= $(ROOT)/telemetry_data
 BATCH ?= 5000
 
-.PHONY: up down up-compare up-otel schema load query bench bench-compare bench-otlp
+.PHONY: up down up-compare up-vm up-otel schema load query bench bench-compare bench-vm bench-otlp
 
 up:
 	docker compose up -d doris
@@ -15,7 +15,7 @@ up:
 
 up-compare:
 	mkdir -p druid_data/var druid_data/shared
-	docker compose -f docker-compose.yml -f docker-compose.druid.yml -f docker-compose.oceanbase.yml -f docker-compose.loki.yml up -d
+	docker compose -f docker-compose.yml -f docker-compose.druid.yml -f docker-compose.oceanbase.yml -f docker-compose.loki.yml -f docker-compose.victoriametrics.yml up -d
 	@echo "Waiting for Doris health..."
 	@for i in {1..60}; do \
 	  curl -sf http://localhost:8030/api/health && echo "Doris is healthy" && break || sleep 2; \
@@ -36,9 +36,21 @@ up-compare:
 	@for i in {1..30}; do \
 	  nc -z 127.0.0.1 3100 && echo "Loki is ready" && break || sleep 2; \
 	done
+	@echo "Waiting for VictoriaMetrics (port 8428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:8428/-/healthy && echo "VictoriaMetrics is ready" && break || sleep 2; \
+	done
+	@echo "Waiting for VictoriaLogs (port 9428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:9428/health && echo "VictoriaLogs is ready" && break || sleep 2; \
+	done
+	@echo "Waiting for VictoriaTraces (port 10428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:10428/health && echo "VictoriaTraces is ready" && break || sleep 2; \
+	done
 
 up-otel:
-	docker compose -f docker-compose.yml -f docker-compose.druid.yml -f docker-compose.oceanbase.yml -f docker-compose.otel.yml up -d
+	docker compose -f docker-compose.yml -f docker-compose.druid.yml -f docker-compose.oceanbase.yml -f docker-compose.loki.yml -f docker-compose.victoriametrics.yml -f docker-compose.otel.yml up -d
 	@echo "Waiting for Doris health..."
 	@for i in {1..60}; do \
 	  curl -sf http://localhost:8030/api/health && echo "Doris is healthy" && break || sleep 2; \
@@ -47,13 +59,40 @@ up-otel:
 	@for i in {1..60}; do \
 	  curl -sf http://localhost:28223/ping && echo "ClickHouse is healthy" && break || sleep 2; \
 	done
+	@echo "Waiting for VictoriaMetrics (port 8428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:8428/-/healthy && echo "VictoriaMetrics is ready" && break || sleep 2; \
+	done
+	@echo "Waiting for VictoriaLogs (port 9428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:9428/health && echo "VictoriaLogs is ready" && break || sleep 2; \
+	done
+	@echo "Waiting for VictoriaTraces (port 10428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:10428/health && echo "VictoriaTraces is ready" && break || sleep 2; \
+	done
 	@echo "Waiting for OTLP collector..."
 	@for i in {1..30}; do \
 	  nc -z 127.0.0.1 4317 && echo "OTLP collector ready" && break || sleep 2; \
 	done
 
+up-vm:
+	docker compose -f docker-compose.victoriametrics.yml up -d
+	@echo "Waiting for VictoriaMetrics (port 8428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:8428/-/healthy && echo "VictoriaMetrics is ready" && break || sleep 2; \
+	done
+	@echo "Waiting for VictoriaLogs (port 9428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:9428/health && echo "VictoriaLogs is ready" && break || sleep 2; \
+	done
+	@echo "Waiting for VictoriaTraces (port 10428)..."
+	@for i in {1..30}; do \
+	  curl -sf http://localhost:10428/health && echo "VictoriaTraces is ready" && break || sleep 2; \
+	done
+
 down:
-	docker compose -f docker-compose.yml -f docker-compose.druid.yml -f docker-compose.otel.yml -f docker-compose.oceanbase.yml -f docker-compose.loki.yml down -v
+	docker compose -f docker-compose.yml -f docker-compose.druid.yml -f docker-compose.otel.yml -f docker-compose.oceanbase.yml -f docker-compose.loki.yml -f docker-compose.victoriametrics.yml down -v
 
 schema:
 	@echo "Applying Doris schema..."
@@ -76,6 +115,7 @@ bench-compare:
 	@echo "Running Doris vs ClickHouse vs Druid comparison..."
 	DORIS_USER="root" DORIS_PASS="" CLICKHOUSE_HTTP="http://localhost:28223" CLICKHOUSE_PASSWORD="changeme" \
 	LOKI_HTTP="http://localhost:3100" \
+	VM_HTTP="http://localhost:8428" VL_HTTP="http://localhost:9428" VT_HTTP="http://localhost:10428" \
 	python3 runner/bench_compare.py --all --data-dir "$(DATA_DIR)" --out "$(OUT)" --batch $(BATCH) $(SCALE_ARGS) $(STREAMING_ARGS)
 
 # Scaling: BATCH=10000 make bench-compare
@@ -86,10 +126,18 @@ SCALE_ARGS := $(if $(SCALE_TO),--scale-to $(SCALE_TO),)
 STREAMING_BATCH ?=
 STREAMING_ARGS := $(if $(STREAMING_BATCH),--streaming-batch $(STREAMING_BATCH),)
 
+bench-vm:
+	@mkdir -p "$(OUT)"
+	@echo "Running VictoriaMetrics-only benchmark..."
+	VM_HTTP="http://localhost:8428" VL_HTTP="http://localhost:9428" VT_HTTP="http://localhost:10428" \
+	python3 runner/bench_compare.py --all --vm-only --data-dir "$(DATA_DIR)" --out "$(OUT)" --batch $(BATCH) $(SCALE_ARGS) $(STREAMING_ARGS)
+
 bench-otlp:
 	@mkdir -p "$(OUT)"
 	@echo "Running benchmark with OTLP ingestion ($(OTLP_COUNT) spans, $(OTLP_COUNT) logs, $(OTLP_COUNT) metrics)..."
-	DORIS_USER="root" DORIS_PASS="" CLICKHOUSE_HTTP="http://localhost:28223" CLICKHOUSE_PASSWORD="changeme" python3 runner/bench_compare.py \
+	DORIS_USER="root" DORIS_PASS="" CLICKHOUSE_HTTP="http://localhost:28223" CLICKHOUSE_PASSWORD="changeme" \
+	VM_HTTP="http://localhost:8428" VL_HTTP="http://localhost:9428" VT_HTTP="http://localhost:10428" \
+	python3 runner/bench_compare.py \
 	  --data-dir "$(DATA_DIR)" --out "$(OUT)" --all --otlp --otlp-count $(OTLP_COUNT)
 OTLP_COUNT ?= 1000
 
diff --git a/research/telemetry_storage_backend/docker-compose.otel.yml b/research/telemetry_storage_backend/docker-compose.otel.yml
index 2410c70..7a499ad 100644
--- a/research/telemetry_storage_backend/docker-compose.otel.yml
+++ b/research/telemetry_storage_backend/docker-compose.otel.yml
@@ -13,8 +13,16 @@ services:
       - "4317:4317"   # OTLP gRPC
       - "4318:4318"   # OTLP HTTP
     depends_on:
-      - doris
-      - clickhouse
+      doris:
+        condition: service_healthy
+      clickhouse:
+        condition: service_healthy
+      victoriametrics:
+        condition: service_healthy
+      victorialogs:
+        condition: service_healthy
+      victoriatraces:
+        condition: service_healthy
     networks:
       - default
     restart: unless-stopped
diff --git a/research/telemetry_storage_backend/docker-compose.victoriametrics.yml b/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
new file mode 100644
index 0000000..faf9402
--- /dev/null
+++ b/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
@@ -0,0 +1,73 @@
+# VictoriaMetrics stack: metrics + logs + traces
+# Extends main compose: docker compose -f docker-compose.yml -f docker-compose.victoriametrics.yml up -d
+# VictoriaMetrics API: http://localhost:8428
+# VictoriaLogs API: http://localhost:9428
+# VictoriaTraces API: http://localhost:10428, OTLP: 14317/14318
+
+services:
+  victoriametrics:
+    image: victoriametrics/victoria-metrics:v1.142.0
+    container_name: tsb-victoriametrics
+    ports:
+      - "8428:8428"
+    volumes:
+      - vmdata:/storage
+    command:
+      - "--storageDataPath=/storage"
+      - "--httpListenAddr=:8428"
+      - "--retentionPeriod=100y"
+    networks:
+      - default
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:8428/-/healthy"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+    restart: unless-stopped
+
+  victorialogs:
+    image: victoriametrics/victoria-logs:v1.50.0
+    container_name: tsb-victorialogs
+    ports:
+      - "9428:9428"
+    volumes:
+      - vldata:/vlogs
+    command:
+      - "--storageDataPath=/vlogs"
+      - "--httpListenAddr=:9428"
+      - "--retentionPeriod=100y"
+    networks:
+      - default
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:9428/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+    restart: unless-stopped
+
+  victoriatraces:
+    image: docker.io/victoriametrics/victoria-traces:latest
+    container_name: tsb-victoriatraces
+    ports:
+      - "10428:10428"
+      - "14317:4317"
+      - "14318:4318"
+    volumes:
+      - vtdata:/vtraces
+    command:
+      - "--storageDataPath=/vtraces"
+      - "--httpListenAddr=:10428"
+      - "--retentionPeriod=100y"
+    networks:
+      - default
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:10428/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+    restart: unless-stopped
+
+volumes:
+  vmdata:
+  vldata:
+  vtdata:
diff --git a/research/telemetry_storage_backend/otel-collector-config.yaml b/research/telemetry_storage_backend/otel-collector-config.yaml
index 1d92555..21bde61 100644
--- a/research/telemetry_storage_backend/otel-collector-config.yaml
+++ b/research/telemetry_storage_backend/otel-collector-config.yaml
@@ -33,18 +33,30 @@ exporters:
     username: default
     password: changeme
     create_schema: true
+  otlphttp/victoriametrics:
+    endpoint: http://tsb-victoriametrics:8428/opentelemetry
+    tls:
+      insecure: true
+  otlphttp/victorialogs:
+    endpoint: http://tsb-victorialogs:9428/insert/opentelemetry
+    tls:
+      insecure: true
+  otlphttp/victoriatraces:
+    endpoint: http://tsb-victoriatraces:10428/insert/opentelemetry
+    tls:
+      insecure: true
 
 service:
   pipelines:
     traces:
       receivers: [otlp]
       processors: [batch]
-      exporters: [doris, clickhouse]
+      exporters: [doris, clickhouse, otlphttp/victoriatraces]
     metrics:
       receivers: [otlp]
       processors: [batch]
-      exporters: [doris, clickhouse]
+      exporters: [doris, clickhouse, otlphttp/victoriametrics]
     logs:
       receivers: [otlp]
       processors: [batch]
-      exporters: [doris, clickhouse]
+      exporters: [doris, clickhouse, otlphttp/victorialogs]
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/correlation_by_timestamp.logql b/research/telemetry_storage_backend/queries/victoriametrics/correlation_by_timestamp.logql
new file mode 100644
index 0000000..4449d3c
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/correlation_by_timestamp.logql
@@ -0,0 +1 @@
+_time:30d * | stats by (_time:1m, service) count() as event_count | sort by (event_count) desc | limit 20
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/correlation_by_trace_id.logql b/research/telemetry_storage_backend/queries/victoriametrics/correlation_by_trace_id.logql
new file mode 100644
index 0000000..8ab136f
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/correlation_by_trace_id.logql
@@ -0,0 +1 @@
+_time:30d trace_id:* | stats by (trace_id, service) count() as log_count | sort by (log_count) desc | limit 20
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/data_volume.logql b/research/telemetry_storage_backend/queries/victoriametrics/data_volume.logql
new file mode 100644
index 0000000..38f7ab6
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/data_volume.logql
@@ -0,0 +1 @@
+* | stats count() as total
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/data_volume.metricsql b/research/telemetry_storage_backend/queries/victoriametrics/data_volume.metricsql
new file mode 100644
index 0000000..106a556
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/data_volume.metricsql
@@ -0,0 +1 @@
+count({__name__!=""})
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/data_volume.traceql b/research/telemetry_storage_backend/queries/victoriametrics/data_volume.traceql
new file mode 100644
index 0000000..38f7ab6
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/data_volume.traceql
@@ -0,0 +1 @@
+* | stats count() as total
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/logs_errors_by_service.logql b/research/telemetry_storage_backend/queries/victoriametrics/logs_errors_by_service.logql
new file mode 100644
index 0000000..a47cb4b
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/logs_errors_by_service.logql
@@ -0,0 +1 @@
+_time:1d (level:error OR error) | stats by (service) count() as err_count | sort by (err_count) desc | limit 20
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/logs_recent.logql b/research/telemetry_storage_backend/queries/victoriametrics/logs_recent.logql
new file mode 100644
index 0000000..0a46d5b
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/logs_recent.logql
@@ -0,0 +1 @@
+* | sort by (_time) desc | limit 100
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/logs_search_error.logql b/research/telemetry_storage_backend/queries/victoriametrics/logs_search_error.logql
new file mode 100644
index 0000000..6fc3bdd
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/logs_search_error.logql
@@ -0,0 +1 @@
+_time:30d error | sort by (_time) desc | limit 100
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/metrics_by_service_hourly.metricsql b/research/telemetry_storage_backend/queries/victoriametrics/metrics_by_service_hourly.metricsql
new file mode 100644
index 0000000..baacec5
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/metrics_by_service_hourly.metricsql
@@ -0,0 +1 @@
+avg_over_time({__name__!=""}[1h])
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/metrics_p95_latency.metricsql b/research/telemetry_storage_backend/queries/victoriametrics/metrics_p95_latency.metricsql
new file mode 100644
index 0000000..15b3356
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/metrics_p95_latency.metricsql
@@ -0,0 +1 @@
+topk(20, quantile_over_time(0.95, {__name__!=""}[365d]))
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/sla_latency_compliance.traceql b/research/telemetry_storage_backend/queries/victoriametrics/sla_latency_compliance.traceql
new file mode 100644
index 0000000..1177d81
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/sla_latency_compliance.traceql
@@ -0,0 +1 @@
+_time:30d * | stats count() as total, count() if (duration:<500ms) as fast | math fast * 100 / total as pct_under_500ms
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/spans_error_by_service.traceql b/research/telemetry_storage_backend/queries/victoriametrics/spans_error_by_service.traceql
new file mode 100644
index 0000000..7d398dd
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/spans_error_by_service.traceql
@@ -0,0 +1 @@
+_time:30d (duration:>5s OR "span_attr:http.status_code":>=500) | stats by ("resource_attr:service.name") count() as error_span_count | sort by (error_span_count) desc | limit 20
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/trace_by_id.traceql b/research/telemetry_storage_backend/queries/victoriametrics/trace_by_id.traceql
new file mode 100644
index 0000000..037042e
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/trace_by_id.traceql
@@ -0,0 +1 @@
+trace_id:* | sort by (_time) | limit 1
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/queries/victoriametrics/traces_slow_by_service.traceql b/research/telemetry_storage_backend/queries/victoriametrics/traces_slow_by_service.traceql
new file mode 100644
index 0000000..221972c
--- /dev/null
+++ b/research/telemetry_storage_backend/queries/victoriametrics/traces_slow_by_service.traceql
@@ -0,0 +1 @@
+_time:1d duration:>500ms | stats by ("resource_attr:service.name") count() as slow_cnt | sort by (slow_cnt) desc | limit 20
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/requirements.txt b/research/telemetry_storage_backend/requirements.txt
index e5ff456..15a042c 100644
--- a/research/telemetry_storage_backend/requirements.txt
+++ b/research/telemetry_storage_backend/requirements.txt
@@ -1,2 +1,7 @@
 # For OceanBase loader (replay_oceanbase.py)
 pymysql>=1.0.0
+# For all loaders and bench runner
+requests>=2.28.0
+# For VictoriaMetrics remote write (protobuf + snappy)
+protobuf>=4.21.0
+python-snappy>=0.6.0
diff --git a/research/telemetry_storage_backend/runner/bench_compare.py b/research/telemetry_storage_backend/runner/bench_compare.py
index ce73915..85cdddc 100644
--- a/research/telemetry_storage_backend/runner/bench_compare.py
+++ b/research/telemetry_storage_backend/runner/bench_compare.py
@@ -22,9 +22,11 @@
 CH_QDIR = ROOT / "queries" / "clickhouse"
 DRUID_QDIR = ROOT / "queries" / "druid"
 OB_QDIR = ROOT / "queries" / "oceanbase"
+VM_QDIR = ROOT / "queries" / "victoriametrics"
 OB_SCHEMA = ROOT / "schemas" / "oceanbase.sql"
 
 DORIS_FE_HTTP = os.getenv("DORIS_FE_HTTP", "http://localhost:8030")
+DORIS_MYSQL_PORT = int(os.getenv("DORIS_MYSQL_PORT", "9030"))
 DORIS_PASS = os.getenv("DORIS_PASS", "")
 CH_HTTP = os.getenv("CLICKHOUSE_HTTP", "http://localhost:8123")
 CH_PASSWORD = os.getenv("CLICKHOUSE_PASSWORD", "")
@@ -33,8 +35,16 @@
 OB_PORT = int(os.getenv("OCEANBASE_PORT", "2881"))
 OB_CONTAINER = os.getenv("OCEANBASE_CONTAINER", "tsb-oceanbase")
 LOKI_HTTP = os.getenv("LOKI_HTTP", "http://localhost:3100")
+VM_HTTP = os.getenv("VM_HTTP", "http://localhost:8428")
+VL_HTTP = os.getenv("VL_HTTP", "http://localhost:9428")
+VT_HTTP = os.getenv("VT_HTTP", "http://localhost:10428")
 DB = "telemetry"
 
+def _port_of(url: str) -> int:
+    from urllib.parse import urlparse
+    p = urlparse(url)
+    return p.port or (443 if p.scheme == "https" else 80)
+
 def _ch_params(extra: dict | None = None) -> dict:
     params = dict(extra) if extra else {}
     if CH_PASSWORD:
@@ -170,6 +180,45 @@ def truncate_oceanbase_tables() -> None:
             print(f"[truncate] OceanBase {t}: {e}")
     print("[truncate] OceanBase tables cleared")
 
+def reset_vm_storage() -> None:
+    """Stop VM containers, remove volumes, restart with clean storage."""
+    compose = ["docker", "compose", "-f", str(ROOT / "docker-compose.yml"),
+               "-f", str(ROOT / "docker-compose.victoriametrics.yml")]
+    subprocess.run(compose + ["stop", "victoriametrics", "victorialogs", "victoriatraces"],
+                   capture_output=True, check=False)
+    subprocess.run(compose + ["rm", "-f", "victoriametrics", "victorialogs", "victoriatraces"],
+                   capture_output=True, check=False)
+    for vol_suffix in ["vmdata", "vldata", "vtdata"]:
+        out = subprocess.run(["docker", "volume", "ls", "-q", "--filter", f"name={vol_suffix}"],
+                             capture_output=True, text=True)
+        for vol in out.stdout.strip().splitlines():
+            subprocess.run(["docker", "volume", "rm", "-f", vol], capture_output=True, check=False)
+    subprocess.run(compose + ["up", "-d", "victoriametrics", "victorialogs", "victoriatraces"], check=True)
+    print("[truncate] VM storage reset, containers restarted")
+
+def wait_vm_healthy(timeout_s: int = 120) -> bool:
+    """Wait until all three VM services are healthy after restart."""
+    endpoints = [
+        (VM_HTTP, "/-/healthy", "VictoriaMetrics"),
+        (VL_HTTP, "/health", "VictoriaLogs"),
+        (VT_HTTP, "/health", "VictoriaTraces"),
+    ]
+    for base, path, name in endpoints:
+        t0 = time.time()
+        while time.time() - t0 < timeout_s:
+            try:
+                r = requests.get(f"{base}{path}", timeout=5)
+                if r.status_code == 200:
+                    print(f"[wait] {name} healthy")
+                    break
+            except Exception:
+                pass
+            time.sleep(2)
+        else:
+            print(f"[wait] {name} not healthy after {timeout_s}s")
+            return False
+    return True
+
 def run_doris_query(sql: str) -> dict:
     pass_arg = f"-p{DORIS_PASS}" if DORIS_PASS else ""
     sh_script = (
@@ -279,6 +328,82 @@ def bench_loki_logs(run_id: str) -> dict:
     return res
 
 
+def run_vm_logql(query: str, run_id: str = "") -> dict:
+    """Run a LogsQL query against VictoriaLogs native API."""
+    url = f"{VL_HTTP}/select/logsql/query"
+    if run_id:
+        if " | " in query:
+            filt, pipes = query.split(" | ", 1)
+            query = f"run_id:{run_id} {filt} | {pipes}"
+        else:
+            query = f"run_id:{run_id} {query}"
+    params = {"query": query, "limit": 1000}
+    t0 = time.time()
+    try:
+        r = requests.get(url, params=params, timeout=120)
+        dt = time.time() - t0
+        if r.status_code != 200:
+            return {"latency_s": dt, "rows": 0, "error": r.text[:300]}
+        rows = len([ln for ln in r.text.strip().splitlines() if ln.strip()])
+        return {"latency_s": dt, "rows": rows}
+    except Exception as e:
+        return {"latency_s": 0, "rows": 0, "error": str(e)[:200]}
+
+
+def run_vm_metricsql(query: str) -> dict:
+    """Run a MetricsQL query against VictoriaMetrics."""
+    url = f"{VM_HTTP}/api/v1/query"
+    params = {"query": query, "step": "24h"}
+    t0 = time.time()
+    try:
+        r = requests.get(url, params=params, timeout=120)
+        dt = time.time() - t0
+        if r.status_code != 200:
+            return {"latency_s": dt, "rows": 0, "error": r.text[:300]}
+        data = r.json()
+        results = data.get("data", {}).get("result", [])
+        return {"latency_s": dt, "rows": len(results)}
+    except Exception as e:
+        return {"latency_s": 0, "rows": 0, "error": str(e)[:200]}
+
+
+def run_vm_traceql(query: str) -> dict:
+    """Run a LogsQL query against VictoriaTraces (uses same query language as VictoriaLogs)."""
+    url = f"{VT_HTTP}/select/logsql/query"
+    params = {"query": query, "limit": 1000}
+    t0 = time.time()
+    try:
+        r = requests.get(url, params=params, timeout=120)
+        dt = time.time() - t0
+        if r.status_code != 200:
+            return {"latency_s": dt, "rows": 0, "error": r.text[:300]}
+        rows = len([ln for ln in r.text.strip().splitlines() if ln.strip()])
+        return {"latency_s": dt, "rows": rows}
+    except Exception as e:
+        return {"latency_s": 0, "rows": 0, "error": str(e)[:200]}
+
+
+def bench_vm(run_id: str = "") -> tuple[dict, dict, dict]:
+    """Run all VM queries. Returns (victorialogs_results, victoriametrics_results, victoriatraces_results)."""
+    vl, vm, vt = {}, {}, {}
+    for f in sorted(VM_QDIR.glob("*")):
+        if f.suffix not in (".logql", ".metricsql", ".traceql"):
+            continue
+        name = f.stem
+        query = f.read_text().strip()
+        try:
+            if f.suffix == ".logql":
+                vl[name] = run_vm_logql(query, run_id)
+            elif f.suffix == ".metricsql":
+                vm[name] = run_vm_metricsql(query)
+            elif f.suffix == ".traceql":
+                vt[name] = run_vm_traceql(query)
+        except Exception as e:
+            target = vl if f.suffix == ".logql" else vm if f.suffix == ".metricsql" else vt
+            target[name] = {"error": str(e)[:200]}
+    return vl, vm, vt
+
+
 def bench_backend(qdir: Path, run_fn) -> dict:
     results = {}
     for f in sorted(qdir.glob("*.sql")):
@@ -292,17 +417,15 @@ def bench_backend(qdir: Path, run_fn) -> dict:
     return results
 
 
-def get_data_volume(use_doris: bool, use_oceanbase: bool = True) -> tuple:
-    """Run full-scan COUNT on each backend; return (doris_vol, ch_vol, druid_vol, ob_vol)."""
+def get_data_volume(use_doris: bool, use_oceanbase: bool = True, use_vm: bool = False, use_sql: bool = True) -> tuple:
+    """Run full-scan COUNT on each backend; return (doris_vol, ch_vol, druid_vol, ob_vol, vl_vol, vm_vol, vt_vol)."""
     doris_vol = {}
     ch_vol = {}
     druid_vol = {}
     ob_vol = {}
-    sql_doris = (DORIS_QDIR / "data_volume.sql").read_text()
-    sql_ch = (CH_QDIR / "data_volume.sql").read_text()
-    sql_druid = (DRUID_QDIR / "data_volume.sql").read_text()
-    sql_ob = (OB_QDIR / "data_volume.sql").read_text()
-
+    vl_vol = {}
+    vm_vol = {}
+    vt_vol = {}
     def _parse_tsv(lines: list[str]) -> dict:
         counts = {}
         for ln in lines:
@@ -315,6 +438,7 @@ def _parse_tsv(lines: list[str]) -> dict:
 
     if use_doris:
         try:
+            sql_doris = (DORIS_QDIR / "data_volume.sql").read_text()
             pass_arg = f"-p{DORIS_PASS}" if DORIS_PASS else ""
             sh_script = (
                 "cat > /tmp/dv.sql <<'EOSQL'\n" + sql_doris + "\nEOSQL\n"
@@ -333,45 +457,49 @@ def _parse_tsv(lines: list[str]) -> dict:
         except Exception as e:
             doris_vol = {"error": str(e)[:200]}
 
-    try:
-        t0 = time.time()
-        r = requests.post(CH_HTTP, params=_ch_params({"query": sql_ch}), timeout=120)
-        dt = time.time() - t0
-        if r.status_code == 200:
-            lines = [ln for ln in r.text.strip().splitlines() if ln.strip()]
-            ch_vol = _parse_tsv(lines)
-            ch_vol["latency_s"] = dt
-        else:
-            ch_vol = {"error": r.text[:200]}
-    except Exception as e:
-        ch_vol = {"error": str(e)[:200]}
+    if use_sql:
+        sql_ch = (CH_QDIR / "data_volume.sql").read_text()
+        sql_druid = (DRUID_QDIR / "data_volume.sql").read_text()
+        try:
+            t0 = time.time()
+            r = requests.post(CH_HTTP, params=_ch_params({"query": sql_ch}), timeout=120)
+            dt = time.time() - t0
+            if r.status_code == 200:
+                lines = [ln for ln in r.text.strip().splitlines() if ln.strip()]
+                ch_vol = _parse_tsv(lines)
+                ch_vol["latency_s"] = dt
+            else:
+                ch_vol = {"error": r.text[:200]}
+        except Exception as e:
+            ch_vol = {"error": str(e)[:200]}
 
-    try:
-        t0 = time.time()
-        r = requests.post(
-            f"{DRUID_HTTP}/druid/v2/sql",
-            json={"query": sql_druid, "resultFormat": "array"},
-            headers={"Content-Type": "application/json"},
-            timeout=120,
-        )
-        dt = time.time() - t0
-        if r.status_code == 200:
-            arr = r.json()
-            counts = {}
-            for row in (arr or []):
-                if len(row) >= 2:
-                    tbl = str(row[0]).lower()
-                    cnt = int(row[1]) if isinstance(row[1], (int, float)) else 0
-                    counts[tbl] = cnt
-            counts["total"] = counts.get("logs", 0) + counts.get("spans", 0) + counts.get("metrics", 0)
-            counts["latency_s"] = dt
-            druid_vol = counts
-        else:
-            druid_vol = {"error": r.text[:200]}
-    except Exception as e:
-        druid_vol = {"error": str(e)[:200]}
+        try:
+            t0 = time.time()
+            r = requests.post(
+                f"{DRUID_HTTP}/druid/v2/sql",
+                json={"query": sql_druid, "resultFormat": "array"},
+                headers={"Content-Type": "application/json"},
+                timeout=120,
+            )
+            dt = time.time() - t0
+            if r.status_code == 200:
+                arr = r.json()
+                counts = {}
+                for row in (arr or []):
+                    if len(row) >= 2:
+                        tbl = str(row[0]).lower()
+                        cnt = int(row[1]) if isinstance(row[1], (int, float)) else 0
+                        counts[tbl] = cnt
+                counts["total"] = counts.get("logs", 0) + counts.get("spans", 0) + counts.get("metrics", 0)
+                counts["latency_s"] = dt
+                druid_vol = counts
+            else:
+                druid_vol = {"error": r.text[:200]}
+        except Exception as e:
+            druid_vol = {"error": str(e)[:200]}
 
     if use_oceanbase:
+        sql_ob = (OB_QDIR / "data_volume.sql").read_text()
         try:
             t0 = time.time()
             out = subprocess.run(
@@ -389,138 +517,211 @@ def _parse_tsv(lines: list[str]) -> dict:
         except Exception as e:
             ob_vol = {"error": str(e)[:200]}
 
-    return doris_vol, ch_vol, druid_vol, ob_vol
+    if use_vm:
+        # VictoriaLogs: count log entries via stats
+        try:
+            t0 = time.time()
+            r = requests.get(f"{VL_HTTP}/select/logsql/query",
+                             params={"query": "* | stats count() as total", "limit": 1},
+                             timeout=30)
+            dt = time.time() - t0
+            if r.status_code == 200:
+                lines = [ln for ln in r.text.strip().splitlines() if ln.strip()]
+                if lines:
+                    row = json.loads(lines[0])
+                    vl_vol = {"rows": int(row.get("total", 0)), "latency_s": dt}
+                else:
+                    vl_vol = {"rows": 0, "latency_s": dt}
+        except Exception as e:
+            vl_vol = {"error": str(e)[:200]}
+        # VictoriaMetrics: count total inserted rows via /metrics internal counter
+        try:
+            t0 = time.time()
+            r = requests.get(f"{VM_HTTP}/metrics", timeout=30)
+            dt = time.time() - t0
+            if r.status_code == 200:
+                total_rows = 0
+                for line in r.text.splitlines():
+                    if line.startswith("vm_rows_inserted_total{"):
+                        parts = line.rsplit(" ", 1)
+                        if len(parts) == 2:
+                            total_rows += int(float(parts[1]))
+                vm_vol = {"rows": total_rows, "latency_s": dt}
+        except Exception as e:
+            vm_vol = {"error": str(e)[:200]}
+        # VictoriaTraces: count spans via LogsQL stats
+        try:
+            t0 = time.time()
+            r = requests.get(f"{VT_HTTP}/select/logsql/query",
+                             params={"query": "* | stats count() as total", "limit": 1},
+                             timeout=30)
+            dt = time.time() - t0
+            if r.status_code == 200:
+                lines = [ln for ln in r.text.strip().splitlines() if ln.strip()]
+                if lines:
+                    row = json.loads(lines[0])
+                    vt_vol = {"rows": int(row.get("total", 0)), "latency_s": dt}
+                else:
+                    vt_vol = {"rows": 0, "latency_s": dt}
+        except Exception as e:
+            vt_vol = {"error": str(e)[:200]}
+
+    return doris_vol, ch_vol, druid_vol, ob_vol, vl_vol, vm_vol, vt_vol
 
 def write_combined_report(out_dir: Path, doris_ingest: dict, ch_ingest: dict, druid_ingest: dict,
                          doris_qres: dict, ch_qres: dict, druid_qres: dict,
                          ob_ingest: dict | None = None, ob_qres: dict | None = None,
                          loki_ingest: dict | None = None, loki_qres: dict | None = None,
+                         vl_ingest: dict | None = None, vl_qres: dict | None = None,
+                         vm_ingest: dict | None = None, vm_qres: dict | None = None,
+                         vt_ingest: dict | None = None, vt_qres: dict | None = None,
                          otlp_ingest: dict | None = None,
                          data_vol: tuple | None = None) -> None:
     out_dir.mkdir(parents=True, exist_ok=True)
-    all_queries = sorted(set(doris_qres.keys()) | set(ch_qres.keys()) | set(druid_qres.keys())
-                        | set((ob_qres or {}).keys()) | set((loki_qres or {}).keys()))
-    rows = []
-    chart_data = {"queries": [], "doris": [], "clickhouse": [], "druid": [], "oceanbase": [], "loki": []}
     backends = [("doris", doris_qres), ("clickhouse", ch_qres), ("druid", druid_qres)]
     if ob_qres:
         backends.append(("oceanbase", ob_qres))
     if loki_qres:
         backends.append(("loki", loki_qres))
+    if vl_qres:
+        backends.append(("victorialogs", vl_qres))
+    if vm_qres:
+        backends.append(("victoriametrics", vm_qres))
+    if vt_qres:
+        backends.append(("victoriatraces", vt_qres))
+    all_queries = sorted(set().union(*(qr.keys() for _, qr in backends)))
+    _display = {"doris": "Doris", "clickhouse": "ClickHouse", "druid": "Druid",
+                 "oceanbase": "OceanBase", "loki": "Loki",
+                 "victorialogs": "VictoriaLogs", "victoriametrics": "VictoriaMetrics",
+                 "victoriatraces": "VictoriaTraces"}
+    rows = []
+    chart_data = {"queries": []}
+    for bname, _ in backends:
+        chart_data[bname] = []
     for q in all_queries:
-        d = doris_qres.get(q, {})
-        c = ch_qres.get(q, {})
-        dr = druid_qres.get(q, {})
-        ob = (ob_qres or {}).get(q, {})
-        loki = (loki_qres or {}).get(q, {})
-        d_lat, c_lat, dr_lat = d.get("latency_s", ""), c.get("latency_s", ""), dr.get("latency_s", "")
-        ob_lat = ob.get("latency_s", "")
-        loki_lat = loki.get("latency_s", "")
-        lats = [(d_lat, "Doris"), (c_lat, "ClickHouse"), (dr_lat, "Druid")]
-        if ob_qres:
-            lats.append((ob_lat, "OceanBase"))
-        if loki_qres:
-            lats.append((loki_lat, "Loki"))
+        lats = []
+        cells = ""
+        for bname, bqres in backends:
+            bq = bqres.get(q, {})
+            lat = bq.get("latency_s", "")
+            cells += f"<td>{lat}</td><td>{bq.get('rows', '')}</td><td>{bq.get('error', '')}</td>"
+            lats.append((lat, _display.get(bname, bname)))
+            chart_data[bname].append(round(lat, 4) if isinstance(lat, (int, float)) else None)
         valid = [(x, n) for x, n in lats if isinstance(x, (int, float))]
         winner, pct_diff = "", ""
         if len(valid) >= 2:
             fastest = min(valid, key=lambda t: t[0])
             slowest = max(valid, key=lambda t: t[0])
             winner = fastest[1]
+            if winner in ("VictoriaLogs", "VictoriaTraces"):
+                winner = "VictoriaMetrics"
             if slowest[0] >= 1e-9:
                 pct = (slowest[0] - fastest[0]) / slowest[0] * 100
                 pct_diff = f"{pct:.1f}%"
-        ob_cells = f"<td>{ob_lat}</td><td>{ob.get('rows', '')}</td><td>{ob.get('error', '')}</td>" if ob_qres else ""
-        loki_cells = f"<td>{loki_lat}</td><td>{loki.get('rows', '')}</td><td>{loki.get('error', '')}</td>" if loki_qres else ""
-        rows.append(f"<tr><td>{q}</td><td>{d_lat}</td><td>{d.get('rows', '')}</td><td>{d.get('error', '')}</td>"
-                   f"<td>{c_lat}</td><td>{c.get('rows', '')}</td><td>{c.get('error', '')}</td>"
-                   f"<td>{dr_lat}</td><td>{dr.get('rows', '')}</td><td>{dr.get('error', '')}</td>"
-                   f"{ob_cells}{loki_cells}<td>{pct_diff}</td><td>{winner}</td></tr>")
+        rows.append(f"<tr><td>{q}</td>{cells}<td>{pct_diff}</td><td>{winner}</td></tr>")
         chart_data["queries"].append(q)
-        chart_data["doris"].append(round(d_lat, 4) if isinstance(d_lat, (int, float)) else None)
-        chart_data["clickhouse"].append(round(c_lat, 4) if isinstance(c_lat, (int, float)) else None)
-        chart_data["druid"].append(round(dr_lat, 4) if isinstance(dr_lat, (int, float)) else None)
-        chart_data["oceanbase"].append(round(ob_lat, 4) if isinstance(ob_lat, (int, float)) else None)
-        chart_data["loki"].append(round(loki_lat, 4) if isinstance(loki_lat, (int, float)) else None)
-    ingest_labels = ["Doris", "ClickHouse", "Druid"]
-    ingest_dur = [doris_ingest.get("duration_s"), ch_ingest.get("duration_s"), druid_ingest.get("duration_s")]
-    ingest_rps = [doris_ingest.get("rows_per_sec"), ch_ingest.get("rows_per_sec"), druid_ingest.get("rows_per_sec")]
+    all_ingests = [("Doris", doris_ingest), ("ClickHouse", ch_ingest), ("Druid", druid_ingest)]
     if ob_ingest and ob_ingest.get("status") == "ok":
-        ingest_labels.append("OceanBase")
-        ingest_dur.append(ob_ingest.get("duration_s"))
-        ingest_rps.append(ob_ingest.get("rows_per_sec"))
+        all_ingests.append(("OceanBase", ob_ingest))
     if loki_ingest and loki_ingest.get("status") == "ok":
-        ingest_labels.append("Loki")
-        ingest_dur.append(loki_ingest.get("duration_s"))
-        ingest_rps.append(loki_ingest.get("rows_per_sec"))
+        all_ingests.append(("Loki", loki_ingest))
+    if vl_ingest and vl_ingest.get("status") == "ok":
+        all_ingests.append(("VictoriaLogs", vl_ingest))
+    if vm_ingest and vm_ingest.get("status") == "ok":
+        all_ingests.append(("VictoriaMetrics", vm_ingest))
+    if vt_ingest and vt_ingest.get("status") == "ok":
+        all_ingests.append(("VictoriaTraces", vt_ingest))
+    ingest_labels = [n for n, _ in all_ingests]
+    ingest_dur = [i.get("duration_s") for _, i in all_ingests]
+    ingest_rps = [i.get("rows_per_sec") for _, i in all_ingests]
     ingest_chart = {"labels": ingest_labels, "duration_s": ingest_dur, "rows_per_sec": ingest_rps}
-    otlp_rows = ""
-    if otlp_ingest:
-        mech = otlp_ingest.get("mechanism", "OTLP")
-        dur = otlp_ingest.get("duration_s", "-")
-        for backend, key in [("Doris", "doris"), ("ClickHouse", "clickhouse")]:
-            d = otlp_ingest.get(key, {})
-            r = d.get("rows", "-")
-            rps = d.get("rows_per_sec", "-")
-            otlp_rows += f'    <tr><td>{backend}</td><td>{mech}</td><td>{dur}</td><td>{r}</td><td>{rps}</td></tr>\n'
-
     data_vol_row = ""
     if data_vol:
         d_vol, c_vol, dr_vol = data_vol[0], data_vol[1], data_vol[2]
-        ob_vol = data_vol[3] if len(data_vol) > 3 else {}
+        ob_vol_d = data_vol[3] if len(data_vol) > 3 else {}
+        vl_vol_d = data_vol[4] if len(data_vol) > 4 else {}
+        vm_vol_d = data_vol[5] if len(data_vol) > 5 else {}
+        vt_vol_d = data_vol[6] if len(data_vol) > 6 else {}
         def _fmt(v: dict) -> str:
             if not v or "error" in v:
                 return v.get("error", "-") if v else "-"
-            total = v.get("total", 0)
-            return f"{total:,} (logs={v.get('logs',0):,}, spans={v.get('spans',0):,}, metrics={v.get('metrics',0):,})"
+            if "total" in v:
+                total = v["total"]
+                return f"{total:,} (logs={v.get('logs',0):,}, spans={v.get('spans',0):,}, metrics={v.get('metrics',0):,})"
+            return f"{v.get('rows', 0):,}"
         def _lat(v: dict) -> str:
             lat = v.get("latency_s") if v else None
             return f"{lat:.3f}s" if isinstance(lat, (int, float)) else "-"
-        ob_row = f"<tr><td>OceanBase</td><td>{_fmt(ob_vol)}</td><td>{_lat(ob_vol)}</td></tr>" if ob_vol else ""
+        vol_rows = [
+            ("Doris", d_vol), ("ClickHouse", c_vol), ("Druid", dr_vol),
+        ]
+        if ob_vol_d:
+            vol_rows.append(("OceanBase", ob_vol_d))
+        if vl_vol_d:
+            vol_rows.append(("VictoriaLogs", vl_vol_d))
+        if vm_vol_d:
+            vol_rows.append(("VictoriaMetrics", vm_vol_d))
+        if vt_vol_d:
+            vol_rows.append(("VictoriaTraces", vt_vol_d))
+        vol_html = "\n".join(f"    <tr><td>{name}</td><td>{_fmt(v)}</td><td>{_lat(v)}</td></tr>" for name, v in vol_rows)
         data_vol_row = f"""
   <h3>Data volume (full scan)</h3>
   <p>Total rows in telemetry tables at query time. Latency = full COUNT(*) time.</p>
   <table>
-    <tr><th>Backend</th><th>Total rows (logs, spans, metrics)</th><th>Full-scan latency</th></tr>
-    <tr><td>Doris</td><td>{_fmt(d_vol)}</td><td>{_lat(d_vol)}</td></tr>
-    <tr><td>ClickHouse</td><td>{_fmt(c_vol)}</td><td>{_lat(c_vol)}</td></tr>
-    <tr><td>Druid</td><td>{_fmt(dr_vol)}</td><td>{_lat(dr_vol)}</td></tr>
-    {ob_row}
+    <tr><th>Backend</th><th>Total rows</th><th>Full-scan latency</th></tr>
+{vol_html}
   </table>
 """
 
     chart_json = json.dumps(chart_data)
     ingest_json = json.dumps(ingest_chart)
+    report_title = " vs ".join(_display.get(n, n) for n, _ in backends) + " benchmark"
+    ingest_rows_html = "\n".join(
+        f'    <tr><td>{name}</td><td>{ing.get("mechanism", "-")}</td><td>{ing.get("duration_s", "-")}</td>'
+        f'<td>{ing.get("rows", "-")}</td><td>{ing.get("rows_per_sec", "-")}</td></tr>'
+        for name, ing in all_ingests)
+    if otlp_ingest:
+        mech = otlp_ingest.get("mechanism", "OTLP")
+        dur = otlp_ingest.get("duration_s", "-")
+        otlp_count = otlp_ingest.get("count", 1000)
+        vm_mech = {
+            "victorialogs": f"OTLP ({otlp_count} logs)",
+            "victoriametrics": f"OTLP ({otlp_count} metrics)",
+            "victoriatraces": f"OTLP ({otlp_count} spans)",
+        }
+        for backend, key in [("Doris", "doris"), ("ClickHouse", "clickhouse"),
+                               ("VictoriaLogs", "victorialogs"), ("VictoriaMetrics", "victoriametrics"),
+                               ("VictoriaTraces", "victoriatraces")]:
+            d = otlp_ingest.get(key, {})
+            if d and d.get("rows", 0) > 0:
+                row_mech = vm_mech.get(key, mech)
+                ingest_rows_html += f'\n    <tr><td>{backend}</td><td>{row_mech}</td><td>{dur}</td><td>{d.get("rows", "-")}</td><td>{d.get("rows_per_sec", "-")}</td></tr>'
+    query_th = "".join(f'<th>{_display.get(n,n)} lat</th><th>{_display.get(n,n)} rows</th><th>{_display.get(n,n)} err</th>' for n, _ in backends)
+    colors = ['rgba(54,162,235,0.7)', 'rgba(75,192,192,0.7)', 'rgba(255,159,64,0.7)',
+              'rgba(153,102,255,0.7)', 'rgba(255,99,132,0.7)',
+              'rgba(0,200,83,0.7)', 'rgba(255,206,86,0.7)', 'rgba(231,76,60,0.7)']
+    chart_datasets = ", ".join(
+        f'{{ label: "{_display.get(n,n)}", data: data["{n}"], backgroundColor: "{colors[i % len(colors)]}" }}'
+        for i, (n, _) in enumerate(backends))
     html = f"""<!doctype html>
-<html><head><meta charset="utf-8"><title>Doris vs ClickHouse vs Druid benchmark</title>
+<html><head><meta charset="utf-8"><title>{report_title}</title>
 <style>table{{border-collapse:collapse}}td,th{{border:1px solid #ccc;padding:6px 10px}}#chartWrap{{max-width:900px;height:400px;margin:1em 0}}</style>
 <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
 </head><body>
-  <h1>Doris vs ClickHouse vs Druid benchmark</h1>
+  <h1>{report_title}</h1>
   <div>Generated at {datetime.now().isoformat()}</div>
   <h2>Ingestion comparison</h2>
   <div id="ingestChartWrap" style="max-width:600px;height:250px;margin:1em 0"><canvas id="ingestChart"></canvas></div>
   <table>
     <tr><th>Backend</th><th>Mechanism</th><th>Duration (s)</th><th>Rows</th><th>Rows/sec</th></tr>
-    <tr><td>Doris</td><td>{doris_ingest.get('mechanism', '-')}</td><td>{doris_ingest.get('duration_s', '-')}</td><td>{doris_ingest.get('rows', '-')}</td><td>{doris_ingest.get('rows_per_sec', '-')}</td></tr>
-    <tr><td>ClickHouse</td><td>{ch_ingest.get('mechanism', '-')}</td><td>{ch_ingest.get('duration_s', '-')}</td><td>{ch_ingest.get('rows', '-')}</td><td>{ch_ingest.get('rows_per_sec', '-')}</td></tr>
-    <tr><td>Druid</td><td>{druid_ingest.get('mechanism', '-')}</td><td>{druid_ingest.get('duration_s', '-')}</td><td>{druid_ingest.get('rows', '-')}</td><td>{druid_ingest.get('rows_per_sec', '-')}</td></tr>
-    {f'<tr><td>OceanBase</td><td>{ob_ingest.get("mechanism", "-")}</td><td>{ob_ingest.get("duration_s", "-")}</td><td>{ob_ingest.get("rows", "-")}</td><td>{ob_ingest.get("rows_per_sec", "-")}</td></tr>' if ob_ingest and ob_ingest.get('status') == 'ok' else ''}
-    {f'<tr><td>Loki</td><td>{loki_ingest.get("mechanism", "-") if loki_ingest and loki_ingest.get("status") == "ok" else ("skipped" if loki_ingest and loki_ingest.get("status") == "skipped" else loki_ingest.get("error", "error"))}</td><td>{loki_ingest.get("duration_s", "-") if loki_ingest and loki_ingest.get("status") == "ok" else "-"}</td><td>{loki_ingest.get("rows", "-") if loki_ingest and loki_ingest.get("status") == "ok" else "-"}</td><td>{loki_ingest.get("rows_per_sec", "-") if loki_ingest and loki_ingest.get("status") == "ok" else "-"}</td></tr>' if loki_ingest else ''}
-    {otlp_rows}
+{ingest_rows_html}
   </table>
-  <p><small>Raw ingest: Doris {json.dumps(doris_ingest)} | CH {json.dumps(ch_ingest)} | Druid {json.dumps(druid_ingest)}</small></p>
   {data_vol_row}
   <h2>Query latency (seconds)</h2>
   <div id="chartWrap"><canvas id="latencyChart"></canvas></div>
   <h2>Query comparison</h2>
   <table>
-    <tr><th>query</th><th>Doris lat</th><th>Doris rows</th><th>Doris err</th>
-        <th>CH lat</th><th>CH rows</th><th>CH err</th>
-        <th>Druid lat</th><th>Druid rows</th><th>Druid err</th>
-        {"<th>OceanBase lat</th><th>OceanBase rows</th><th>OceanBase err</th>" if ob_qres else ""}
-        {"<th>Loki lat</th><th>Loki rows</th><th>Loki err</th>" if loki_qres else ""}
-        <th>% diff</th><th>faster</th></tr>
+    <tr><th>query</th>{query_th}<th>% diff</th><th>faster</th></tr>
     {chr(10).join(rows)}
   </table>
   <script>
@@ -532,19 +733,13 @@ def _lat(v: dict) -> str:
         type: 'bar',
         data: {{
           labels: ingestData.labels,
-          datasets: [{{ label: 'Ingestion time (s)', data: ingestData.duration_s, backgroundColor: ['rgba(54,162,235,0.7)','rgba(75,192,192,0.7)','rgba(255,159,64,0.7)'] }}]
+          datasets: [{{ label: 'Ingestion time (s)', data: ingestData.duration_s }}]
         }},
         options: {{ responsive: true, maintainAspectRatio: false, plugins: {{ legend: {{ display: false }} }}, scales: {{ y: {{ beginAtZero: true }} }} }}
       }});
     }}
     const ctx = document.getElementById('latencyChart').getContext('2d');
-    const datasets = [
-      {{ label: 'Doris', data: data.doris, backgroundColor: 'rgba(54,162,235,0.7)' }},
-      {{ label: 'ClickHouse', data: data.clickhouse, backgroundColor: 'rgba(75,192,192,0.7)' }},
-      {{ label: 'Druid', data: data.druid, backgroundColor: 'rgba(255,159,64,0.7)' }}
-    ];
-    if (data.oceanbase && data.oceanbase.length) datasets.push({{ label: 'OceanBase', data: data.oceanbase, backgroundColor: 'rgba(153,102,255,0.7)' }});
-    if (data.loki && data.loki.length) datasets.push({{ label: 'Loki', data: data.loki, backgroundColor: 'rgba(255,99,132,0.7)' }});
+    const datasets = [{chart_datasets}];
     new Chart(ctx, {{
       type: 'bar',
       data: {{ labels: data.queries, datasets: datasets }},
@@ -568,10 +763,28 @@ def _lat(v: dict) -> str:
         (out_dir / "oceanbase_queries.json").write_text(json.dumps(ob_qres, indent=2))
     if loki_qres:
         (out_dir / "loki_queries.json").write_text(json.dumps(loki_qres, indent=2))
+    if vl_qres:
+        (out_dir / "victorialogs_queries.json").write_text(json.dumps(vl_qres, indent=2))
+    if vm_qres:
+        (out_dir / "victoriametrics_queries.json").write_text(json.dumps(vm_qres, indent=2))
+    if vt_qres:
+        (out_dir / "victoriatraces_queries.json").write_text(json.dumps(vt_qres, indent=2))
     print(f"[bench] wrote {out_dir}/compare.html")
 
+def _detect_backends(run_dir: Path) -> str:
+    """Detect which backends participated in a run from *_queries.json files."""
+    name_map = {"doris": "Doris", "clickhouse": "ClickHouse", "druid": "Druid",
+                "oceanbase": "OceanBase", "loki": "Loki",
+                "victorialogs": "VictoriaLogs", "victoriametrics": "VictoriaMetrics",
+                "victoriatraces": "VictoriaTraces"}
+    found = []
+    for key, label in name_map.items():
+        if (run_dir / f"{key}_queries.json").exists():
+            found.append(label)
+    return " vs ".join(found) if found else "unknown"
+
 def write_rolling_report(out_base: Path) -> None:
-    """Write rolling_index.html with both Doris and Doris vs ClickHouse runs."""
+    """Write rolling_index.html listing all benchmark runs."""
     import re
     entries = []
     for d in out_base.glob("storage_bench_doris_*"):
@@ -581,7 +794,8 @@ def write_rolling_report(out_base: Path) -> None:
     for d in out_base.glob("storage_bench_compare_*"):
         if (d / "compare.html").exists():
             ts = d.name.replace("storage_bench_compare_", "").replace("_", " ", 1)
-            entries.append((d.name, "compare.html", "Doris vs ClickHouse vs Druid", ts))
+            run_type = _detect_backends(d)
+            entries.append((d.name, "compare.html", run_type, ts))
     def sort_key(e):
         m = re.search(r"storage_bench_(?:doris|compare)_(\d{8})_?(\d{6})?", e[0])
         return (m.group(1), m.group(2) or "") if m else ("", "")
@@ -622,6 +836,7 @@ def main() -> int:
     ap.add_argument("--init", action="store_true")
     ap.add_argument("--all", action="store_true")
     ap.add_argument("--clickhouse-only", action="store_true", help="Skip Doris (e.g. when BE not ready)")
+    ap.add_argument("--vm-only", action="store_true", help="Run only VictoriaMetrics/Logs/Traces (skip SQL backends)")
     ap.add_argument("--out", type=Path, default=ROOT / "out")
     ap.add_argument("--batch", type=int, default=5000, help="Rows per load batch")
     ap.add_argument("--scale-to", type=int, default=None, help="Target row count per type (50k = 50000)")
@@ -631,18 +846,28 @@ def main() -> int:
     ap.add_argument("--otlp-count", type=int, default=1000, help="Spans, logs, metrics each for OTLP (default 1000)")
     args = ap.parse_args()
 
-    use_doris = not args.clickhouse_only
-    if use_doris:
-        assert wait_port("127.0.0.1", 8030, 180), "Doris FE 8030 not ready"
-        assert wait_port("127.0.0.1", 9030, 180), "Doris MySQL 9030 not ready"
-        assert wait_doris_be_ready(300), "Doris BE not ready (no online backends). Use --clickhouse-only to run without Doris."
-    assert wait_port("127.0.0.1", 8123, 180), "ClickHouse 8123 not ready"
-    assert wait_port("127.0.0.1", 8888, 300), "Druid 8888 not ready"
-    assert wait_druid_ready(300), "Druid not ready"
-    assert wait_port("127.0.0.1", 2881, 360), "OceanBase 2881 not ready (bootstrap ~3-5 min)"
-    loki_available = wait_port("127.0.0.1", 3100, 60)
-    if not loki_available:
-        print("[bench] Loki not available (port 3100), skipping logs-only backend")
+    vm_only = getattr(args, "vm_only", False)
+    use_doris = not args.clickhouse_only and not vm_only
+    use_sql_backends = not vm_only
+    if use_sql_backends:
+        if use_doris:
+            assert wait_port("127.0.0.1", _port_of(DORIS_FE_HTTP), 180), f"Doris FE {_port_of(DORIS_FE_HTTP)} not ready"
+            assert wait_port("127.0.0.1", DORIS_MYSQL_PORT, 180), f"Doris MySQL {DORIS_MYSQL_PORT} not ready"
+            assert wait_doris_be_ready(300), "Doris BE not ready (no online backends). Use --clickhouse-only to run without Doris."
+        assert wait_port("127.0.0.1", _port_of(CH_HTTP), 180), f"ClickHouse {_port_of(CH_HTTP)} not ready"
+        assert wait_port("127.0.0.1", _port_of(DRUID_HTTP), 300), f"Druid {_port_of(DRUID_HTTP)} not ready"
+        assert wait_druid_ready(300), "Druid not ready"
+        assert wait_port("127.0.0.1", OB_PORT, 360), f"OceanBase {OB_PORT} not ready (bootstrap ~3-5 min)"
+    loki_available = not vm_only and wait_port("127.0.0.1", _port_of(LOKI_HTTP), 60)
+    if not loki_available and not vm_only:
+        print(f"[bench] Loki not available (port {_port_of(LOKI_HTTP)}), skipping logs-only backend")
+    vm_available = (wait_port("127.0.0.1", _port_of(VM_HTTP), 60)
+                    and wait_port("127.0.0.1", _port_of(VL_HTTP), 60)
+                    and wait_port("127.0.0.1", _port_of(VT_HTTP), 60))
+    if not vm_available:
+        print("[bench] VictoriaMetrics stack not fully available (8428/9428/10428), skipping VM backend")
+    if vm_only:
+        assert vm_available, "VM services not available. Run: make up-vm"
 
     tsdir = args.out / f"storage_bench_compare_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
     run_id = tsdir.name
@@ -656,18 +881,26 @@ def main() -> int:
     druid_qres = {}
     ob_qres = {}
     loki_ingest = {"status": "skipped"}
+    vl_ingest = {"status": "skipped"}
+    vm_ingest = {"status": "skipped"}
+    vt_ingest = {"status": "skipped"}
 
     if args.init or args.all:
-        if use_doris:
-            apply_doris_schema()
-        apply_clickhouse_schema()
-        apply_oceanbase_schema()
+        if use_sql_backends:
+            if use_doris:
+                apply_doris_schema()
+            apply_clickhouse_schema()
+            apply_oceanbase_schema()
 
     if args.all:
-        if use_doris:
-            truncate_doris_tables()
-        truncate_clickhouse_tables()
-        truncate_oceanbase_tables()
+        if use_sql_backends:
+            if use_doris:
+                truncate_doris_tables()
+            truncate_clickhouse_tables()
+            truncate_oceanbase_tables()
+        if vm_available:
+            reset_vm_storage()
+            assert wait_vm_healthy(120), "VM services not healthy after storage reset"
         stats_dir = tsdir / "ingest_stats"
         stats_dir.mkdir(parents=True, exist_ok=True)
         batch_arg = args.streaming_batch if getattr(args, "streaming_batch", None) else args.batch
@@ -685,45 +918,50 @@ def main() -> int:
                 s = json.loads((stats_dir / "doris.json").read_text())
                 doris_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
                 doris_ingest["rows_per_sec"] = round(doris_ingest["rows"] / doris_ingest["duration_s"], 0) if doris_ingest["duration_s"] > 0 else 0
-        ch_cmd = ["python3", str(ROOT / "loaders" / "replay_clickhouse.py"), "--data-dir", str(args.data_dir), "--batch", str(batch_arg),
-                  "--stats", str(stats_dir / "clickhouse.json")]
-        if getattr(args, "scale_to", None):
-            ch_cmd.extend(["--scale-to", str(args.scale_to)])
-        t0 = time.time()
-        subprocess.run(ch_cmd, check=True, env={**os.environ, "CLICKHOUSE_PASSWORD": CH_PASSWORD})
-        ch_ingest["status"] = "ok"
-        ch_ingest["duration_s"] = round(time.time() - t0, 2)
-        ch_ingest["mechanism"] = f"Batch file load ({batch_arg} rows)"
-        if (stats_dir / "clickhouse.json").exists():
-            s = json.loads((stats_dir / "clickhouse.json").read_text())
-            ch_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
-            ch_ingest["rows_per_sec"] = round(ch_ingest["rows"] / ch_ingest["duration_s"], 0) if ch_ingest["duration_s"] > 0 else 0
-        druid_cmd = ["python3", str(ROOT / "loaders" / "replay_druid.py"), "--data-dir", str(args.data_dir), "--batch", str(batch_arg),
-                     "--stats", str(stats_dir / "druid.json")]
-        if getattr(args, "scale_to", None):
-            druid_cmd.extend(["--scale-to", str(args.scale_to)])
-        t0 = time.time()
-        subprocess.run(druid_cmd, check=True, env={**os.environ})
-        druid_ingest["status"] = "ok"
-        druid_ingest["duration_s"] = round(time.time() - t0, 2)
-        druid_ingest["mechanism"] = f"Batch file load ({batch_arg} rows)"
-        if (stats_dir / "druid.json").exists():
-            s = json.loads((stats_dir / "druid.json").read_text())
-            druid_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
-            druid_ingest["rows_per_sec"] = round(druid_ingest["rows"] / druid_ingest["duration_s"], 0) if druid_ingest["duration_s"] > 0 else 0
-        ob_cmd = ["python3", str(ROOT / "loaders" / "replay_oceanbase.py"), "--data-dir", str(args.data_dir), "--batch", str(batch_arg),
-                  "--stats", str(stats_dir / "oceanbase.json")]
-        if getattr(args, "scale_to", None):
-            ob_cmd.extend(["--scale-to", str(args.scale_to)])
-        t0 = time.time()
-        subprocess.run(ob_cmd, check=True, env={**os.environ, "OCEANBASE_HOST": "127.0.0.1", "OCEANBASE_PORT": "2881"})
-        ob_ingest["status"] = "ok"
-        ob_ingest["duration_s"] = round(time.time() - t0, 2)
-        ob_ingest["mechanism"] = f"Batch file load ({batch_arg} rows)"
-        if (stats_dir / "oceanbase.json").exists():
-            s = json.loads((stats_dir / "oceanbase.json").read_text())
-            ob_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
-            ob_ingest["rows_per_sec"] = round(ob_ingest["rows"] / ob_ingest["duration_s"], 0) if ob_ingest["duration_s"] > 0 else 0
+        if use_sql_backends:
+            ch_cmd = ["python3", str(ROOT / "loaders" / "replay_clickhouse.py"), "--data-dir", str(args.data_dir), "--batch", str(batch_arg),
+                      "--stats", str(stats_dir / "clickhouse.json")]
+            if getattr(args, "scale_to", None):
+                ch_cmd.extend(["--scale-to", str(args.scale_to)])
+            t0 = time.time()
+            subprocess.run(ch_cmd, check=True, env={**os.environ, "CLICKHOUSE_PASSWORD": CH_PASSWORD})
+            ch_ingest["status"] = "ok"
+            ch_ingest["duration_s"] = round(time.time() - t0, 2)
+            ch_ingest["mechanism"] = f"Batch file load ({batch_arg} rows)"
+            if (stats_dir / "clickhouse.json").exists():
+                s = json.loads((stats_dir / "clickhouse.json").read_text())
+                ch_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
+                ch_ingest["rows_per_sec"] = round(ch_ingest["rows"] / ch_ingest["duration_s"], 0) if ch_ingest["duration_s"] > 0 else 0
+            druid_cmd = ["python3", str(ROOT / "loaders" / "replay_druid.py"), "--data-dir", str(args.data_dir), "--batch", str(batch_arg),
+                         "--stats", str(stats_dir / "druid.json")]
+            if getattr(args, "scale_to", None):
+                druid_cmd.extend(["--scale-to", str(args.scale_to)])
+            t0 = time.time()
+            subprocess.run(druid_cmd, check=True, env={**os.environ})
+            druid_ingest["status"] = "ok"
+            druid_ingest["duration_s"] = round(time.time() - t0, 2)
+            druid_ingest["mechanism"] = f"Batch file load ({batch_arg} rows)"
+            if (stats_dir / "druid.json").exists():
+                s = json.loads((stats_dir / "druid.json").read_text())
+                druid_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
+                druid_ingest["rows_per_sec"] = round(druid_ingest["rows"] / druid_ingest["duration_s"], 0) if druid_ingest["duration_s"] > 0 else 0
+            ob_cmd = ["python3", str(ROOT / "loaders" / "replay_oceanbase.py"), "--data-dir", str(args.data_dir), "--batch", str(batch_arg),
+                      "--stats", str(stats_dir / "oceanbase.json")]
+            if getattr(args, "scale_to", None):
+                ob_cmd.extend(["--scale-to", str(args.scale_to)])
+            t0 = time.time()
+            try:
+                subprocess.run(ob_cmd, check=True, env={**os.environ, "OCEANBASE_HOST": "127.0.0.1", "OCEANBASE_PORT": "2881"})
+                ob_ingest["status"] = "ok"
+                ob_ingest["duration_s"] = round(time.time() - t0, 2)
+                ob_ingest["mechanism"] = f"Batch file load ({batch_arg} rows)"
+                if (stats_dir / "oceanbase.json").exists():
+                    s = json.loads((stats_dir / "oceanbase.json").read_text())
+                    ob_ingest["rows"] = s.get("logs", 0) + s.get("spans", 0) + s.get("metrics", 0)
+                    ob_ingest["rows_per_sec"] = round(ob_ingest["rows"] / ob_ingest["duration_s"], 0) if ob_ingest["duration_s"] > 0 else 0
+            except Exception as e:
+                ob_ingest["status"] = "error"
+                ob_ingest["error"] = str(e)[:200]
         if loki_available:
             # Use smaller batch for Loki to avoid ingestion rate limit (default 4MB/s)
             loki_batch = min(25, batch_arg)  # Keep small to avoid Loki 4MB/s rate limit with large log lines
@@ -744,23 +982,54 @@ def main() -> int:
             except Exception as e:
                 loki_ingest["status"] = "error"
                 loki_ingest["error"] = str(e)[:200]
-        # Wait for Druid segments to be available for querying
-        print("[wait] Druid segments loading...")
-        for _ in range(24):
+        if vm_available:
+            vm_cmd = ["python3", str(ROOT / "loaders" / "replay_victoriametrics.py"), "--data-dir", str(args.data_dir),
+                      "--batch", str(batch_arg), "--run-id", run_id, "--stats", str(stats_dir / "vm.json")]
+            if getattr(args, "scale_to", None):
+                vm_cmd.extend(["--scale-to", str(args.scale_to)])
+            t0 = time.time()
             try:
-                r = requests.get(f"{DRUID_HTTP}/proxy/coordinator/druid/coordinator/v1/metadata/datasources", timeout=5)
-                if r.status_code == 200:
-                    ds = r.json()
-                    if "telemetry_logs" in ds and "telemetry_spans" in ds and "telemetry_metrics" in ds:
-                        print("[wait] Druid datasources ready")
-                        break
-            except Exception:
-                pass
-            time.sleep(5)
-        time.sleep(15)
+                subprocess.run(vm_cmd, check=True, env={**os.environ,
+                               "VM_HTTP": VM_HTTP, "VL_HTTP": VL_HTTP, "VT_HTTP": VT_HTTP})
+                total_dur = round(time.time() - t0, 2)
+                if (stats_dir / "vm.json").exists():
+                    s = json.loads((stats_dir / "vm.json").read_text())
+                else:
+                    s = {}
+                for mech, key, dur_key, target in [
+                    ("VictoriaLogs push (logs only)", "logs", "logs_duration_s", vl_ingest),
+                    ("VictoriaMetrics push (metrics only)", "metrics", "metrics_duration_s", vm_ingest),
+                    ("VictoriaTraces push (traces only)", "spans", "spans_duration_s", vt_ingest),
+                ]:
+                    rows = s.get(key, 0)
+                    dur = s.get(dur_key, total_dur)
+                    target["status"] = "ok"
+                    target["duration_s"] = dur
+                    target["mechanism"] = mech
+                    target["rows"] = rows
+                    target["rows_per_sec"] = round(rows / dur, 0) if dur > 0 else 0
+            except Exception as e:
+                for target in (vl_ingest, vm_ingest, vt_ingest):
+                    target["status"] = "error"
+                    target["error"] = str(e)[:200]
+        if use_sql_backends:
+            # Wait for Druid segments to be available for querying
+            print("[wait] Druid segments loading...")
+            for _ in range(24):
+                try:
+                    r = requests.get(f"{DRUID_HTTP}/proxy/coordinator/druid/coordinator/v1/metadata/datasources", timeout=5)
+                    if r.status_code == 200:
+                        ds = r.json()
+                        if "telemetry_logs" in ds and "telemetry_spans" in ds and "telemetry_metrics" in ds:
+                            print("[wait] Druid datasources ready")
+                            break
+                except Exception:
+                    pass
+                time.sleep(5)
+            time.sleep(15)
 
     otlp_ingest = None
-    if getattr(args, "otlp", False):
+    if getattr(args, "otlp", False) and use_sql_backends:
         assert wait_port("127.0.0.1", 4317, 60), "OTLP collector 4317 not ready. Run: make up-otel"
         stats_path = tsdir / "ingest_stats" / "otlp.json"
         stats_path.parent.mkdir(parents=True, exist_ok=True)
@@ -768,7 +1037,9 @@ def main() -> int:
             "python3", str(ROOT / "runner" / "run_otlp_ingest.py"),
             "--stats", str(stats_path),
             "--count", str(getattr(args, "otlp_count", 1000)),
-        ], check=True, env={**os.environ, "CLICKHOUSE_PASSWORD": CH_PASSWORD})
+        ], check=True, env={**os.environ, "CLICKHOUSE_PASSWORD": CH_PASSWORD,
+                            "CLICKHOUSE_HTTP": CH_HTTP,
+                            "VM_HTTP": VM_HTTP, "VL_HTTP": VL_HTTP, "VT_HTTP": VT_HTTP})
         # Map otel.* into telemetry.* so canonical queries run against batch + OTLP data
         subprocess.run([
             "python3", str(ROOT / "runner" / "map_otlp_to_telemetry.py"), "--both",
@@ -776,17 +1047,37 @@ def main() -> int:
         if stats_path.exists():
             otlp_ingest = json.loads(stats_path.read_text())
 
-    if use_doris:
-        doris_qres = bench_backend(DORIS_QDIR, run_doris_query)
-    ch_qres = bench_backend(CH_QDIR, run_clickhouse_query)
-    druid_qres = bench_backend(DRUID_QDIR, run_druid_query)
-    ob_qres = bench_backend(OB_QDIR, run_oceanbase_query)
+    if use_sql_backends:
+        if use_doris:
+            doris_qres = bench_backend(DORIS_QDIR, run_doris_query)
+        ch_qres = bench_backend(CH_QDIR, run_clickhouse_query)
+        druid_qres = bench_backend(DRUID_QDIR, run_druid_query)
+        ob_qres = bench_backend(OB_QDIR, run_oceanbase_query)
     loki_qres = bench_loki_logs(run_id) if loki_ingest.get("status") == "ok" else {}
+    vm_any_ok = any(d.get("status") == "ok" for d in (vl_ingest, vm_ingest, vt_ingest))
+    if vm_any_ok:
+        print("[wait] waiting for VictoriaMetrics to flush ingested data...")
+        for i in range(30):
+            try:
+                r = requests.get(f"{VM_HTTP}/api/v1/query",
+                                 params={"query": 'count({__name__!=""})', "step": "24h"}, timeout=5)
+                if r.status_code == 200 and r.json().get("data", {}).get("result"):
+                    print(f"[wait] VictoriaMetrics data ready after {i * 2}s")
+                    break
+            except Exception:
+                pass
+            time.sleep(2)
+        else:
+            print("[wait] VictoriaMetrics data not ready after 60s, proceeding anyway")
+    vl_qres, vm_qres, vt_qres = bench_vm(run_id) if vm_any_ok else ({}, {}, {})
 
-    data_vol = get_data_volume(use_doris, use_oceanbase=True)
+    data_vol = get_data_volume(use_doris and use_sql_backends, use_oceanbase=use_sql_backends, use_vm=vm_any_ok, use_sql=use_sql_backends)
 
     write_combined_report(tsdir, doris_ingest, ch_ingest, druid_ingest, doris_qres, ch_qres, druid_qres,
                           ob_ingest=ob_ingest, ob_qres=ob_qres, loki_ingest=loki_ingest, loki_qres=loki_qres,
+                          vl_ingest=vl_ingest, vl_qres=vl_qres,
+                          vm_ingest=vm_ingest, vm_qres=vm_qres,
+                          vt_ingest=vt_ingest, vt_qres=vt_qres,
                           otlp_ingest=otlp_ingest, data_vol=data_vol)
     write_rolling_report(args.out)
     return 0
diff --git a/research/telemetry_storage_backend/runner/run_otlp_ingest.py b/research/telemetry_storage_backend/runner/run_otlp_ingest.py
index ae6ff09..4da7bf4 100644
--- a/research/telemetry_storage_backend/runner/run_otlp_ingest.py
+++ b/research/telemetry_storage_backend/runner/run_otlp_ingest.py
@@ -13,8 +13,11 @@
 
 ROOT = Path(__file__).resolve().parents[1]
 OTEL_ENDPOINT = "tsb-otel-collector:4317"
-CH_HTTP = "http://localhost:8123"
+CH_HTTP = __import__("os").environ.get("CLICKHOUSE_HTTP", "http://localhost:8123")
 CH_PASSWORD = __import__("os").environ.get("CLICKHOUSE_PASSWORD", "changeme")
+VM_HTTP = __import__("os").environ.get("VM_HTTP", "http://localhost:8428")
+VL_HTTP = __import__("os").environ.get("VL_HTTP", "http://localhost:9428")
+VT_HTTP = __import__("os").environ.get("VT_HTTP", "http://localhost:10428")
 
 def _ch_params():
     p = {"query": ""}
@@ -51,6 +54,49 @@ def count_doris_otel():
             pass
     return total
 
+def count_victorialogs_otel():
+    try:
+        r = requests.get(f"{VL_HTTP}/select/logsql/query",
+                         params={"query": "* | stats count() as total", "limit": 1}, timeout=10)
+        if r.status_code == 200:
+            lines = [ln for ln in r.text.strip().splitlines() if ln.strip()]
+            if lines:
+                return int(json.loads(lines[0]).get("total", 0))
+    except Exception:
+        pass
+    return 0
+
+
+def count_victoriametrics_rows_inserted():
+    """Get total rows inserted from /metrics (vm_rows_inserted_total summed across all types)."""
+    try:
+        r = requests.get(f"{VM_HTTP}/metrics", timeout=10)
+        if r.status_code == 200:
+            total = 0
+            for line in r.text.splitlines():
+                if line.startswith("vm_rows_inserted_total{"):
+                    parts = line.rsplit(" ", 1)
+                    if len(parts) == 2:
+                        total += int(float(parts[1]))
+            return total
+    except Exception:
+        pass
+    return 0
+
+
+def count_victoriatraces_otel():
+    try:
+        r = requests.get(f"{VT_HTTP}/select/logsql/query",
+                         params={"query": "* | stats count() as total", "limit": 1}, timeout=10)
+        if r.status_code == 200:
+            lines = [ln for ln in r.text.strip().splitlines() if ln.strip()]
+            if lines:
+                return int(json.loads(lines[0]).get("total", 0))
+    except Exception:
+        pass
+    return 0
+
+
 def run_telemetrygen(count: int, rate: int = 1000):
     """Run telemetrygen to send count traces, count logs, count metrics."""
     # --rate N = N/sec; 50k at 1000/sec = ~50s
@@ -71,19 +117,42 @@ def main():
     ap.add_argument("--rate", type=int, default=1000, help="Items per second (default 1000)")
     args = ap.parse_args()
 
+    # Snapshot VM counts before OTLP ingestion (VM has no otel.* table isolation)
+    before_vl = count_victorialogs_otel()
+    before_vm = count_victoriametrics_rows_inserted()
+    before_vt = count_victoriatraces_otel()
+    print(f"[otlp] before: VL={before_vl}, VM={before_vm}, VT={before_vt}")
+
     t0 = time.time()
     run_telemetrygen(args.count, args.rate)
     elapsed = time.time() - t0
-    # Allow collector a moment to flush
-    time.sleep(3)
+
+    # Poll until VictoriaMetrics rows_inserted increases (up to 30s)
+    for i in range(15):
+        time.sleep(2)
+        if count_victoriametrics_rows_inserted() > before_vm:
+            print(f"[otlp] VictoriaMetrics data ready after {(i + 1) * 2}s")
+            break
+    else:
+        print("[otlp] VictoriaMetrics data not ready after 30s, proceeding anyway")
+
+    # VM backends: report delta (after - before) to isolate OTLP-only rows
+    after_vl = count_victorialogs_otel()
+    after_vm = count_victoriametrics_rows_inserted()
+    after_vt = count_victoriatraces_otel()
+    print(f"[otlp] after:  VL={after_vl}, VM={after_vm}, VT={after_vt}")
 
     result = {
         "mechanism": f"OTLP ({args.count} spans, {args.count} logs, {args.count} metrics)",
+        "count": args.count,
         "duration_s": round(elapsed, 2),
         "doris": {"rows": count_doris_otel()},
         "clickhouse": {"rows": count_clickhouse_otel()},
+        "victorialogs": {"rows": after_vl - before_vl},
+        "victoriametrics": {"rows": after_vm - before_vm},
+        "victoriatraces": {"rows": after_vt - before_vt},
     }
-    for k in ["doris", "clickhouse"]:
+    for k in ["doris", "clickhouse", "victorialogs", "victoriametrics", "victoriatraces"]:
         r = result[k]["rows"]
         result[k]["rows_per_sec"] = round(r / result["duration_s"], 0) if result["duration_s"] > 0 else 0
 

From 7481f7e91efdd661b8835156933ca2900ea2135e Mon Sep 17 00:00:00 2001
From: dmitryk-dk <kozlovdmitriyy@gmail.com>
Date: Mon, 4 May 2026 20:53:42 +0200
Subject: [PATCH 2/4] make consistent health check, add missing files

---
 research/telemetry_storage_backend/Makefile   |  12 +-
 .../docker-compose.victoriametrics.yml        |   4 +-
 .../loaders/remote_write.py                   |  69 +++++
 .../loaders/replay_victoriametrics.py         | 246 ++++++++++++++++++
 4 files changed, 323 insertions(+), 8 deletions(-)
 create mode 100644 research/telemetry_storage_backend/loaders/remote_write.py
 create mode 100644 research/telemetry_storage_backend/loaders/replay_victoriametrics.py

diff --git a/research/telemetry_storage_backend/Makefile b/research/telemetry_storage_backend/Makefile
index ec73512..690e80c 100644
--- a/research/telemetry_storage_backend/Makefile
+++ b/research/telemetry_storage_backend/Makefile
@@ -42,11 +42,11 @@ up-compare:
 	done
 	@echo "Waiting for VictoriaLogs (port 9428)..."
 	@for i in {1..30}; do \
-	  curl -sf http://localhost:9428/health && echo "VictoriaLogs is ready" && break || sleep 2; \
+	  curl -sf http://localhost:9428/-/healthy && echo "VictoriaLogs is ready" && break || sleep 2; \
 	done
 	@echo "Waiting for VictoriaTraces (port 10428)..."
 	@for i in {1..30}; do \
-	  curl -sf http://localhost:10428/health && echo "VictoriaTraces is ready" && break || sleep 2; \
+	  curl -sf http://localhost:10428/-/healthy && echo "VictoriaTraces is ready" && break || sleep 2; \
 	done
 
 up-otel:
@@ -65,11 +65,11 @@ up-otel:
 	done
 	@echo "Waiting for VictoriaLogs (port 9428)..."
 	@for i in {1..30}; do \
-	  curl -sf http://localhost:9428/health && echo "VictoriaLogs is ready" && break || sleep 2; \
+	  curl -sf http://localhost:9428/-/healthy && echo "VictoriaLogs is ready" && break || sleep 2; \
 	done
 	@echo "Waiting for VictoriaTraces (port 10428)..."
 	@for i in {1..30}; do \
-	  curl -sf http://localhost:10428/health && echo "VictoriaTraces is ready" && break || sleep 2; \
+	  curl -sf http://localhost:10428/-/healthy && echo "VictoriaTraces is ready" && break || sleep 2; \
 	done
 	@echo "Waiting for OTLP collector..."
 	@for i in {1..30}; do \
@@ -84,11 +84,11 @@ up-vm:
 	done
 	@echo "Waiting for VictoriaLogs (port 9428)..."
 	@for i in {1..30}; do \
-	  curl -sf http://localhost:9428/health && echo "VictoriaLogs is ready" && break || sleep 2; \
+	  curl -sf http://localhost:9428/-/healthy && echo "VictoriaLogs is ready" && break || sleep 2; \
 	done
 	@echo "Waiting for VictoriaTraces (port 10428)..."
 	@for i in {1..30}; do \
-	  curl -sf http://localhost:10428/health && echo "VictoriaTraces is ready" && break || sleep 2; \
+	  curl -sf http://localhost:10428/-/healthy && echo "VictoriaTraces is ready" && break || sleep 2; \
 	done
 
 down:
diff --git a/research/telemetry_storage_backend/docker-compose.victoriametrics.yml b/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
index faf9402..0e13c9c 100644
--- a/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
+++ b/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
@@ -39,7 +39,7 @@ services:
     networks:
       - default
     healthcheck:
-      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:9428/health"]
+      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:9428/-/healthy"]
       interval: 10s
       timeout: 5s
       retries: 30
@@ -61,7 +61,7 @@ services:
     networks:
       - default
     healthcheck:
-      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:10428/health"]
+      test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:10428/-/healthy"]
       interval: 10s
       timeout: 5s
       retries: 30
diff --git a/research/telemetry_storage_backend/loaders/remote_write.py b/research/telemetry_storage_backend/loaders/remote_write.py
new file mode 100644
index 0000000..827f079
--- /dev/null
+++ b/research/telemetry_storage_backend/loaders/remote_write.py
@@ -0,0 +1,69 @@
+"""
+Minimal Prometheus remote-write encoder (protobuf + snappy).
+Implements just enough of the WriteRequest proto to push samples
+to VictoriaMetrics /api/v1/write.
+
+Proto schema (from prometheus/prometheus):
+  message WriteRequest { repeated TimeSeries timeseries = 1; }
+  message TimeSeries   { repeated Label labels = 1; repeated Sample samples = 2; }
+  message Label        { string name = 1; string value = 2; }
+  message Sample       { double value = 1; int64 timestamp = 2; }
+"""
+from __future__ import annotations
+import struct
+
+import snappy
+
+
+def _encode_varint(value: int) -> bytes:
+    bits = value & 0x7F
+    value >>= 7
+    out = b""
+    while value:
+        out += bytes([0x80 | bits])
+        bits = value & 0x7F
+        value >>= 7
+    out += bytes([bits])
+    return out
+
+
+def _encode_bytes(field_number: int, data: bytes) -> bytes:
+    tag = _encode_varint((field_number << 3) | 2)
+    return tag + _encode_varint(len(data)) + data
+
+
+def _encode_double(field_number: int, value: float) -> bytes:
+    tag = _encode_varint((field_number << 3) | 1)
+    return tag + struct.pack("<d", value)
+
+
+def _encode_sint64(field_number: int, value: int) -> bytes:
+    tag = _encode_varint((field_number << 3) | 0)
+    return tag + _encode_varint(value)
+
+
+def _encode_label(name: str, value: str) -> bytes:
+    inner = _encode_bytes(1, name.encode()) + _encode_bytes(2, value.encode())
+    return _encode_bytes(1, inner)
+
+
+def _encode_sample(value: float, timestamp_ms: int) -> bytes:
+    inner = _encode_double(1, value) + _encode_sint64(2, timestamp_ms)
+    return _encode_bytes(2, inner)
+
+
+def encode_write_request(timeseries: list[dict]) -> bytes:
+    """
+    Encode a list of time series into a snappy-compressed WriteRequest.
+
+    Each entry in timeseries: {"labels": {"__name__": "x", ...}, "value": float, "timestamp_ms": int}
+    """
+    body = b""
+    for ts in timeseries:
+        labels_bytes = b""
+        for k, v in sorted(ts["labels"].items()):
+            labels_bytes += _encode_label(k, v)
+        sample_bytes = _encode_sample(ts["value"], ts["timestamp_ms"])
+        ts_msg = labels_bytes + sample_bytes
+        body += _encode_bytes(1, ts_msg)
+    return snappy.compress(body)
\ No newline at end of file
diff --git a/research/telemetry_storage_backend/loaders/replay_victoriametrics.py b/research/telemetry_storage_backend/loaders/replay_victoriametrics.py
new file mode 100644
index 0000000..dc517f7
--- /dev/null
+++ b/research/telemetry_storage_backend/loaders/replay_victoriametrics.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+"""
+Replay telemetry files into VictoriaMetrics (metrics), VictoriaLogs (logs),
+and VictoriaTraces (traces via OTLP HTTP).
+Uses shared extraction from loaders.common.
+Environment:
+  VM_HTTP  (default: http://localhost:8428) — VictoriaMetrics
+  VL_HTTP  (default: http://localhost:9428) — VictoriaLogs
+  VT_HTTP  (default: http://localhost:10428) — VictoriaTraces (OTLP via main HTTP)
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+
+import requests
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import common
+import remote_write
+
+VM_HTTP = os.environ.get("VM_HTTP", "http://localhost:8428")
+VL_HTTP = os.environ.get("VL_HTTP", "http://localhost:9428")
+VL_PUSH_URL = f"{VL_HTTP}/insert/loki/api/v1/push"
+VT_HTTP = os.environ.get("VT_HTTP", "http://localhost:10428")
+PUSH_CHUNK_SIZE = 5
+
+
+def push_metrics(rows: list[dict]) -> bool:
+    """Push metric rows to VictoriaMetrics via /api/v1/write (Prometheus remote write)."""
+    url = f"{VM_HTTP}/api/v1/write"
+    timeseries = []
+    for row in rows:
+        ts_epoch_ms = _ts_to_epoch_ms(row.get("ts", ""))
+        labels = {"__name__": row.get("metric_name", "unknown")}
+        row_labels = row.get("labels", {})
+        if isinstance(row_labels, str):
+            try:
+                row_labels = json.loads(row_labels)
+            except Exception:
+                row_labels = {}
+        for k, v in row_labels.items():
+            if k and v:
+                labels[k] = str(v)
+        timeseries.append({
+            "labels": labels,
+            "value": float(row.get("value", 0)),
+            "timestamp_ms": ts_epoch_ms,
+        })
+    body = remote_write.encode_write_request(timeseries)
+    try:
+        r = requests.post(url, data=body,
+                          headers={"Content-Type": "application/x-protobuf",
+                                   "Content-Encoding": "snappy",
+                                   "X-Prometheus-Remote-Write-Version": "0.1.0"},
+                          timeout=60)
+        if r.status_code in (200, 204):
+            return True
+        print(f"[vm] metrics push HTTP {r.status_code}: {r.text[:300]}")
+    except Exception as e:
+        print(f"[vm] metrics push error: {e}")
+    return False
+
+
+def _ts_to_ns(ts: str) -> str:
+    """Convert SQL-style timestamp to nanoseconds since epoch (same as Loki loader)."""
+    try:
+        if "." in ts:
+            dt = datetime.strptime(ts[:26], "%Y-%m-%d %H:%M:%S.%f")
+        else:
+            dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
+        return str(int(dt.timestamp() * 1_000_000_000))
+    except Exception:
+        return str(int(time.time() * 1_000_000_000))
+
+
+def _row_to_stream(row: dict, run_id: str = "") -> dict:
+    """Convert one log row to a Loki stream (same format as replay_loki.py)."""
+    service = (row.get("service") or "unknown").replace('"', '\\"')[:64]
+    level = (row.get("level") or "info").replace('"', '\\"')[:32]
+    stream = {"job": "telemetry", "service": service or "unknown", "level": level or "info"}
+    if run_id:
+        stream["run_id"] = run_id
+    if row.get("trace_id"):
+        stream["trace_id"] = str(row["trace_id"])[:64]
+    if row.get("span_id"):
+        stream["span_id"] = str(row["span_id"])[:64]
+    ts_ns = _ts_to_ns(row.get("ts", ""))
+    line = (row.get("message") or "").replace("\n", " ").replace("\r", "")
+    if len(line) > 200_000:
+        line = line[:200_000]
+    return {"stream": stream, "values": [[ts_ns, line]]}
+
+
+def push_logs(rows: list[dict], run_id: str = "") -> bool:
+    """Push log rows to VictoriaLogs via Loki-compatible push API."""
+    streams = [_row_to_stream(r, run_id) for r in rows]
+    for i in range(0, len(streams), PUSH_CHUNK_SIZE):
+        chunk = streams[i : i + PUSH_CHUNK_SIZE]
+        payload = {"streams": chunk}
+        try:
+            r = requests.post(VL_PUSH_URL, json=payload,
+                              headers={"Content-Type": "application/json"}, timeout=60)
+            if r.status_code not in (200, 204):
+                print(f"[vl] logs push HTTP {r.status_code}: {r.text[:300]}")
+                return False
+        except Exception as e:
+            print(f"[vl] logs push error: {e}")
+            return False
+    return True
+
+
+def push_traces(rows: list[dict]) -> bool:
+    """Push span rows to VictoriaTraces via OTLP HTTP JSON (/v1/traces)."""
+    url = f"{VT_HTTP}/insert/opentelemetry/v1/traces"
+    spans_by_service: dict[str, list] = {}
+    for row in rows:
+        svc = row.get("service", "unknown")
+        spans_by_service.setdefault(svc, []).append(row)
+
+    resource_spans = []
+    for svc, svc_rows in spans_by_service.items():
+        otlp_spans = []
+        for row in svc_rows:
+            start_ns = _ts_to_epoch_ns(row.get("ts_start", ""))
+            end_ns = _ts_to_epoch_ns(row.get("ts_end", ""))
+            attrs = row.get("attributes", {})
+            if isinstance(attrs, str):
+                try:
+                    attrs = json.loads(attrs)
+                except Exception:
+                    attrs = {}
+            otlp_attrs = [{"key": k, "value": {"stringValue": str(v)}} for k, v in attrs.items()]
+            otlp_spans.append({
+                "traceId": _hex_pad(row.get("trace_id", ""), 32),
+                "spanId": _hex_pad(row.get("span_id", ""), 16),
+                "parentSpanId": _hex_pad(row.get("parent_span_id", ""), 16) if row.get("parent_span_id") else "",
+                "name": row.get("name", ""),
+                "kind": 1,
+                "startTimeUnixNano": str(start_ns),
+                "endTimeUnixNano": str(end_ns),
+                "attributes": otlp_attrs,
+                "status": {},
+            })
+        resource_spans.append({
+            "resource": {
+                "attributes": [{"key": "service.name", "value": {"stringValue": svc}}],
+            },
+            "scopeSpans": [{"scope": {"name": "telemetry-bench"}, "spans": otlp_spans}],
+        })
+
+    payload = {"resourceSpans": resource_spans}
+    for attempt in range(5):
+        try:
+            r = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=60)
+            if r.status_code in (200, 202):
+                return True
+            print(f"[vt] traces push HTTP {r.status_code}: {r.text[:300]}")
+        except (requests.exceptions.ConnectionError, ConnectionResetError) as e:
+            if attempt < 4:
+                print(f"[vt] traces push retry {attempt + 1}/5 ({e})")
+                time.sleep(3)
+                continue
+            print(f"[vt] traces push error after retries: {e}")
+        except Exception as e:
+            print(f"[vt] traces push error: {e}")
+            break
+    return False
+
+
+def _ts_to_epoch_ms(ts: str) -> int:
+    try:
+        if "." in ts:
+            from datetime import datetime
+            dt = datetime.strptime(ts[:26], "%Y-%m-%d %H:%M:%S.%f")
+            return int(dt.timestamp() * 1000)
+        elif ts:
+            from datetime import datetime
+            dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
+            return int(dt.timestamp() * 1000)
+    except Exception:
+        pass
+    return int(time.time() * 1000)
+
+
+def _ts_to_epoch_ns(ts: str) -> int:
+    return _ts_to_epoch_ms(ts) * 1_000_000
+
+
+def _hex_pad(val: str, length: int) -> str:
+    """Ensure hex string is exactly `length` chars, zero-padded or truncated."""
+    if not val:
+        return "0" * length
+    cleaned = val.replace("-", "")
+    if len(cleaned) < length:
+        cleaned = cleaned.zfill(length)
+    return cleaned[:length]
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="Replay telemetry into VictoriaMetrics/Logs/Traces")
+    ap.add_argument("--data-dir", type=Path, required=True)
+    ap.add_argument("--batch", type=int, default=500)
+    ap.add_argument("--scale-to", type=int, default=None, help="Target row count per type")
+    ap.add_argument("--stats", type=Path, default=None, help="Write ingestion stats JSON")
+    ap.add_argument("--run-id", type=str, default="", help="Label to isolate this run in VictoriaLogs")
+    args = ap.parse_args()
+    data_dir = args.data_dir
+    assert data_dir.exists(), f"DATA_DIR not found: {data_dir}"
+    target = args.scale_to
+    stats = {"logs": 0, "spans": 0, "metrics": 0,
+             "logs_duration_s": 0, "spans_duration_s": 0, "metrics_duration_s": 0}
+
+    t0 = time.time()
+    for log_rows in common.extract_log_rows(data_dir, args.batch, target_rows=target):
+        if not push_logs(log_rows, run_id=args.run_id):
+            return 1
+        stats["logs"] += len(log_rows)
+    stats["logs_duration_s"] = round(time.time() - t0, 3)
+
+    t0 = time.time()
+    for span_rows in common.extract_span_rows(data_dir, args.batch, target_rows=target):
+        if not push_traces(span_rows):
+            return 1
+        stats["spans"] += len(span_rows)
+    stats["spans_duration_s"] = round(time.time() - t0, 3)
+
+    t0 = time.time()
+    for met_rows in common.extract_metric_rows(data_dir, args.batch, target_rows=target):
+        if not push_metrics(met_rows):
+            return 1
+        stats["metrics"] += len(met_rows)
+    stats["metrics_duration_s"] = round(time.time() - t0, 3)
+
+    if args.stats:
+        args.stats.write_text(json.dumps(stats))
+    print("[replay] victoriametrics done")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())

From 0afd85808c15a87e2ed7a625a5e1e40cbd814a34 Mon Sep 17 00:00:00 2001
From: dmitryk-dk <kozlovdmitriyy@gmail.com>
Date: Mon, 4 May 2026 21:09:37 +0200
Subject: [PATCH 3/4] add default network

---
 .../docker-compose.victoriametrics.yml                       | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/research/telemetry_storage_backend/docker-compose.victoriametrics.yml b/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
index 0e13c9c..70b8d19 100644
--- a/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
+++ b/research/telemetry_storage_backend/docker-compose.victoriametrics.yml
@@ -67,6 +67,11 @@ services:
       retries: 30
     restart: unless-stopped
 
+networks:
+  default:
+    name: tsb-net
+    driver: bridge
+
 volumes:
   vmdata:
   vldata:

From 65a3351199b6e5cecf07bc8608f67cf278109242 Mon Sep 17 00:00:00 2001
From: dmitryk-dk <kozlovdmitriyy@gmail.com>
Date: Tue, 5 May 2026 11:04:44 +0200
Subject: [PATCH 4/4] update README.md

---
 research/telemetry_storage_backend/README.md | 65 +++++++++++++++-----
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/research/telemetry_storage_backend/README.md b/research/telemetry_storage_backend/README.md
index 2a5e0ba..0d03a1b 100644
--- a/research/telemetry_storage_backend/README.md
+++ b/research/telemetry_storage_backend/README.md
@@ -33,12 +33,17 @@ Tech Stack: Python (Pandas/NumPy), k8s
 
 ## 📂 Repository Structure
 ```text
-├── loeaders/           # logic to load the data
+├── loaders/            # logic to load data into each backend
 ├── out/                # benchmark run test results
 ├── queries/            # Queries to produce the performance benchmark
+│   ├── doris/          # SQL queries for Doris
+│   ├── clickhouse/     # SQL queries for ClickHouse
+│   ├── druid/          # SQL queries for Druid
+│   ├── oceanbase/      # SQL queries for OceanBase
+│   └── victoriametrics/  # MetricsQL, LogQL, TraceQL queries
 ├── runner/             # benchmark run logic
 ├── docs/               # In-depth documentation and literature review
-├── schemas/            # backend storage chemas
+├── schemas/            # backend storage schemas
 ├── telemetry_data/     # static logs, metrics, traces and metadata
 └── README.md           # This file
 ```
@@ -53,16 +58,20 @@ This harness replays pre-collected OpenTelemetry-like ground-truth (`telemetry_d
 - `docker-compose.yml` — Doris + ClickHouse for comparison trials
 - `docker-compose.druid.yml` — Druid (extends main)
 - `docker-compose.oceanbase.yml` — OceanBase CE (extends main)
+- `docker-compose.victoriametrics.yml` — VictoriaMetrics stack (metrics + logs + traces)
 - `schemas/doris.sql`, `schemas/clickhouse.sql`, `schemas/oceanbase.sql` — database and tables (logs, spans, metrics)
 - `loaders/replay_doris.py` — Doris replayer using Stream Load HTTP API
 - `loaders/replay_clickhouse.py` — ClickHouse replayer via HTTP
 - `loaders/replay_druid.py` — Druid native batch ingestion
 - `loaders/replay_oceanbase.py` — OceanBase via MySQL protocol (pymysql)
+- `loaders/replay_victoriametrics.py` — VictoriaMetrics replayer (Prometheus remote write for metrics, Loki API for logs, OTLP HTTP for traces)
+- `loaders/remote_write.py` — Prometheus remote write encoding helper
 - `queries/{doris,clickhouse,druid,oceanbase}/*.sql` — canonical queries per backend
+- `queries/victoriametrics/*.{metricsql,logql,traceql}` — VictoriaMetrics queries (MetricsQL, LogQL, TraceQL)
 - `runner/bench.py` — Doris-only: schema → load → queries → report
-- `runner/bench_compare.py` — Doris vs ClickHouse vs Druid vs OceanBase: same flow on all, combined report
+- `runner/bench_compare.py` — Doris vs ClickHouse vs Druid vs OceanBase vs VictoriaMetrics: same flow on all, combined report
 - `out/` — run outputs (`storage_bench_doris_<ts>/`, `storage_bench_compare_<ts>/`, `rolling_index.html`)
-- `otel-collector-config.yaml` — OTLP receiver → Doris + ClickHouse exporters
+- `otel-collector-config.yaml` — OTLP receiver → Doris + ClickHouse + VictoriaMetrics exporters
 - `docker-compose.otel.yml` — OTLP collector service (extends main compose)
 - `runner/run_otlp_ingest.py` — sends 1000 spans/logs/metrics via telemetrygen → collector
 - `runner/map_otlp_to_telemetry.py` — maps `otel.*` → `telemetry.*` so queries use batch + OTLP data
@@ -77,18 +86,25 @@ make bench               # run benchmark (uses telemetry_data/)
 make down                # stop
 ```
 
-**Doris vs ClickHouse vs Druid vs OceanBase comparison:**
+**Doris vs ClickHouse vs Druid vs OceanBase vs VictoriaMetrics comparison:**
 ```bash
-make up-compare          # start Doris + ClickHouse + Druid + OceanBase
+make up-compare          # start Doris + ClickHouse + Druid + OceanBase + VictoriaMetrics
 make bench-compare       # run comparison benchmark (file load only, no telemetrygen)
 make bench-compare SCALE_TO=5000   # scale to 5k rows per type
 make down
 ```
 
+**VictoriaMetrics-only:**
+```bash
+make up-vm               # start VictoriaMetrics + VictoriaLogs + VictoriaTraces
+make bench-vm            # run VM-only benchmark
+make down
+```
+
 **OTLP ingestion (telemetrygen → collector):**
 ```bash
-make up-otel             # start stack + OTLP collector
-make bench-otlp          # file load + telemetrygen (1000 spans, 1000 logs, 1000 metrics) via OTLP → Doris + ClickHouse
+make up-otel             # start stack + OTLP collector (includes VictoriaMetrics)
+make bench-otlp          # file load + telemetrygen (1000 spans, 1000 logs, 1000 metrics) via OTLP → Doris + ClickHouse + VictoriaMetrics
 make down
 ```
 
@@ -96,10 +112,11 @@ make down
 
 See `docs/DATA_SOURCES.md` for 50k correlated benchmark options.
 
-| Run | Data source | Notes |
-|-----|-------------|-------|
-| `bench-compare` | `telemetry_data/` | Pre-collected files (logs_*.txt, traces_*.json, metrics_*.json). No telemetrygen. |
-| `bench-otlp` | `telemetry_data/` + telemetrygen | Same file load; additionally sends 1000 spans/logs/metrics via telemetrygen → OTLP collector → Doris + ClickHouse. |
+| Run             | Data source                    | Notes                                                                                                                                             |
+|-----------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
+| `bench-compare` | `telemetry_data/`              | Pre-collected files (logs_*.txt, traces_*.json, metrics_*.json). No telemetrygen.                                                                 |
+| `bench-vm`      | `telemetry_data/`              | Same file load, VictoriaMetrics stack only.                                                                                                       |
+| `bench-otlp`    | `telemetry_data/` + telemetrygen | Same file load; additionally sends 1000 spans/logs/metrics via telemetrygen → OTLP collector → Doris + ClickHouse + VictoriaMetrics.             |
 
 Notes:
 - Requires Docker + docker compose.
@@ -117,33 +134,35 @@ Notes:
 
 ## Outputs
 - `out/storage_bench_doris_<ts>/` — Doris-only: `summary.html`, `ingest.json`, `queries.json`
-- `out/storage_bench_compare_<ts>/` — Doris vs ClickHouse vs Druid vs OceanBase: `compare.html`, `*_queries.json` per backend
+- `out/storage_bench_compare_<ts>/` — Doris vs ClickHouse vs Druid vs OceanBase vs VictoriaMetrics: `compare.html`, `*_queries.json` per backend
 - `out/rolling_index.html` — unified index of all runs (newest first)
 
 The ingestion comparison table shows: **Backend | Mechanism | Duration (s) | Rows | Rows/sec**. Mechanism describes the ingest method (e.g. `Batch file load (5000 rows)` or `OTLP (1000 spans, 1000 logs, 1000 metrics)`). With `--otlp`, OTLP rows are appended to the table.
 
 ## Ingestion benchmark (how it works)
 
-**Batch file load** — Same JSON/JSONL files from `telemetry_data/` are replayed into Doris, ClickHouse, Druid, and OceanBase via each loader:
+**Batch file load** — Same JSON/JSONL files from `telemetry_data/` are replayed into Doris, ClickHouse, Druid, OceanBase, and VictoriaMetrics via each loader:
 - Doris: Stream Load HTTP API (`loaders/replay_doris.py`)
 - ClickHouse: HTTP INSERT (`loaders/replay_clickhouse.py`)
 - Druid: Native batch ingestion via Overlord (`loaders/replay_druid.py`)
 - OceanBase: MySQL protocol via pymysql (`loaders/replay_oceanbase.py`)
+- VictoriaMetrics: Prometheus remote write for metrics (`loaders/remote_write.py`), Loki-compatible API for logs, OTLP HTTP for traces (`loaders/replay_victoriametrics.py`)
 
-All four backends get identical data. Ingest duration and rows/sec are measured per backend. **Message size** is capped at 200KB in `loaders/common.py` to avoid huge Druid/OceanBase files when scaling.
+All five backends get identical data. Ingest duration and rows/sec are measured per backend. **Message size** is capped at 200KB in `loaders/common.py` to avoid huge Druid/OceanBase files when scaling.
 
-**OTLP ingestion** — When `--otlp` is used, telemetrygen sends N spans, N logs, and N metrics (gRPC) to the OpenTelemetry Collector (`otel-collector-config.yaml`). The collector batches and exports to Doris and ClickHouse only. Rows are counted in `otel.*` tables after a short flush delay; duration is end-to-end (telemetrygen start → last batch exported). The OTLP data is then **mapped into `telemetry.*`** via `runner/map_otlp_to_telemetry.py` (INSERT … SELECT from `otel.*` with column mapping), so canonical queries run against batch + OTLP data combined.
+**OTLP ingestion** — When `--otlp` is used, telemetrygen sends N spans, N logs, and N metrics (gRPC) to the OpenTelemetry Collector (`otel-collector-config.yaml`). The collector batches and exports to Doris, ClickHouse, and VictoriaMetrics (VictoriaTraces accepts OTLP natively on port 14317/14318). Rows are counted in `otel.*` tables after a short flush delay; duration is end-to-end (telemetrygen start → last batch exported). The OTLP data is then **mapped into `telemetry.*`** via `runner/map_otlp_to_telemetry.py` (INSERT … SELECT from `otel.*` with column mapping), so canonical queries run against batch + OTLP data combined.
 
 **Why Druid is not in OTLP** — The OpenTelemetry Collector has no Druid exporter. Doris and ClickHouse both have official OTLP/contrib exporters; Druid typically ingests OTLP data via Kafka (collector → Kafka → Druid). Adding Druid to the OTLP path would require a Kafka-based pipeline, which this harness does not implement.
 
 ## Query benchmark (how it works)
 
-After ingestion (and OTLP mapping if `--otlp`), the runner executes the same set of SQL queries on each backend. Each query file in `queries/{doris,clickhouse,druid,oceanbase}/*.sql` is run once per backend via its native API:
+After ingestion (and OTLP mapping if `--otlp`), the runner executes the same set of queries on each backend. Each query file in `queries/{doris,clickhouse,druid,oceanbase}/*.sql` and `queries/victoriametrics/*.{metricsql,logql,traceql}` is run once per backend via its native API:
 
 - **Doris** — `mysql` client over Docker (`telemetry.logs`, `telemetry.spans`, `telemetry.metrics`)
 - **ClickHouse** — HTTP POST to `:8123` with `?query=...`
 - **Druid** — HTTP POST to `:8888/druid/v2/sql` with JSON body
 - **OceanBase** — `mysql` client over Docker (port 2881, MySQL-compatible)
+- **VictoriaMetrics** — MetricsQL via `:8428/api/v1/query_range`, LogQL via `:9428/select/logsql/query`, TraceQL via `:10428/api/traces`
 
 **What is measured** — For each query, the runner records:
 - **Latency (s)** — Wall-clock time from query start to completion (includes network, parsing, execution)
@@ -154,6 +173,18 @@ After ingestion (and OTLP mapping if `--otlp`), the runner executes the same set
 - **Query comparison** — Bar chart and table with latency, result row count, and error per backend. Includes a `data_volume` query that runs full COUNT on all three tables. The fastest backend and % difference vs. slowest are shown.
 - All backends query the same data: batch load + (when `--otlp`) mapped OTLP data in `telemetry.*`.
 
+### VictoriaMetrics stack
+
+VictoriaMetrics uses a split architecture — three separate components handle different telemetry signals:
+
+| Component        | Port  | Signal  | Ingest API                         | Query language |
+|------------------|-------|---------|------------------------------------|----------------|
+| VictoriaMetrics  | 8428  | Metrics | Prometheus remote write (`/api/v1/write`) | MetricsQL      |
+| VictoriaLogs     | 9428  | Logs    | Loki-compatible (`/insert/loki/api/v1/push`) | LogQL          |
+| VictoriaTraces   | 10428 | Traces  | OTLP HTTP/gRPC (ports 14317/14318) | TraceQL        |
+
+Environment variables: `VM_HTTP`, `VL_HTTP`, `VT_HTTP` configure endpoints (defaults: `http://localhost:8428`, `:9428`, `:10428`).
+
 ### Query result differences (row count)
 
 Some queries return different row counts across backends: