diff --git a/QUERIES.md b/QUERIES.md index 3a03ab2..dd97dfe 100644 --- a/QUERIES.md +++ b/QUERIES.md @@ -242,3 +242,368 @@ WITH largest_tables AS ( GROUP BY s.schema_name, s.table_name, s.node['name'] ORDER BY s.schema_name, s.table_name, s.node['name']; ``` + +# Shard Distribution + +```sql + +SELECT + CASE + WHEN size < 1*1024*1024*1024::bigint THEN '<1GB' + WHEN size < 5*1024*1024*1024::bigint THEN '1GB-5GB' + WHEN size < 10*1024*1024*1024::bigint THEN '5GB-10GB' + WHEN size < 50*1024*1024*1024::bigint THEN '10GB-50GB' + ELSE '>=50GB' + END AS size_bucket, + COUNT(*) AS shards_in_bucket, + ROUND(AVG(size)::numeric / 1024 / 1024 / 1024, 2) AS avg_bucket_size_gb + FROM sys.shards + WHERE state = 'STARTED' + GROUP BY size_bucket + ORDER BY + CASE size_bucket + WHEN '<1GB' THEN 1 + WHEN '1GB-5GB' THEN 2 + WHEN '5GB-10GB' THEN 3 + WHEN '10GB-50GB' THEN 4 + ELSE 5 + END; +``` + +## Shard Distribution by Node + +```sql + +SELECT + s.node['name'] as node_name, + CASE + WHEN size < 1*1024*1024*1024::bigint THEN '<1GB' + WHEN size < 5*1024*1024*1024::bigint THEN '1GB-5GB' + WHEN size < 10*1024*1024*1024::bigint THEN '5GB-10GB' + WHEN size < 50*1024*1024*1024::bigint THEN '10GB-50GB' + ELSE '>=50GB' + END AS size_bucket, + COUNT(*) AS shards_in_bucket, + ROUND(AVG(size)::numeric / 1024 / 1024 / 1024, 2) AS avg_bucket_size_gb + FROM sys.shards s + WHERE state = 'STARTED' + GROUP BY node_name, size_bucket + ORDER BY node_name, size_bucket; +``` + +## Active Shard detection +```sql + +SELECT + sh.schema_name, + sh.table_name, + sh.id AS shard_id, primary, node['name'], + sh.partition_ident, + sh.translog_stats['uncommitted_size'] / 1024^2 AS translog_uncommitted_bytes, + sh.seq_no_stats['local_checkpoint'] - sh.seq_no_stats['global_checkpoint'] AS checkpoint_delta + FROM + sys.shards AS sh + WHERE + sh.state = 'STARTED' + AND sh.translog_stats['uncommitted_size'] > 10 * 1024 ^2 -- threshold: e.g., 10MB + OR (sh.seq_no_stats['local_checkpoint'] - sh.seq_no_stats['global_checkpoint'] > 1000) -- significant lag + ORDER BY + sh.translog_stats['uncommitted_size'] DESC, + checkpoint_delta DESC + limit 10; +``` + +```sql +partition-id / values from information_schema table by using a join +ALTER TABLE "TURVO"."shipmentFormFieldData" REROUTE CANCEL SHARD 11 on 'data-hot-8' WITH (allow_primary=False); +``` + +```sql + +SELECT + sh.schema_name, + sh.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + sh.id AS shard_id, + node['name'], + sh.translog_stats['uncommitted_size'] / 1024^2 AS translog_uncommitted_mb + FROM + sys.shards AS sh + LEFT JOIN information_schema.table_partitions p + ON sh.table_name = p.table_name + AND sh.schema_name = p.table_schema + AND sh.partition_ident = p.partition_ident + WHERE + sh.state = 'STARTED' + AND sh.translog_stats['uncommitted_size'] > 300 * 1024 ^2 -- threshold: e.g., 10MB + AND primary=FALSE + ORDER BY + 6 DESC LIMIT 10; ++-------------+------------------------------+----------------------------+----------+--------------+-------------------------+ +| schema_name | table_name | partition_values | shard_id | node['name'] | translog_uncommitted_mb | ++-------------+------------------------------+----------------------------+----------+--------------+-------------------------+ +| TURVO | shipmentFormFieldData | NULL | 14 | data-hot-6 | 7011.800104141235 | +| TURVO | shipmentFormFieldData | NULL | 27 | data-hot-7 | 5131.491161346436 | +| TURVO | shipmentFormFieldData | NULL | 0 | data-hot-9 | 2460.8706073760986 | +| TURVO | shipmentFormFieldData | NULL | 7 | data-hot-2 | 1501.8993682861328 | +| TURVO | shipmentFormFieldData | NULL | 10 | data-hot-5 | 504.0952272415161 | +| TURVO | shipmentFormFieldData | NULL | 29 | data-hot-3 | 501.0663766860962 | +| TURVO | shipmentFormFieldData | NULL | 16 | data-hot-8 | 497.5628480911255 | +| TURVO | shipmentFormFieldData_events | ("sync_day"=1757376000000) | 3 | data-hot-2 | 481.20221996307373 | +| TURVO | shipmentFormFieldData_events | ("sync_day"=1757376000000) | 4 | data-hot-4 | 473.12464427948 | +| TURVO | orderFormFieldData | NULL | 5 | data-hot-1 | 469.4924907684326 | ++-------------+------------------------------+----------------------------+----------+--------------+-------------------------+ + +``` + + +# Segements per Shard + +```sql +SELECT + shard_id, + table_schema, + table_name, + COUNT(*) AS segment_count + FROM sys.segments + GROUP BY shard_id, table_schema, table_name + ORDER BY segment_count DESC + LIMIT 10; +``` + +```sql + +SELECT + s.node['name'] AS node_name, + CASE + WHEN size < 512*1024*1024::bigint THEN '<512MB' + WHEN size < 2.5*1024*1024*1024::bigint THEN '512MB-2.5GB' + WHEN size < 5*1024*1024*1024::bigint THEN '2.5GB-5GB' + WHEN size < 25*1024*1024*1024::bigint THEN '5GB-25GB' + ELSE '>=25GB' + END AS size_bucket, + COUNT(*) AS segments_in_bucket, + ROUND(AVG(size)::numeric / 1024 / 1024 / 1024, 2) AS avg_segment_size_gb + FROM sys.segments s + GROUP BY node_name, size_bucket + ORDER BY node_name, size_bucket; +``` + +### Count retention_lease + +### for a partition + +```sql +cr> SELECT array_length(retention_leases['leases'], 1) as cnt_leases, id from sys.shards WHERE table_name = 'shipmentFormFieldData' AND partition_ident = '04732dpl6or3gd1 + o60o30c1g' order by array_length(retention_leases['leases'], 1); ++------------+----+ +| cnt_leases | id | ++------------+----+ +| 1 | 5 | +| 1 | 4 | +| 1 | 7 | +| 1 | 0 | +| 1 | 3 | +| 1 | 6 | +| 1 | 1 | +| 1 | 2 | ++------------+----+ +SELECT 8 rows in set (0.038 sec) +cr> + +``` + +### for a table + +```sql + +SELECT array_length(retention_leases['leases'], 1) as cnt_leases, id from sys.shards WHERE table_name = 'shipmentFormFieldData' AND array_length(retention_leases['leases'], 1) > 1 order by 1; +``` + + +#### list partition ids + +```sql +cr> SELECT partition_ident, values + FROM information_schema.table_partitions + WHERE table_schema = 'TURVO' + AND table_name = 'shipmentFormFieldData' limit 100; ++--------------------------+--------------------------------+ +| partition_ident | values | ++--------------------------+--------------------------------+ +| 04732dhi6srjedhg60o30c1g | {"id_ts_month": 1627776000000} | +| 04732d9o60qj2d9i60o30c1g | {"id_ts_month": 1580515200000} | +| 04732dhj6krj4d1o60o30c1g | {"id_ts_month": 1635724800000} | +| 04732dhg64qj2c1k60o30c1g | {"id_ts_month": 1601510400000} | +| 04732dhk60sjid9i60o30c1g | {"id_ts_month": 1640995200000} | +``` + +cr> SELECT partition_ident, values + FROM information_schema.table_partitions + WHERE table_schema = 'TURVO' + AND table_name = 'shipmentFormFieldData' limit 100; ++--------------------------+--------------------------------+ +| partition_ident | values | ++--------------------------+--------------------------------+ +| 04732dhi6srjedhg60o30c1g | {"id_ts_month": 1627776000000} | +| 04732d9o60qj2d9i60o30c1g | {"id_ts_month": 1580515200000} | +| 04732dhj6krj4d1o60o30c1g | {"id_ts_month": 1635724800000} | +| 04732dhg64qj2c1k60o30c1g | {"id_ts_month": 1601510400000} | +| 04732dhk60sjid9i60o30c1g | {"id_ts_month": 1640995200000} | +| 04732dpk60rjgdpi60o30c1g | {"id_ts_month": 1740787200000} | +| 04732dhp6ooj2e1k60o30c1g | {"id_ts_month": 1696118400000} | +| 04732dhl6or36cpm60o30c1g | {"id_ts_month": 1656633600000} | +| 04732d9p6op38c1g60o30c1g | {"id_ts_month": 1596240000000} | +| 04732dhl6go38c9m60o30c1g | {"id_ts_month": 1654041600000} | +| 04732dpg6orj8d9m60o30c1g | {"id_ts_month": 1706745600000} | +| 04732d9p60sjce9m60o30c1g | {"id_ts_month": 1590969600000} | +| 04732dhi6ko3idpm60o30c1g | {"id_ts_month": 1625097600000} | +| 04732dpj6kr3ge9m60o30c1g | {"id_ts_month": 1735689600000} | +| 04732dhm74s3acho60o30c1g | {"id_ts_month": 1669852800000} | +| 04732dpi6koj8e1o60o30c1g | {"id_ts_month": 1725148800000} | +| 04732dhg6orjgc1o60o30c1g | {"id_ts_month": 1606780800000} | +| 04732dhm6gqjgchk60o30c1g | {"id_ts_month": 1664582400000} | +| 04732d9p70sj2e1k60o30c1g | {"id_ts_month": 1598918400000} | +| 04732dhk6cr3ecpm60o30c1g | {"id_ts_month": 1643673600000} | +| 04732d9o6kr3ie9i60o30c1g | {"id_ts_month": 1585699200000} | +| 04732dhp60s38e1g60o30c1g | {"id_ts_month": 1690848000000} | +| 04732dhn6kp30e9m60o30c1g | {"id_ts_month": 1675209600000} | +| 04732dpk6oo3adpm60o30c1g | {"id_ts_month": 1746057600000} | +| 04732dpg74p3ac9i60o30c1g | {"id_ts_month": 1709251200000} | +| 04732dph6gqj4c9m60o30c1g | {"id_ts_month": 1714521600000} | +| 04732dhn68qj6c9i60o30c1g | {"id_ts_month": 1672531200000} | +| 04732dhm6sp3cc1o60o30c1g | {"id_ts_month": 1667260800000} | +| 04732dhl64pjccpi60o30c1g | {"id_ts_month": 1651363200000} | +| 04732dph6sp30c1g60o30c1g | {"id_ts_month": 1717200000000} | +| 04732dph74rjichg60o30c1g | {"id_ts_month": 1719792000000} | +| 04732dpj6co32c9i60o30c1g | {"id_ts_month": 1733011200000} | +| 04732dpg64pjge1o60o30c1g | {"id_ts_month": 1701388800000} | +| 04732dpj70pjce1g60o30c1g | {"id_ts_month": 1738368000000} | +| 04732dpk6cq3cd9m60o30c1g | {"id_ts_month": 1743465600000} | +| 04732dhh6sp36d9i60o30c1g | {"id_ts_month": 1617235200000} | +| 04732dpi68q3ec1k60o30c1g | {"id_ts_month": 1722470400000} | +| 04732dho70ojce9m60o30c1g | {"id_ts_month": 1688169600000} | +| 04732dhg6gojge1o60o30c1g | {"id_ts_month": 1604188800000} | +| 04732dhk70rjec9i60o30c1g | {"id_ts_month": 1648771200000} | +| 04732dhj70pj2dho60o30c1g | {"id_ts_month": 1638316800000} | +| 04732dho60pj0dpi60o30c1g | {"id_ts_month": 1680307200000} | +| 04732d9o6co34c1o60o30c1g | {"id_ts_month": 1583020800000} | +| 04732dhj60q3ad1k60o30c1g | {"id_ts_month": 1630454400000} | +| 04732dhg74q3ae9i60o30c1g | {"id_ts_month": 1609459200000} | +| 04732dhl74pj2chg60o30c1g | {"id_ts_month": 1659312000000} | +| 04732dpi6srj8c1o60o30c1g | {"id_ts_month": 1727740800000} | +*| 04732dpl6go30dhk60o30c1g | {"id_ts_month": 1754006400000} | +| 04732dhp70rjidho60o30c1g | {"id_ts_month": 1698796800000} | +| 04732dhi68qj0d9m60o30c1g | {"id_ts_month": 1622505600000} | +| 04732d9p6cqjcc9m60o30c1g | {"id_ts_month": 1593561600000} | +| 04732dpg6go3cdpi60o30c1g | {"id_ts_month": 1704067200000} | +| 04732dho68s3ie9i60o30c1g | {"id_ts_month": 1682899200000} | +| 04732d9n6ss36dho60o30c1g | {"id_ts_month": 1577836800000} | +| 04732dpj60q32e9i60o30c1g | {"id_ts_month": 1730419200000} | +| 04732dhm64sjic1k60o30c1g | {"id_ts_month": 1661990400000} | +| 04732dhh6gqjadho60o30c1g | {"id_ts_month": 1614556800000} | +| 04732dho6kqjedpm60o30c1g | {"id_ts_month": 1685577600000} | +| 04732dhn6sr34e1o60o30c1g | {"id_ts_month": 1677628800000} | +| 04732dph64sj4e9m60o30c1g | {"id_ts_month": 1711929600000} | +| 04732dhp6cqj4dhk60o30c1g | {"id_ts_month": 1693526400000} | +| 04732dpk70rj6dhg60o30c1g | {"id_ts_month": 1748736000000} | +| 04732dpl64pj4e1g60o30c1g | {"id_ts_month": 1751328000000} | +*| 04732dpl6or3gd1o60o30c1g | {"id_ts_month": 1756684800000} | +| 04732dhh74s34dpi60o30c1g | {"id_ts_month": 1619827200000} | +| 04732dhj6co38dhk60o30c1g | {"id_ts_month": 1633046400000} | +| 04732dhk6oo3icho60o30c1g | {"id_ts_month": 1646092800000} | +| 04732dhh68oj6dpm60o30c1g | {"id_ts_month": 1612137600000} | ++--------------------------+--------------------------------+ +SELECT 68 rows in set (0.006 sec) + +## Disable Rebalancing + +SET GLOBAL PERSISTENT "cluster.routing.rebalance.enable"='xxx'; -- all / none +[data-hot-7] updating [cluster.routing.rebalance.enable] from [all] to [none]` + + +### Report on schema, tables, sizes, ... + +```sql +WITH columns AS ( + SELECT table_schema, + table_name, + COUNT(*) AS num_columns + FROM information_schema.columns + GROUP BY ALL +), tables AS ( + SELECT table_schema, + table_name, + partitioned_by, + clustered_by + FROM information_schema.tables +), shards AS ( + SELECT schema_name AS table_schema, + table_name, + partition_ident, + SUM(size) FILTER (WHERE primary = TRUE) / POWER(1024, 3) AS total_primary_size_gb, + AVG(size) / POWER(1024, 3) AS avg_shard_size_gb, + MIN(size) / POWER(1024, 3) AS min_shard_size_gb, + MAX(size) / POWER(1024, 3) AS max_shard_size_gb, + COUNT(*) FILTER (WHERE primary = TRUE) AS num_shards_primary, + COUNT(*) FILTER (WHERE primary = FALSE) AS num_shards_replica, + COUNT(*) AS num_shards_total + FROM sys.shards + GROUP BY ALL +) +SELECT s.*, + num_columns, + partitioned_by[1] AS partitioned_by, + clustered_by +FROM shards s +JOIN columns c ON s.table_name = c.table_name AND s.table_schema = c.table_schema +JOIN tables t ON s.table_name = t.table_name AND s.table_schema = t.table_schema +ORDER BY table_schema, table_name, partition_ident +``` + +---- +partition_ident | values | ++--------------------------+--------------------------------+ +| 04732dhp6ooj2e1k60o30c1g | {"id_ts_month": 1696118400000} | +| 04732dpk60rjgdpi60o30c1g | {"id_ts_month": 1740787200000} | +| 04732dhl6or36cpm60o30c1g | {"id_ts_month": 1656633600000} | +| 04732dpi6srj8c1o60o30c1g | {"id_ts_month": 1727740800000} | +| 04732dhl74pj2chg60o30c1g | {"id_ts_month": 1659312000000} | +| 04732dhl6go38c9m60o30c1g | {"id_ts_month": 1654041600000} | +| 04732dpg6orj8d9m60o30c1g | {"id_ts_month": 1706745600000} | +| 04732dpl6go30dhk60o30c1g | {"id_ts_month": 1754006400000} | +| 04732dhp70rjidho60o30c1g | {"id_ts_month": 1698796800000} | +| 04732dpj6kr3ge9m60o30c1g | {"id_ts_month": 1735689600000} | +| 04732dhm74s3acho60o30c1g | {"id_ts_month": 1669852800000} | +| 04732dpi6koj8e1o60o30c1g | {"id_ts_month": 1725148800000} | +| 04732dhm6gqjgchk60o30c1g | {"id_ts_month": 1664582400000} | +| 04732dpg6go3cdpi60o30c1g | {"id_ts_month": 1704067200000} | +| 04732dho68s3ie9i60o30c1g | {"id_ts_month": 1682899200000} | +| 04732dhp60s38e1g60o30c1g | {"id_ts_month": 1690848000000} | +| 04732dhn6kp30e9m60o30c1g | {"id_ts_month": 1675209600000} | +| 04732dpk6oo3adpm60o30c1g | {"id_ts_month": 1746057600000} | +| 04732dpj60q32e9i60o30c1g | {"id_ts_month": 1730419200000} | +| 04732dpl74p3edho60o30c1g | {"id_ts_month": 1759276800000} | +| 04732dhm64sjic1k60o30c1g | {"id_ts_month": 1661990400000} | +| 04732dpg74p3ac9i60o30c1g | {"id_ts_month": 1709251200000} | +| 04732dph6gqj4c9m60o30c1g | {"id_ts_month": 1714521600000} | +| 04732dhn68qj6c9i60o30c1g | {"id_ts_month": 1672531200000} | +| 04732dhm6sp3cc1o60o30c1g | {"id_ts_month": 1667260800000} | +| 04732dhl64pjccpi60o30c1g | {"id_ts_month": 1651363200000} | +| 04732dho6kqjedpm60o30c1g | {"id_ts_month": 1685577600000} | +| 04732dhn6sr34e1o60o30c1g | {"id_ts_month": 1677628800000} | +| 04732dph74rjichg60o30c1g | {"id_ts_month": 1719792000000} | +| 04732dph6sp30c1g60o30c1g | {"id_ts_month": 1717200000000} | +| 04732dph64sj4e9m60o30c1g | {"id_ts_month": 1711929600000} | +| 04732dpj6co32c9i60o30c1g | {"id_ts_month": 1733011200000} | +| 04732dhp6cqj4dhk60o30c1g | {"id_ts_month": 1693526400000} | +| 04732dpg64pjge1o60o30c1g | {"id_ts_month": 1701388800000} | +| 04732dpk70rj6dhg60o30c1g | {"id_ts_month": 1748736000000} | +| 04732dpl64pj4e1g60o30c1g | {"id_ts_month": 1751328000000} | +| 04732dpj70pjce1g60o30c1g | {"id_ts_month": 1738368000000} | +| 04732dpl6or3gd1o60o30c1g | {"id_ts_month": 1756684800000} | +| 04732dpk6cq3cd9m60o30c1g | {"id_ts_month": 1743465600000} | +| 04732dpi68q3ec1k60o30c1g | {"id_ts_month": 1722470400000} | +| 04732dho70ojce9m60o30c1g | {"id_ts_month": 1688169600000} | +| 04732dho60pj0dpi60o30c1g | {"id_ts_month": 1680307200000} | ++--------------------------+--------------------------------+ diff --git a/README.md b/README.md index 3e1cc70..6d97ef3 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,16 @@ pip install -e . ``` 3. Create a `.env` file with your CrateDB connection details: + +**For localhost CrateDB:** +```bash +CRATE_CONNECTION_STRING=https://localhost:4200 +CRATE_USERNAME=crate +# CRATE_PASSWORD= # Leave empty or unset for default crate user +CRATE_SSL_VERIFY=false +``` + +**For remote CrateDB:** ```bash CRATE_CONNECTION_STRING=https://your-cluster.cratedb.net:4200 CRATE_USERNAME=your-username @@ -41,6 +51,12 @@ CRATE_SSL_VERIFY=true ## Quick Start ### Test Connection +You can test your connection configuration with the included test script: +```bash +python test_connection.py +``` + +Or use the built-in test: ```bash xmover test-connection ``` @@ -112,10 +128,61 @@ Analyzes current shard distribution across nodes and zones. **Options:** - `--table, -t`: Analyze specific table only +- `--largest INTEGER`: Show N largest tables/partitions by size +- `--smallest INTEGER`: Show N smallest tables/partitions by size +- `--no-zero-size`: Exclude zero-sized tables from smallest results (default: include zeros) -**Example:** +**Examples:** ```bash +# Basic cluster analysis +xmover analyze + +# Analyze specific table only xmover analyze --table events + +# Show top 10 largest tables/partitions +xmover analyze --largest 10 + +# Show top 5 smallest tables/partitions (includes zero-sized) +xmover analyze --smallest 5 + +# Show top 5 smallest non-zero tables/partitions (exclude zero-sized) +xmover analyze --smallest 5 --no-zero-size + +# Combine options +xmover analyze --table events --largest 3 +``` + +**Sample Output (--largest 3):** +``` + Largest Tables/Partitions by Size (Top 3) +╭─────────────────────────────────┬─────────────────────────────┬────────┬───────┬──────────┬──────────┬──────────┬────────────╮ +│ Table │ Partition │ Shards │ P/R │ Min Size │ Avg Size │ Max Size │ Total Size │ +├─────────────────────────────────┼─────────────────────────────┼────────┼───────┼──────────┼──────────┼──────────┼────────────┤ +│ TURVO.shipmentFormFieldData │ ("id_ts_month"=162777600000 │ 4 │ 2P/2R │ 89.1GB │ 95.3GB │ 104.2GB │ 381.2GB │ +│ TURVO.orderFormFieldData │ N/A │ 6 │ 3P/3R │ 23.4GB │ 28.7GB │ 35.1GB │ 172.2GB │ +│ TURVO.documentUploadProgress │ ("sync_day"=1635724800000) │ 8 │ 4P/4R │ 15.2GB │ 18.4GB │ 22.1GB │ 147.2GB │ +╰─────────────────────────────────┴─────────────────────────────┴────────┴───────┴──────────┴──────────┴──────────┴────────────╯ + +📊 Summary: 18 total shards using 700.6GB across 3 largest table/partition(s) +``` + +**Sample Output (--smallest 5 --no-zero-size):** +``` +ℹ️ Found 12 table/partition(s) with 0.0GB size (excluded from results) + + Smallest Tables/Partitions by Size (Top 5) +╭─────────────────────────────────┬─────────────────────────────┬────────┬───────┬──────────┬──────────┬──────────┬────────────╮ +│ Table │ Partition │ Shards │ P/R │ Min Size │ Avg Size │ Max Size │ Total Size │ +├─────────────────────────────────┼─────────────────────────────┼────────┼───────┼──────────┼──────────┼──────────┼────────────┤ +│ TURVO.emailActivity_transformf… │ N/A │ 2 │ 1P/1R │ 0.001GB │ 0.001GB │ 0.002GB │ 0.002GB │ +│ TURVO.calendarFormFieldData_tr… │ ("sync_day"=1627776000000) │ 2 │ 1P/1R │ 0.005GB │ 0.005GB │ 0.005GB │ 0.010GB │ +│ TURVO.shipmentSummary_failures │ N/A │ 2 │ 1P/1R │ 0.100GB │ 0.100GB │ 0.100GB │ 0.200GB │ +│ TURVO.documentActivity_failures │ N/A │ 4 │ 2P/2R │ 0.250GB │ 0.325GB │ 0.400GB │ 1.300GB │ +│ TURVO.userActivity_logs │ ("date"=2024-01-01) │ 6 │ 3P/3R │ 0.800GB │ 0.950GB │ 1.100GB │ 5.700GB │ +╰─────────────────────────────────┴─────────────────────────────┴────────┴───────┴──────────┴──────────┴──────────┴────────────╯ + +📊 Summary: 16 total shards using 7.212GB across 5 smallest non-zero table/partition(s) ``` ### `find-candidates` @@ -262,6 +329,275 @@ xmover monitor-recovery --watch --include-transitioning - **PEER**: Copying shard data from another node (replication/relocation) - **DISK**: Rebuilding shard from local data (after restart/disk issues) +**Enhanced Translog Monitoring:** +The recovery monitor now displays detailed translog information in the format: +``` +📋 TURVO.shipmentFormFieldData_events S4 PEER TRANSLOG 0.0% 6.2GB (TL:109.8GB / 22.1GB / 20%) data-hot-0 → data-hot-7 +``` + +**Translog Display Format**: `TL:X.XGB / Y.YGB / ZZ%` +- `X.XGB`: Total translog file size (`translog_stats['size']`) +- `Y.YGB`: Uncommitted translog size (`translog_stats['uncommitted_size']`) +- `ZZ%`: Uncommitted as percentage of total translog size + +**Color Coding:** +- 🔴 **Red**: Uncommitted ≥ 5GB OR uncommitted ≥ 80% (critical) +- 🟡 **Yellow**: Uncommitted ≥ 1GB OR uncommitted ≥ 50% (warning) +- 🟢 **Green**: Below warning thresholds (normal) + +Translog information is only shown when significant (uncommitted ≥ 10MB or total ≥ 50MB). + +**Enhanced Replica Progress Tracking:** +For replica shard recoveries, the monitor now shows sequence number-based progress when available: +``` +📋 TURVO.LINEAGE_DIRECTLY_OPEN_TO_APPOINTMENT S2R PEER TRANSLOG 99.9% (seq) 15.2GB data-hot-0 → data-hot-1 +``` + +**Progress Display Formats:** +- `99.9% (seq)`: Replica progress based on sequence number comparison with primary +- `37.5% (seq) / 95.0% (rec)`: Shows both when sequence and traditional progress differ significantly (>5%) +- `98.5%`: Primary shards or when sequence data unavailable (traditional progress) + +**Sequence Progress Benefits:** +- More accurate progress indication for replica synchronization +- Based on comparing `max_seq_no` between replica and primary shards +- Reveals actual replication lag in terms of operations behind primary +- Particularly useful for detecting stuck replica recoveries where traditional recovery shows 100% but replica is still far behind + +**Enhanced Transitioning Recovery Display:** +The monitor now shows detailed information for transitioning recoveries instead of just "(transitioning)": +``` +16:08:20 | 5 done (transitioning) + | 🔄 TURVO.accountFormFieldData S7R PEER DONE 99.8% (seq) 3.8GB data-hot-5 → data-hot-7 + | 🔄 TURVO_MySQL.composite_mapping S11P PEER DONE 100.0% 3.0GB data-hot-5 → data-hot-6 + | 🔄 TURVO.shipmentFormFieldData ("id_ts_month"=1633046400000) S6R PEER DONE 99.8% (seq) 8.2GB (TL:233MB / 49MB / 21%) data-hot-4 → data-hot-7 +``` + +**Transitioning Display Features:** +- Shows up to 5 transitioning recoveries with full details +- Includes sequence progress, translog info, and node routing +- Throttled to every 30 seconds to reduce noise +- Uses 🔄 icon to indicate transitioning state +- Distinguishes primary (P) vs replica (R) shards + +### `problematic-translogs` +Find tables with problematic translog sizes and generate replica management commands. + +**Options:** +- `--sizeMB INTEGER`: Minimum translog uncommitted size in MB (default: 300) +- `--execute`: Execute the replica management commands after confirmation + +**Description:** +This command identifies tables with replica shards that have large uncommitted translog sizes indicating replication issues. It shows both individual problematic shards and a summary by table/partition. It generates two types of ALTER commands: individual REROUTE CANCEL SHARD commands for each problematic shard, and replica management commands that temporarily set replicas to 0 and restore them to force recreation of problematic replicas. + +**Examples:** +```bash +# Show problematic tables with translog > 300MB (default) +xmover problematic-translogs + +# Show tables with translog > 500MB +xmover problematic-translogs --sizeMB 500 + +# Execute replica management commands for tables > 1GB after confirmation +xmover problematic-translogs --sizeMB 1000 --execute +``` + +**Sample Output:** +``` + Problematic Replica Shards (translog > 300MB) +╭────────┬───────────────────────────────┬────────────────────────────┬──────────┬────────────┬─────────────╮ +│ Schema │ Table │ Partition │ Shard ID │ Node │ Translog MB │ +├────────┼───────────────────────────────┼────────────────────────────┼──────────┼────────────┼─────────────┤ +│ TURVO │ shipmentFormFieldData │ none │ 14 │ data-hot-6 │ 7040.9 │ +│ TURVO │ shipmentFormFieldData_events │ ("sync_day"=1757376000000) │ 3 │ data-hot-2 │ 481.2 │ +│ TURVO │ orderFormFieldData │ none │ 5 │ data-hot-1 │ 469.5 │ +╰────────┴───────────────────────────────┴────────────────────────────┴──────────┴────────────┴─────────────╯ + +Found 2 table/partition(s) with problematic translogs: + + Tables with Problematic Replicas (translog > 300MB) +╭────────┬───────────┬───────────┬───────────┬──────────┬─────────────┬──────────────┬──────────╮ +│ Schema │ Table │ Partition │ Problema… │ Max │ Shards │ Size GB │ Current │ +│ │ │ │ Replicas │ Trans.MB │ (P/R) │ (P/R) │ Replicas │ +├────────┼───────────┼───────────┼───────────┼──────────┼─────────────┼──────────────┼──────────┤ +│ TURVO │ shipment… │ ("sync.. │ 2 │ 7011.8 │ 5P/5R │ 12.4/12.1 │ 1 │ +│ TURVO │ orderFor… │ none │ 1 │ 469.5 │ 3P/6R │ 8.2/16.3 │ 2 │ +╰────────┴───────────┴───────────┴───────────┴──────────┴─────────────┴──────────────┴──────────╯ + +Generated ALTER Commands: + +ALTER TABLE "TURVO"."shipmentFormFieldData" REROUTE CANCEL SHARD 14 on 'data-hot-6' WITH (allow_primary=False); +ALTER TABLE "TURVO"."shipmentFormFieldData_events" partition ("sync_day"=1757376000000) REROUTE CANCEL SHARD 3 on 'data-hot-2' WITH (allow_primary=False); +ALTER TABLE "TURVO"."orderFormFieldData" REROUTE CANCEL SHARD 5 on 'data-hot-1' WITH (allow_primary=False); + +-- Set replicas to 0: +ALTER TABLE "TURVO"."shipmentFormFieldData" PARTITION ("id_ts_month"=1756684800000) SET ("number_of_replicas" = 0); +-- Restore replicas to 1: +ALTER TABLE "TURVO"."shipmentFormFieldData" PARTITION ("id_ts_month"=1756684800000) SET ("number_of_replicas" = 1); + +-- Set replicas to 0: +ALTER TABLE "TURVO"."orderFormFieldData" SET ("number_of_replicas" = 0); +-- Restore replicas to 2: +ALTER TABLE "TURVO"."orderFormFieldData" SET ("number_of_replicas" = 2); + +Total: 3 REROUTE CANCEL commands + 4 replica management commands +``` + +When using `--execute`, each command is presented individually for confirmation, allowing you to selectively execute specific commands as needed. + +### `active-shards` +Monitors the most active shards by tracking checkpoint progression over time. + +**Options:** +- `--count`: Number of most active shards to show (default: 10) +- `--interval`: Observation interval in seconds (default: 30) +- `--min-checkpoint-delta`: Minimum checkpoint progression between snapshots to show shard (default: 1000) +- `--table, -t`: Monitor specific table only +- `--node, -n`: Monitor specific node only +- `--watch, -w`: Continuously monitor (refresh every interval) +- `--exclude-system`: Exclude system tables (gc.*, information_schema.*, *_events, *_log) +- `--min-rate`: Minimum activity rate (changes/sec) to show +- `--show-replicas/--hide-replicas`: Show replica shards (default: True) + +**Examples:** +```bash +# Show top 10 most active shards over 30 seconds +xmover active-shards + +# Top 20 shards with 60-second observation period +xmover active-shards --count 20 --interval 60 + +# Continuous monitoring with 30-second intervals +xmover active-shards --watch --interval 30 + +# Monitor specific table activity +xmover active-shards --table my_table --watch + +# Monitor specific node with custom threshold +xmover active-shards --node data-hot-1 --min-checkpoint-delta 500 + +# Exclude system tables and event logs for business data focus +xmover active-shards --exclude-system --count 20 + +# Only show high-activity shards (≥50 changes/sec) +xmover active-shards --min-rate 50 --count 15 + +# Focus on primary shards only +xmover active-shards --hide-replicas --count 20 +``` + +This command helps identify which shards are receiving the most write activity by measuring local checkpoint progression between two snapshots. + +**How it works:** +1. **Takes snapshot of ALL started shards** (not just currently active ones) +2. **Waits for observation interval** (configurable, default: 30 seconds) +3. **Takes second snapshot** of all started shards +4. **Compares snapshots** to find shards with checkpoint progression ≥ threshold +5. **Shows ranked results** with activity trends and insights + +**Enhanced output features:** +- **Checkpoint visibility**: Shows actual `local_checkpoint` values (CP Start → CP End → Delta) +- **Partition awareness**: Separate tracking for partitioned tables (different partition_ident values) +- **Activity trends**: 🔥 HOT (≥100/s), 📈 HIGH (≥50/s), 📊 MED (≥10/s), 📉 LOW (<10/s) +- **Smart insights**: Identifies concentration patterns and load distribution (non-watch mode) +- **Flexible filtering**: Exclude system tables, set minimum rates, hide replicas +- **Context information**: Total activity, average rates, observation period +- **Clean watch mode**: Streamlined output without legend/insights for continuous monitoring + +This approach captures shards that become active during the observation period, providing a complete view of cluster write patterns and identifying hot spots. The enhanced filtering helps focus on business-critical activity patterns. + +**Sample output (single run):** +``` +🔥 Most Active Shards (3 shown, 30s observation period) + +Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec + + Rank | Schema.Table | Shard | Partition | Node | Type | Checkpoint Δ | Rate/sec | Trend + ----------------------------------------------------------------------------------------------------------- + 1 | gc.scheduled_jobs_log | 0 | - | data-hot-8 | P | 113,744 | 3,791.5 | 🔥 HOT + 2 | TURVO.events | 0 | 04732dpl6osj8d | data-hot-0 | P | 45,837 | 1,527.9 | 🔥 HOT + 3 | doc.user_actions | 1 | 04732dpk70rj6d | data-hot-2 | P | 30,733 | 1,024.4 | 🔥 HOT + +Legend: + • Checkpoint Δ: Write operations during observation period + • Partition: partition_ident (truncated if >14 chars, '-' if none) + +Insights: + • 3 HOT shards (≥100 changes/sec) - consider load balancing + • All active shards are PRIMARY - normal write pattern +``` + +**Sample output (watch mode - cleaner):** +``` +30s interval | threshold: 1,000 | top 5 + +🔥 Most Active Shards (3 shown, 30s observation period) + +Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec + + Rank | Schema.Table | Shard | Partition | Node | Type | Checkpoint Δ | Rate/sec | Trend + ----------------------------------------------------------------------------------------------------------- + 1 | gc.scheduled_jobs_log | 0 | - | data-hot-8 | P | 113,744 | 3,791.5 | 🔥 HOT + 2 | TURVO.events | 0 | 04732dpl6osj8d | data-hot-0 | P | 45,837 | 1,527.9 | 🔥 HOT + 3 | doc.user_actions | 1 | 04732dpk70rj6d | data-hot-2 | P | 30,733 | 1,024.4 | 🔥 HOT + +━━━ Next update in 30s ━━━ +``` + +### `large-translogs` +Monitors shards with large translog uncommitted sizes that do not flush properly, displaying both primary and replica shards. + +**Options:** +- `--translogsize`: Minimum translog uncommitted size threshold in MB (default: 500) +- `--interval`: Monitoring interval in seconds for watch mode (default: 60) +- `--watch, -w`: Continuously monitor (refresh every interval) +- `--table, -t`: Monitor specific table only +- `--node, -n`: Monitor specific node only +- `--count`: Maximum number of shards with large translogs to show (default: 50) + +**Examples:** +```bash +# Show shards with translog over default 500MB threshold +xmover large-translogs + +# Show shards with translog over 1GB threshold +xmover large-translogs --translogsize 1000 + +# Continuous monitoring every 30 seconds +xmover large-translogs --watch --interval 30 + +# Monitor specific table +xmover large-translogs --table my_table --watch + +# Monitor specific node, show top 20 +xmover large-translogs --node data-hot-1 --count 20 +``` + +This command helps identify shards that are not flushing properly by monitoring their translog uncommitted sizes, which can indicate replication or flush issues. + +**Output includes:** +- **Schema.Table**: Combined schema and table name +- **Partition**: Partition values or "-" for non-partitioned tables +- **Shard**: Numeric shard identifier +- **Node**: Node where shard is located +- **TL MB**: Translog uncommitted size (color-coded: bright_red >1GB, red >500MB, yellow >100MB, green ≤100MB) +- **Type**: "P" for primary shards, "R" for replica shards +- **Timestamp**: Current time for each update +- **Summary**: Total shards, primary/replica breakdown, average translog size + +**Sample output:** +``` +Large Translogs (>400MB) - 09:45:51 +╭────────────────────────────┬──────────────────────┬───────┬────────────┬────────┬──────╮ +│ Schema.Table │ Partition │ Shard │ Node │ TL MB │ Type │ +├────────────────────────────┼──────────────────────┼───────┼────────────┼────────┼──────┤ +│ TURVO.orderFormFieldData_… │ ("sync_day"=175936.… │ 7 │ data-hot-7 │ 510 │ P │ +│ TURVO.orderFormFieldData │ - │ 8 │ data-hot-6 │ 509 │ R │ +│ TURVO.orderFormFieldData │ - │ 20 │ data-hot-3 │ 507 │ R │ +╰────────────────────────────┴──────────────────────┴───────┴────────────┴────────┴──────╯ +3 shards (1P/2R) - Avg translog: 509MB +``` + ### `test-connection` Tests the connection to CrateDB and displays basic cluster information. @@ -362,6 +698,40 @@ xmover monitor-recovery --watch --include-transitioning xmover monitor-recovery --node data-hot-3 --recovery-type DISK ``` +### Monitoring Active Shards and Write Patterns + +Identify which shards are receiving the most write activity: + +1. Quick snapshot of most active shards: +```bash +# Show top 10 most active shards over 30 seconds +xmover active-shards + +# Longer observation period for more accurate results +xmover active-shards --count 15 --interval 60 +``` + +2. Continuous monitoring for real-time insights: +```bash +# Continuous monitoring with 30-second intervals +xmover active-shards --watch --interval 30 + +# Monitor specific table for focused analysis +xmover active-shards --table critical_table --watch +``` + +3. Integration with rebalancing workflow: +```bash +# Identify hot shards first +xmover active-shards --count 20 --interval 60 + +# Move hot shards away from overloaded nodes +xmover recommend --table hot_table --prioritize-space --execute + +# Monitor the impact +xmover active-shards --table hot_table --watch +``` + ### Manual Shard Movement 1. Validate the move first: @@ -394,8 +764,20 @@ xmover recommend --prioritize-zones --execute - `CRATE_CONNECTION_STRING`: CrateDB HTTP endpoint (required) - `CRATE_USERNAME`: Username for authentication (optional) -- `CRATE_PASSWORD`: Password for authentication (optional) -- `CRATE_SSL_VERIFY`: Enable SSL certificate verification (default: true) +- `CRATE_PASSWORD`: Password for authentication (optional, only used if username is also provided) +- `CRATE_SSL_VERIFY`: SSL certificate verification (default: auto-detects based on connection string) + - `true`: Always verify SSL certificates + - `false`: Disable SSL certificate verification + - `auto`: Automatically disable for localhost/127.0.0.1, enable for remote connections + +#### Retry and Timeout Configuration + +For clusters under pressure, you can configure retry behavior: + +- `CRATE_MAX_RETRIES`: Maximum number of retries for failed queries (default: 3) +- `CRATE_TIMEOUT`: Base timeout in seconds for queries (default: 30) +- `CRATE_MAX_TIMEOUT`: Maximum timeout in seconds for retries (default: 120) +- `CRATE_RETRY_BACKOFF`: Exponential backoff factor between retries (default: 2.0) ### Connection String Format diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 83a7dd6..8126bb3 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -20,6 +20,81 @@ xmover validate-move SCHEMA.TABLE SHARD_ID FROM_NODE TO_NODE xmover explain-error "your error message here" ``` +## Cluster Under Pressure / Performance Issues + +### Symptoms +- `500 Server Error: Internal Server Error` +- `503 Service Unavailable` +- `429 Too Many Requests` +- Query timeouts +- Slow response times + +### Solutions + +#### 1. Configure Retry and Timeout Settings +Add these to your `.env` file for better resilience: + +```bash +# Increase retries for unstable clusters +CRATE_MAX_RETRIES=5 + +# Increase base timeout for slow queries +CRATE_TIMEOUT=60 + +# Allow longer timeouts for retries +CRATE_MAX_TIMEOUT=300 + +# Adjust backoff between retries +CRATE_RETRY_BACKOFF=1.5 +``` + +#### 2. Monitor Cluster Health +```bash +# Check cluster load +SELECT node['name'], load, heap FROM sys.nodes; + +# Check query queue +SELECT * FROM sys.jobs WHERE stmt LIKE '%ALTER TABLE%'; + +# Check disk usage +SELECT node['name'], fs['total'], fs['used'] FROM sys.nodes; +``` + +#### 3. Reduce Load During Operations +- Run XMover during low-traffic periods +- Move fewer shards at once with `--limit` +- Use `--wait-time` between operations +- Monitor with `xmover monitor` before proceeding + +#### 4. Temporary Cluster Adjustments +```sql +-- Increase query timeout temporarily +SET SESSION "statement_timeout" = '300s'; + +-- Reduce concurrent recoveries +SET GLOBAL TRANSIENT cluster.routing.allocation.node_concurrent_recoveries = 1; + +-- Increase recovery throttling +SET GLOBAL TRANSIENT indices.recovery.max_bytes_per_sec = '20mb'; +``` + +#### 5. Error-Specific Solutions + +**500 Internal Server Error:** +- Usually indicates cluster overload +- Wait and retry with exponential backoff (built into XMover) +- Check cluster logs for specific errors + +**503 Service Unavailable:** +- Cluster rejecting new queries +- Reduce concurrent operations +- Wait for current operations to complete + +**429 Too Many Requests:** +- Rate limiting active +- Increase retry delays with higher `CRATE_RETRY_BACKOFF` +- Reduce operation frequency + ## Common Issues and Solutions ### 1. Zone Conflicts diff --git a/config/shard_size_rules.yaml b/config/shard_size_rules.yaml new file mode 100644 index 0000000..ffd8f77 --- /dev/null +++ b/config/shard_size_rules.yaml @@ -0,0 +1,194 @@ +# XMover Shard Size Monitoring Rules +# Configuration file for analyzing CrateDB shard sizes and generating optimization recommendations +# +# Rules are evaluated against each table/partition combination returned by the analysis query. +# Variables available in rule conditions: +# - table_schema, table_name, partition_ident +# - total_primary_size_gb, avg_shard_size_gb, min_shard_size_gb, max_shard_size_gb +# - num_shards_primary, num_shards_replica, num_shards_total +# - num_columns, partitioned_by, clustered_by +# - cluster_config dictionary with cluster-level metrics + +metadata: + version: "1.0" + description: "CrateDB shard size optimization rules" + author: "XMover" + last_updated: "2025-10-03" + +# Global thresholds referenced in rules +thresholds: + # Core shard size recommendations + optimal_shard_size_min_gb: 3 + optimal_shard_size_max_gb: 70 + performance_sweet_spot_min_gb: 10 + performance_sweet_spot_max_gb: 50 + + # Workload-specific ranges + search_optimized_max_gb: 30 + write_heavy_min_gb: 30 + write_heavy_max_gb: 50 + time_series_min_gb: 20 + time_series_max_gb: 40 + + # Critical thresholds + large_shard_threshold_gb: 50 + small_shard_threshold_gb: 1 + consolidation_threshold_gb: 3 + + # Column-related thresholds + wide_table_column_threshold: 500 + wide_table_shard_max_gb: 25 + max_columns_default: 1000 + + # Cluster density thresholds + shards_per_heap_gb_ratio: 20 + max_shards_per_node_safe: 1000 + cpu_per_shard_ratio: 1.5 + +# Table/Partition level rules +rules: + - name: "critical_oversized_shards" + category: "size_optimization" + severity: "critical" + condition: "max_shard_size_gb > thresholds['large_shard_threshold_gb']" + recommendation: "Shard size {max_shard_size_gb:.1f}GB exceeds {large_shard_threshold_gb}GB limit. Split shards to improve recovery times and query performance." + action_hint: "Consider reducing number_of_shards or using table partitioning" + + - name: "undersized_shards_with_excess_count" + category: "size_optimization" + severity: "warning" + condition: "max_shard_size_gb < thresholds['small_shard_threshold_gb'] and num_shards_primary > cluster_config['total_nodes']" + recommendation: "Shards too small ({max_shard_size_gb:.2f}GB < {small_shard_threshold_gb}GB) with {num_shards_primary} primary shards across {cluster_config[total_nodes]} nodes. Consolidate to reduce overhead." + action_hint: "Reduce number_of_shards for future partitions or use shard shrinking" + + - name: "wide_table_oversized_shards" + category: "performance" + severity: "critical" + condition: "num_columns > thresholds['wide_table_column_threshold'] and max_shard_size_gb > thresholds['wide_table_shard_max_gb']" + recommendation: "Wide table with {num_columns} columns (>{wide_table_column_threshold}) has {max_shard_size_gb:.1f}GB shards. Reduce to <{wide_table_shard_max_gb}GB to mitigate column overhead." + action_hint: "Increase number_of_shards or disable indexing for unused columns" + + - name: "suboptimal_small_shards" + category: "efficiency" + severity: "info" + condition: "max_shard_size_gb < thresholds['consolidation_threshold_gb'] and max_shard_size_gb >= thresholds['small_shard_threshold_gb']" + recommendation: "Small shards ({max_shard_size_gb:.1f}GB) could be consolidated. Target minimum {consolidation_threshold_gb}GB per shard." + action_hint: "Consider reducing number_of_shards for better efficiency" + + - name: "outside_performance_sweet_spot" + category: "performance" + severity: "info" + condition: "(max_shard_size_gb < thresholds['performance_sweet_spot_min_gb'] or max_shard_size_gb > thresholds['performance_sweet_spot_max_gb']) and max_shard_size_gb >= thresholds['consolidation_threshold_gb'] and max_shard_size_gb <= thresholds['large_shard_threshold_gb']" + recommendation: "Shard size {max_shard_size_gb:.1f}GB outside performance sweet spot ({performance_sweet_spot_min_gb}-{performance_sweet_spot_max_gb}GB). Consider rebalancing." + action_hint: "Adjust number_of_shards to reach optimal range" + + - name: "excessive_column_count" + category: "schema_design" + severity: "warning" + condition: "num_columns > thresholds['max_columns_default']" + recommendation: "Table has {num_columns} columns exceeding default limit of {max_columns_default}. May require mapping.total_fields.limit adjustment and impacts memory usage." + action_hint: "Review schema design and disable indexing for unused columns" + + - name: "uneven_shard_distribution" + category: "balance" + severity: "warning" + condition: "num_shards_primary > 1 and min_shard_size_gb > 0 and (max_shard_size_gb / min_shard_size_gb) > 3" + recommendation: "Uneven shard size distribution: largest {max_shard_size_gb:.1f}GB vs smallest {min_shard_size_gb:.1f}GB (ratio {ratio:.1f}:1). Check data skew." + action_hint: "Review partitioning strategy or clustering keys" + + - name: "single_large_shard_table" + category: "scalability" + severity: "warning" + condition: "num_shards_primary == 1 and total_primary_size_gb > thresholds['performance_sweet_spot_max_gb']" + recommendation: "Large single shard ({total_primary_size_gb:.1f}GB) limits parallelization. Consider increasing number_of_shards." + action_hint: "Increase number_of_shards to enable parallel processing" + +# Cluster-level rules (evaluated once per analysis) +cluster_rules: + - name: "heap_to_shard_ratio_exceeded" + category: "stability" + severity: "warning" + condition: "cluster_config['total_shards'] > (cluster_config['total_heap_gb'] * thresholds['shards_per_heap_gb_ratio'])" + recommendation: "Total cluster shards ({cluster_config[total_shards]}) exceed recommended ratio of {shards_per_heap_gb_ratio} per GB heap ({cluster_config[total_heap_gb]:.1f}GB). Risk of memory pressure." + action_hint: "Consolidate small shards or increase heap size" + + - name: "node_shard_density_critical" + category: "stability" + severity: "critical" + condition: "cluster_config['max_shards_per_node'] > thresholds['max_shards_per_node_safe']" + recommendation: "At least one node has {cluster_config[max_shards_per_node]} shards, exceeding safe limit of {max_shards_per_node_safe}. Redistribute immediately." + action_hint: "Move shards to other nodes or add capacity" + + - name: "insufficient_cpu_per_shard" + category: "performance" + severity: "info" + condition: "cluster_config['total_shards'] > (cluster_config['total_cpu_cores'] * thresholds['cpu_per_shard_ratio'])" + recommendation: "Total shards ({cluster_config[total_shards]}) may exceed CPU capacity ({cluster_config[total_cpu_cores]} cores, recommended {cpu_per_shard_ratio} vCPU per shard)." + action_hint: "Consider shard consolidation or adding CPU resources" + +# Validation rules for the configuration file itself +validation: + required_fields: + - metadata + - thresholds + - rules + - cluster_rules + + rule_required_fields: + - name + - category + - severity + - condition + - recommendation + + valid_severities: + - critical + - warning + - info + + valid_categories: + - size_optimization + - performance + - efficiency + - schema_design + - balance + - scalability + - stability + +# Documentation for rule writing +rule_writing_guide: + available_variables: + table_level: + - "table_schema: Schema name" + - "table_name: Table name" + - "partition_ident: Partition identifier (may be null)" + - "total_primary_size_gb: Total size of primary shards in GB" + - "avg_shard_size_gb: Average shard size in GB" + - "min_shard_size_gb: Smallest shard size in GB" + - "max_shard_size_gb: Largest shard size in GB" + - "num_shards_primary: Number of primary shards" + - "num_shards_replica: Number of replica shards" + - "num_shards_total: Total number of shards" + - "num_columns: Number of columns in table" + - "partitioned_by: Partitioning column (may be null)" + - "clustered_by: Clustering configuration (may be null)" + + cluster_level: + - "cluster_config['total_nodes']: Total number of nodes" + - "cluster_config['total_cpu_cores']: Total CPU cores" + - "cluster_config['total_memory_gb']: Total system memory in GB" + - "cluster_config['total_heap_gb']: Total JVM heap in GB" + - "cluster_config['max_shards_per_node']: Setting value" + - "cluster_config['total_shards']: Total shards in cluster" + - "cluster_config['max_shards_per_node']: Actual max shards on any node" + + condition_examples: + - "max_shard_size_gb > 50" + - "num_columns > 500 and avg_shard_size_gb > 25" + - "num_shards_primary == 1 and total_primary_size_gb > 30" + - "table_name.startswith('logs_') and max_shard_size_gb < 20" + + recommendation_formatting: + - "Use {variable_name} to insert values" + - "Use {variable_name:.1f} for decimal formatting" + - "Reference thresholds with {threshold_name}" diff --git a/pyproject.toml b/pyproject.toml index 862d230..fb5afc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,12 @@ dependencies = [ "requests>=2.28.0", "python-dotenv>=1.0.0", "rich>=13.0.0", + "pyyaml>=6.0", ] +[project.optional-dependencies] +dev = ["pytest>=7.0.0"] + [project.scripts] xmover = "xmover.cli:main" diff --git a/src/xmover/analyzer.py b/src/xmover/analyzer.py index 75af909..c5c3085 100644 --- a/src/xmover/analyzer.py +++ b/src/xmover/analyzer.py @@ -631,6 +631,289 @@ def _check_zone_conflict(self, recommendation: MoveRecommendation) -> Optional[s # If we can't check, err on the side of caution return f"Cannot verify zone safety: {str(e)}" + def get_shard_size_overview(self) -> Dict[str, Any]: + """Get shard size distribution analysis""" + # Only analyze STARTED shards + started_shards = [s for s in self.shards if s.state == 'STARTED'] + + if not started_shards: + return { + 'total_shards': 0, + 'size_buckets': {}, + 'large_shards_count': 0, + 'small_shards_percentage': 0.0, + 'avg_shard_size_gb': 0.0 + } + + total_shards = len(started_shards) + total_size_gb = sum(s.size_gb for s in started_shards) + avg_size_gb = total_size_gb / total_shards if total_shards > 0 else 0.0 + + # Define size buckets (in GB) + size_buckets = { + '<1GB': {'count': 0, 'total_size': 0.0, 'max_size': 0.0}, + '1GB-5GB': {'count': 0, 'total_size': 0.0, 'max_size': 0.0}, + '5GB-10GB': {'count': 0, 'total_size': 0.0, 'max_size': 0.0}, + '10GB-50GB': {'count': 0, 'total_size': 0.0, 'max_size': 0.0}, + '>=50GB': {'count': 0, 'total_size': 0.0, 'max_size': 0.0} + } + + # Categorize shards by size + large_shards_count = 0 # >50GB shards + very_small_shards = 0 # <1GB shards (for percentage calculation) + + for shard in started_shards: + size_gb = shard.size_gb + + if size_gb >= 50: + size_buckets['>=50GB']['count'] += 1 + size_buckets['>=50GB']['total_size'] += size_gb + size_buckets['>=50GB']['max_size'] = max(size_buckets['>=50GB']['max_size'], size_gb) + large_shards_count += 1 + elif size_gb >= 10: + size_buckets['10GB-50GB']['count'] += 1 + size_buckets['10GB-50GB']['total_size'] += size_gb + size_buckets['10GB-50GB']['max_size'] = max(size_buckets['10GB-50GB']['max_size'], size_gb) + elif size_gb >= 5: + size_buckets['5GB-10GB']['count'] += 1 + size_buckets['5GB-10GB']['total_size'] += size_gb + size_buckets['5GB-10GB']['max_size'] = max(size_buckets['5GB-10GB']['max_size'], size_gb) + elif size_gb >= 1: + size_buckets['1GB-5GB']['count'] += 1 + size_buckets['1GB-5GB']['total_size'] += size_gb + size_buckets['1GB-5GB']['max_size'] = max(size_buckets['1GB-5GB']['max_size'], size_gb) + else: + size_buckets['<1GB']['count'] += 1 + size_buckets['<1GB']['total_size'] += size_gb + size_buckets['<1GB']['max_size'] = max(size_buckets['<1GB']['max_size'], size_gb) + very_small_shards += 1 + + # Calculate average size for each bucket + for bucket_name, bucket_data in size_buckets.items(): + if bucket_data['count'] > 0: + bucket_data['avg_size_gb'] = bucket_data['total_size'] / bucket_data['count'] + else: + bucket_data['avg_size_gb'] = 0.0 + + # Calculate percentage of very small shards (<1GB) + very_small_percentage = (very_small_shards / total_shards * 100) if total_shards > 0 else 0.0 + + return { + 'total_shards': total_shards, + 'total_size_gb': total_size_gb, + 'avg_shard_size_gb': avg_size_gb, + 'size_buckets': size_buckets, + 'large_shards_count': large_shards_count, + 'very_small_shards_percentage': very_small_percentage + } + + def get_large_shards_details(self) -> List[Dict[str, Any]]: + """Get detailed information about large shards (>=50GB) including partition values""" + # Optimized query to fetch only large shards directly from database + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.id as shard_id, + s.size / 1024^3 as size_gb, + s."primary" as is_primary, + s.node['name'] as node_name, + s.node['id'] as node_id + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + AND s.size >= 50 * 1024^3 -- 50GB in bytes + ORDER BY s.size DESC + """ + + result = self.client.execute_query(query) + + large_shards = [] + for row in result.get('rows', []): + # Get zone information from our nodes data + node_id = row[7] + zone = next((node.zone for node in self.nodes if node.id == node_id), 'unknown') + + large_shards.append({ + 'schema_name': row[0] or 'doc', + 'table_name': row[1], + 'partition_values': row[2], + 'shard_id': row[3], + 'size_gb': float(row[4]) if row[4] else 0.0, + 'is_primary': row[5] or False, + 'node_name': row[6], + 'zone': zone + }) + + return large_shards + + def get_small_shards_details(self, limit: int = 10) -> List[Dict[str, Any]]: + """Get detailed information about the smallest shards, grouped by table/partition""" + # Query to get all shards, ordered by size ascending to get the smallest + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.id as shard_id, + s.size / 1024^3 as size_gb, + s."primary" as is_primary, + s.node['name'] as node_name, + s.node['id'] as node_id + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + ORDER BY s.size ASC + """ + + result = self.client.execute_query(query) + + # Group by table/partition to get aggregated stats + table_partition_stats = {} + for row in result.get('rows', []): + # Get zone information from our nodes data + node_id = row[7] + zone = next((node.zone for node in self.nodes if node.id == node_id), 'unknown') + + # Create table key with schema + schema_name = row[0] or 'doc' + table_name = row[1] + table_display = table_name + if schema_name and schema_name != 'doc': + table_display = f"{schema_name}.{table_name}" + + # Create partition key + partition_key = row[2] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = { + 'sizes': [], + 'primary_count': 0, + 'replica_count': 0, + 'total_size': 0.0 + } + + # Aggregate stats + stats = table_partition_stats[key] + size_gb = float(row[4]) if row[4] else 0.0 + stats['sizes'].append(size_gb) + stats['total_size'] += size_gb + if row[5]: # is_primary + stats['primary_count'] += 1 + else: + stats['replica_count'] += 1 + + # Sort by average size ascending (smallest first) and return top tables/partitions + sorted_stats = [] + for (table_name, partition_key), stats in table_partition_stats.items(): + avg_size = sum(stats['sizes']) / len(stats['sizes']) if stats['sizes'] else 0 + sorted_stats.append({ + 'table_name': table_name, + 'partition_key': partition_key, + 'stats': stats, + 'avg_size': avg_size + }) + + # Sort by average size and take the top 'limit' entries + sorted_stats.sort(key=lambda x: x['avg_size']) + return sorted_stats[:limit] + + def get_table_size_breakdown(self, limit: Optional[int] = 10, order: str = 'largest') -> List[Dict[str, Any]]: + """Get table/partition size breakdown, sorted by total size + + Args: + limit: Number of tables/partitions to return (None for all) + order: 'largest' for biggest first, 'smallest' for smallest first + + Returns: + List of table/partition stats with size information + """ + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.size / 1024^3 as size_gb, + s."primary" as is_primary + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + """ + + result = self.client.execute_query(query) + + # Group by table/partition to get aggregated stats + table_partition_stats = {} + for row in result.get('rows', []): + schema_name = row[0] or 'doc' + table_name = row[1] + table_display = table_name + if schema_name and schema_name != 'doc': + table_display = f"{schema_name}.{table_name}" + + # Create partition key + partition_key = row[2] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = { + 'sizes': [], + 'primary_count': 0, + 'replica_count': 0, + 'total_size': 0.0 + } + + # Aggregate stats + stats = table_partition_stats[key] + size_gb = float(row[3]) if row[3] else 0.0 + stats['sizes'].append(size_gb) + stats['total_size'] += size_gb + if row[4]: # is_primary + stats['primary_count'] += 1 + else: + stats['replica_count'] += 1 + + # Convert to list and calculate derived stats + table_stats = [] + for (table_name, partition_key), stats in table_partition_stats.items(): + total_shards = stats['primary_count'] + stats['replica_count'] + min_size = min(stats['sizes']) if stats['sizes'] else 0.0 + max_size = max(stats['sizes']) if stats['sizes'] else 0.0 + avg_size = stats['total_size'] / total_shards if total_shards > 0 else 0.0 + + table_stats.append({ + 'table_name': table_name, + 'partition': partition_key, + 'total_shards': total_shards, + 'primary_count': stats['primary_count'], + 'replica_count': stats['replica_count'], + 'min_size': min_size, + 'avg_size': avg_size, + 'max_size': max_size, + 'total_size': stats['total_size'] + }) + + # Sort by total size + reverse = order == 'largest' + table_stats.sort(key=lambda x: x['total_size'], reverse=reverse) + + return table_stats if limit is None else table_stats[:limit] + def get_cluster_overview(self) -> Dict[str, Any]: """Get a comprehensive overview of the cluster""" # Get cluster watermark settings @@ -872,6 +1155,12 @@ def get_cluster_recovery_status(self, return recoveries + def get_problematic_shards(self, + table_name: Optional[str] = None, + node_name: Optional[str] = None) -> List[Dict[str, Any]]: + """Get shards that need attention but aren't actively recovering""" + return self.client.get_problematic_shards(table_name, node_name) + def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]: """Generate a summary of recovery operations""" @@ -966,18 +1255,23 @@ def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str: return " No recoveries of this type" # Table headers - headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"] + headers = ["Table", "Shard", "Node", "Recovery", "Stage", "Progress", "Size(GB)", "Time(s)"] # Calculate column widths col_widths = [len(h) for h in headers] rows = [] for recovery in recoveries: + # Format table name with partition values if available + table_display = f"{recovery.schema_name}.{recovery.table_name}" + if recovery.partition_values: + table_display = f"{table_display} {recovery.partition_values}" + row = [ - f"{recovery.schema_name}.{recovery.table_name}", + table_display, str(recovery.shard_id), recovery.node_name, - recovery.shard_type, + recovery.recovery_type, recovery.stage, f"{recovery.overall_progress:.1f}%", f"{recovery.size_gb:.1f}", @@ -1003,3 +1297,170 @@ def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str: output.append(data_row) return "\n".join(output) + + +class ActiveShardMonitor: + """Monitor active shard checkpoint progression over time""" + + def __init__(self, client: CrateDBClient): + self.client = client + + def compare_snapshots(self, snapshot1: List['ActiveShardSnapshot'], + snapshot2: List['ActiveShardSnapshot'], + min_activity_threshold: int = 0) -> List['ActiveShardActivity']: + """Compare two snapshots and return activity data for shards present in both + + Args: + snapshot1: First snapshot (baseline) + snapshot2: Second snapshot (comparison) + min_activity_threshold: Minimum checkpoint delta to consider active (default: 0) + """ + from .database import ActiveShardActivity + + # Create lookup dict for snapshot1 + snapshot1_dict = {snap.shard_identifier: snap for snap in snapshot1} + + activities = [] + + for snap2 in snapshot2: + snap1 = snapshot1_dict.get(snap2.shard_identifier) + if snap1: + # Calculate local checkpoint delta + local_checkpoint_delta = snap2.local_checkpoint - snap1.local_checkpoint + time_diff = snap2.timestamp - snap1.timestamp + + # Filter based on actual activity between snapshots + if local_checkpoint_delta >= min_activity_threshold: + activity = ActiveShardActivity( + schema_name=snap2.schema_name, + table_name=snap2.table_name, + shard_id=snap2.shard_id, + node_name=snap2.node_name, + is_primary=snap2.is_primary, + partition_ident=snap2.partition_ident, + local_checkpoint_delta=local_checkpoint_delta, + snapshot1=snap1, + snapshot2=snap2, + time_diff_seconds=time_diff + ) + activities.append(activity) + + # Sort by activity (highest checkpoint delta first) + activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True) + + return activities + + def format_activity_display(self, activities: List['ActiveShardActivity'], + show_count: int = 10, watch_mode: bool = False) -> str: + """Format activity data for console display""" + if not activities: + return "✅ No active shards with significant checkpoint progression found" + + # Limit to requested count + activities = activities[:show_count] + + # Calculate observation period for context + if activities: + observation_period = activities[0].time_diff_seconds + output = [f"\n🔥 Most Active Shards ({len(activities)} shown, {observation_period:.0f}s observation period)"] + else: + output = [f"\n🔥 Most Active Shards ({len(activities)} shown, sorted by checkpoint activity)"] + + output.append("") + + # Add activity rate context + if activities: + total_activity = sum(a.local_checkpoint_delta for a in activities) + avg_rate = sum(a.activity_rate for a in activities) / len(activities) + output.append(f"[dim]Total checkpoint activity: {total_activity:,} changes, Average rate: {avg_rate:.1f}/sec[/dim]") + output.append("") + + # Create table headers + headers = ["Rank", "Schema.Table", "Shard", "Partition", "Node", "Type", "Checkpoint Δ", "Rate/sec", "Trend"] + + # Calculate column widths + col_widths = [len(h) for h in headers] + + # Prepare rows + rows = [] + for i, activity in enumerate(activities, 1): + # Format values + rank = str(i) + table_id = activity.table_identifier + shard_id = str(activity.shard_id) + partition = activity.partition_ident[:14] + "..." if len(activity.partition_ident) > 14 else activity.partition_ident or "-" + node = activity.node_name + shard_type = "P" if activity.is_primary else "R" + checkpoint_delta = f"{activity.local_checkpoint_delta:,}" + rate = f"{activity.activity_rate:.1f}" if activity.activity_rate >= 0.1 else "<0.1" + + # Calculate activity trend indicator + if activity.activity_rate >= 100: + trend = "🔥 HOT" + elif activity.activity_rate >= 50: + trend = "📈 HIGH" + elif activity.activity_rate >= 10: + trend = "📊 MED" + else: + trend = "📉 LOW" + + row = [rank, table_id, shard_id, partition, node, shard_type, checkpoint_delta, rate, trend] + rows.append(row) + + # Update column widths + for j, cell in enumerate(row): + col_widths[j] = max(col_widths[j], len(cell)) + + # Format table + header_row = " " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths)) + output.append(header_row) + output.append(" " + "-" * (len(header_row) - 3)) + + # Data rows + for row in rows: + data_row = " " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths)) + output.append(data_row) + + # Only show legend and insights in non-watch mode + if not watch_mode: + output.append("") + output.append("Legend:") + output.append(" • Checkpoint Δ: Write operations during observation period") + output.append(" • Rate/sec: Checkpoint changes per second") + output.append(" • Partition: partition_ident (truncated if >14 chars, '-' if none)") + output.append(" • Type: P=Primary, R=Replica") + output.append(" • Trend: 🔥 HOT (≥100/s), 📈 HIGH (≥50/s), 📊 MED (≥10/s), 📉 LOW (<10/s)") + + # Add insights about activity patterns + if activities: + output.append("") + output.append("Insights:") + + # Count by trend + hot_count = len([a for a in activities if a.activity_rate >= 100]) + high_count = len([a for a in activities if 50 <= a.activity_rate < 100]) + med_count = len([a for a in activities if 10 <= a.activity_rate < 50]) + low_count = len([a for a in activities if a.activity_rate < 10]) + + if hot_count > 0: + output.append(f" • {hot_count} HOT shards (≥100 changes/sec) - consider load balancing") + if high_count > 0: + output.append(f" • {high_count} HIGH activity shards - monitor capacity") + if med_count > 0: + output.append(f" • {med_count} MEDIUM activity shards - normal operation") + if low_count > 0: + output.append(f" • {low_count} LOW activity shards - occasional writes") + + # Identify patterns + primary_activities = [a for a in activities if a.is_primary] + if len(primary_activities) == len(activities): + output.append(" • All active shards are PRIMARY - normal write pattern") + elif len(primary_activities) < len(activities) * 0.5: + output.append(" • Many REPLICA shards active - possible recovery/replication activity") + + # Node concentration + nodes = set(a.node_name for a in activities) + if len(nodes) <= 2: + output.append(f" • Activity concentrated on {len(nodes)} node(s) - consider redistribution") + + return "\n".join(output) diff --git a/src/xmover/cli.py b/src/xmover/cli.py index 15b6073..569d4cb 100644 --- a/src/xmover/cli.py +++ b/src/xmover/cli.py @@ -5,6 +5,7 @@ import sys import time import os +import json from typing import Optional try: import click @@ -19,8 +20,9 @@ sys.exit(1) from .database import CrateDBClient -from .analyzer import ShardAnalyzer, RecoveryMonitor +from .analyzer import ShardAnalyzer, RecoveryMonitor, ActiveShardMonitor from .distribution_analyzer import DistributionAnalyzer +from .shard_size_monitor import ShardSizeMonitor, validate_rules_file console = Console() @@ -46,31 +48,73 @@ def format_percentage(value: float) -> str: return f"[{color}]{value:.1f}%[/{color}]" +def format_table_display_with_partition(schema_name: str, table_name: str, partition_values: str = None) -> str: + """Format table display with partition values if available""" + # Create base table name + if schema_name and schema_name != 'doc': + base_display = f"{schema_name}.{table_name}" + else: + base_display = table_name + + # Add partition values if available + if partition_values: + return f"{base_display} {partition_values}" + else: + return base_display + + def format_translog_info(recovery_info) -> str: - """Format translog size information with color coding""" - tl_bytes = recovery_info.translog_size_bytes + """Format translog size information with color coding showing both total and uncommitted sizes""" + tl_total_bytes = recovery_info.translog_size_bytes + tl_uncommitted_bytes = recovery_info.translog_uncommitted_bytes - # Only show if significant (>10MB for production) - if tl_bytes < 10 * 1024 * 1024: # 10MB for production + # Only show if significant (>10MB for production) - check uncommitted size primarily + if tl_uncommitted_bytes < 10 * 1024 * 1024 and tl_total_bytes < 50 * 1024 * 1024: # 10MB uncommitted or 50MB total return "" - tl_gb = recovery_info.translog_size_gb + tl_total_gb = recovery_info.translog_size_gb + tl_uncommitted_gb = recovery_info.translog_uncommitted_gb + uncommitted_percentage = recovery_info.translog_uncommitted_percentage - # Color coding based on size - if tl_gb >= 5.0: + # Color coding based on uncommitted size and percentage + # Round percentage to handle floating-point precision issues + rounded_percentage = round(uncommitted_percentage, 1) + if tl_uncommitted_gb >= 5.0 or rounded_percentage >= 80.0: color = "red" - elif tl_gb >= 1.0: + elif tl_uncommitted_gb >= 1.0 or rounded_percentage >= 50.0: color = "yellow" else: color = "green" - # Format size - if tl_gb >= 1.0: - size_str = f"{tl_gb:.1f}GB" + # Format sizes + if tl_total_gb >= 1.0: + total_str = f"{tl_total_gb:.1f}GB" + else: + total_str = f"{tl_total_gb*1000:.0f}MB" + + if tl_uncommitted_gb >= 1.0: + uncommitted_str = f"{tl_uncommitted_gb:.1f}GB" else: - size_str = f"{tl_gb*1000:.0f}MB" + uncommitted_str = f"{tl_uncommitted_gb*1000:.0f}MB" - return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]" + return f" [dim]([{color}]TL:{total_str} / {uncommitted_str} / {uncommitted_percentage:.0f}%[/{color}])[/dim]" + + +def format_recovery_progress(recovery_info) -> str: + """Format recovery progress, using sequence number progress for replicas when available""" + if not recovery_info.is_primary and recovery_info.seq_no_progress is not None: + # For replica shards, show sequence number progress if available + seq_progress = recovery_info.seq_no_progress + traditional_progress = recovery_info.overall_progress + + # If sequence progress is significantly different from traditional progress, show both + if abs(seq_progress - traditional_progress) > 5.0: + return f"{seq_progress:.1f}% (seq) / {traditional_progress:.1f}% (rec)" + else: + return f"{seq_progress:.1f}% (seq)" + else: + # For primary shards or when sequence progress unavailable, use traditional progress + return f"{recovery_info.overall_progress:.1f}%" @click.group() @@ -99,9 +143,18 @@ def main(ctx): @main.command() @click.option('--table', '-t', help='Analyze specific table only') +@click.option('--largest', type=int, help='Show N largest tables/partitions by size') +@click.option('--smallest', type=int, help='Show N smallest tables/partitions by size') +@click.option('--no-zero-size', is_flag=True, default=False, help='Exclude zero-sized tables from smallest results') @click.pass_context -def analyze(ctx, table: Optional[str]): - """Analyze current shard distribution across nodes and zones""" +def analyze(ctx, table: Optional[str], largest: Optional[int], smallest: Optional[int], no_zero_size: bool): + """Analyze current shard distribution across nodes and zones + + Use --largest N to show the N largest tables/partitions by total size. + Use --smallest N to show the N smallest tables/partitions by total size. + Use --no-zero-size with --smallest to exclude zero-sized tables from results. + Both options properly handle partitioned tables and show detailed size breakdowns. + """ client = ctx.obj['client'] analyzer = ShardAnalyzer(client) @@ -182,6 +235,280 @@ def analyze(ctx, table: Optional[str]): ) console.print(node_table) + console.print() + + # Shard Size Overview + size_overview = analyzer.get_shard_size_overview() + + size_table = Table(title="Shard Size Distribution", box=box.ROUNDED) + size_table.add_column("Size Range", style="cyan") + size_table.add_column("Count", justify="right", style="magenta") + size_table.add_column("Percentage", justify="right", style="green") + size_table.add_column("Avg Size", justify="right", style="blue") + size_table.add_column("Max Size", justify="right", style="red") + size_table.add_column("Total Size", justify="right", style="yellow") + + total_shards = size_overview['total_shards'] + + # Define color coding thresholds + large_shards_threshold = 0 # warn if ANY shards >=50GB (red flag) + small_shards_percentage_threshold = 40 # warn if >40% of shards are small (<1GB) + + for bucket_name, bucket_data in size_overview['size_buckets'].items(): + count = bucket_data['count'] + avg_size = bucket_data['avg_size_gb'] + total_size = bucket_data['total_size'] + percentage = (count / total_shards * 100) if total_shards > 0 else 0 + + # Apply color coding + count_str = str(count) + percentage_str = f"{percentage:.1f}%" + + # Color code large shards (>=50GB) - ANY large shard is a red flag + if bucket_name == '>=50GB' and count > large_shards_threshold: + count_str = f"[red]{count}[/red]" + percentage_str = f"[red]{percentage:.1f}%[/red]" + + # Color code if too many very small shards (<1GB) + if bucket_name == '<1GB' and percentage > small_shards_percentage_threshold: + count_str = f"[yellow]{count}[/yellow]" + percentage_str = f"[yellow]{percentage:.1f}%[/yellow]" + + size_table.add_row( + bucket_name, + count_str, + percentage_str, + f"{avg_size:.2f}GB" if avg_size > 0 else "0GB", + f"{bucket_data['max_size']:.2f}GB" if bucket_data['max_size'] > 0 else "0GB", + format_size(total_size) + ) + + console.print(size_table) + + # Add footer showing total number of tables/partitions + all_tables = analyzer.get_table_size_breakdown(limit=None) + total_tables_partitions = len(all_tables) + console.print(f"[dim]📊 Total: {total_tables_partitions} table/partition(s) in cluster[/dim]") + + # Add schema breakdown table + schema_stats = {} + for table_info in all_tables: + # Extract schema from table name (format: "schema.table" or just "table") + table_name = table_info['table_name'] + if '.' in table_name: + schema = table_name.split('.')[0] + else: + schema = 'doc' # Default schema + + partition = table_info['partition'] + has_partition = partition != 'N/A' + + if schema not in schema_stats: + schema_stats[schema] = { + 'tables': 0, + 'partitioned_tables': set(), + 'total_partitions': 0 + } + + if has_partition: + # This is a partitioned table + base_table_name = table_name + schema_stats[schema]['partitioned_tables'].add(base_table_name) + schema_stats[schema]['total_partitions'] += 1 + else: + # This is a regular table + schema_stats[schema]['tables'] += 1 + + # Create schema breakdown table + console.print() + schema_table = Table(title="Schema Breakdown", box=box.ROUNDED) + schema_table.add_column("Schema", style="cyan") + schema_table.add_column("Tables", justify="right", style="green") + schema_table.add_column("Partitioned Tables", justify="right", style="magenta") + schema_table.add_column("Total Partitions", justify="right", style="yellow") + + # Sort schemas alphabetically (case-insensitive) + for schema in sorted(schema_stats.keys(), key=str.lower): + stats = schema_stats[schema] + tables_count = stats['tables'] + partitioned_tables_count = len(stats['partitioned_tables']) + total_partitions = stats['total_partitions'] + + schema_table.add_row( + schema, + str(tables_count), + str(partitioned_tables_count), + str(total_partitions) + ) + + console.print(schema_table) + + # Add warnings if thresholds are exceeded + warnings = [] + if size_overview['large_shards_count'] > large_shards_threshold: + warnings.append(f"[red]🔥 CRITICAL: {size_overview['large_shards_count']} large shards (>=50GB) detected - IMMEDIATE ACTION REQUIRED![/red]") + warnings.append(f"[red] Large shards cause slow recovery, memory pressure, and performance issues[/red]") + + # Calculate percentage of very small shards (<1GB) + very_small_count = size_overview['size_buckets']['<1GB']['count'] + very_small_percentage = (very_small_count / total_shards * 100) if total_shards > 0 else 0 + + if very_small_percentage > small_shards_percentage_threshold: + warnings.append(f"[yellow]⚠️ {very_small_percentage:.1f}% of shards are very small (<1GB) - consider optimizing shard allocation[/yellow]") + warnings.append(f"[yellow] Too many small shards create metadata overhead and reduce efficiency[/yellow]") + + if warnings: + console.print() + for warning in warnings: + console.print(warning) + + # Show compact table/partition breakdown of large shards if any exist + if size_overview['large_shards_count'] > 0: + console.print() + large_shards_details = analyzer.get_large_shards_details() + + # Aggregate by table/partition + table_partition_stats = {} + for shard in large_shards_details: + # Create table key with schema + table_display = shard['table_name'] + if shard['schema_name'] and shard['schema_name'] != 'doc': + table_display = f"{shard['schema_name']}.{shard['table_name']}" + + # Create partition key + partition_key = shard['partition_values'] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = { + 'sizes': [], + 'primary_count': 0, + 'replica_count': 0, + 'total_size': 0.0 + } + + # Aggregate stats + stats = table_partition_stats[key] + stats['sizes'].append(shard['size_gb']) + stats['total_size'] += shard['size_gb'] + if shard['is_primary']: + stats['primary_count'] += 1 + else: + stats['replica_count'] += 1 + + # Create compact table + large_shards_table = Table(title=f"Large Shards Breakdown by Table/Partition (>=50GB)", box=box.ROUNDED) + large_shards_table.add_column("Table", style="cyan") + large_shards_table.add_column("Partition", style="blue") + large_shards_table.add_column("Shards", justify="right", style="magenta") + large_shards_table.add_column("P/R", justify="center", style="yellow") + large_shards_table.add_column("Min Size", justify="right", style="green") + large_shards_table.add_column("Avg Size", justify="right", style="red") + large_shards_table.add_column("Max Size", justify="right", style="red") + large_shards_table.add_column("Total Size", justify="right", style="red") + + # Sort by total size descending (most problematic first) + sorted_stats = sorted(table_partition_stats.items(), key=lambda x: x[1]['total_size'], reverse=True) + + for (table_name, partition_key), stats in sorted_stats: + # Format partition display + partition_display = partition_key + if partition_display != "N/A" and len(partition_display) > 25: + partition_display = partition_display[:22] + "..." + + # Calculate size stats + sizes = stats['sizes'] + min_size = min(sizes) + avg_size = sum(sizes) / len(sizes) + max_size = max(sizes) + total_size = stats['total_size'] + total_shards = len(sizes) + + # Format primary/replica ratio + p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R" + + large_shards_table.add_row( + table_name, + partition_display, + str(total_shards), + p_r_display, + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB" + ) + + console.print(large_shards_table) + + # Add summary stats + total_primary = sum(stats['primary_count'] for stats in table_partition_stats.values()) + total_replica = sum(stats['replica_count'] for stats in table_partition_stats.values()) + affected_table_partitions = len(table_partition_stats) + + console.print() + console.print(f"[dim]📊 Summary: {total_primary} primary, {total_replica} replica shards across {affected_table_partitions} table/partition(s)[/dim]") + + # Show compact table/partition breakdown of smallest shards (top 10) + console.print() + small_shards_details = analyzer.get_small_shards_details(limit=10) + + if small_shards_details: + # Create compact table + small_shards_table = Table(title=f"Smallest Shards Breakdown by Table/Partition (Top 10)", box=box.ROUNDED) + small_shards_table.add_column("Table", style="cyan") + small_shards_table.add_column("Partition", style="blue") + small_shards_table.add_column("Shards", justify="right", style="magenta") + small_shards_table.add_column("P/R", justify="center", style="yellow") + small_shards_table.add_column("Min Size", justify="right", style="green") + small_shards_table.add_column("Avg Size", justify="right", style="red") + small_shards_table.add_column("Max Size", justify="right", style="red") + small_shards_table.add_column("Total Size", justify="right", style="red") + + for entry in small_shards_details: + table_name = entry['table_name'] + partition_key = entry['partition_key'] + stats = entry['stats'] + + # Format partition display + partition_display = partition_key + if partition_display != "N/A" and len(partition_display) > 25: + partition_display = partition_display[:22] + "..." + + # Calculate size stats + sizes = stats['sizes'] + min_size = min(sizes) + avg_size = sum(sizes) / len(sizes) + max_size = max(sizes) + total_size = stats['total_size'] + total_shards = len(sizes) + + # Format primary/replica ratio + p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R" + + small_shards_table.add_row( + table_name, + partition_display, + str(total_shards), + p_r_display, + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB" + ) + + console.print(small_shards_table) + + # Add summary stats for smallest shards + total_small_primary = sum(entry['stats']['primary_count'] for entry in small_shards_details) + total_small_replica = sum(entry['stats']['replica_count'] for entry in small_shards_details) + small_table_partitions = len(small_shards_details) + + console.print() + console.print(f"[dim]📊 Summary: {total_small_primary} primary, {total_small_replica} replica shards across {small_table_partitions} table/partition(s) with smallest average sizes[/dim]") + + console.print() # Table-specific analysis if requested if table: @@ -201,6 +528,119 @@ def analyze(ctx, table: Optional[str]): console.print(table_summary) + # Show largest tables if requested + if largest: + console.print() + largest_tables = analyzer.get_table_size_breakdown(limit=largest, order='largest') + + largest_table = Table(title=f"Largest Tables/Partitions by Size (Top {largest})", box=box.ROUNDED) + largest_table.add_column("Table", style="cyan") + largest_table.add_column("Partition", style="magenta") + largest_table.add_column("Shards", justify="right", style="yellow") + largest_table.add_column("P/R", justify="right", style="blue") + largest_table.add_column("Min Size", justify="right", style="green") + largest_table.add_column("Avg Size", justify="right", style="bright_green") + largest_table.add_column("Max Size", justify="right", style="red") + largest_table.add_column("Total Size", justify="right", style="bright_red") + + for entry in largest_tables: + table_name = entry['table_name'] + partition = entry['partition'] + total_shards = entry['total_shards'] + primary_count = entry['primary_count'] + replica_count = entry['replica_count'] + min_size = entry['min_size'] + avg_size = entry['avg_size'] + max_size = entry['max_size'] + total_size = entry['total_size'] + + largest_table.add_row( + table_name, + partition, + str(total_shards), + f"{primary_count}P/{replica_count}R", + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB" + ) + + console.print(largest_table) + + # Add summary stats + total_largest_size = sum(entry['total_size'] for entry in largest_tables) + total_largest_shards = sum(entry['total_shards'] for entry in largest_tables) + + console.print() + console.print(f"[dim]📊 Summary: {total_largest_shards} total shards using {total_largest_size:.1f}GB across {len(largest_tables)} largest table/partition(s)[/dim]") + + # Show smallest tables if requested + if smallest: + console.print() + all_smallest = analyzer.get_table_size_breakdown(limit=None, order='smallest') + + # Filter based on no_zero_size flag + if no_zero_size: + # Use tolerance for effectively zero-sized tables (handles display formatting) + # Since display uses {size:.1f}GB format, anything < 0.05GB displays as 0.0GB + zero_tolerance = 0.05 # Consider anything that displays as 0.0GB as effectively zero + + # Count effectively zero-sized tables + zero_sized_count = len([t for t in all_smallest if t['total_size'] < zero_tolerance]) + # Filter out effectively zero-sized tables and take the requested number + non_zero_tables = [t for t in all_smallest if t['total_size'] >= zero_tolerance] + smallest_tables = non_zero_tables[:smallest] + + if zero_sized_count > 0: + console.print(f"[dim]ℹ️ Found {zero_sized_count} table/partition(s) with 0.0GB size (excluded from results)[/dim]") + console.print() + else: + smallest_tables = all_smallest[:smallest] + + smallest_table = Table(title=f"Smallest Tables/Partitions by Size (Top {len(smallest_tables)})", box=box.ROUNDED) + smallest_table.add_column("Table", style="cyan") + smallest_table.add_column("Partition", style="magenta") + smallest_table.add_column("Shards", justify="right", style="yellow") + smallest_table.add_column("P/R", justify="right", style="blue") + smallest_table.add_column("Min Size", justify="right", style="green") + smallest_table.add_column("Avg Size", justify="right", style="bright_green") + smallest_table.add_column("Max Size", justify="right", style="red") + smallest_table.add_column("Total Size", justify="right", style="bright_red") + + for entry in smallest_tables: + table_name = entry['table_name'] + partition = entry['partition'] + total_shards = entry['total_shards'] + primary_count = entry['primary_count'] + replica_count = entry['replica_count'] + min_size = entry['min_size'] + avg_size = entry['avg_size'] + max_size = entry['max_size'] + total_size = entry['total_size'] + + smallest_table.add_row( + table_name, + partition, + str(total_shards), + f"{primary_count}P/{replica_count}R", + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB" + ) + + console.print(smallest_table) + + # Add summary stats + total_smallest_size = sum(entry['total_size'] for entry in smallest_tables) + total_smallest_shards = sum(entry['total_shards'] for entry in smallest_tables) + + console.print() + if no_zero_size and len([t for t in all_smallest if t['total_size'] < 0.05]) > 0: + console.print(f"[dim]📊 Summary: {total_smallest_shards} total shards using {total_smallest_size:.3f}GB across {len(smallest_tables)} smallest non-zero table/partition(s)[/dim]") + else: + console.print(f"[dim]📊 Summary: {total_smallest_shards} total shards using {total_smallest_size:.3f}GB across {len(smallest_tables)} smallest table/partition(s)[/dim]") + @main.command() @click.option('--table', '-t', help='Find candidates for specific table only') @@ -1094,6 +1534,7 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: # Track previous state for change detection previous_recoveries = {} previous_timestamp = None + last_transitioning_display = None first_run = True while True: @@ -1118,10 +1559,9 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: recovery_key = f"{recovery.schema_name}.{recovery.table_name}.{recovery.shard_id}.{recovery.node_name}" # Create complete table name - if recovery.schema_name == "doc": - table_display = recovery.table_name - else: - table_display = f"{recovery.schema_name}.{recovery.table_name}" + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) # Count active vs completed if recovery.stage == "DONE" and recovery.overall_progress >= 100.0: @@ -1145,9 +1585,17 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: translog_info = format_translog_info(recovery) if diff > 0: - changes.append(f"[green]📈[/green] {table_display} S{recovery.shard_id} {recovery.overall_progress:.1f}% (+{diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}") + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) + progress_info = format_recovery_progress(recovery) + changes.append(f"[green]📈[/green] {table_display} S{recovery.shard_id} {recovery.recovery_type} {progress_info} (+{diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}") else: - changes.append(f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} {recovery.overall_progress:.1f}% ({diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}") + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) + progress_info = format_recovery_progress(recovery) + changes.append(f"[yellow]📉[/yellow] {table_display} S{recovery.shard_id} {recovery.recovery_type} {progress_info} ({diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}") elif prev['stage'] != recovery.stage: # Create node route display node_route = "" @@ -1159,7 +1607,11 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: # Add translog info translog_info = format_translog_info(recovery) - changes.append(f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} {prev['stage']}→{recovery.stage} {recovery.size_gb:.1f}GB{translog_info}{node_route}") + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) + progress_info = format_recovery_progress(recovery) + changes.append(f"[blue]🔄[/blue] {table_display} S{recovery.shard_id} {recovery.recovery_type} {prev['stage']}→{recovery.stage} {progress_info} {recovery.size_gb:.1f}GB{translog_info}{node_route}") else: # New recovery - show based on include_transitioning flag or first run if first_run or include_transitioning or (recovery.overall_progress < 100.0 or recovery.stage != "DONE"): @@ -1174,7 +1626,11 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: # Add translog info translog_info = format_translog_info(recovery) - changes.append(f"{status_icon} {table_display} S{recovery.shard_id} {recovery.stage} {recovery.overall_progress:.1f}% {recovery.size_gb:.1f}GB{translog_info}{node_route}") + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) + progress_info = format_recovery_progress(recovery) + changes.append(f"{status_icon} {table_display} S{recovery.shard_id} {recovery.recovery_type} {recovery.stage} {progress_info} {recovery.size_gb:.1f}GB{translog_info}{node_route}") # Store current state for next comparison previous_recoveries[recovery_key] = { @@ -1182,29 +1638,108 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: 'stage': recovery.stage } - # Always show a status line - if not recoveries: - console.print(f"{current_time} | [green]No recoveries - cluster stable[/green]") + # Get problematic shards for comprehensive status + problematic_shards = recovery_monitor.get_problematic_shards(table, node) + + # Filter out shards that are already being recovered + non_recovering_shards = [] + if problematic_shards: + for shard in problematic_shards: + # Check if this shard is already in our recoveries list + is_recovering = any( + r.shard_id == shard['shard_id'] and + r.table_name == shard['table_name'] and + r.schema_name == shard['schema_name'] + for r in recoveries + ) + if not is_recovering: + non_recovering_shards.append(shard) + + # Always show a comprehensive status line + if not recoveries and not non_recovering_shards: + console.print(f"{current_time} | [green]No issues - cluster stable[/green]") + previous_recoveries.clear() + elif not recoveries and non_recovering_shards: + console.print(f"{current_time} | [yellow]{len(non_recovering_shards)} shards need attention (not recovering)[/yellow]") + # Show first few problematic shards + for shard in non_recovering_shards[:5]: + table_display = format_table_display_with_partition( + shard['schema_name'], shard['table_name'], shard.get('partition_values') + ) + primary_indicator = "P" if shard.get('primary') else "R" + console.print(f" | [red]⚠[/red] {table_display} S{shard['shard_id']}{primary_indicator} {shard['state']}") + if len(non_recovering_shards) > 5: + console.print(f" | [dim]... and {len(non_recovering_shards) - 5} more[/dim]") previous_recoveries.clear() else: - # Build status message - status = "" + # Build status message for active recoveries + status_parts = [] if active_count > 0: - status = f"{active_count} active" + status_parts.append(f"{active_count} recovering") if completed_count > 0: - status += f", {completed_count} done" if status else f"{completed_count} done" + status_parts.append(f"{completed_count} done") + if non_recovering_shards: + status_parts.append(f"[yellow]{len(non_recovering_shards)} awaiting recovery[/yellow]") + + status = " | ".join(status_parts) # Show status line with changes or periodic update if changes: console.print(f"{current_time} | {status}") for change in changes: console.print(f" | {change}") + # Show some problematic shards if there are any + if non_recovering_shards and len(changes) < 3: # Don't overwhelm the output + for shard in non_recovering_shards[:2]: + table_display = format_table_display_with_partition( + shard['schema_name'], shard['table_name'], shard.get('partition_values') + ) + primary_indicator = "P" if shard.get('primary') else "R" + console.print(f" | [red]⚠[/red] {table_display} S{shard['shard_id']}{primary_indicator} {shard['state']}") else: # Show periodic status even without changes if include_transitioning and completed_count > 0: - console.print(f"{current_time} | {status} (transitioning)") + from datetime import datetime, timedelta + current_dt = datetime.now() + + # Show transitioning details every 30 seconds or first time + should_show_details = ( + last_transitioning_display is None or + (current_dt - last_transitioning_display).total_seconds() >= 30 + ) + + if should_show_details: + console.print(f"{current_time} | {status} (transitioning)") + # Show details of transitioning recoveries + transitioning_recoveries = [r for r in recoveries if r.stage == "DONE" and r.overall_progress >= 100.0] + for recovery in transitioning_recoveries[:5]: # Limit to first 5 to avoid spam + # Create node route display + node_route = "" + if recovery.recovery_type == "PEER" and recovery.source_node_name: + node_route = f" {recovery.source_node_name} → {recovery.node_name}" + elif recovery.recovery_type == "DISK": + node_route = f" disk → {recovery.node_name}" + + # Add translog info + translog_info = format_translog_info(recovery) + + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) + progress_info = format_recovery_progress(recovery) + primary_indicator = "P" if recovery.is_primary else "R" + console.print(f" | [cyan]🔄[/cyan] {table_display} S{recovery.shard_id}{primary_indicator} {recovery.recovery_type} {recovery.stage} {progress_info} {recovery.size_gb:.1f}GB{translog_info}{node_route}") + + if len(transitioning_recoveries) > 5: + console.print(f" | [dim]... and {len(transitioning_recoveries) - 5} more transitioning[/dim]") + + last_transitioning_display = current_dt + else: + console.print(f"{current_time} | {status} (transitioning)") elif active_count > 0: console.print(f"{current_time} | {status} (no changes)") + elif non_recovering_shards: + console.print(f"{current_time} | {status} (issues persist)") previous_timestamp = current_time first_run = False @@ -1220,26 +1755,55 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: recovery_type_filter=recovery_type, include_transitioning=include_transitioning ) - - if final_recoveries: - console.print("\n📊 [bold]Final Recovery Summary:[/bold]") - summary = recovery_monitor.get_recovery_summary(final_recoveries) - - # Count active vs completed - active_count = len([r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"]) - completed_count = len(final_recoveries) - active_count - - console.print(f" Total recoveries: {summary['total_recoveries']}") - console.print(f" Active: {active_count}, Completed: {completed_count}") - console.print(f" Total size: {summary['total_size_gb']:.1f} GB") - console.print(f" Average progress: {summary['avg_progress']:.1f}%") - - if summary['by_type']: - console.print(f" By recovery type:") - for rec_type, stats in summary['by_type'].items(): - console.print(f" {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress") + + final_problematic_shards = recovery_monitor.get_problematic_shards(table, node) + + # Filter out shards that are already being recovered + final_non_recovering_shards = [] + if final_problematic_shards: + for shard in final_problematic_shards: + is_recovering = any( + r.shard_id == shard['shard_id'] and + r.table_name == shard['table_name'] and + r.schema_name == shard['schema_name'] + for r in final_recoveries + ) + if not is_recovering: + final_non_recovering_shards.append(shard) + + if final_recoveries or final_non_recovering_shards: + console.print("\n📊 [bold]Final Cluster Status Summary:[/bold]") + + if final_recoveries: + summary = recovery_monitor.get_recovery_summary(final_recoveries) + # Count active vs completed + active_count = len([r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"]) + completed_count = len(final_recoveries) - active_count + + console.print(f" Total recoveries: {summary['total_recoveries']}") + console.print(f" Active: {active_count}, Completed: {completed_count}") + console.print(f" Total size: {summary['total_size_gb']:.1f} GB") + console.print(f" Average progress: {summary['avg_progress']:.1f}%") + + if summary['by_type']: + console.print(f" By recovery type:") + for rec_type, stats in summary['by_type'].items(): + console.print(f" {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress") + + if final_non_recovering_shards: + console.print(f" [yellow]Problematic shards needing attention: {len(final_non_recovering_shards)}[/yellow]") + # Group by state for summary + by_state = {} + for shard in final_non_recovering_shards: + state = shard['state'] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") else: - console.print("\n[green]✅ No active recoveries at exit[/green]") + console.print("\n[green]✅ Cluster stable - no issues detected[/green]") return @@ -1255,18 +1819,58 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: display_output = recovery_monitor.format_recovery_display(recoveries) console.print(display_output) - if not recoveries: + # Get problematic shards for comprehensive status + problematic_shards = recovery_monitor.get_problematic_shards(table, node) + + # Filter out shards that are already being recovered + non_recovering_shards = [] + if problematic_shards: + for shard in problematic_shards: + is_recovering = any( + r.shard_id == shard['shard_id'] and + r.table_name == shard['table_name'] and + r.schema_name == shard['schema_name'] + for r in recoveries + ) + if not is_recovering: + non_recovering_shards.append(shard) + + if not recoveries and not non_recovering_shards: if include_transitioning: - console.print("\n[green]✅ No recoveries found (active or transitioning)[/green]") + console.print("\n[green]✅ No issues found - cluster stable[/green]") else: console.print("\n[green]✅ No active recoveries found[/green]") console.print("[dim]💡 Use --include-transitioning to see completed recoveries still transitioning[/dim]") + elif not recoveries and non_recovering_shards: + console.print(f"\n[yellow]⚠️ {len(non_recovering_shards)} shards need attention (not recovering)[/yellow]") + # Group by state for summary + by_state = {} + for shard in non_recovering_shards: + state = shard['state'] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") + + # Show first few examples + console.print(f"\nExamples:") + for shard in non_recovering_shards[:5]: + table_display = format_table_display_with_partition( + shard['schema_name'], shard['table_name'], shard.get('partition_values') + ) + primary_indicator = "P" if shard.get('primary') else "R" + console.print(f" [red]⚠[/red] {table_display} S{shard['shard_id']}{primary_indicator} {shard['state']}") + + if len(non_recovering_shards) > 5: + console.print(f" [dim]... and {len(non_recovering_shards) - 5} more[/dim]") else: - # Show summary + # Show recovery summary summary = recovery_monitor.get_recovery_summary(recoveries) - console.print(f"\n📊 [bold]Recovery Summary:[/bold]") - console.print(f" Total recoveries: {summary['total_recoveries']}") - console.print(f" Total size: {summary['total_size_gb']:.1f} GB") + console.print(f"\n📊 [bold]Cluster Status Summary:[/bold]") + console.print(f" Active recoveries: {summary['total_recoveries']}") + console.print(f" Total recovery size: {summary['total_size_gb']:.1f} GB") console.print(f" Average progress: {summary['avg_progress']:.1f}%") # Show breakdown by type @@ -1275,6 +1879,19 @@ def monitor_recovery(ctx, table: str, node: str, watch: bool, refresh_interval: for rec_type, stats in summary['by_type'].items(): console.print(f" {rec_type}: {stats['count']} recoveries, {stats['avg_progress']:.1f}% avg progress") + # Show problematic shards if any + if non_recovering_shards: + console.print(f"\n [yellow]Problematic shards needing attention: {len(non_recovering_shards)}[/yellow]") + by_state = {} + for shard in non_recovering_shards: + state = shard['state'] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") + console.print(f"\n[dim]💡 Use --watch flag for continuous monitoring[/dim]") except Exception as e: @@ -1490,5 +2107,914 @@ def shard_distribution(ctx, top_tables: int, table: Optional[str]): console.print(f"[dim]{traceback.format_exc()}[/dim]") +@main.command() +@click.option('--count', default=10, help='Number of most active shards to show (default: 10)') +@click.option('--interval', default=30, help='Observation interval in seconds (default: 30)') +@click.option('--min-checkpoint-delta', default=1000, help='Minimum checkpoint progression between snapshots to show shard (default: 1000)') +@click.option('--table', '-t', help='Monitor specific table only') +@click.option('--node', '-n', help='Monitor specific node only') +@click.option('--watch', '-w', is_flag=True, help='Continuously monitor (refresh every interval)') +@click.option('--exclude-system', is_flag=True, help='Exclude system tables (gc.*, information_schema.*)') +@click.option('--min-rate', type=float, help='Minimum activity rate (changes/sec) to show') +@click.option('--show-replicas/--hide-replicas', default=True, help='Show replica shards (default: True)') +@click.pass_context +def active_shards(ctx, count: int, interval: int, min_checkpoint_delta: int, + table: Optional[str], node: Optional[str], watch: bool, + exclude_system: bool, min_rate: Optional[float], show_replicas: bool): + """Monitor most active shards by checkpoint progression + + This command takes two snapshots of ALL started shards separated by the + observation interval, then shows the shards with the highest checkpoint + progression (activity) between the snapshots. + + Unlike other commands, this tracks ALL shards and filters based on actual + activity between snapshots, not current state. This captures shards that + become active during the observation period. + + Useful for identifying which shards are receiving the most write activity + in your cluster and understanding write patterns. + + Examples: + xmover active-shards --count 20 --interval 60 # Top 20 over 60 seconds + xmover active-shards --watch --interval 30 # Continuous monitoring + xmover active-shards --table my_table --watch # Monitor specific table + xmover active-shards --node data-hot-1 --count 5 # Top 5 on specific node + xmover active-shards --min-checkpoint-delta 500 # Lower activity threshold + xmover active-shards --exclude-system --min-rate 50 # Skip system tables, min 50/sec + xmover active-shards --hide-replicas --count 20 # Only primary shards + """ + client = ctx.obj['client'] + monitor = ActiveShardMonitor(client) + + def get_filtered_snapshot(): + """Get snapshot with optional filtering""" + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=min_checkpoint_delta) + + # Apply table filter if specified + if table: + snapshots = [s for s in snapshots if s.table_name == table or + f"{s.schema_name}.{s.table_name}" == table] + + # Apply node filter if specified + if node: + snapshots = [s for s in snapshots if s.node_name == node] + + # Exclude system tables if requested + if exclude_system: + snapshots = [s for s in snapshots if not ( + s.schema_name.startswith('gc.') or + s.schema_name == 'information_schema' or + s.schema_name == 'sys' or + s.table_name.endswith('_events') or + s.table_name.endswith('_log') + )] + + return snapshots + + def run_single_analysis(): + """Run a single analysis cycle""" + if not watch: + console.print(Panel.fit("[bold blue]Active Shards Monitor[/bold blue]")) + + # Show configuration - simplified for watch mode + if watch: + config_parts = [f"{interval}s interval", f"threshold: {min_checkpoint_delta:,}", f"top {count}"] + if table: + config_parts.append(f"table: {table}") + if node: + config_parts.append(f"node: {node}") + console.print(f"[dim]{' | '.join(config_parts)}[/dim]") + else: + config_info = [ + f"Observation interval: {interval}s", + f"Min checkpoint delta: {min_checkpoint_delta:,}", + f"Show count: {count}" + ] + if table: + config_info.append(f"Table filter: {table}") + if node: + config_info.append(f"Node filter: {node}") + if exclude_system: + config_info.append("Excluding system tables") + if min_rate: + config_info.append(f"Min rate: {min_rate}/sec") + if not show_replicas: + config_info.append("Primary shards only") + + console.print("[dim]" + " | ".join(config_info) + "[/dim]") + console.print() + + # Take first snapshot + if not watch: + console.print("📷 Taking first snapshot...") + snapshot1 = get_filtered_snapshot() + + if not snapshot1: + console.print("[yellow]No started shards found matching criteria[/yellow]") + return + + if not watch: + console.print(f" Tracking {len(snapshot1)} started shards for activity") + console.print(f"⏱️ Waiting {interval} seconds for activity...") + + # Wait for observation interval + if watch: + # Simplified countdown for watch mode + for remaining in range(interval, 0, -1): + if remaining % 5 == 0 or remaining <= 3: # Show fewer updates + console.print(f"[dim]⏱️ {remaining}s...[/dim]", end="\r") + time.sleep(1) + console.print(" " * 15, end="\r") # Clear countdown + else: + time.sleep(interval) + + # Take second snapshot + if not watch: + console.print("📷 Taking second snapshot...") + snapshot2 = get_filtered_snapshot() + + if not snapshot2: + console.print("[yellow]No started shards found in second snapshot[/yellow]") + return + + if not watch: + console.print(f" Tracking {len(snapshot2)} started shards for activity") + + # Compare snapshots and show results + activities = monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=min_checkpoint_delta) + + # Apply additional filters + if not show_replicas: + activities = [a for a in activities if a.is_primary] + + if min_rate: + activities = [a for a in activities if a.activity_rate >= min_rate] + + if not activities: + console.print(f"[green]✅ No shards exceeded activity threshold ({min_checkpoint_delta:,} checkpoint changes)[/green]") + if min_rate: + console.print(f"[dim]Also filtered by minimum rate: {min_rate}/sec[/dim]") + else: + if not watch: + overlap_count = len(set(s.shard_identifier for s in snapshot1) & + set(s.shard_identifier for s in snapshot2)) + console.print(f"[dim]Analyzed {overlap_count} shards present in both snapshots[/dim]") + console.print(monitor.format_activity_display(activities, show_count=count, watch_mode=watch)) + + try: + if watch: + console.print("[dim]Press Ctrl+C to stop monitoring[/dim]") + console.print() + + while True: + run_single_analysis() + if watch: + console.print(f"\n[dim]━━━ Next update in {interval}s ━━━[/dim]\n") + time.sleep(interval) + else: + run_single_analysis() + + except KeyboardInterrupt: + console.print("\n[yellow]Monitoring stopped by user[/yellow]") + except Exception as e: + console.print(f"[red]Error during active shards monitoring: {e}[/red]") + import traceback + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + +@main.command() +@click.option('--sizeMB', default=300, help='Minimum translog uncommitted size in MB (default: 300)') +@click.option('--execute', is_flag=True, help='Execute the replica commands after confirmation') +@click.pass_context +def problematic_translogs(ctx, sizemb: int, execute: bool): + """Find tables with problematic translog sizes and generate comprehensive shard management commands + + This command identifies tables with replica shards that have large uncommitted translog sizes + indicating replication issues. It generates a complete sequence including: + 1. Stop automatic shard rebalancing + 2. REROUTE CANCEL commands for problematic shards + 3. Set replicas to 0 commands + 4. Retention lease queries for monitoring + 5. Set replicas to 1 commands (restored from original values) + 6. Re-enable automatic shard rebalancing + With --execute, it runs them after confirmation. + """ + client = ctx.obj['client'] + + console.print(Panel.fit("[bold blue]Problematic Translog Analysis[/bold blue]")) + console.print(f"[dim]Looking for tables with replica shards having translog uncommitted size > {sizemb}MB[/dim]") + console.print() + + # First query to get individual problematic shards for REROUTE CANCEL commands + individual_shards_query = """ + SELECT + sh.schema_name, + sh.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + sh.id AS shard_id, + node['name'] AS node_name, + sh.translog_stats['uncommitted_size'] / 1024^2 AS translog_uncommitted_mb + FROM + sys.shards AS sh + LEFT JOIN information_schema.table_partitions p + ON sh.table_name = p.table_name + AND sh.schema_name = p.table_schema + AND sh.partition_ident = p.partition_ident + WHERE + sh.state = 'STARTED' + AND sh.translog_stats['uncommitted_size'] > ? * 1024^2 + AND primary=FALSE + ORDER BY + translog_uncommitted_mb DESC + """ + + # Query to find tables with problematic replica shards, grouped by table/partition + summary_query = """ + SELECT + all_shards.schema_name, + all_shards.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + p.partition_ident, + COUNT(CASE WHEN all_shards.primary=FALSE AND all_shards.translog_stats['uncommitted_size'] > ? * 1024^2 THEN 1 END) as problematic_replica_shards, + MAX(CASE WHEN all_shards.primary=FALSE AND all_shards.translog_stats['uncommitted_size'] > ? * 1024^2 THEN all_shards.translog_stats['uncommitted_size'] / 1024^2 END) AS max_translog_uncommitted_mb, + COUNT(CASE WHEN all_shards.primary=TRUE THEN 1 END) as total_primary_shards, + COUNT(CASE WHEN all_shards.primary=FALSE THEN 1 END) as total_replica_shards, + SUM(CASE WHEN all_shards.primary=TRUE THEN all_shards.size / 1024^3 ELSE 0 END) as total_primary_size_gb, + SUM(CASE WHEN all_shards.primary=FALSE THEN all_shards.size / 1024^3 ELSE 0 END) as total_replica_size_gb + FROM + sys.shards AS all_shards + LEFT JOIN information_schema.table_partitions p + ON all_shards.table_name = p.table_name + AND all_shards.schema_name = p.table_schema + AND all_shards.partition_ident = p.partition_ident + WHERE + all_shards.state = 'STARTED' + AND all_shards.schema_name || '.' || all_shards.table_name || COALESCE(all_shards.partition_ident, '') IN ( + SELECT DISTINCT sh.schema_name || '.' || sh.table_name || COALESCE(sh.partition_ident, '') + FROM sys.shards AS sh + WHERE sh.state = 'STARTED' + AND sh.translog_stats['uncommitted_size'] > ? * 1024^2 + AND sh.primary=FALSE + ) + GROUP BY + all_shards.schema_name, all_shards.table_name, partition_values, p.partition_ident + ORDER BY + max_translog_uncommitted_mb DESC + """ + + try: + # Get individual shards first + individual_result = client.execute_query(individual_shards_query, [sizemb]) + individual_shards = individual_result.get('rows', []) + + # Get summary data + summary_result = client.execute_query(summary_query, [sizemb, sizemb, sizemb]) + summary_rows = summary_result.get('rows', []) + + if not individual_shards: + console.print(f"[green]✓ No tables found with replica shards having translog uncommitted size > {sizemb}MB[/green]") + return + + # Display individual problematic shards first + console.print(f"[bold]Problematic Replica Shards (translog > {sizemb}MB)[/bold]") + from rich.table import Table + individual_table = Table(box=box.ROUNDED) + individual_table.add_column("Schema", style="cyan") + individual_table.add_column("Table", style="blue") + individual_table.add_column("Partition", style="magenta") + individual_table.add_column("Shard ID", justify="right", style="yellow") + individual_table.add_column("Node", style="green") + individual_table.add_column("Translog MB", justify="right", style="red") + + for row in individual_shards: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb = row + partition_display = partition_values if partition_values and partition_values != 'NULL' else "none" + + individual_table.add_row( + schema_name, + table_name, + partition_display, + str(shard_id), + node_name, + f"{translog_mb:.1f}" + ) + + console.print(individual_table) + console.print() + + console.print(f"Found {len(summary_rows)} table/partition(s) with problematic translogs:") + console.print() + + # Display summary table + results_table = Table(title=f"Tables with Problematic Replicas (translog > {sizemb}MB)", box=box.ROUNDED) + results_table.add_column("Schema", style="cyan") + results_table.add_column("Table", style="blue") + results_table.add_column("Partition", style="magenta") + results_table.add_column("Problematic Replicas", justify="right", style="yellow") + results_table.add_column("Max Translog MB", justify="right", style="red") + results_table.add_column("Shards (P/R)", justify="right", style="blue") + results_table.add_column("Size GB (P/R)", justify="right", style="bright_blue") + results_table.add_column("Current Replicas", justify="right", style="green") + + # Collect table/partition info and look up current replica counts + table_replica_info = [] + for row in summary_rows: + schema_name, table_name, partition_values, partition_ident, problematic_replica_shards, max_translog_mb, total_primary_shards, total_replica_shards, total_primary_size_gb, total_replica_size_gb = row + partition_display = partition_values if partition_values and partition_values != 'NULL' else "[dim]none[/dim]" + + # Look up current replica count + current_replicas = 0 + try: + if partition_values and partition_values != 'NULL': + # Partitioned table query + replica_query = """ + SELECT number_of_replicas + FROM information_schema.table_partitions + WHERE table_name = ? AND table_schema = ? AND partition_ident = ? + """ + replica_result = client.execute_query(replica_query, [table_name, schema_name, partition_ident]) + else: + # Non-partitioned table query + replica_query = """ + SELECT number_of_replicas + FROM information_schema.tables + WHERE table_name = ? AND table_schema = ? + """ + replica_result = client.execute_query(replica_query, [table_name, schema_name]) + + replica_rows = replica_result.get('rows', []) + if replica_rows: + current_replicas = replica_rows[0][0] + except Exception as e: + console.print(f"[yellow]Warning: Could not retrieve replica count for {schema_name}.{table_name}: {e}[/yellow]") + current_replicas = "unknown" + + table_replica_info.append(( + schema_name, table_name, partition_values, partition_ident, + problematic_replica_shards, max_translog_mb, total_primary_shards, total_replica_shards, + total_primary_size_gb, total_replica_size_gb, current_replicas + )) + + results_table.add_row( + schema_name, + table_name, + partition_display, + str(problematic_replica_shards), + f"{max_translog_mb:.1f}", + f"{total_primary_shards}P/{total_replica_shards}R", + f"{total_primary_size_gb:.1f}/{total_replica_size_gb:.1f}", + str(current_replicas) + ) + + console.print(results_table) + console.print() + console.print("[bold]Generated Comprehensive Shard Management Commands:[/bold]") + console.print() + + # 1. Stop automatic shard rebalancing + console.print("[bold cyan]1. Stop Automatic Shard Rebalancing:[/bold cyan]") + rebalance_disable_cmd = 'SET GLOBAL PERSISTENT "cluster.routing.rebalance.enable"=\'none\';' + console.print(rebalance_disable_cmd) + console.print() + + # 2. Generate REROUTE CANCEL SHARD commands for individual shards (unchanged) + console.print("[bold cyan]2. REROUTE CANCEL Commands (unchanged from original):[/bold cyan]") + reroute_commands = [] + for row in individual_shards: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb = row + cmd = f'ALTER TABLE "{schema_name}"."{table_name}" REROUTE CANCEL SHARD {shard_id} on \'{node_name}\' WITH (allow_primary=False);' + reroute_commands.append(cmd) + console.print(cmd) + + if reroute_commands: + console.print() + + # 3. Generate ALTER commands to set replicas to 0 + console.print("[bold cyan]3. Set Replicas to 0:[/bold cyan]") + set_zero_commands = [] + valid_table_info = [] + + for info in table_replica_info: + schema_name, table_name, partition_values, partition_ident, problematic_replica_shards, max_translog_mb, total_primary_shards, total_replica_shards, total_primary_size_gb, total_replica_size_gb, current_replicas = info + + if current_replicas == "unknown": + console.print(f"[yellow]-- Skipping {schema_name}.{table_name} (unknown replica count)[/yellow]") + continue + + if current_replicas == 0: + console.print(f"[yellow]-- Skipping {schema_name}.{table_name} (already has 0 replicas)[/yellow]") + continue + + valid_table_info.append(info) + + # Build the ALTER command to set replicas to 0 + if partition_values and partition_values != 'NULL': + # Partitioned table commands + cmd_set_zero = f'ALTER TABLE "{schema_name}"."{table_name}" PARTITION {partition_values} SET ("number_of_replicas" = 0);' + else: + # Non-partitioned table commands + cmd_set_zero = f'ALTER TABLE "{schema_name}"."{table_name}" SET ("number_of_replicas" = 0);' + + set_zero_commands.append(cmd_set_zero) + console.print(cmd_set_zero) + + console.print() + + # 4. Generate retention lease queries for monitoring + console.print("[bold cyan]4. Retention Lease Monitoring Queries:[/bold cyan]") + retention_queries = [] + + for info in valid_table_info: + schema_name, table_name, partition_values, partition_ident, problematic_replica_shards, max_translog_mb, total_primary_shards, total_replica_shards, total_primary_size_gb, total_replica_size_gb, current_replicas = info + + if partition_values and partition_values != 'NULL': + # For partitioned tables, we need to resolve the partition_ident + # First, get all partition_idents for this table + partition_query = f"""SELECT array_length(retention_leases['leases'], 1) as cnt_leases, id +FROM sys.shards +WHERE table_name = '{table_name}' + AND schema_name = '{schema_name}' + AND partition_ident = '{partition_ident}' +ORDER BY array_length(retention_leases['leases'], 1);""" + else: + # For non-partitioned tables + partition_query = f"""SELECT array_length(retention_leases['leases'], 1) as cnt_leases, id +FROM sys.shards +WHERE table_name = '{table_name}' + AND schema_name = '{schema_name}' +ORDER BY array_length(retention_leases['leases'], 1);""" + + retention_queries.append(partition_query) + console.print(f"-- For {schema_name}.{table_name}:") + console.print(partition_query) + console.print() + + # 5. Generate ALTER commands to set replicas to 1 (or original value) + console.print("[bold cyan]5. Restore Replicas to Original Values:[/bold cyan]") + restore_commands = [] + + for info in valid_table_info: + schema_name, table_name, partition_values, partition_ident, problematic_replica_shards, max_translog_mb, total_primary_shards, total_replica_shards, total_primary_size_gb, total_replica_size_gb, current_replicas = info + + # Build the ALTER command to restore replicas + if partition_values and partition_values != 'NULL': + # Partitioned table commands + cmd_restore = f'ALTER TABLE "{schema_name}"."{table_name}" PARTITION {partition_values} SET ("number_of_replicas" = {current_replicas});' + else: + # Non-partitioned table commands + cmd_restore = f'ALTER TABLE "{schema_name}"."{table_name}" SET ("number_of_replicas" = {current_replicas});' + + restore_commands.append(cmd_restore) + console.print(cmd_restore) + + console.print() + + # 6. Re-enable automatic shard rebalancing + console.print("[bold cyan]6. Re-enable Automatic Shard Rebalancing:[/bold cyan]") + rebalance_enable_cmd = 'SET GLOBAL PERSISTENT "cluster.routing.rebalance.enable"=\'all\';' + console.print(rebalance_enable_cmd) + console.print() + + # Collect all commands for execution + all_commands = [rebalance_disable_cmd] + reroute_commands + set_zero_commands + restore_commands + [rebalance_enable_cmd] + + if not all_commands: + console.print("[yellow]No ALTER commands generated[/yellow]") + return + + console.print(f"[bold]Total Commands:[/bold]") + console.print(f" • 1 rebalancing disable command") + console.print(f" • {len(reroute_commands)} REROUTE CANCEL commands") + console.print(f" • {len(set_zero_commands)} set replicas to 0 commands") + console.print(f" • {len(retention_queries)} retention lease queries (for monitoring)") + console.print(f" • {len(restore_commands)} restore replicas commands") + console.print(f" • 1 rebalancing enable command") + + if execute and all_commands: + console.print() + console.print("[yellow]⚠️ WARNING: This will execute the complete shard management sequence![/yellow]") + console.print("[yellow]This includes disabling rebalancing, canceling problematic shards,") + console.print("setting replicas to 0, restoring replicas, and re-enabling rebalancing.[/yellow]") + console.print("[yellow]Retention lease queries will be displayed but not executed.[/yellow]") + console.print() + + if click.confirm("Execute all commands with individual confirmation for each?"): + console.print() + console.print("[bold blue]Executing comprehensive shard management sequence...[/bold blue]") + + executed = 0 + failed = 0 + cmd_num = 0 + + # 1. Execute rebalancing disable command + cmd_num += 1 + console.print(f"[bold]Step 1: Disable Rebalancing[/bold]") + console.print(f"[dim]Command {cmd_num}: {rebalance_disable_cmd}[/dim]") + if click.confirm(f"Execute rebalancing disable command?"): + try: + client.execute_query(rebalance_disable_cmd) + console.print(f"[green]✓ Command {cmd_num} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]✗ Command {cmd_num} failed: {e}[/red]") + failed += 1 + else: + console.print(f"[yellow]Command {cmd_num} skipped[/yellow]") + console.print() + + # 2. Execute REROUTE CANCEL commands + if reroute_commands: + console.print(f"[bold]Step 2: Execute REROUTE CANCEL Commands[/bold]") + for cmd in reroute_commands: + cmd_num += 1 + console.print(f"[dim]Command {cmd_num}: {cmd}[/dim]") + if click.confirm(f"Execute this REROUTE CANCEL command?"): + try: + client.execute_query(cmd) + console.print(f"[green]✓ Command {cmd_num} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]✗ Command {cmd_num} failed: {e}[/red]") + failed += 1 + else: + console.print(f"[yellow]Command {cmd_num} skipped[/yellow]") + console.print() + + # 3. Execute set replicas to 0 commands + if set_zero_commands: + console.print(f"[bold]Step 3: Set Replicas to 0[/bold]") + for cmd in set_zero_commands: + cmd_num += 1 + console.print(f"[dim]Command {cmd_num}: {cmd}[/dim]") + if click.confirm(f"Execute this SET replicas to 0 command?"): + try: + client.execute_query(cmd) + console.print(f"[green]✓ Command {cmd_num} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]✗ Command {cmd_num} failed: {e}[/red]") + failed += 1 + else: + console.print(f"[yellow]Command {cmd_num} skipped[/yellow]") + console.print() + + # 4. Display retention lease queries (not executed) + if retention_queries: + console.print(f"[bold]Step 4: Retention Lease Monitoring Queries (for reference)[/bold]") + console.print("[dim]These queries are for monitoring purposes and will not be executed:[/dim]") + for i, query in enumerate(retention_queries, 1): + console.print(f"[dim]Query {i}:[/dim]") + console.print(f"[dim]{query}[/dim]") + console.print() + + # 5. Execute restore replicas commands + if restore_commands: + console.print(f"[bold]Step 5: Restore Replicas to Original Values[/bold]") + for cmd in restore_commands: + cmd_num += 1 + console.print(f"[dim]Command {cmd_num}: {cmd}[/dim]") + if click.confirm(f"Execute this RESTORE replicas command?"): + try: + client.execute_query(cmd) + console.print(f"[green]✓ Command {cmd_num} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]✗ Command {cmd_num} failed: {e}[/red]") + failed += 1 + else: + console.print(f"[yellow]Command {cmd_num} skipped[/yellow]") + console.print() + + # 6. Execute rebalancing enable command + cmd_num += 1 + console.print(f"[bold]Step 6: Re-enable Rebalancing[/bold]") + console.print(f"[dim]Command {cmd_num}: {rebalance_enable_cmd}[/dim]") + if click.confirm(f"Execute rebalancing enable command?"): + try: + client.execute_query(rebalance_enable_cmd) + console.print(f"[green]✓ Command {cmd_num} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]✗ Command {cmd_num} failed: {e}[/red]") + failed += 1 + else: + console.print(f"[yellow]Command {cmd_num} skipped[/yellow]") + console.print() + + console.print(f"[bold]Execution Summary:[/bold]") + console.print(f"[green]✓ Successful: {executed}[/green]") + if failed > 0: + console.print(f"[red]✗ Failed: {failed}[/red]") + else: + console.print("[yellow]Operation cancelled by user[/yellow]") + + except Exception as e: + console.print(f"[red]Error analyzing problematic translogs: {e}[/red]") + import traceback + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + +@main.command() +@click.option('--translogsize', default=500, help='Minimum translog uncommitted size threshold in MB (default: 500)') +@click.option('--interval', default=60, help='Monitoring interval in seconds for watch mode (default: 60)') +@click.option('--watch', '-w', is_flag=True, help='Continuously monitor (refresh every interval)') +@click.option('--table', '-t', help='Monitor specific table only') +@click.option('--node', '-n', help='Monitor specific node only') +@click.option('--count', default=50, help='Maximum number of shards with large translogs to show (default: 50)') +@click.pass_context +def large_translogs(ctx, translogsize: int, interval: int, watch: bool, table: Optional[str], node: Optional[str], count: int): + """Monitor shards with large translog uncommitted sizes that do not flush + + This command identifies shards (both primary and replica) that have large + translog uncommitted sizes, indicating they are not flushing properly. + Useful for monitoring translog growth and identifying problematic shards. + + Examples: + xmover large-translogs --translogsize 1000 # Shards with >1GB translog + xmover large-translogs --watch --interval 30 # Continuous monitoring every 30s + xmover large-translogs --table my_table --watch # Monitor specific table + xmover large-translogs --node data-hot-1 --count 20 # Top 20 on specific node + """ + client = ctx.obj['client'] + + def get_large_translog_shards(): + """Get shards with large translog uncommitted sizes""" + query = """ + SELECT + sh.schema_name, + sh.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + sh.id AS shard_id, + node['name'] AS node_name, + COALESCE(sh.translog_stats['uncommitted_size'] / 1024^2, 0) AS translog_uncommitted_mb, + sh.primary, + sh.size / 1024^2 AS shard_size_mb + FROM + sys.shards AS sh + LEFT JOIN information_schema.table_partitions p + ON sh.table_name = p.table_name + AND sh.schema_name = p.table_schema + AND sh.partition_ident = p.partition_ident + WHERE + sh.state = 'STARTED' + AND COALESCE(sh.translog_stats['uncommitted_size'], 0) > ? * 1024^2 + """ + + params = [translogsize] + + # Add table filter if specified + if table: + if '.' in table: + schema_name, table_name = table.split('.', 1) + query += " AND sh.schema_name = ? AND sh.table_name = ?" + params.extend([schema_name, table_name]) + else: + query += " AND sh.table_name = ?" + params.append(table) + + # Add node filter if specified + if node: + query += " AND node['name'] = ?" + params.append(node) + + query += """ + ORDER BY + COALESCE(sh.translog_stats['uncommitted_size'], 0) DESC + LIMIT ? + """ + params.append(count) + + try: + result = client.execute_query(query, params) + return result.get('rows', []) + except Exception as e: + console.print(f"[red]Error querying shards with large translogs: {e}[/red]") + return [] + + def display_large_translog_shards(shards_data, show_header=True): + """Display the shards with large translogs in a table""" + if not shards_data: + threshold_display = f"{translogsize}MB" if translogsize < 1000 else f"{translogsize/1000:.1f}GB" + console.print(f"[green]✅ No shards found with translog uncommitted size over {threshold_display}[/green]") + return + + # Get current timestamp + import datetime + timestamp = datetime.datetime.now().strftime("%H:%M:%S") + + # Create condensed table + from rich.table import Table + results_table = Table(show_header=show_header, box=box.SIMPLE if watch else box.ROUNDED) + if show_header: + results_table.add_column("Schema.Table", style="cyan", max_width=50) + results_table.add_column("Partition", style="magenta", max_width=30) + results_table.add_column("Shard", justify="right", style="yellow", width=5) + results_table.add_column("Node", style="green", max_width=12) + results_table.add_column("TL MB", justify="right", style="red", width=6) + results_table.add_column("Type", justify="center", style="bright_white", width=4) + else: + results_table.add_column("", style="cyan", max_width=50) + results_table.add_column("", style="magenta", max_width=30) + results_table.add_column("", justify="right", style="yellow", width=5) + results_table.add_column("", style="green", max_width=12) + results_table.add_column("", justify="right", style="red", width=6) + results_table.add_column("", justify="center", style="bright_white", width=4) + + for row in shards_data: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb, is_primary, shard_size_mb = row + + # Format table name + if schema_name and schema_name != 'doc': + table_display = f"{schema_name}.{table_name}" + else: + table_display = table_name + + # Format partition + if partition_values and partition_values != 'NULL': + partition_display = partition_values[:27] + "..." if len(partition_values) > 30 else partition_values + else: + partition_display = "-" + + primary_display = "P" if is_primary else "R" + + # Color code translog based on size + if translog_mb > 1000: + translog_color = "bright_red" + elif translog_mb > 500: + translog_color = "red" + elif translog_mb > 100: + translog_color = "yellow" + else: + translog_color = "green" + + results_table.add_row( + table_display, + partition_display, + str(shard_id), + node_name, + f"[{translog_color}]{translog_mb:.0f}[/{translog_color}]", + primary_display + ) + + # Show timestamp and summary + total_shards = len(shards_data) + primary_count = sum(1 for row in shards_data if row[6]) # is_primary is at index 6 + replica_count = total_shards - primary_count + avg_translog = sum(row[5] for row in shards_data) / total_shards if total_shards > 0 else 0 # translog_mb is at index 5 + + if show_header: + threshold_display = f"{translogsize}MB" if translogsize < 1000 else f"{translogsize/1000:.1f}GB" + console.print(f"[bold blue]Large Translogs (>{threshold_display}) - {timestamp}[/bold blue]") + else: + console.print(f"[dim]{timestamp}[/dim]") + + console.print(results_table) + console.print(f"[dim]{total_shards} shards ({primary_count}P/{replica_count}R) - Avg translog: {avg_translog:.0f}MB[/dim]") + + def run_single_analysis(): + """Run a single analysis cycle""" + if not watch: + console.print(Panel.fit("[bold blue]Large Translog Monitor[/bold blue]")) + + # Show configuration + threshold_display = f"{translogsize}MB" if translogsize < 1000 else f"{translogsize/1000:.1f}GB" + if watch: + config_parts = [f"{interval}s", f">{threshold_display}", f"top {count}"] + if table: + config_parts.append(f"table: {table}") + if node: + config_parts.append(f"node: {node}") + console.print(f"[dim]{' | '.join(config_parts)}[/dim]") + else: + config_info = [f"Threshold: >{threshold_display}"] + if count != 50: + config_info.append(f"Limit: {count}") + if table: + config_info.append(f"Table: {table}") + if node: + config_info.append(f"Node: {node}") + + console.print("[dim]" + " | ".join(config_info) + "[/dim]") + if not watch: + console.print() + + # Get shards with large translogs + shards_data = get_large_translog_shards() + + # Display results + display_large_translog_shards(shards_data, show_header=not watch) + + try: + if watch: + console.print("[dim]Press Ctrl+C to stop monitoring[/dim]") + console.print() + + while True: + run_single_analysis() + if watch: + console.print(f"\n[dim]━━━ Next update in {interval}s ━━━[/dim]\n") + time.sleep(interval) + else: + run_single_analysis() + + except KeyboardInterrupt: + console.print("\n[yellow]Monitoring stopped by user[/yellow]") + except Exception as e: + console.print(f"[red]Error during large translog monitoring: {e}[/red]") + import traceback + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + +@main.command("deep-analyze") +@click.option('--rules-file', '-r', type=click.Path(exists=True), + help='Path to custom rules YAML file') +@click.option('--schema', '-s', help='Analyze specific schema only') +@click.option('--severity', type=click.Choice(['critical', 'warning', 'info']), + help='Show only violations of specified severity') +@click.option('--export-csv', type=click.Path(), + help='Export results to CSV file') +@click.option('--validate-rules', type=click.Path(exists=True), + help='Validate rules file and exit') +@click.pass_context +def deep_analyze(ctx, rules_file: Optional[str], schema: Optional[str], + severity: Optional[str], export_csv: Optional[str], + validate_rules: Optional[str]): + """Deep analysis of shard sizes with configurable optimization rules + + This command analyzes your CrateDB cluster's shard sizes, column counts, + and distribution patterns, then applies a comprehensive set of rules to + identify optimization opportunities and performance issues. + + Features: + - Cluster configuration analysis (nodes, CPU, memory, heap) + - Table and partition shard size analysis + - Configurable rule-based recommendations + - CSV export for spreadsheet analysis + - Custom rules file support + + Examples: + + # Run full analysis with default rules + xmover deep-analyze + + # Analyze specific schema only + xmover deep-analyze --schema myschema + + # Show only critical issues + xmover deep-analyze --severity critical + + # Export to spreadsheet + xmover deep-analyze --export-csv shard_analysis.csv + + # Use custom rules + xmover deep-analyze --rules-file custom_rules.yaml + + # Validate rules file + xmover deep-analyze --validate-rules custom_rules.yaml + """ + if validate_rules: + if validate_rules_file(validate_rules): + console.print(f"[green]✅ Rules file {validate_rules} is valid[/green]") + sys.exit(0) + else: + sys.exit(1) + + try: + client = ctx.obj['client'] + + # Initialize monitor with optional custom rules + monitor = ShardSizeMonitor(client, rules_file) + + console.print("[bold blue]🔍 XMover Deep Shard Size Analysis[/bold blue]") + console.print("Analyzing cluster configuration and shard distributions...\n") + + # Run analysis + report = monitor.analyze_cluster_shard_sizes(schema_filter=schema) + + # Display results + monitor.display_report(report, severity_filter=severity) + + # Export CSV if requested + if export_csv: + monitor.export_csv(report, export_csv) + console.print(f"\n[green]📊 Results exported to {export_csv}[/green]") + + # Summary footer + violation_counts = report.total_violations_by_severity + total_violations = sum(violation_counts.values()) + + if total_violations > 0: + console.print(f"\n[bold]Analysis completed:[/bold] {total_violations} optimization opportunities identified") + if violation_counts['critical'] > 0: + console.print("[red]⚠️ Critical issues require immediate attention[/red]") + else: + console.print("\n[bold green]🎉 Excellent! No optimization issues detected[/bold green]") + + except Exception as e: + console.print(f"[red]Error during deep shard size analysis: {e}[/red]") + import traceback + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + if __name__ == '__main__': main() diff --git a/src/xmover/database.py b/src/xmover/database.py index 2a561be..2c78e46 100644 --- a/src/xmover/database.py +++ b/src/xmover/database.py @@ -62,6 +62,7 @@ class RecoveryInfo: """Information about an active shard recovery""" schema_name: str table_name: str + partition_values: Optional[str] # Partition values for partitioned tables shard_id: int node_name: str node_id: str @@ -76,6 +77,9 @@ class RecoveryInfo: size_bytes: int source_node_name: Optional[str] = None # Source node for PEER recoveries translog_size_bytes: int = 0 # Translog size in bytes + translog_uncommitted_bytes: int = 0 # Translog uncommitted size in bytes + max_seq_no: Optional[int] = None # Sequence number for this shard + primary_max_seq_no: Optional[int] = None # Primary shard's sequence number for replica progress @property def overall_progress(self) -> float: @@ -101,10 +105,91 @@ def translog_size_gb(self) -> float: """Translog size in GB""" return self.translog_size_bytes / (1024**3) + @property + def translog_uncommitted_gb(self) -> float: + """Translog uncommitted size in GB""" + return self.translog_uncommitted_bytes / (1024**3) + @property def translog_percentage(self) -> float: """Translog size as percentage of shard size""" return (self.translog_size_bytes / self.size_bytes * 100) if self.size_bytes > 0 else 0 + + @property + def translog_uncommitted_percentage(self) -> float: + """Translog uncommitted size as percentage of total translog size""" + return (self.translog_uncommitted_bytes / self.translog_size_bytes * 100) if self.translog_size_bytes > 0 else 0 + + @property + def seq_no_progress(self) -> Optional[float]: + """Calculate replica progress based on sequence numbers (for replica shards only)""" + if not self.is_primary and self.max_seq_no is not None and self.primary_max_seq_no is not None: + if self.primary_max_seq_no == 0: + return 100.0 # No operations on primary yet + return min((self.max_seq_no / self.primary_max_seq_no * 100.0), 100.0) + return None + + +@dataclass +class ActiveShardSnapshot: + """Snapshot of active shard checkpoint data for tracking activity""" + schema_name: str + table_name: str + shard_id: int + node_name: str + is_primary: bool + partition_ident: str + local_checkpoint: int + global_checkpoint: int + translog_uncommitted_bytes: int + timestamp: float # Unix timestamp when snapshot was taken + + @property + def checkpoint_delta(self) -> int: + """Current checkpoint delta (local - global)""" + return self.local_checkpoint - self.global_checkpoint + + @property + def translog_uncommitted_mb(self) -> float: + """Translog uncommitted size in MB""" + return self.translog_uncommitted_bytes / (1024 * 1024) + + @property + def shard_identifier(self) -> str: + """Unique identifier for this shard including partition""" + shard_type = "P" if self.is_primary else "R" + partition = f":{self.partition_ident}" if self.partition_ident else "" + return f"{self.schema_name}.{self.table_name}:{self.shard_id}:{self.node_name}:{shard_type}{partition}" + + +@dataclass +class ActiveShardActivity: + """Activity comparison between two snapshots of the same shard""" + schema_name: str + table_name: str + shard_id: int + node_name: str + is_primary: bool + partition_ident: str + local_checkpoint_delta: int # Change in local checkpoint between snapshots + snapshot1: ActiveShardSnapshot + snapshot2: ActiveShardSnapshot + time_diff_seconds: float + + @property + def activity_rate(self) -> float: + """Activity rate as checkpoint changes per second""" + if self.time_diff_seconds > 0: + return self.local_checkpoint_delta / self.time_diff_seconds + return 0.0 + + @property + def shard_type(self) -> str: + return "PRIMARY" if self.is_primary else "REPLICA" + + @property + def table_identifier(self) -> str: + return f"{self.schema_name}.{self.table_name}" class CrateDBClient: @@ -119,7 +204,20 @@ def __init__(self, connection_string: Optional[str] = None): self.username = os.getenv('CRATE_USERNAME') self.password = os.getenv('CRATE_PASSWORD') - self.ssl_verify = os.getenv('CRATE_SSL_VERIFY', 'true').lower() == 'true' + + # Auto-disable SSL verification for localhost connections + is_localhost = 'localhost' in self.connection_string or '127.0.0.1' in self.connection_string + ssl_verify_env = os.getenv('CRATE_SSL_VERIFY', 'true').lower() + + # Default to false for localhost, true for remote connections + if ssl_verify_env == 'auto': + self.ssl_verify = not is_localhost + else: + self.ssl_verify = ssl_verify_env == 'true' + + # For localhost, disable SSL verification by default unless explicitly enabled + if is_localhost and ssl_verify_env == 'true' and os.getenv('CRATE_SSL_VERIFY') is None: + self.ssl_verify = False # Suppress SSL warnings when SSL verification is disabled if not self.ssl_verify: @@ -139,9 +237,14 @@ def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[s if parameters: payload['args'] = parameters + # Handle authentication - only use auth if both username and password are provided + # For CrateDB, username without password should not use auth auth = None if self.username and self.password: auth = (self.username, self.password) + elif self.username and not self.password: + # For CrateDB 'crate' user without password, don't use auth + auth = None try: response = requests.post( @@ -153,6 +256,15 @@ def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[s ) response.raise_for_status() return response.json() + except requests.exceptions.SSLError as e: + # Provide helpful SSL error message for localhost connections + if 'localhost' in self.connection_string or '127.0.0.1' in self.connection_string: + raise Exception(f"SSL certificate error for localhost connection. " + f"Try setting CRATE_SSL_VERIFY=false in your .env file. Error: {e}") + else: + raise Exception(f"SSL error: {e}") + except requests.exceptions.ConnectionError as e: + raise Exception(f"Connection error - check if CrateDB is running and accessible: {e}") except requests.exceptions.RequestException as e: raise Exception(f"Failed to execute query: {e}") @@ -344,7 +456,9 @@ def test_connection(self) -> bool: try: result = self.execute_query("SELECT 1") return result.get('rowcount', 0) >= 0 - except Exception: + except Exception as e: + # Log the actual error for debugging + print(f"Connection test failed: {e}") return False def get_cluster_watermarks(self) -> Dict[str, Any]: @@ -420,6 +534,7 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) SELECT s.table_name, s.schema_name, + translate(p.values::text, ':{}', '=()') as partition_values, s.id as shard_id, s.node['name'] as node_name, s.node['id'] as node_id, @@ -428,8 +543,14 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) s.recovery, s.size, s."primary", - s.translog_stats['size'] as translog_size + s.translog_stats['size'] as translog_size, + s.translog_stats['uncommitted_size'] as translog_uncommitted_size, + s.seq_no_stats['max_seq_no'] as max_seq_no FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident WHERE s.table_name = ? AND s.id = ? AND (s.state = 'RECOVERING' OR s.routing_state IN ('INITIALIZING', 'RELOCATING')) ORDER BY s.schema_name @@ -445,17 +566,42 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) return { 'table_name': row[0], 'schema_name': row[1], - 'shard_id': row[2], - 'node_name': row[3], - 'node_id': row[4], - 'routing_state': row[5], - 'state': row[6], - 'recovery': row[7], - 'size': row[8], - 'primary': row[9], - 'translog_size': row[10] or 0 + 'partition_values': row[2], + 'shard_id': row[3], + 'node_name': row[4], + 'node_id': row[5], + 'routing_state': row[6], + 'state': row[7], + 'recovery': row[8], + 'size': row[9], + 'primary': row[10], + 'translog_size': row[11] or 0, + 'translog_uncommitted_size': row[12] or 0, + 'max_seq_no': row[13] } + def _get_primary_max_seq_no(self, schema_name: str, table_name: str, shard_id: int) -> Optional[int]: + """Get the max_seq_no of the primary shard for replica progress comparison""" + try: + query = """ + SELECT s.seq_no_stats['max_seq_no'] as primary_max_seq_no + FROM sys.shards s + WHERE s.schema_name = ? AND s.table_name = ? AND s.id = ? + AND s."primary" = true + AND s.state = 'STARTED' + LIMIT 1 + """ + + result = self.execute_query(query, [schema_name, table_name, shard_id]) + + if result.get('rows'): + return result['rows'][0][0] + return None + + except Exception: + # If query fails, return None + return None + def get_all_recovering_shards(self, table_name: Optional[str] = None, node_name: Optional[str] = None, include_transitioning: bool = False) -> List[RecoveryInfo]: @@ -482,6 +628,37 @@ def get_all_recovering_shards(self, table_name: Optional[str] = None, allocation['schema_name'] = recovery_detail['schema_name'] recovery_info = self._parse_recovery_info(allocation, recovery_detail) + # For replica recoveries, get primary sequence number for progress tracking + if not recovery_info.is_primary and recovery_info.recovery_type == 'PEER': + primary_seq_no = self._get_primary_max_seq_no( + recovery_detail['schema_name'], + recovery_detail['table_name'], + recovery_detail['shard_id'] + ) + # Create updated recovery info with primary sequence number + recovery_info = RecoveryInfo( + schema_name=recovery_info.schema_name, + table_name=recovery_info.table_name, + partition_values=recovery_info.partition_values, + shard_id=recovery_info.shard_id, + node_name=recovery_info.node_name, + node_id=recovery_info.node_id, + recovery_type=recovery_info.recovery_type, + stage=recovery_info.stage, + files_percent=recovery_info.files_percent, + bytes_percent=recovery_info.bytes_percent, + total_time_ms=recovery_info.total_time_ms, + routing_state=recovery_info.routing_state, + current_state=recovery_info.current_state, + is_primary=recovery_info.is_primary, + size_bytes=recovery_info.size_bytes, + source_node_name=recovery_info.source_node_name, + translog_size_bytes=recovery_info.translog_size_bytes, + translog_uncommitted_bytes=recovery_info.translog_uncommitted_bytes, + max_seq_no=recovery_info.max_seq_no, + primary_max_seq_no=primary_seq_no + ) + # Filter out completed recoveries unless include_transitioning is True if include_transitioning or not self._is_recovery_completed(recovery_info): recoveries.append(recovery_info) @@ -529,6 +706,7 @@ def _parse_recovery_info(self, allocation: Dict[str, Any], return RecoveryInfo( schema_name=shard_detail['schema_name'], table_name=shard_detail['table_name'], + partition_values=shard_detail.get('partition_values'), shard_id=shard_detail['shard_id'], node_name=shard_detail['node_name'], node_id=shard_detail['node_id'], @@ -542,7 +720,10 @@ def _parse_recovery_info(self, allocation: Dict[str, Any], is_primary=shard_detail['primary'], size_bytes=shard_detail.get('size', 0), source_node_name=source_node, - translog_size_bytes=shard_detail.get('translog_size', 0) + translog_size_bytes=shard_detail.get('translog_size', 0), + translog_uncommitted_bytes=shard_detail.get('translog_uncommitted_size', 0), + max_seq_no=shard_detail.get('max_seq_no'), + primary_max_seq_no=None # Will be populated later for replicas ) def _find_source_node_for_recovery(self, schema_name: str, table_name: str, shard_id: int, target_node_id: str) -> Optional[str]: @@ -587,4 +768,120 @@ def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool: """Check if a recovery is completed but still transitioning""" return (recovery_info.stage == 'DONE' and recovery_info.files_percent >= 100.0 and - recovery_info.bytes_percent >= 100.0) \ No newline at end of file + recovery_info.bytes_percent >= 100.0) + + def get_problematic_shards(self, table_name: Optional[str] = None, + node_name: Optional[str] = None) -> List[Dict[str, Any]]: + """Get shards that need attention but aren't actively recovering""" + + where_conditions = ["s.state != 'STARTED'"] + parameters = [] + + if table_name: + where_conditions.append("s.table_name = ?") + parameters.append(table_name) + + if node_name: + where_conditions.append("s.node['name'] = ?") + parameters.append(node_name) + + where_clause = f"WHERE {' AND '.join(where_conditions)}" + + query = f""" + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{{}}', '=()') as partition_values, + s.id as shard_id, + s.state, + s.routing_state, + s.node['name'] as node_name, + s.node['id'] as node_id, + s."primary" + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + {where_clause} + ORDER BY s.state, s.table_name, s.id + """ + + result = self.execute_query(query, parameters) + + problematic_shards = [] + for row in result.get('rows', []): + problematic_shards.append({ + 'schema_name': row[0] or 'doc', + 'table_name': row[1], + 'partition_values': row[2], + 'shard_id': row[3], + 'state': row[4], + 'routing_state': row[5], + 'node_name': row[6], + 'node_id': row[7], + 'primary': row[8] + }) + + return problematic_shards + + def get_active_shards_snapshot(self, min_checkpoint_delta: int = 1000) -> List[ActiveShardSnapshot]: + """Get a snapshot of all started shards for activity monitoring + + Note: This captures ALL started shards regardless of current activity level. + The min_checkpoint_delta parameter is kept for backwards compatibility but + filtering is now done during snapshot comparison to catch shards that + become active between observations. + + Args: + min_checkpoint_delta: Kept for compatibility - filtering now done in comparison + + Returns: + List of ActiveShardSnapshot objects for all started shards + """ + import time + + query = """ + SELECT + sh.schema_name, + sh.table_name, + sh.id AS shard_id, + sh."primary", + node['name'] as node_name, + sh.partition_ident, + sh.translog_stats['uncommitted_size'] AS translog_uncommitted_bytes, + sh.seq_no_stats['local_checkpoint'] AS local_checkpoint, + sh.seq_no_stats['global_checkpoint'] AS global_checkpoint + FROM + sys.shards AS sh + WHERE + sh.state = 'STARTED' + ORDER BY + sh.schema_name, sh.table_name, sh.id, sh.node['name'] + """ + + try: + result = self.execute_query(query) + snapshots = [] + current_time = time.time() + + for row in result.get('rows', []): + snapshot = ActiveShardSnapshot( + schema_name=row[0], + table_name=row[1], + shard_id=row[2], + is_primary=row[3], + node_name=row[4], + partition_ident=row[5] or '', + translog_uncommitted_bytes=row[6] or 0, + local_checkpoint=row[7] or 0, + global_checkpoint=row[8] or 0, + timestamp=current_time + ) + snapshots.append(snapshot) + + return snapshots + + except Exception as e: + print(f"Error getting active shards snapshot: {e}") + return [] \ No newline at end of file diff --git a/src/xmover/shard_size_monitor.py b/src/xmover/shard_size_monitor.py new file mode 100644 index 0000000..fa00ebc --- /dev/null +++ b/src/xmover/shard_size_monitor.py @@ -0,0 +1,689 @@ +""" +XMover Shard Size Monitor + +A comprehensive tool for analyzing CrateDB shard sizes and generating optimization recommendations +based on configurable rules. This module can be used standalone or integrated with other tools. +""" + +import csv +import os +import sys +import yaml +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Any, Union +from rich.console import Console +from rich.table import Table +from rich.panel import Panel +from rich.text import Text + + +@dataclass +class ShardSizeRule: + """Represents a single shard size analysis rule.""" + name: str + category: str + severity: str # 'critical', 'warning', 'info' + condition: str # Python expression to evaluate + recommendation: str # Template string with variables + action_hint: Optional[str] = None + + +@dataclass +class RuleViolation: + """Represents a violated rule with context.""" + rule_name: str + category: str + severity: str + recommendation: str + action_hint: Optional[str] + table_identifier: str # schema.table[partition] + + +@dataclass +class ShardAnalysisResult: + """Analysis results for a single table/partition.""" + # Raw data from query + table_schema: str + table_name: str + partition_ident: Optional[str] + total_primary_size_gb: float + avg_shard_size_gb: float + min_shard_size_gb: float + max_shard_size_gb: float + num_shards_primary: int + num_shards_replica: int + num_shards_total: int + num_columns: int + partitioned_by: Optional[str] + clustered_by: Optional[str] + total_documents: int + + # Analysis results + violations: List[RuleViolation] = field(default_factory=list) + + @property + def table_identifier(self) -> str: + """Get human-readable table identifier.""" + base = f"{self.table_schema}.{self.table_name}" + if self.partition_ident and self.partition_ident != '': + return f"{base}[{self.partition_ident}]" + return base + + @property + def has_critical_violations(self) -> bool: + """Check if there are any critical violations.""" + return any(v.severity == 'critical' for v in self.violations) + + @property + def has_warnings(self) -> bool: + """Check if there are any warning violations.""" + return any(v.severity == 'warning' for v in self.violations) + + +@dataclass +class ClusterConfiguration: + """Cluster-level configuration and metrics.""" + total_nodes: int + total_cpu_cores: int + total_memory_gb: float + total_heap_gb: float + max_shards_per_node_setting: int + actual_max_shards_per_node: int + total_shards: int + disk_watermark_low: Optional[float] = None + disk_watermark_high: Optional[float] = None + disk_watermark_flood_stage: Optional[float] = None + + +@dataclass +class MonitoringReport: + """Complete analysis report.""" + timestamp: datetime + cluster_config: ClusterConfiguration + table_results: List[ShardAnalysisResult] + cluster_violations: List[RuleViolation] + + @property + def total_violations_by_severity(self) -> Dict[str, int]: + """Count violations by severity level.""" + counts = {'critical': 0, 'warning': 0, 'info': 0} + + # Count table-level violations + for result in self.table_results: + for violation in result.violations: + counts[violation.severity] += 1 + + # Count cluster-level violations + for violation in self.cluster_violations: + counts[violation.severity] += 1 + + return counts + + +class RulesConfigValidator: + """Validates rules configuration files.""" + + @staticmethod + def validate_config(config: Dict[str, Any]) -> List[str]: + """Validate rules configuration and return list of errors.""" + errors = [] + + # Check required top-level fields + required_fields = ['metadata', 'thresholds', 'rules'] + for field in required_fields: + if field not in config: + errors.append(f"Missing required field: {field}") + + if 'validation' in config and 'rule_required_fields' in config['validation']: + rule_required_fields = config['validation']['rule_required_fields'] + else: + rule_required_fields = ['name', 'category', 'severity', 'condition', 'recommendation'] + + # Validate individual rules + if 'rules' in config: + for i, rule in enumerate(config['rules']): + for field in rule_required_fields: + if field not in rule: + errors.append(f"Rule {i}: Missing required field '{field}'") + + # Validate severity + if 'severity' in rule: + valid_severities = config.get('validation', {}).get('valid_severities', + ['critical', 'warning', 'info']) + if rule['severity'] not in valid_severities: + errors.append(f"Rule {i} ({rule.get('name', 'unnamed')}): " + f"Invalid severity '{rule['severity']}'") + + # Try to compile condition as Python expression + if 'condition' in rule: + try: + compile(rule['condition'], '', 'eval') + except SyntaxError as e: + errors.append(f"Rule {i} ({rule.get('name', 'unnamed')}): " + f"Invalid condition syntax: {e}") + + # Validate cluster rules if present + if 'cluster_rules' in config: + for i, rule in enumerate(config['cluster_rules']): + for field in rule_required_fields: + if field not in rule: + errors.append(f"Cluster rule {i}: Missing required field '{field}'") + + return errors + + +class ShardSizeMonitor: + """Main shard size monitoring and analysis class.""" + + SHARD_ANALYSIS_QUERY = """ + WITH columns AS ( + SELECT table_schema, + table_name, + COUNT(*) AS num_columns + FROM information_schema.columns + GROUP BY ALL + ), tables AS ( + SELECT table_schema, + table_name, + partitioned_by, + clustered_by + FROM information_schema.tables + ), shards AS ( + SELECT schema_name AS table_schema, + table_name, + partition_ident, + SUM(size) FILTER (WHERE primary = TRUE) / POWER(1024, 3) AS total_primary_size_gb, + AVG(size) / POWER(1024, 3) AS avg_shard_size_gb, + MIN(size) / POWER(1024, 3) AS min_shard_size_gb, + MAX(size) / POWER(1024, 3) AS max_shard_size_gb, + COUNT(*) FILTER (WHERE primary = TRUE) AS num_shards_primary, + COUNT(*) FILTER (WHERE primary = FALSE) AS num_shards_replica, + COUNT(*) AS num_shards_total, + SUM(num_docs) AS total_documents + FROM sys.shards + GROUP BY ALL + ) + SELECT s.*, + num_columns, + partitioned_by[1] AS partitioned_by, + clustered_by + FROM shards s + JOIN columns c ON s.table_name = c.table_name AND s.table_schema = c.table_schema + JOIN tables t ON s.table_name = t.table_name AND s.table_schema = t.table_schema + ORDER BY table_schema, table_name, partition_ident + """ + + def __init__(self, db_client, rules_config_path: Optional[str] = None): + """Initialize monitor with database client and rules configuration.""" + self.db_client = db_client + self.console = Console() + + # Load rules configuration + if rules_config_path is None: + # Use default rules file + current_dir = Path(__file__).parent.parent.parent + rules_config_path = current_dir / "config" / "shard_size_rules.yaml" + + self.rules_config = self._load_rules_config(rules_config_path) + self.thresholds = self.rules_config.get('thresholds', {}) + self.table_rules = [ShardSizeRule(**rule) for rule in self.rules_config.get('rules', [])] + self.cluster_rules = [ShardSizeRule(**rule) for rule in self.rules_config.get('cluster_rules', [])] + + def _load_rules_config(self, config_path: Union[str, Path]) -> Dict[str, Any]: + """Load and validate rules configuration from YAML file.""" + try: + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + + # Validate configuration + validator = RulesConfigValidator() + errors = validator.validate_config(config) + + if errors: + self.console.print("[red]Configuration validation errors:[/red]") + for error in errors: + self.console.print(f" • {error}") + sys.exit(1) + + return config + + except FileNotFoundError: + self.console.print(f"[red]Rules configuration file not found: {config_path}[/red]") + sys.exit(1) + except yaml.YAMLError as e: + self.console.print(f"[red]Error parsing YAML configuration: {e}[/red]") + sys.exit(1) + + def analyze_cluster_shard_sizes(self, schema_filter: Optional[str] = None) -> MonitoringReport: + """Run complete shard size analysis.""" + self.console.print("🔍 Gathering cluster configuration...") + cluster_config = self._gather_cluster_config() + + self.console.print("📊 Analyzing shard sizes and table schemas...") + table_results = self._analyze_table_shards(cluster_config, schema_filter) + + self.console.print("✅ Applying analysis rules...") + cluster_violations = self._evaluate_cluster_rules(cluster_config, table_results) + + return MonitoringReport( + timestamp=datetime.now(), + cluster_config=cluster_config, + table_results=table_results, + cluster_violations=cluster_violations + ) + + def _gather_cluster_config(self) -> ClusterConfiguration: + """Gather cluster-level configuration and metrics.""" + # Get cluster nodes info + nodes_query = """ + SELECT + COUNT(*) as total_nodes, + SUM(os_info['available_processors']) as total_cpu_cores, + SUM(mem['used'] + mem['free']) / POWER(1024, 3) as total_memory_gb, + SUM(heap['max']) / POWER(1024, 3) as total_heap_gb + FROM sys.nodes + WHERE name IS NOT NULL + """ + nodes_result = self.db_client.execute_query(nodes_query) + nodes_data = nodes_result.get('rows', [])[0] + + # Get cluster settings - use default if sys.cluster is not accessible + max_shards_setting = 1000 # CrateDB default + + try: + settings_query = """ + SELECT settings['cluster']['max_shards_per_node'] as max_shards_per_node + FROM sys.cluster + """ + settings_result = self.db_client.execute_query(settings_query) + + rows = settings_result.get('rows', []) + if rows and rows[0][0] is not None: + max_shards_setting = int(rows[0][0]) + except Exception as e: + # sys.cluster might not be accessible in CrateDB Cloud + self.console.print(f"[yellow]Warning: Could not access cluster settings, using default max_shards_per_node=1000[/yellow]") + + # Get total shard count and max shards per node + shards_query = """ + SELECT + COUNT(*) as total_shards + FROM sys.shards + """ + shards_result = self.db_client.execute_query(shards_query) + shards_data = shards_result.get('rows', [])[0] + + # Get actual max shards per node (current distribution) + try: + max_shards_query = """ + SELECT node['name'], COUNT(*) as shard_count + FROM sys.shards + GROUP BY node['name'] + ORDER BY shard_count DESC + LIMIT 1 + """ + max_shards_result = self.db_client.execute_query(max_shards_query) + max_shards_rows = max_shards_result.get('rows', []) + actual_max_shards_per_node = max_shards_rows[0][1] if max_shards_rows else 0 + except Exception as e: + # Calculate approximate value: total_shards / total_nodes + actual_max_shards_per_node = int(shards_data[0] / max(nodes_data[0], 1)) + self.console.print(f"[dim]Using approximate max shards per node: {actual_max_shards_per_node}[/dim]") + + return ClusterConfiguration( + total_nodes=nodes_data[0], + total_cpu_cores=nodes_data[1] or 0, + total_memory_gb=nodes_data[2] or 0.0, + total_heap_gb=nodes_data[3] or 0.0, + max_shards_per_node_setting=max_shards_setting, + actual_max_shards_per_node=actual_max_shards_per_node, + total_shards=shards_data[0] + ) + + def _analyze_table_shards(self, cluster_config: ClusterConfiguration, + schema_filter: Optional[str] = None) -> List[ShardAnalysisResult]: + """Analyze individual table shard configurations.""" + query = self.SHARD_ANALYSIS_QUERY + + if schema_filter: + # Add WHERE clause for schema filtering + query = query.replace( + "ORDER BY table_schema", + f"WHERE s.table_schema = '{schema_filter}' ORDER BY table_schema" + ) + + results = self.db_client.execute_query(query) + + table_results = [] + for row in results.get('rows', []): + # Parse query results + analysis_result = ShardAnalysisResult( + table_schema=row[0], + table_name=row[1], + partition_ident=row[2], + total_primary_size_gb=float(row[3] or 0), + avg_shard_size_gb=float(row[4] or 0), + min_shard_size_gb=float(row[5] or 0), + max_shard_size_gb=float(row[6] or 0), + num_shards_primary=int(row[7] or 0), + num_shards_replica=int(row[8] or 0), + num_shards_total=int(row[9] or 0), + total_documents=int(row[10] or 0), + num_columns=int(row[11] or 0), + partitioned_by=row[12], + clustered_by=row[13] + ) + + # Evaluate rules for this table + analysis_result.violations = self._evaluate_table_rules(analysis_result, cluster_config) + table_results.append(analysis_result) + + return table_results + + def _evaluate_table_rules(self, result: ShardAnalysisResult, + cluster_config: ClusterConfiguration) -> List[RuleViolation]: + """Evaluate table-level rules against a single table/partition.""" + violations = [] + + # Prepare evaluation context + context = { + # Table data + 'table_schema': result.table_schema, + 'table_name': result.table_name, + 'partition_ident': result.partition_ident, + 'total_primary_size_gb': result.total_primary_size_gb, + 'avg_shard_size_gb': result.avg_shard_size_gb, + 'min_shard_size_gb': result.min_shard_size_gb, + 'max_shard_size_gb': result.max_shard_size_gb, + 'num_shards_primary': result.num_shards_primary, + 'num_shards_replica': result.num_shards_replica, + 'num_shards_total': result.num_shards_total, + 'num_columns': result.num_columns, + 'partitioned_by': result.partitioned_by, + 'clustered_by': result.clustered_by, + + # Cluster context + 'cluster_config': { + 'total_nodes': cluster_config.total_nodes, + 'total_cpu_cores': cluster_config.total_cpu_cores, + 'total_memory_gb': cluster_config.total_memory_gb, + 'total_heap_gb': cluster_config.total_heap_gb, + 'max_shards_per_node': cluster_config.max_shards_per_node_setting, + 'total_shards': cluster_config.total_shards + }, + + # Thresholds + 'thresholds': self.thresholds + } + + # Evaluate each rule + for rule in self.table_rules: + try: + if eval(rule.condition, {"__builtins__": {}}, context): + # Create formatting context with flattened values + format_context = { + **context, + **self.thresholds, + 'ratio': context['max_shard_size_gb'] / context['min_shard_size_gb'] if context['min_shard_size_gb'] > 0 else 0 + } + # Add flattened cluster_config values for easier formatting + for key, value in context['cluster_config'].items(): + format_context[f'cluster_config[{key}]'] = value + + recommendation = rule.recommendation.format(**format_context) + + violations.append(RuleViolation( + rule_name=rule.name, + category=rule.category, + severity=rule.severity, + recommendation=recommendation, + action_hint=rule.action_hint, + table_identifier=result.table_identifier + )) + except Exception as e: + self.console.print(f"[yellow]Warning: Error evaluating rule '{rule.name}': {e}[/yellow]") + + return violations + + def _evaluate_cluster_rules(self, cluster_config: ClusterConfiguration, + table_results: List[ShardAnalysisResult]) -> List[RuleViolation]: + """Evaluate cluster-level rules.""" + violations = [] + + # Prepare cluster-level context + context = { + 'cluster_config': { + 'total_nodes': cluster_config.total_nodes, + 'total_cpu_cores': cluster_config.total_cpu_cores, + 'total_memory_gb': cluster_config.total_memory_gb, + 'total_heap_gb': cluster_config.total_heap_gb, + 'max_shards_per_node': cluster_config.actual_max_shards_per_node, + 'total_shards': cluster_config.total_shards + }, + 'thresholds': self.thresholds, + 'total_shards': cluster_config.total_shards, + 'total_heap_gb': cluster_config.total_heap_gb, + 'max_shards_per_node': cluster_config.actual_max_shards_per_node, + 'total_cpu_cores': cluster_config.total_cpu_cores + } + + # Evaluate each cluster rule + for rule in self.cluster_rules: + try: + if eval(rule.condition, {"__builtins__": {}}, context): + # Create formatting context with flattened values + format_context = { + **context, + **self.thresholds + } + # Add flattened cluster_config values for easier formatting + for key, value in context['cluster_config'].items(): + format_context[f'cluster_config[{key}]'] = value + + recommendation = rule.recommendation.format(**format_context) + + violations.append(RuleViolation( + rule_name=rule.name, + category=rule.category, + severity=rule.severity, + recommendation=recommendation, + action_hint=rule.action_hint, + table_identifier="[CLUSTER]" + )) + except Exception as e: + self.console.print(f"[yellow]Warning: Error evaluating cluster rule '{rule.name}': {e}[/yellow]") + + return violations + + def display_report(self, report: MonitoringReport, severity_filter: Optional[str] = None): + """Display analysis report to console.""" + # Header with cluster info + self.console.print(Panel( + f"[bold blue]CrateDB Shard Size Analysis Report[/bold blue]\n" + f"[dim]Generated: {report.timestamp.strftime('%Y-%m-%d %H:%M:%S')}[/dim]\n\n" + f"[bold]Cluster Overview:[/bold]\n" + f"• Nodes: {report.cluster_config.total_nodes}\n" + f"• Total Shards: {report.cluster_config.total_shards}\n" + f"• CPU Cores: {report.cluster_config.total_cpu_cores}\n" + f"• Heap Memory: {report.cluster_config.total_heap_gb:.1f}GB\n" + f"• Max Shards/Node: {report.cluster_config.actual_max_shards_per_node} " + f"(limit: {report.cluster_config.max_shards_per_node_setting})", + expand=False + )) + + # Summary of violations + violation_counts = report.total_violations_by_severity + if any(violation_counts.values()): + summary_text = Text() + if violation_counts['critical'] > 0: + summary_text.append(f"🔴 {violation_counts['critical']} Critical ", style="bold red") + if violation_counts['warning'] > 0: + summary_text.append(f"🟡 {violation_counts['warning']} Warning ", style="bold yellow") + if violation_counts['info'] > 0: + summary_text.append(f"🔵 {violation_counts['info']} Info", style="bold blue") + + self.console.print(Panel(summary_text, title="Issue Summary")) + else: + self.console.print(Panel("✅ No issues found", style="green")) + return + + # Cluster-level violations + cluster_violations = [v for v in report.cluster_violations + if not severity_filter or v.severity == severity_filter] + if cluster_violations: + self.console.print("\n[bold]🏢 Cluster-Level Issues:[/bold]") + for violation in cluster_violations: + severity_color = {'critical': 'red', 'warning': 'yellow', 'info': 'blue'}[violation.severity] + self.console.print(f"[{severity_color}]• [{violation.severity.upper()}] {violation.recommendation}[/{severity_color}]") + if violation.action_hint: + self.console.print(f" 💡 {violation.action_hint}") + + # Table-level violations + tables_with_violations = [r for r in report.table_results if r.violations] + if severity_filter: + tables_with_violations = [r for r in tables_with_violations + if any(v.severity == severity_filter for v in r.violations)] + + if tables_with_violations: + self.console.print(f"\n[bold]📊 Table/Partition Issues ({len(tables_with_violations)} affected):[/bold]") + + for result in tables_with_violations: + violations_to_show = [v for v in result.violations + if not severity_filter or v.severity == severity_filter] + + if not violations_to_show: + continue + + # Table header with key metrics + table_info = (f"{result.table_identifier} " + f"({result.num_shards_primary}s/{result.num_shards_replica}r, " + f"{result.max_shard_size_gb:.1f}GB max, " + f"avg {result.avg_shard_size_gb:.1f}GB, " + f"{result.total_documents:,} docs, " + f"{result.num_columns} cols)") + + self.console.print(f"\n[bold cyan]{table_info}[/bold cyan]") + + for violation in violations_to_show: + severity_color = {'critical': 'red', 'warning': 'yellow', 'info': 'blue'}[violation.severity] + self.console.print(f" [{severity_color}]• [{violation.severity.upper()}] {violation.recommendation}[/{severity_color}]") + if violation.action_hint: + self.console.print(f" 💡 {violation.action_hint}") + + def export_csv(self, report: MonitoringReport, filename: str): + """Export analysis results to CSV file.""" + with open(filename, 'w', newline='') as csvfile: + fieldnames = [ + 'timestamp', 'violation_level', 'table_schema', 'table_name', 'partition_ident', + 'severity', 'category', 'rule_name', 'recommendation', 'action_hint', + 'total_primary_size_gb', 'avg_shard_size_gb', 'min_shard_size_gb', 'max_shard_size_gb', + 'num_shards_primary', 'num_shards_replica', 'num_shards_total', 'num_columns', 'total_documents' + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + # Write cluster-level violations + for violation in report.cluster_violations: + writer.writerow({ + 'timestamp': report.timestamp.isoformat(), + 'violation_level': 'cluster', + 'table_schema': None, + 'table_name': None, + 'partition_ident': None, + 'severity': violation.severity, + 'category': violation.category, + 'rule_name': violation.rule_name, + 'recommendation': violation.recommendation, + 'action_hint': violation.action_hint, + 'total_primary_size_gb': None, + 'avg_shard_size_gb': None, + 'min_shard_size_gb': None, + 'max_shard_size_gb': None, + 'num_shards_primary': None, + 'num_shards_replica': None, + 'num_shards_total': None, + 'num_columns': None, + 'total_documents': None + }) + + # Write table-level violations + for result in report.table_results: + if result.violations: + for violation in result.violations: + writer.writerow({ + 'timestamp': report.timestamp.isoformat(), + 'violation_level': 'table', + 'table_schema': result.table_schema, + 'table_name': result.table_name, + 'partition_ident': result.partition_ident, + 'severity': violation.severity, + 'category': violation.category, + 'rule_name': violation.rule_name, + 'recommendation': violation.recommendation, + 'action_hint': violation.action_hint, + 'total_primary_size_gb': result.total_primary_size_gb, + 'avg_shard_size_gb': result.avg_shard_size_gb, + 'min_shard_size_gb': result.min_shard_size_gb, + 'max_shard_size_gb': result.max_shard_size_gb, + 'num_shards_primary': result.num_shards_primary, + 'num_shards_replica': result.num_shards_replica, + 'num_shards_total': result.num_shards_total, + 'num_columns': result.num_columns, + 'total_documents': result.total_documents + }) + else: + # Include tables without violations for complete dataset + writer.writerow({ + 'timestamp': report.timestamp.isoformat(), + 'violation_level': 'table', + 'table_schema': result.table_schema, + 'table_name': result.table_name, + 'partition_ident': result.partition_ident, + 'severity': None, + 'category': None, + 'rule_name': None, + 'recommendation': None, + 'action_hint': None, + 'total_primary_size_gb': result.total_primary_size_gb, + 'avg_shard_size_gb': result.avg_shard_size_gb, + 'min_shard_size_gb': result.min_shard_size_gb, + 'max_shard_size_gb': result.max_shard_size_gb, + 'num_shards_primary': result.num_shards_primary, + 'num_shards_replica': result.num_shards_replica, + 'num_shards_total': result.num_shards_total, + 'num_columns': result.num_columns, + 'total_documents': result.total_documents + }) + + +def validate_rules_file(config_path: str) -> bool: + """Standalone function to validate a rules configuration file.""" + console = Console() + + try: + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + + validator = RulesConfigValidator() + errors = validator.validate_config(config) + + if errors: + console.print(f"[red]❌ Validation failed for {config_path}:[/red]") + for error in errors: + console.print(f" • {error}") + return False + else: + console.print(f"[green]✅ Configuration file {config_path} is valid[/green]") + return True + + except FileNotFoundError: + console.print(f"[red]❌ File not found: {config_path}[/red]") + return False + except yaml.YAMLError as e: + console.print(f"[red]❌ YAML parsing error: {e}[/red]") + return False + except Exception as e: + console.print(f"[red]❌ Unexpected error: {e}[/red]") + return False diff --git a/tests/test_active_shard_monitor.py b/tests/test_active_shard_monitor.py new file mode 100644 index 0000000..3fb6613 --- /dev/null +++ b/tests/test_active_shard_monitor.py @@ -0,0 +1,461 @@ +""" +Tests for ActiveShardMonitor functionality +""" + +import pytest +import time +from unittest.mock import Mock, patch +from xmover.database import CrateDBClient, ActiveShardSnapshot, ActiveShardActivity +from xmover.analyzer import ActiveShardMonitor + + +class TestActiveShardSnapshot: + """Test ActiveShardSnapshot dataclass""" + + def test_checkpoint_delta(self): + """Test checkpoint delta calculation""" + snapshot = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1500, + global_checkpoint=500, + translog_uncommitted_bytes=10485760, # 10MB + timestamp=time.time() + ) + + assert snapshot.checkpoint_delta == 1000 + assert snapshot.translog_uncommitted_mb == 10.0 + assert snapshot.shard_identifier == "test_schema.test_table:1:node1:P" + + +class TestActiveShardActivity: + """Test ActiveShardActivity dataclass""" + + def test_activity_calculations(self): + """Test activity rate and property calculations""" + snapshot1 = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1000, + global_checkpoint=500, + translog_uncommitted_bytes=5242880, # 5MB + timestamp=100.0 + ) + + snapshot2 = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1500, + global_checkpoint=500, + translog_uncommitted_bytes=10485760, # 10MB + timestamp=130.0 # 30 seconds later + ) + + activity = ActiveShardActivity( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0 + ) + + assert activity.activity_rate == 500 / 30.0 # ~16.67 changes/sec + assert activity.shard_type == "PRIMARY" + assert activity.table_identifier == "test_schema.test_table" + + +class TestCrateDBClientActiveShards: + """Test CrateDB client active shards functionality""" + + @patch.object(CrateDBClient, 'execute_query') + def test_get_active_shards_snapshot_success(self, mock_execute): + """Test successful snapshot retrieval""" + mock_execute.return_value = { + 'rows': [ + ['schema1', 'table1', 1, True, 'node1', '', 10485760, 1500, 500], + ['schema1', 'table2', 2, False, 'node2', 'part1', 20971520, 2000, 800] + ] + } + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert len(snapshots) == 2 + + # Check first snapshot + snap1 = snapshots[0] + assert snap1.schema_name == 'schema1' + assert snap1.table_name == 'table1' + assert snap1.shard_id == 1 + assert snap1.is_primary is True + assert snap1.node_name == 'node1' + assert snap1.local_checkpoint == 1500 + assert snap1.global_checkpoint == 500 + assert snap1.checkpoint_delta == 1000 + assert snap1.translog_uncommitted_mb == 10.0 + + # Check second snapshot + snap2 = snapshots[1] + assert snap2.schema_name == 'schema1' + assert snap2.table_name == 'table2' + assert snap2.shard_id == 2 + assert snap2.is_primary is False + assert snap2.node_name == 'node2' + assert snap2.partition_ident == 'part1' + assert snap2.checkpoint_delta == 1200 + assert snap2.translog_uncommitted_mb == 20.0 + + # Verify query was called without checkpoint delta filter (new behavior) + mock_execute.assert_called_once() + args = mock_execute.call_args[0] + # No longer passes min_checkpoint_delta parameter + assert len(args) == 1 # Only the query, no parameters + + @patch.object(CrateDBClient, 'execute_query') + def test_get_active_shards_snapshot_empty(self, mock_execute): + """Test snapshot retrieval with no results""" + mock_execute.return_value = {'rows': []} + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert snapshots == [] + + @patch.object(CrateDBClient, 'execute_query') + def test_get_active_shards_snapshot_error(self, mock_execute): + """Test snapshot retrieval with database error""" + mock_execute.side_effect = Exception("Database connection failed") + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert snapshots == [] + + +class TestActiveShardMonitor: + """Test ActiveShardMonitor class""" + + def setup_method(self): + """Set up test fixtures""" + self.mock_client = Mock(spec=CrateDBClient) + self.monitor = ActiveShardMonitor(self.mock_client) + + def create_test_snapshot(self, schema: str, table: str, shard_id: int, node: str, + is_primary: bool, local_checkpoint: int, timestamp: float): + """Helper to create test snapshots""" + return ActiveShardSnapshot( + schema_name=schema, + table_name=table, + shard_id=shard_id, + node_name=node, + is_primary=is_primary, + partition_ident="", + local_checkpoint=local_checkpoint, + global_checkpoint=500, # Fixed for simplicity + translog_uncommitted_bytes=10485760, # 10MB + timestamp=timestamp + ) + + def test_compare_snapshots_with_activity(self): + """Test comparing snapshots with active shards""" + # Create first snapshot + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0), + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0), + ] + + # Create second snapshot (30 seconds later with activity) + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0), # +500 + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2200, 130.0), # +200 + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 130.0), # No change + self.create_test_snapshot("schema1", "table4", 1, "node3", True, 1000, 130.0), # New shard + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 activities (table3 had no change, table4 is new) + assert len(activities) == 2 + + # Check activities are sorted by checkpoint delta (highest first) + assert activities[0].local_checkpoint_delta == 500 # table1 + assert activities[0].schema_name == "schema1" + assert activities[0].table_name == "table1" + + assert activities[1].local_checkpoint_delta == 200 # table2 + assert activities[1].schema_name == "schema1" + assert activities[1].table_name == "table2" + + # Check activity rate calculation + assert activities[0].activity_rate == 500 / 30.0 # ~16.67/sec + assert activities[1].activity_rate == 200 / 30.0 # ~6.67/sec + + def test_compare_snapshots_no_activity(self): + """Test comparing snapshots with no activity""" + # Create identical snapshots + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 130.0), # No change + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + assert activities == [] + + def test_compare_snapshots_no_overlap(self): + """Test comparing snapshots with no overlapping shards""" + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table2", 1, "node2", True, 1500, 130.0), # Different shard + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + assert activities == [] + + def test_format_activity_display_with_activities(self): + """Test formatting activity display with data""" + # Create test activities + snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name="table1", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0 + ) + + display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False) + + # Check that output contains expected elements + assert "Most Active Shards" in display + assert "schema1.table1" in display + assert "500" in display # checkpoint delta + assert "16.7" in display # activity rate + assert "P" in display # primary indicator + assert "Legend:" in display + assert "Trend:" in display # new trend column explanation + assert "Partition:" in display # new partition column explanation + + def test_format_activity_display_empty(self): + """Test formatting activity display with no data""" + display = self.monitor.format_activity_display([], show_count=10, watch_mode=False) + + assert "No active shards with significant checkpoint progression found" in display + + def test_format_activity_display_count_limit(self): + """Test that display respects show_count limit""" + # Create multiple activities + activities = [] + for i in range(15): + snapshot1 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000 + (i+1)*100, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name=f"table{i}", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=(i+1)*100, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0 + ) + activities.append(activity) + + # Sort activities by checkpoint delta (highest first) - same as compare_snapshots does + activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True) + + # Should only show top 5 + display = self.monitor.format_activity_display(activities, show_count=5, watch_mode=False) + + # Count number of table entries in display + table_count = display.count("schema1.table") + assert table_count == 5 # Should only show 5 entries + + # Should show highest activity first (table14 has highest checkpoint delta) + assert "schema1.table14" in display + + def test_compare_snapshots_with_activity_threshold(self): + """Test filtering activities by minimum threshold""" + # Create snapshots with various activity levels + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), # Will have +2000 delta + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0), # Will have +500 delta + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0), # Will have +100 delta + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 3000, 130.0), # +2000 delta + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2500, 130.0), # +500 delta + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3100, 130.0), # +100 delta + ] + + # Test with threshold of 1000 - should only show table1 (2000 delta) + activities_high_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1000) + assert len(activities_high_threshold) == 1 + assert activities_high_threshold[0].table_name == "table1" + assert activities_high_threshold[0].local_checkpoint_delta == 2000 + + # Test with threshold of 200 - should show table1 and table2 + activities_medium_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=200) + assert len(activities_medium_threshold) == 2 + assert activities_medium_threshold[0].local_checkpoint_delta == 2000 # table1 first (highest) + assert activities_medium_threshold[1].local_checkpoint_delta == 500 # table2 second + + # Test with threshold of 0 - should show all three + activities_low_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=0) + assert len(activities_low_threshold) == 3 + assert activities_low_threshold[0].local_checkpoint_delta == 2000 # Sorted by activity + assert activities_low_threshold[1].local_checkpoint_delta == 500 + assert activities_low_threshold[2].local_checkpoint_delta == 100 + + def test_primary_replica_separation(self): + """Test that primary and replica shards are tracked separately""" + # Create snapshots with same table/shard but different primary/replica + snapshot1 = [ + # Primary shard + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 15876, 100.0), + # Replica shard (same table/shard/node but different type) + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129434, 100.0), + ] + + snapshot2 = [ + # Primary shard progresses normally + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 16000, 130.0), # +124 delta + # Replica shard progresses normally + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129500, 130.0), # +66 delta + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 separate activities (primary and replica tracked separately) + assert len(activities) == 2 + + # Find primary and replica activities + primary_activity = next(a for a in activities if a.is_primary) + replica_activity = next(a for a in activities if not a.is_primary) + + # Verify deltas are calculated correctly for each type + assert primary_activity.local_checkpoint_delta == 124 # 16000 - 15876 + assert replica_activity.local_checkpoint_delta == 66 # 129500 - 129434 + + # Verify they have different shard identifiers + assert primary_activity.snapshot1.shard_identifier != replica_activity.snapshot1.shard_identifier + assert "data-hot-8:P" in primary_activity.snapshot1.shard_identifier + assert "data-hot-8:R" in replica_activity.snapshot1.shard_identifier + + # This test prevents the bug where we mixed primary CP End with replica CP Start + # which created fake deltas like 129434 - 15876 = 113558 + + def test_partition_separation(self): + """Test that partitions within the same table/shard are tracked separately""" + # Create snapshots with same table/shard but different partitions + snapshot1 = [ + # Partition 1 + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32684, 100.0), + # Partition 2 (same table/shard/node/type but different partition) + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54289, 100.0), + ] + + # Modify partition_ident for the snapshots to simulate different partitions + snapshot1[0].partition_ident = "04732dpl6osj8d1g60o30c1g" + snapshot1[1].partition_ident = "04732dpl6os3adpm60o30c1g" + + snapshot2 = [ + # Partition 1 progresses + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32800, 130.0), # +116 delta + # Partition 2 progresses + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54400, 130.0), # +111 delta + ] + + # Set partition_ident for second snapshot + snapshot2[0].partition_ident = "04732dpl6osj8d1g60o30c1g" + snapshot2[1].partition_ident = "04732dpl6os3adpm60o30c1g" + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 separate activities (partitions tracked separately) + assert len(activities) == 2 + + # Verify deltas are calculated correctly for each partition + partition1_activity = next(a for a in activities if "04732dpl6osj8d1g60o30c1g" in a.snapshot1.shard_identifier) + partition2_activity = next(a for a in activities if "04732dpl6os3adpm60o30c1g" in a.snapshot1.shard_identifier) + + assert partition1_activity.local_checkpoint_delta == 116 # 32800 - 32684 + assert partition2_activity.local_checkpoint_delta == 111 # 54400 - 54289 + + # Verify they have different shard identifiers due to partition + assert partition1_activity.snapshot1.shard_identifier != partition2_activity.snapshot1.shard_identifier + assert ":04732dpl6osj8d1g60o30c1g" in partition1_activity.snapshot1.shard_identifier + assert ":04732dpl6os3adpm60o30c1g" in partition2_activity.snapshot1.shard_identifier + + # This test prevents mixing partitions which would create fake activity measurements + + def test_format_activity_display_watch_mode(self): + """Test that watch mode excludes legend and insights""" + snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name="table1", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0 + ) + + # Test non-watch mode (should include legend and insights) + normal_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False) + assert "Legend:" in normal_display + assert "Insights:" in normal_display + assert "Checkpoint Δ:" in normal_display + + # Test watch mode (should exclude legend and insights) + watch_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=True) + assert "Legend:" not in watch_display + assert "Insights:" not in watch_display + assert "Checkpoint Δ" in watch_display # Core data should still be present + + # But should still contain the core data + assert "Most Active Shards" in watch_display + assert "schema1.table1" in watch_display + assert "500" in watch_display # checkpoint delta \ No newline at end of file diff --git a/tests/test_distribution_analyzer.py b/tests/test_distribution_analyzer.py new file mode 100644 index 0000000..42e92a0 --- /dev/null +++ b/tests/test_distribution_analyzer.py @@ -0,0 +1,294 @@ +""" +Tests for distribution analyzer functionality +""" + +import pytest +from unittest.mock import Mock, patch +from xmover.distribution_analyzer import DistributionAnalyzer, TableDistribution, DistributionAnomaly +from xmover.database import CrateDBClient, NodeInfo + + +class TestDistributionAnalyzer: + + def setup_method(self): + """Set up test fixtures""" + self.mock_client = Mock(spec=CrateDBClient) + self.analyzer = DistributionAnalyzer(self.mock_client) + + def test_coefficient_of_variation_calculation(self): + """Test CV calculation with different scenarios""" + + # Normal case + values = [10, 12, 8, 14, 6] + cv = self.analyzer.calculate_coefficient_of_variation(values) + assert cv > 0 + + # All equal values (should return 0) + equal_values = [10, 10, 10, 10] + cv_equal = self.analyzer.calculate_coefficient_of_variation(equal_values) + assert cv_equal == 0.0 + + # Empty list + empty_values = [] + cv_empty = self.analyzer.calculate_coefficient_of_variation(empty_values) + assert cv_empty == 0.0 + + # Single value + single_value = [10] + cv_single = self.analyzer.calculate_coefficient_of_variation(single_value) + assert cv_single == 0.0 + + def test_get_largest_tables_distribution(self): + """Test fetching table distribution data""" + + # Mock query results + mock_results = [ + # schema, table, node, primary_shards, replica_shards, total_shards, total_size, primary_size, replica_size, docs + ['doc', 'large_table', 'node1', 5, 2, 7, 100.5, 80.2, 20.3, 1000000], + ['doc', 'large_table', 'node2', 4, 3, 7, 95.1, 75.8, 19.3, 950000], + ['doc', 'large_table', 'node3', 6, 1, 7, 110.2, 85.9, 24.3, 1100000], + ['custom', 'another_table', 'node1', 3, 2, 5, 50.1, 40.2, 9.9, 500000], + ['custom', 'another_table', 'node2', 2, 3, 5, 45.8, 35.1, 10.7, 480000], + ] + + self.mock_client.execute_query.return_value = {'rows': mock_results} + + distributions = self.analyzer.get_largest_tables_distribution(top_n=10) + + # Verify query was called with correct parameters + self.mock_client.execute_query.assert_called_once() + call_args = self.mock_client.execute_query.call_args + assert call_args[0][1] == [10] # top_n parameter + + # Verify we got the expected number of tables + assert len(distributions) == 2 + + # Verify table data structure + large_table = next(d for d in distributions if d.table_name == 'large_table') + assert large_table.schema_name == 'doc' + assert large_table.full_table_name == 'large_table' # Should omit 'doc' schema + assert len(large_table.node_distributions) == 3 + + another_table = next(d for d in distributions if d.table_name == 'another_table') + assert another_table.schema_name == 'custom' + assert another_table.full_table_name == 'custom.another_table' + assert len(another_table.node_distributions) == 2 + + # Verify sorting by primary size (descending) + assert distributions[0].total_primary_size_gb >= distributions[1].total_primary_size_gb + + def test_detect_shard_count_imbalance(self): + """Test shard count imbalance detection""" + + # Create test table with imbalanced shard distribution + imbalanced_table = TableDistribution( + schema_name='doc', + table_name='imbalanced_table', + total_primary_size_gb=500.0, + node_distributions={ + 'node1': {'total_shards': 10, 'primary_shards': 5, 'replica_shards': 5}, + 'node2': {'total_shards': 15, 'primary_shards': 8, 'replica_shards': 7}, + 'node3': {'total_shards': 5, 'primary_shards': 2, 'replica_shards': 3}, + } + ) + + anomaly = self.analyzer.detect_shard_count_imbalance(imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Shard Count Imbalance" + assert anomaly.combined_score > 0 + assert len(anomaly.recommendations) > 0 + + # Create balanced table (should not detect anomaly) + balanced_table = TableDistribution( + schema_name='doc', + table_name='balanced_table', + total_primary_size_gb=100.0, + node_distributions={ + 'node1': {'total_shards': 8, 'primary_shards': 4, 'replica_shards': 4}, + 'node2': {'total_shards': 8, 'primary_shards': 4, 'replica_shards': 4}, + 'node3': {'total_shards': 8, 'primary_shards': 4, 'replica_shards': 4}, + } + ) + + no_anomaly = self.analyzer.detect_shard_count_imbalance(balanced_table) + assert no_anomaly is None + + def test_detect_storage_imbalance(self): + """Test storage imbalance detection""" + + # Create test table with storage imbalance + storage_imbalanced_table = TableDistribution( + schema_name='doc', + table_name='storage_imbalanced', + total_primary_size_gb=300.0, + node_distributions={ + 'node1': {'total_size_gb': 150.0, 'primary_size_gb': 100.0, 'replica_size_gb': 50.0}, + 'node2': {'total_size_gb': 50.0, 'primary_size_gb': 30.0, 'replica_size_gb': 20.0}, + 'node3': {'total_size_gb': 100.0, 'primary_size_gb': 70.0, 'replica_size_gb': 30.0}, + } + ) + + anomaly = self.analyzer.detect_storage_imbalance(storage_imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Storage Imbalance" + assert anomaly.combined_score > 0 + + # Small table (should be ignored) + small_table = TableDistribution( + schema_name='doc', + table_name='small_table', + total_primary_size_gb=0.1, + node_distributions={ + 'node1': {'total_size_gb': 0.5, 'primary_size_gb': 0.05, 'replica_size_gb': 0.05}, + 'node2': {'total_size_gb': 0.1, 'primary_size_gb': 0.03, 'replica_size_gb': 0.02}, + } + ) + + no_anomaly = self.analyzer.detect_storage_imbalance(small_table) + assert no_anomaly is None + + def test_detect_node_coverage_issues(self): + """Test node coverage issue detection""" + + # Mock nodes_info to simulate cluster with 4 nodes + class MockNode: + def __init__(self, name): + self.name = name + + mock_nodes = [ + MockNode('node1'), MockNode('node2'), + MockNode('node3'), MockNode('node4') + ] + self.mock_client.get_nodes_info.return_value = mock_nodes + + # Table with limited coverage (only on 2 out of 4 nodes) + limited_coverage_table = TableDistribution( + schema_name='doc', + table_name='limited_coverage', + total_primary_size_gb=100.0, # Significant size + node_distributions={ + 'node1': {'total_shards': 10, 'primary_shards': 5, 'replica_shards': 5}, + 'node2': {'total_shards': 10, 'primary_shards': 5, 'replica_shards': 5}, + # node3 and node4 missing + } + ) + + anomaly = self.analyzer.detect_node_coverage_issues(limited_coverage_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Node Coverage Issue" + assert 'node3' in anomaly.details['nodes_without_shards'] + assert 'node4' in anomaly.details['nodes_without_shards'] + assert len(anomaly.recommendations) > 0 + + def test_detect_document_imbalance(self): + """Test document imbalance detection""" + + # Table with document imbalance + doc_imbalanced_table = TableDistribution( + schema_name='doc', + table_name='doc_imbalanced', + total_primary_size_gb=200.0, + node_distributions={ + 'node1': {'total_documents': 1000000}, # 1M docs + 'node2': {'total_documents': 500000}, # 500K docs + 'node3': {'total_documents': 100000}, # 100K docs (5x imbalance) + } + ) + + anomaly = self.analyzer.detect_document_imbalance(doc_imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Document Imbalance" + assert "data skew" in anomaly.recommendations[0].lower() + + # Table with very few documents (should be ignored) + low_doc_table = TableDistribution( + schema_name='doc', + table_name='low_docs', + total_primary_size_gb=100.0, + node_distributions={ + 'node1': {'total_documents': 1000}, + 'node2': {'total_documents': 500}, + } + ) + + no_anomaly = self.analyzer.detect_document_imbalance(low_doc_table) + assert no_anomaly is None + + def test_analyze_distribution_integration(self): + """Test the full analysis workflow""" + + # Mock the get_largest_tables_distribution method + mock_table = TableDistribution( + schema_name='doc', + table_name='test_table', + total_primary_size_gb=500.0, + node_distributions={ + 'node1': { + 'total_shards': 15, 'primary_shards': 8, 'replica_shards': 7, + 'total_size_gb': 200.0, 'primary_size_gb': 120.0, 'replica_size_gb': 80.0, + 'total_documents': 2000000 + }, + 'node2': { + 'total_shards': 8, 'primary_shards': 4, 'replica_shards': 4, + 'total_size_gb': 100.0, 'primary_size_gb': 60.0, 'replica_size_gb': 40.0, + 'total_documents': 1000000 + }, + 'node3': { + 'total_shards': 5, 'primary_shards': 3, 'replica_shards': 2, + 'total_size_gb': 50.0, 'primary_size_gb': 30.0, 'replica_size_gb': 20.0, + 'total_documents': 500000 + }, + } + ) + + with patch.object(self.analyzer, 'get_largest_tables_distribution', return_value=[mock_table]): + anomalies, tables_analyzed = self.analyzer.analyze_distribution(top_tables=10) + + # Should detect multiple types of anomalies + assert len(anomalies) > 0 + assert tables_analyzed == 1 # We provided 1 mock table + + # Anomalies should be sorted by combined score (descending) + if len(anomalies) > 1: + for i in range(len(anomalies) - 1): + assert anomalies[i].combined_score >= anomalies[i + 1].combined_score + + # Each anomaly should have required fields + for anomaly in anomalies: + assert anomaly.table is not None + assert anomaly.anomaly_type is not None + assert anomaly.combined_score >= 0 + assert isinstance(anomaly.recommendations, list) + + def test_format_distribution_report_no_anomalies(self): + """Test report formatting when no anomalies found""" + + # This should not raise an exception + with patch('builtins.print'): # Mock print to avoid console output during tests + self.analyzer.format_distribution_report([], 5) + + def test_format_distribution_report_with_anomalies(self): + """Test report formatting with anomalies""" + + mock_anomaly = DistributionAnomaly( + table=TableDistribution('doc', 'test_table', 100.0, {}), + anomaly_type='Test Anomaly', + severity_score=7.5, + impact_score=8.0, + combined_score=60.0, + description='Test description', + details={}, + recommendations=['Test recommendation'] + ) + + # This should not raise an exception + with patch('builtins.print'): # Mock print to avoid console output during tests + self.analyzer.format_distribution_report([mock_anomaly], 3) + + +if __name__ == '__main__': + pytest.main([__file__]) \ No newline at end of file diff --git a/tests/test_problematic_translogs.py b/tests/test_problematic_translogs.py new file mode 100644 index 0000000..9446e59 --- /dev/null +++ b/tests/test_problematic_translogs.py @@ -0,0 +1,402 @@ +""" +Tests for problematic translogs functionality with replica management +""" + +import pytest +from unittest.mock import Mock, patch +from click.testing import CliRunner +from xmover.cli import main, problematic_translogs +from xmover.database import CrateDBClient + + +class TestProblematicTranslogs: + + def setup_method(self): + """Set up test fixtures""" + self.runner = CliRunner() + self.mock_client = Mock(spec=CrateDBClient) + + def test_no_problematic_tables(self): + """Test when no tables meet the criteria""" + self.mock_client.execute_query.return_value = {'rows': []} + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs', '--sizeMB', '300']) + + assert result.exit_code == 0 + assert 'No tables found with replica shards having translog uncommitted size > 300MB' in result.output + + def test_non_partitioned_table_command_generation(self): + """Test ALTER command generation for non-partitioned tables""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8], + ['TURVO', 'orderFormFieldData', 'NULL', 5, 'data-hot-1', 469.5] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 3, 7011.8, 5, 5, 12.4, 12.1], + ['TURVO', 'orderFormFieldData', 'NULL', None, 1, 469.5, 3, 6, 8.2, 16.3] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Replica count for shipmentFormFieldData + {'rows': [[2]]}, # Replica count for orderFormFieldData + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs', '--sizeMB', '300']) + + assert result.exit_code == 0 + assert 'Found 2 table/partition(s) with problematic translogs' in result.output + assert 'Tables with Problematic Replicas' in result.output + assert 'Generated ALTER Commands:' in result.output + + # Check that replica management commands are present + assert 'SET ("number_of_replicas" = 0)' in result.output + assert 'SET ("number_of_replicas" = 1)' in result.output + assert 'SET ("number_of_replicas" = 2)' in result.output + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData"' in result.output + assert 'ALTER TABLE "TURVO"."orderFormFieldData"' in result.output + + def test_partitioned_table_command_generation(self): + """Test ALTER command generation for partitioned tables""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData_events', '("sync_day"=1757376000000)', 3, 'data-hot-2', 481.2], + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData_events', '("sync_day"=1757376000000)', 'partition123', 2, 481.2, 2, 2, 1.1, 1.0], + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Replica count for partitioned table + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs', '--sizeMB', '400']) + + assert result.exit_code == 0 + assert 'Found 1 table/partition(s) with problematic translogs' in result.output + assert 'Generated ALTER Commands:' in result.output + + # Check that partitioned table commands are present (handle Rich line wrapping) + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData_events"' in result.output + assert 'PARTITION' in result.output + assert '("sync_day"=1757376000000)' in result.output + assert 'SET ("number_of_replicas" = 0)' in result.output + assert 'SET ("number_of_replicas" = 1)' in result.output + + def test_mixed_partitioned_non_partitioned(self): + """Test handling of both partitioned and non-partitioned tables""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8], + ['TURVO', 'shipmentFormFieldData_events', '("sync_day"=1757376000000)', 3, 'data-hot-2', 481.2], + ['TURVO', 'orderFormFieldData', 'NULL', 5, 'data-hot-1', 469.5] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 2, 7011.8, 5, 5, 12.4, 12.1], + ['TURVO', 'shipmentFormFieldData_events', '("sync_day"=1757376000000)', 'partition123', 1, 481.2, 2, 2, 1.1, 1.0], + ['TURVO', 'orderFormFieldData', 'NULL', None, 1, 469.5, 3, 6, 8.2, 16.3] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[2]]}, # Replica count for shipmentFormFieldData + {'rows': [[1]]}, # Replica count for partitioned table + {'rows': [[3]]}, # Replica count for orderFormFieldData + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs', '--sizeMB', '200']) + + assert result.exit_code == 0 + assert 'Found 3 table/partition(s) with problematic translogs' in result.output + + # Check non-partitioned commands + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData" SET ("number_of_replicas" = 0)' in result.output + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData" SET ("number_of_replicas" = 2)' in result.output + + # Check partitioned commands (handle Rich line wrapping) + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData_events"' in result.output + assert 'PARTITION' in result.output + assert '("sync_day"=1757376000000)' in result.output + # Check that both 0 and 1 replica settings are present for partitioned table + assert 'SET ("number_of_replicas" = 0)' in result.output + assert 'SET ("number_of_replicas" = 1)' in result.output + + # Check NULL partition handled as non-partitioned + assert 'ALTER TABLE "TURVO"."orderFormFieldData" SET ("number_of_replicas" = 0)' in result.output + assert 'ALTER TABLE "TURVO"."orderFormFieldData" SET ("number_of_replicas" = 3)' in result.output + + def test_query_parameters(self): + """Test that the query is called with correct parameters""" + self.mock_client.execute_query.return_value = {'rows': []} + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs', '--sizeMB', '500']) + + # Verify the query was called twice (individual shards + summary) + assert self.mock_client.execute_query.call_count == 2 + call_args = self.mock_client.execute_query.call_args + query = call_args[0][0] + parameters = call_args[0][1] + + assert 'sh.translog_stats[\'uncommitted_size\'] > ? * 1024^2' in query + assert 'primary=FALSE' in query + assert 'GROUP BY' in query + assert 'max_translog_uncommitted_mb DESC' in query + assert parameters == [500, 500, 500] + + def test_execute_flag_user_confirmation_no(self): + """Test --execute flag with user declining confirmation""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 1, 7011.8, 5, 5, 12.4, 12.1] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Replica count query + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client), \ + patch('click.confirm', return_value=False): + result = self.runner.invoke(main, ['problematic-translogs', '--execute']) + + assert result.exit_code == 0 + assert 'Operation cancelled by user' in result.output + # Should be called 3 times: individual shards query, summary query, replica count query + assert self.mock_client.execute_query.call_count == 3 + + def test_execute_flag_user_confirmation_set_zero_only(self): + """Test --execute flag with user confirming reroute but skipping replica commands""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 1, 7011.8, 5, 5, 12.4, 12.1] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Replica count query + None, # REROUTE CANCEL execution + ] + self.mock_client.test_connection.return_value = True + + # Confirm overall execution, confirm REROUTE CANCEL, skip SET to 0 + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client), \ + patch('click.confirm', side_effect=[True, True, False]): + result = self.runner.invoke(main, ['problematic-translogs', '--execute']) + + assert result.exit_code == 0 + assert 'Executing commands individually' in result.output + assert 'executed successfully' in result.output + assert 'skipped' in result.output + + # Should be called 4 times: individual query, summary query, replica count, reroute execution + assert self.mock_client.execute_query.call_count == 4 + + def test_execute_flag_user_confirmation_both_steps(self): + """Test --execute flag with user confirming all commands""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 1, 7011.8, 5, 5, 12.4, 12.1] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Replica count query + None, # REROUTE CANCEL execution + None, # SET to 0 execution + None, # RESTORE execution + ] + self.mock_client.test_connection.return_value = True + + # Confirm overall execution, confirm REROUTE CANCEL, confirm SET to 0, confirm RESTORE + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client), \ + patch('click.confirm', side_effect=[True, True, True, True]): + result = self.runner.invoke(main, ['problematic-translogs', '--execute']) + + assert result.exit_code == 0 + assert 'Executing commands individually' in result.output + assert 'executed successfully' in result.output + assert 'Execution Summary:' in result.output + assert 'Successful: 3' in result.output + + # Should be called 6 times: individual query, summary query, replica count, reroute, set to 0, restore + assert self.mock_client.execute_query.call_count == 6 + + def test_execution_failure_handling(self): + """Test handling of command execution failures""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 1, 7011.8, 5, 5, 12.4, 12.1] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Replica count query + Exception("REROUTE failed"), # REROUTE CANCEL execution fails + ] + self.mock_client.test_connection.return_value = True + + # Confirm overall execution, confirm REROUTE CANCEL (which fails), then decline next command + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client), \ + patch('click.confirm', side_effect=[True, True, False]): + result = self.runner.invoke(main, ['problematic-translogs', '--execute']) + + assert result.exit_code == 0 + assert 'failed' in result.output + assert 'REROUTE failed' in result.output + assert 'Failed: 1' in result.output + + def test_skip_tables_with_unknown_replicas(self): + """Test skipping tables with unknown replica counts""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 1, 7011.8, 5, 5, 12.4, 12.1] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + Exception("Cannot get replica count"), # Replica count query fails + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs']) + + assert result.exit_code == 0 + assert 'Warning: Could not retrieve replica count' in result.output + assert 'Skipping' in result.output + assert 'unknown replica count' in result.output + assert 'REROUTE CANCEL commands' in result.output + assert '1 REROUTE CANCEL commands + 0 replica management commands' in result.output + + def test_skip_tables_with_zero_replicas(self): + """Test skipping tables that already have 0 replicas""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'shipmentFormFieldData', None, 14, 'data-hot-6', 7011.8] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'shipmentFormFieldData', None, None, 1, 7011.8, 5, 5, 12.4, 12.1] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[0]]}, # Replica count query returns 0 + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs']) + + assert result.exit_code == 0 + assert 'Skipping' in result.output + assert 'already has 0 replicas' in result.output + assert 'REROUTE CANCEL commands' in result.output + assert '1 REROUTE CANCEL commands + 0 replica management commands' in result.output + + def test_database_error_handling(self): + """Test handling of database connection errors""" + self.mock_client.execute_query.side_effect = Exception("Connection failed") + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs']) + + assert result.exit_code == 0 + assert 'Error analyzing problematic translogs' in result.output + assert 'Connection failed' in result.output + + def test_default_size_mb(self): + """Test that default sizeMB is 300""" + self.mock_client.execute_query.return_value = {'rows': []} + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs']) + + assert result.exit_code == 0 + assert '300MB' in result.output + + # Verify query was called with default value + call_args = self.mock_client.execute_query.call_args + parameters = call_args[0][1] + assert parameters == [300, 300, 300] + + def test_partitioned_and_non_partitioned_replica_queries(self): + """Test that correct replica queries are used for partitioned vs non-partitioned tables""" + # Individual shards data (6 columns) + individual_shards_data = [ + ['TURVO', 'partitioned_table', '("id"=123)', 14, 'data-hot-6', 500.0], + ['TURVO', 'regular_table', None, 5, 'data-hot-1', 400.0] + ] + # Summary data (10 columns from query, displayed as 8 by combining P/R columns) + summary_data = [ + ['TURVO', 'partitioned_table', '("id"=123)', 'part123', 1, 500.0, 3, 3, 5.5, 5.2], + ['TURVO', 'regular_table', None, None, 1, 400.0, 2, 4, 3.1, 6.2] + ] + self.mock_client.execute_query.side_effect = [ + {'rows': individual_shards_data}, # Individual shards query + {'rows': summary_data}, # Summary query + {'rows': [[1]]}, # Partitioned table replica count + {'rows': [[2]]}, # Regular table replica count + ] + self.mock_client.test_connection.return_value = True + + with patch('xmover.cli.CrateDBClient', return_value=self.mock_client): + result = self.runner.invoke(main, ['problematic-translogs']) + + assert result.exit_code == 0 + + # Verify the replica queries were called correctly + calls = self.mock_client.execute_query.call_args_list + + # First two calls are the individual shards and summary queries + assert len(calls) == 4 + + # Third call should be partitioned table replica query + partitioned_query = calls[2][0][0] + assert 'information_schema.table_partitions' in partitioned_query + assert 'partition_ident' in partitioned_query + assert calls[2][0][1] == ['partitioned_table', 'TURVO', 'part123'] + + # Fourth call should be regular table replica query + regular_query = calls[3][0][0] + assert 'information_schema.tables' in regular_query + assert 'partition_ident' not in regular_query + assert calls[3][0][1] == ['regular_table', 'TURVO'] diff --git a/tests/test_recovery_monitor.py b/tests/test_recovery_monitor.py new file mode 100644 index 0000000..1821e5f --- /dev/null +++ b/tests/test_recovery_monitor.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 +""" +Test script for XMover recovery monitoring functionality + +This script tests the recovery monitoring features by creating mock recovery scenarios +and verifying the output formatting and data parsing. +""" + +import sys +import os +from unittest.mock import Mock, patch +from typing import Dict, List, Any + +# Add the src directory to the path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +from xmover.database import CrateDBClient, RecoveryInfo +from xmover.analyzer import RecoveryMonitor + + +def create_mock_allocation(schema_name: str, table_name: str, shard_id: int, + current_state: str, node_id: str) -> Dict[str, Any]: + """Create a mock allocation response""" + return { + 'schema_name': schema_name, + 'table_name': table_name, + 'shard_id': shard_id, + 'current_state': current_state, + 'node_id': node_id, + 'explanation': None + } + + +def create_mock_shard_detail(schema_name: str, table_name: str, shard_id: int, + node_name: str, node_id: str, recovery_type: str, + stage: str, files_percent: float, bytes_percent: float, + total_time: int, size: int, is_primary: bool, + translog_size: int = 0, translog_uncommitted_size: int = 0) -> Dict[str, Any]: + """Create a mock shard detail response""" + return { + 'schema_name': schema_name, + 'table_name': table_name, + 'shard_id': shard_id, + 'node_name': node_name, + 'node_id': node_id, + 'routing_state': 'RELOCATING', + 'state': 'RECOVERING', + 'recovery': { + 'type': recovery_type, + 'stage': stage, + 'files': { + 'percent': files_percent, + 'recovered': int(files_percent * 100), + 'used': 100 + }, + 'size': { + 'percent': bytes_percent, + 'recovered': int(bytes_percent * size), + 'used': size + }, + 'total_time': total_time + }, + 'size': size, + 'primary': is_primary, + 'translog_size': translog_size, + 'translog_uncommitted_size': translog_uncommitted_size, + 'max_seq_no': None + } + + +def test_recovery_info_parsing(): + """Test RecoveryInfo dataclass and its properties""" + print("Testing RecoveryInfo parsing...") + + recovery = RecoveryInfo( + schema_name='CURVO', + table_name='PartioffD', + partition_values=None, + shard_id=19, + node_name='data-hot-1', + node_id='ZH6fBanGSjanGqeSh-sw0A', + recovery_type='PEER', + stage='DONE', + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1555907, + routing_state='RELOCATING', + current_state='RELOCATING', + is_primary=False, + size_bytes=56565284209, + translog_size_bytes=0, + translog_uncommitted_bytes=0, + max_seq_no=None, + primary_max_seq_no=None + ) + + # Test properties + assert recovery.overall_progress == 100.0, f"Expected 100.0, got {recovery.overall_progress}" + assert abs(recovery.size_gb - 52.681) < 0.01, f"Expected ~52.681, got {recovery.size_gb:.3f}" + assert recovery.shard_type == "REPLICA", f"Expected REPLICA, got {recovery.shard_type}" + assert recovery.total_time_seconds == 1555.907, f"Expected 1555.907, got {recovery.total_time_seconds}" + + print("✅ RecoveryInfo parsing tests passed") + + +def test_database_client_parsing(): + """Test database client recovery parsing logic""" + print("Testing database client recovery parsing...") + + # Create a real client instance to test the parsing method + client = CrateDBClient.__new__(CrateDBClient) # Create without calling __init__ + + # Create test data + allocation = create_mock_allocation('CURVO', 'PartioffD', 19, 'RELOCATING', 'node1') + shard_detail = create_mock_shard_detail( + 'CURVO', 'PartioffD', 19, 'data-hot-1', 'node1', + 'PEER', 'DONE', 100.0, 100.0, 1555907, 56565284209, False + ) + + # Test the parsing method directly + recovery_info = client._parse_recovery_info(allocation, shard_detail) + + assert recovery_info.recovery_type == 'PEER' + assert recovery_info.stage == 'DONE' + assert recovery_info.overall_progress == 100.0 + + print("✅ Database client parsing tests passed") + + +def test_recovery_monitor_formatting(): + """Test recovery monitor display formatting""" + print("Testing recovery monitor formatting...") + + # Create mock client + mock_client = Mock(spec=CrateDBClient) + monitor = RecoveryMonitor(mock_client) + + # Create test recovery data + recoveries = [ + RecoveryInfo( + schema_name='CURVO', + table_name='PartioffD', + partition_values=None, + shard_id=19, + node_name='data-hot-1', + node_id='node1', + recovery_type='PEER', + stage='DONE', + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1555907, + routing_state='RELOCATING', + current_state='RELOCATING', + is_primary=False, + size_bytes=56565284209, + translog_size_bytes=0, + translog_uncommitted_bytes=0, + max_seq_no=None, + primary_max_seq_no=None + ), + RecoveryInfo( + schema_name='CURVO', + table_name='orderTracking', + partition_values=None, + shard_id=7, + node_name='data-hot-2', + node_id='node2', + recovery_type='DISK', + stage='INDEX', + files_percent=75.5, + bytes_percent=67.8, + total_time_ms=890234, + routing_state='INITIALIZING', + current_state='INITIALIZING', + is_primary=True, + size_bytes=25120456789, + translog_size_bytes=0, + translog_uncommitted_bytes=0, + max_seq_no=None, + primary_max_seq_no=None + ) + ] + + # Test summary generation + summary = monitor.get_recovery_summary(recoveries) + + assert summary['total_recoveries'] == 2 + assert 'PEER' in summary['by_type'] + assert 'DISK' in summary['by_type'] + assert summary['by_type']['PEER']['count'] == 1 + assert summary['by_type']['DISK']['count'] == 1 + + # Test display formatting + display_output = monitor.format_recovery_display(recoveries) + + assert "Active Shard Recoveries (2 total)" in display_output + assert "PEER Recoveries (1)" in display_output + assert "DISK Recoveries (1)" in display_output + assert "PartioffD" in display_output + assert "orderTracking" in display_output + + print("✅ Recovery monitor formatting tests passed") + + +def test_empty_recovery_handling(): + """Test handling of no active recoveries""" + print("Testing empty recovery handling...") + + mock_client = Mock(spec=CrateDBClient) + monitor = RecoveryMonitor(mock_client) + + # Test empty list + empty_recoveries = [] + + summary = monitor.get_recovery_summary(empty_recoveries) + assert summary['total_recoveries'] == 0 + assert summary['by_type'] == {} + + display_output = monitor.format_recovery_display(empty_recoveries) + assert "No active shard recoveries found" in display_output + + print("✅ Empty recovery handling tests passed") + + +def test_recovery_type_filtering(): + """Test filtering by recovery type""" + print("Testing recovery type filtering...") + + mock_client = Mock(spec=CrateDBClient) + + # Mock the get_all_recovering_shards method + mock_recoveries = [ + RecoveryInfo( + schema_name='test', table_name='table1', partition_values=None, shard_id=1, + node_name='node1', node_id='n1', recovery_type='PEER', + stage='DONE', files_percent=100.0, bytes_percent=100.0, + total_time_ms=1000, routing_state='RELOCATING', + current_state='RELOCATING', is_primary=True, size_bytes=1000000, + translog_size_bytes=0, translog_uncommitted_bytes=0, + max_seq_no=None, primary_max_seq_no=None + ), + RecoveryInfo( + schema_name='test', table_name='table2', partition_values=None, shard_id=2, + node_name='node2', node_id='n2', recovery_type='DISK', + stage='INDEX', files_percent=50.0, bytes_percent=75.0, + total_time_ms=2000, routing_state='INITIALIZING', + current_state='INITIALIZING', is_primary=False, size_bytes=2000000, + translog_size_bytes=0, translog_uncommitted_bytes=0, + max_seq_no=None, primary_max_seq_no=None + ) + ] + + mock_client.get_all_recovering_shards.return_value = mock_recoveries + + monitor = RecoveryMonitor(mock_client) + + # Test filtering + peer_only = monitor.get_cluster_recovery_status(recovery_type_filter='PEER') + assert len(peer_only) == 1 + assert peer_only[0].recovery_type == 'PEER' + + disk_only = monitor.get_cluster_recovery_status(recovery_type_filter='DISK') + assert len(disk_only) == 1 + assert disk_only[0].recovery_type == 'DISK' + + all_recoveries = monitor.get_cluster_recovery_status(recovery_type_filter='all') + assert len(all_recoveries) == 2 + + print("✅ Recovery type filtering tests passed") + + +def main(): + """Run all tests""" + print("🧪 Running XMover Recovery Monitor Tests") + print("=" * 50) + + try: + test_recovery_info_parsing() + test_database_client_parsing() + test_recovery_monitor_formatting() + test_empty_recovery_handling() + test_recovery_type_filtering() + + print("\n🎉 All tests passed successfully!") + print("\n📋 Test Summary:") + print(" ✅ RecoveryInfo data class and properties") + print(" ✅ Database client parsing logic") + print(" ✅ Recovery monitor display formatting") + print(" ✅ Empty recovery state handling") + print(" ✅ Recovery type filtering") + + print("\n🚀 Recovery monitoring feature is ready for use!") + + except Exception as e: + print(f"\n❌ Test failed: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/validate_rules.py b/validate_rules.py new file mode 100644 index 0000000..f1cedfb --- /dev/null +++ b/validate_rules.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +""" +Standalone rules validation script for XMover shard size monitoring rules. + +This script validates the YAML configuration file used by the shard size monitor +to ensure proper syntax, required fields, and rule structure. + +Usage: + python validate_rules.py [config_file] + python validate_rules.py config/shard_size_rules.yaml +""" + +import sys +import argparse +from pathlib import Path + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent / "src")) + +try: + from xmover.shard_size_monitor import validate_rules_file +except ImportError as e: + print(f"Error importing validation module: {e}") + print("Make sure you're running from the xmover project root directory") + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser( + description="Validate XMover shard size monitoring rules configuration", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python validate_rules.py # Validate default rules + python validate_rules.py config/shard_size_rules.yaml # Validate specific file + python validate_rules.py my_custom_rules.yaml # Validate custom rules + """ + ) + + parser.add_argument( + 'config_file', + nargs='?', + default='config/shard_size_rules.yaml', + help='Path to rules configuration file (default: config/shard_size_rules.yaml)' + ) + + args = parser.parse_args() + + # Resolve path relative to script location + config_path = Path(args.config_file) + if not config_path.is_absolute(): + config_path = Path(__file__).parent / config_path + + print(f"Validating rules configuration: {config_path}") + print("-" * 60) + + if validate_rules_file(str(config_path)): + print("\n✅ Validation completed successfully!") + sys.exit(0) + else: + print("\n❌ Validation failed!") + sys.exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file