Skip to content

Commit 39e388c

Browse files
Skip backup to worker when low on disk space
1 parent e000127 commit 39e388c

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

pioreactor/actions/leader/backup_database.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
import click
55

6+
from pathlib import Path
7+
import subprocess
8+
69
from pioreactor.cluster_management import get_active_workers_in_inventory
710
from pioreactor.config import config
811
from pioreactor.exc import RsyncError
@@ -18,6 +21,29 @@
1821
from pioreactor.whoami import UNIVERSAL_EXPERIMENT
1922

2023

24+
def _remote_available_space(address: str, path: str) -> int | None:
25+
"""Return available bytes on remote machine or ``None`` on failure."""
26+
try:
27+
result = subprocess.run(
28+
["ssh", "-o", "ConnectTimeout=5", address, "df", "-PB1", path],
29+
check=True,
30+
stdout=subprocess.PIPE,
31+
stderr=subprocess.PIPE,
32+
text=True,
33+
)
34+
except subprocess.CalledProcessError:
35+
return None
36+
37+
try:
38+
lines = result.stdout.strip().splitlines()
39+
if len(lines) >= 2:
40+
return int(lines[1].split()[3])
41+
except Exception:
42+
return None
43+
44+
return None
45+
46+
2147
def count_writes_occurring() -> int:
2248
with local_intermittent_storage("mqtt_to_db_streaming") as c:
2349
return c.get("inserts_in_last_60s", 0)
@@ -89,6 +115,17 @@ def backup_database(output_file: str, force: bool = False, backup_to_workers: in
89115
continue
90116

91117
logger.debug(f"Attempting backing up database to {backup_unit}.")
118+
available = _remote_available_space(
119+
resolve_to_address(backup_unit), str(Path(output_file).parent)
120+
)
121+
if available is not None and available < Path(output_file).stat().st_size:
122+
logger.debug(
123+
f"Skipping backup to {backup_unit}. Not enough disk space."
124+
)
125+
logger.warning(
126+
f"Unable to backup database to {backup_unit}. Not enough disk space."
127+
)
128+
continue
92129
try:
93130
rsync(
94131
"-hz",
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import sqlite3
2+
from contextlib import contextmanager
3+
from types import SimpleNamespace
4+
from unittest.mock import MagicMock, patch
5+
6+
from pioreactor.actions.leader.backup_database import backup_database
7+
from pioreactor.config import config
8+
9+
10+
@contextmanager
11+
def dummy_lifecycle(*args, **kwargs):
12+
yield SimpleNamespace(job_key="backup_database")
13+
14+
15+
def test_skip_backup_when_worker_has_no_space(tmp_path):
16+
db_path = tmp_path / "db.sqlite"
17+
config["storage"]["database"] = str(db_path)
18+
19+
conn = sqlite3.connect(db_path)
20+
conn.execute("CREATE TABLE t(id INTEGER)")
21+
conn.commit()
22+
conn.close()
23+
24+
output = tmp_path / "backup.sqlite"
25+
26+
with (
27+
patch(
28+
"pioreactor.actions.leader.backup_database.long_running_managed_lifecycle",
29+
dummy_lifecycle,
30+
),
31+
patch(
32+
"pioreactor.actions.leader.backup_database.create_logger",
33+
return_value=MagicMock(),
34+
),
35+
patch(
36+
"pioreactor.actions.leader.backup_database.get_active_workers_in_inventory",
37+
return_value=["worker1"],
38+
),
39+
patch(
40+
"pioreactor.actions.leader.backup_database._remote_available_space",
41+
return_value=0,
42+
),
43+
patch(
44+
"pioreactor.actions.leader.backup_database.rsync",
45+
) as mock_rsync,
46+
):
47+
backup_database(str(output), force=True, backup_to_workers=1)
48+
mock_rsync.assert_not_called()

0 commit comments

Comments
 (0)