Skip to content

Commit 51da04d

Browse files
committed
Trap error code and wait for process to complete
* refactored dumper/batch runner to use "open" and "close" methods * called open and close methods using a try..except statement during usage * added general exception logging to cli, so that cli doesnt randomly output stacktraces (debatable!). I made this change because it was strange that the integration test was getting a failure on a test that was supposed to error, but it seemed like a logical choice on review.
1 parent 31865ad commit 51da04d

9 files changed

Lines changed: 179 additions & 109 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1818
## [Unreleased]
1919
## Fixed
2020
- Fixed a bug where MSSQL provider would override the server connection with (local).
21+
- Fixed a bug where failing dump & restore operations would not be caught by pynonymizer. Pynonymizer will now error when mysql/mysqldump or psql/pgdump returns an error code.
2122

2223
## [2.3.1] 2024-05-27
2324
## Fixed

pynonymizer/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def default(
313313
+ f"See https://github.com/rwnx/pynonymizer/blob/main/doc/strategyfiles.md#column-strategy-fake_update for usage information."
314314
)
315315
raise typer.Exit(1)
316-
except CalledProcessError as error:
316+
except Exception as error:
317317
root_logger.error(error)
318318
raise typer.Exit(1)
319319

pynonymizer/database/mysql/__init__.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,14 @@ def anonymize_table(progressbar, table_strategy: TableStrategy):
186186

187187
def restore_database(self, input_path):
188188
try:
189-
batch_processor = self.__runner.open_batch_processor()
190-
restore(self.progress, input_path, batch_processor)
189+
restore_pipe = self.__runner.open()
190+
restore(self.progress, input_path, restore_pipe)
191191
finally:
192-
self.__runner.close_batch_processor()
192+
self.__runner.close()
193193

194194
def dump_database(self, output_path):
195-
dump_process = self.__dumper.open_dumper()
196-
dump(self.progress, output_path, dump_process, self.__estimate_dumpsize())
195+
try:
196+
dump_stream = self.__dumper.open()
197+
dump(self.progress, output_path, dump_stream, self.__estimate_dumpsize())
198+
finally:
199+
self.__dumper.close()

pynonymizer/database/mysql/execution.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
logger = logging.getLogger(__name__)
88

9+
RESTORE_CMD = "mysql"
10+
DUMP_CMD = "mysqldump"
11+
912

1013
def _optional_arg(condition, value):
1114
if condition:
@@ -34,21 +37,16 @@ def __init__(
3437
self.db_name = db_name
3538
self.db_port = db_port
3639
self.additional_opts = shlex.split(additional_opts)
40+
self.process = None
3741

3842
if db_name is None:
3943
raise ValueError("db_name cannot be null")
4044

41-
if not (shutil.which("mysqldump")):
45+
if not (shutil.which(DUMP_CMD)):
4246
raise DependencyError(
43-
"mysqldump", "The 'mysqldump' client must be present in the $PATH"
47+
DUMP_CMD, f"The '{DUMP_CMD}' client must be present in the $PATH"
4448
)
4549

46-
def __ifdef(self):
47-
if self.db_host:
48-
return ["--host", self.db_host]
49-
else:
50-
return []
51-
5250
def __get_base_params(self):
5351
return [
5452
*_optional_arg_pair(["--host", self.db_host]),
@@ -57,14 +55,25 @@ def __get_base_params(self):
5755
*_optional_arg(self.db_pass, [f"-p{self.db_pass}"]),
5856
]
5957

60-
def open_dumper(self):
61-
return subprocess.Popen(
62-
["mysqldump"]
58+
def open(self):
59+
self.close()
60+
61+
self.process = subprocess.Popen(
62+
[DUMP_CMD]
6363
+ self.__get_base_params()
6464
+ self.additional_opts
6565
+ [self.db_name],
6666
stdout=subprocess.PIPE,
67-
).stdout
67+
)
68+
return self.process.stdout
69+
70+
def close(self):
71+
if self.process is not None:
72+
self.process.stdout.close()
73+
return_code = self.process.wait()
74+
if return_code > 0:
75+
raise DependencyError(DUMP_CMD, "returned error during run")
76+
self.process = None
6877

6978

7079
class MySqlCmdRunner:
@@ -88,9 +97,9 @@ def __init__(
8897
if db_name is None:
8998
raise ValueError("db_name cannot be null")
9099

91-
if not (shutil.which("mysql")):
100+
if not (shutil.which(RESTORE_CMD)):
92101
raise DependencyError(
93-
"mysql", "The 'mysql' client must be present in the $PATH"
102+
RESTORE_CMD, f"The '{RESTORE_CMD}' client must be present in the $PATH"
94103
)
95104

96105
def __mask_subprocess_error(self, error):
@@ -101,7 +110,7 @@ def __mask_subprocess_error(self, error):
101110
This might be better as a wrapping exception, rather than messing around inside other people's classes.
102111
"""
103112
error.cmd = [
104-
"mysql",
113+
RESTORE_CMD,
105114
"-h",
106115
self.db_host,
107116
"-P",
@@ -114,7 +123,7 @@ def __mask_subprocess_error(self, error):
114123

115124
def __get_base_params(self):
116125
return [
117-
"mysql",
126+
RESTORE_CMD,
118127
*_optional_arg_pair(["-h", self.db_host]),
119128
*_optional_arg_pair(["-P", self.db_port]),
120129
*_optional_arg_pair(["-u", self.db_user]),
@@ -173,16 +182,18 @@ def get_single_result(self, statement):
173182
except subprocess.CalledProcessError as error:
174183
self.__mask_subprocess_error(error)
175184

176-
def open_batch_processor(self):
177-
self.close_batch_processor()
185+
def open(self):
186+
self.close()
178187
self.process = subprocess.Popen(
179188
self.__get_base_params() + self.additional_opts + [self.db_name],
180189
stdin=subprocess.PIPE,
181190
)
182191
return self.process.stdin
183192

184-
def close_batch_processor(self):
193+
def close(self):
185194
if self.process is not None:
186195
self.process.stdin.close()
187-
self.process.wait()
196+
return_code = self.process.wait()
197+
if return_code > 0:
198+
raise DependencyError(RESTORE_CMD, "returned error during run")
188199
self.process = None

pynonymizer/database/postgres/__init__.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,14 @@ def anonymize_table(progressbar, table_strategy: TableStrategy):
186186

187187
def restore_database(self, input_path):
188188
try:
189-
batch_processor = self.__runner.open_batch_processor()
190-
restore(self.progress, input_path, batch_processor)
189+
restore_pipe = self.__runner.open()
190+
restore(self.progress, input_path, restore_pipe)
191191
finally:
192-
self.__runner.close_batch_processor()
192+
self.__runner.close()
193193

194194
def dump_database(self, output_path):
195-
dump_process = self.__dumper.open_dumper()
196-
dump(self.progress, output_path, dump_process, self.__estimate_dumpsize())
195+
try:
196+
dump_stream = self.__dumper.open()
197+
dump(self.progress, output_path, dump_stream, self.__estimate_dumpsize())
198+
finally:
199+
self.__dumper.close()

pynonymizer/database/postgres/execution.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
Seperate everything that touches actual query exec into its own module
1010
"""
1111

12+
RESTORE_CMD = "psql"
13+
DUMP_CMD = "pg_dump"
14+
1215
logger = logging.getLogger(__name__)
1316

1417

@@ -22,15 +25,16 @@ def __init__(
2225
self.db_name = db_name
2326
self.db_port = db_port
2427
self.additional_opts = shlex.split(additional_opts)
28+
self.process = None
2529

26-
if not (shutil.which("pg_dump")):
30+
if not (shutil.which(DUMP_CMD)):
2731
raise DependencyError(
28-
"pg_dump", "The 'pg_dump' client must be present in the $PATH"
32+
DUMP_CMD, f"The '{DUMP_CMD}' client must be present in the $PATH"
2933
)
3034

3135
def __get_base_params(self):
3236
return [
33-
"pg_dump",
37+
DUMP_CMD,
3438
"--host",
3539
self.db_host,
3640
"--port",
@@ -46,12 +50,22 @@ def __get_env(self):
4650

4751
return new_env
4852

49-
def open_dumper(self):
50-
return subprocess.Popen(
53+
def open(self):
54+
self.close()
55+
self.process = subprocess.Popen(
5156
self.__get_base_params() + self.additional_opts + [self.db_name],
5257
stdout=subprocess.PIPE,
5358
env=self.__get_env(),
54-
).stdout
59+
)
60+
return self.process.stdout
61+
62+
def close(self):
63+
if self.process is not None:
64+
self.process.stdout.close()
65+
return_code = self.process.wait()
66+
if return_code > 0:
67+
raise DependencyError(DUMP_CMD, "returned error during run")
68+
self.process = None
5569

5670

5771
class PSqlCmdRunner:
@@ -66,14 +80,14 @@ def __init__(
6680
self.additional_opts = shlex.split(additional_opts)
6781
self.process = None
6882

69-
if not (shutil.which("psql")):
83+
if not (shutil.which(RESTORE_CMD)):
7084
raise DependencyError(
71-
"psql", "The 'psql' client must be present in the $PATH"
85+
RESTORE_CMD, "The f'{RESTORE_CMD}' client must be present in the $PATH"
7286
)
7387

7488
def __get_base_params(self):
7589
return [
76-
"psql",
90+
RESTORE_CMD,
7791
"--host",
7892
self.db_host,
7993
"--port",
@@ -147,8 +161,8 @@ def get_single_result(self, statement):
147161
env=self.__get_env(),
148162
).decode()
149163

150-
def open_batch_processor(self):
151-
self.close_batch_processor()
164+
def open(self):
165+
self.close()
152166
self.process = subprocess.Popen(
153167
self.__get_base_params()
154168
+ ["--dbname", self.db_name, "--quiet"]
@@ -158,8 +172,10 @@ def open_batch_processor(self):
158172
)
159173
return self.process.stdin
160174

161-
def close_batch_processor(self):
175+
def close(self):
162176
if self.process is not None:
163177
self.process.stdin.close()
164-
self.process.wait()
178+
return_code = self.process.wait()
179+
if return_code > 0:
180+
raise DependencyError(RESTORE_CMD, "returned error during run")
165181
self.process = None

tests/conftest.py

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,70 +2,6 @@
22
from unittest.mock import Mock
33

44
from pynonymizer.strategy.database import DatabaseStrategy
5-
from pynonymizer.strategy.table import (
6-
TruncateTableStrategy,
7-
UpdateColumnsTableStrategy,
8-
DeleteTableStrategy,
9-
)
10-
from pynonymizer.strategy.update_column import (
11-
UniqueEmailUpdateColumnStrategy,
12-
UniqueLoginUpdateColumnStrategy,
13-
FakeUpdateColumnStrategy,
14-
EmptyUpdateColumnStrategy,
15-
)
16-
from pynonymizer.fake import FakeDataType
17-
18-
19-
@pytest.fixture
20-
def simple_strategy_fake_generator():
21-
return Mock(
22-
get_data_type=Mock(return_value=FakeDataType.STRING),
23-
get_value=Mock(return_value="TEST_VALUE"),
24-
)
25-
26-
27-
@pytest.fixture
28-
def simple_strategy_update_fake_column(simple_strategy_fake_generator):
29-
return FakeUpdateColumnStrategy(
30-
"column3", simple_strategy_fake_generator, "user_name", where="BANANAS < 3"
31-
)
32-
33-
34-
@pytest.fixture
35-
def simple_strategy_update(simple_strategy_update_fake_column):
36-
return UpdateColumnsTableStrategy(
37-
"update_table_where_3",
38-
[
39-
UniqueEmailUpdateColumnStrategy("column1", where="BANANAS < 5"),
40-
UniqueLoginUpdateColumnStrategy("column2", where="BANANAS < 5"),
41-
simple_strategy_update_fake_column,
42-
EmptyUpdateColumnStrategy("column4"),
43-
],
44-
)
45-
46-
47-
@pytest.fixture
48-
def simple_strategy_delete():
49-
return DeleteTableStrategy("delete_table")
50-
51-
52-
@pytest.fixture
53-
def simple_strategy_trunc():
54-
return TruncateTableStrategy("truncate_table")
55-
56-
57-
@pytest.fixture
58-
def simple_strategy_schema_trunc():
59-
return TruncateTableStrategy("truncate_schema_table", schema="schema")
60-
61-
62-
@pytest.fixture
63-
def simple_strategy(
64-
simple_strategy_trunc, simple_strategy_update, simple_strategy_delete
65-
):
66-
return DatabaseStrategy(
67-
[simple_strategy_trunc, simple_strategy_update, simple_strategy_delete]
68-
)
695

706

717
# MONKEYPATCH: assert_not_called_with

0 commit comments

Comments
 (0)