Skip to content

Commit 9310726

Browse files
committed
Split database calls into DML, DDL, and server calls.
This allows us to deal with multiple resultsets from DML statements separately from DDL statements and statements that apply to the server rather than a database.
1 parent 8f4cdec commit 9310726

File tree

1 file changed

+26
-23
lines changed

1 file changed

+26
-23
lines changed

pynonymizer/database/mssql/__init__.py

+26-23
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ def __db_connection(self):
136136

137137
return self.__db_conn
138138

139-
def __execute(self, statement, *args):
139+
def __execute_dml(self, statement, *args):
140140
logger.debug(statement, args)
141-
c = self.__connection()
141+
c = self.__db_connection()
142142
# If timeout is set, then apply it to the connection. PyODBC will then assign that value to the Cursor created during execute()
143143
if self.timeout:
144144
c.timeout = self.timeout
@@ -149,18 +149,21 @@ def __execute(self, statement, *args):
149149
pass
150150
return cur
151151

152-
def __db_execute(self, statement, *args):
152+
def __execute_ddl(self, statement, *args):
153153
logger.debug(statement, args)
154154
c = self.__db_connection()
155155
# If timeout is set, then apply it to the connection. PyODBC will then assign that value to the Cursor created during execute()
156156
if self.timeout:
157157
c.timeout = self.timeout
158-
# If the SQL query causes multiple messages to come back (either extra row counts from triggers, or PRINT statements),
159-
# then we need to keep running nextset() for PyODBC to get the query to run to completion
160-
cur = c.execute(statement, *args)
161-
while cur.nextset():
162-
pass
163-
return cur
158+
return c.execute(statement, *args)
159+
160+
def __execute_server(self, statement, *args):
161+
logger.debug(statement, args)
162+
c = self.__connection()
163+
# If timeout is set, then apply it to the connection. PyODBC will then assign that value to the Cursor created during execute()
164+
if self.timeout:
165+
c.timeout = self.timeout
166+
return c.execute(statement, *args)
164167

165168
def __get_path(self, filepath):
166169
if "\\" in filepath:
@@ -179,7 +182,7 @@ def __get_default_datafolder(self):
179182
checking the model db seems like a good 'boring' solution
180183
:return: Default data directory e.g. "C:\\DATA"
181184
"""
182-
datafile = self.__execute(
185+
datafile = self.__execute_server(
183186
"""
184187
SELECT physical_name
185188
FROM sys.master_files mf
@@ -197,7 +200,7 @@ def __get_default_logfolder(self):
197200
__get_default_datafolder: see for more info
198201
:return:
199202
"""
200-
logfile = self.__execute(
203+
logfile = self.__execute_server(
201204
"""
202205
SELECT physical_name
203206
FROM sys.master_files mf
@@ -217,7 +220,7 @@ def __get_file_moves(self, input_path):
217220
datadir = self.__get_default_datafolder()
218221
logdir = self.__get_default_logfolder()
219222

220-
filelist = self.__execute(
223+
filelist = self.__execute_server(
221224
f"RESTORE FILELISTONLY FROM DISK = ?;", input_path
222225
).fetchall()
223226

@@ -255,7 +258,7 @@ def __run_scripts(self, script_list, title=""):
255258

256259
for i, script in enumerate(script_list):
257260
logger.info(f'Running {title} script #{i} "{script[:50]}"')
258-
cursor = self.__db_execute(script)
261+
cursor = self.__execute_dml(script)
259262
results = None
260263
try:
261264
results = cursor.fetchall()
@@ -272,10 +275,10 @@ def __create_seed_table(self, qualifier_map):
272275
SEED_TABLE_NAME, ",".join(seed_column_lines)
273276
)
274277

275-
self.__db_execute(create_statement)
278+
self.__execute_ddl(create_statement)
276279

277280
def __drop_seed_table(self):
278-
self.__db_execute("DROP TABLE IF EXISTS [{}];".format(SEED_TABLE_NAME))
281+
self.__execute_ddl("DROP TABLE IF EXISTS [{}];".format(SEED_TABLE_NAME))
279282

280283
def __insert_seed_row(self, qualifier_map):
281284
column_list = ",".join(
@@ -289,7 +292,7 @@ def __insert_seed_row(self, qualifier_map):
289292
statement = "INSERT INTO [{}]({}) VALUES ({});".format(
290293
SEED_TABLE_NAME, column_list, substitution_list
291294
)
292-
self.__db_execute(statement, value_list)
295+
self.__execute_dml(statement, value_list)
293296

294297
def __seed(self, qualifier_map):
295298
for i in self.progress(
@@ -322,10 +325,10 @@ def create_database(self):
322325

323326
def drop_database(self):
324327
# force connection close so we can always drop the db: sometimes timing makes a normal drop impossible.
325-
self.__execute(
328+
self.__execute_server(
326329
f"ALTER DATABASE [{self.db_name}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE;"
327330
)
328-
self.__execute(f"DROP DATABASE IF EXISTS [{self.db_name}];")
331+
self.__execute_server(f"DROP DATABASE IF EXISTS [{self.db_name}];")
329332

330333
def anonymize_database(self, database_strategy, db_workers):
331334
qualifier_map = database_strategy.fake_update_qualifier_map
@@ -353,13 +356,13 @@ def anonymize_table(progressbar, table_strategy: TableStrategy):
353356

354357
if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE:
355358
progressbar.set_description("Truncating {}".format(table_name))
356-
self.__db_execute(
359+
self.__execute_dml(
357360
"TRUNCATE TABLE {}[{}];".format(schema_prefix, table_name)
358361
)
359362

360363
elif table_strategy.strategy_type == TableStrategyTypes.DELETE:
361364
progressbar.set_description("Deleting {}".format(table_name))
362-
self.__db_execute(
365+
self.__execute_dml(
363366
"DELETE FROM {}[{}];".format(schema_prefix, table_name)
364367
)
365368

@@ -396,7 +399,7 @@ def anonymize_table(progressbar, table_strategy: TableStrategy):
396399

397400
# set ansi warnings off because otherwise we run into lots of little incompatibilities between the seed data nd the columns
398401
# e.g. string or binary data would be truncated (when the data is too long)
399-
self.__db_execute(
402+
self.__execute_dml(
400403
f"{ansi_warnings_prefix} UPDATE {schema_prefix}[{table_name}] SET {column_assignments}{where_clause}; {ansi_warnings_suffix}"
401404
)
402405

@@ -436,7 +439,7 @@ def restore_database(self, input_path):
436439
move_clauses = ", ".join(["MOVE ? TO ?"] * len(move_files))
437440
move_clause_params = [item for pair in move_files.items() for item in pair]
438441

439-
restore_cursor = self.__execute(
442+
restore_cursor = self.__execute_server(
440443
f"RESTORE DATABASE ? FROM DISK = ? WITH {move_clauses}, STATS = ?;",
441444
[self.db_name, input_path, *move_clause_params, self.__STATS],
442445
)
@@ -452,7 +455,7 @@ def dump_database(self, output_path):
452455
",".join(with_options) + ", " if len(with_options) > 0 else ""
453456
)
454457

455-
dump_cursor = self.__execute(
458+
dump_cursor = self.__execute_server(
456459
f"BACKUP DATABASE ? TO DISK = ? WITH {with_options_str}STATS = ?;",
457460
[self.db_name, output_path, self.__STATS],
458461
)

0 commit comments

Comments
 (0)