Skip to content

Commit db07553

Browse files
authored
Merge pull request #175 from jonstace-hsf/mssql-multiple-resultsets
MSSQL: Handle updates that return multiple resultsets
2 parents 72efe78 + 261d5ed commit db07553

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1616

1717
-------------------------------------------------------------------
1818
## [Unreleased]
19+
## Fixed
20+
- Fixed a issue when updates to MSSQL data would result in multiple messages coming back from the server, e.g. when triggers update multiple tables and NOCOUNT is OFF. Another scenario is before or after scripts that call stored procs wth PRINT statements in them or that return multiple resultsets before completing. Without the fix, this issue can result in tables only partially anonymized.
1921

2022
## [2.4.0] 2024-07-30
2123
## Changed

pynonymizer/database/mssql/__init__.py

+32-19
Original file line numberDiff line numberDiff line change
@@ -136,21 +136,34 @@ def __db_connection(self):
136136

137137
return self.__db_conn
138138

139-
def __execute(self, statement, *args):
139+
def __execute_dml(self, statement, *args):
140140
logger.debug(statement, args)
141-
c = self.__connection()
141+
c = self.__db_connection()
142142
# If timeout is set, then apply it to the connection. PyODBC will then assign that value to the Cursor created during execute()
143143
if self.timeout:
144144
c.timeout = self.timeout
145-
return c.execute(statement, *args)
146-
147-
def __db_execute(self, statement, *args):
145+
cur = c.execute(statement, *args)
146+
# If the SQL query causes multiple messages to come back (either extra row counts from triggers, or PRINT statements),
147+
# then we need to keep running nextset() for PyODBC to get the query to run to completion
148+
while cur.nextset():
149+
pass
150+
return cur
151+
152+
def __execute_ddl(self, statement, *args):
148153
logger.debug(statement, args)
149154
c = self.__db_connection()
150155
# If timeout is set, then apply it to the connection. PyODBC will then assign that value to the Cursor created during execute()
151156
if self.timeout:
152157
c.timeout = self.timeout
153158
return c.execute(statement, *args)
159+
160+
def __execute_server(self, statement, *args):
161+
logger.debug(statement, args)
162+
c = self.__connection()
163+
# If timeout is set, then apply it to the connection. PyODBC will then assign that value to the Cursor created during execute()
164+
if self.timeout:
165+
c.timeout = self.timeout
166+
return c.execute(statement, *args)
154167

155168
def __get_path(self, filepath):
156169
if "\\" in filepath:
@@ -169,7 +182,7 @@ def __get_default_datafolder(self):
169182
checking the model db seems like a good 'boring' solution
170183
:return: Default data directory e.g. "C:\\DATA"
171184
"""
172-
datafile = self.__execute(
185+
datafile = self.__execute_server(
173186
"""
174187
SELECT physical_name
175188
FROM sys.master_files mf
@@ -187,7 +200,7 @@ def __get_default_logfolder(self):
187200
__get_default_datafolder: see for more info
188201
:return:
189202
"""
190-
logfile = self.__execute(
203+
logfile = self.__execute_server(
191204
"""
192205
SELECT physical_name
193206
FROM sys.master_files mf
@@ -207,7 +220,7 @@ def __get_file_moves(self, input_path):
207220
datadir = self.__get_default_datafolder()
208221
logdir = self.__get_default_logfolder()
209222

210-
filelist = self.__execute(
223+
filelist = self.__execute_server(
211224
f"RESTORE FILELISTONLY FROM DISK = ?;", input_path
212225
).fetchall()
213226

@@ -245,7 +258,7 @@ def __run_scripts(self, script_list, title=""):
245258

246259
for i, script in enumerate(script_list):
247260
logger.info(f'Running {title} script #{i} "{script[:50]}"')
248-
cursor = self.__db_execute(script)
261+
cursor = self.__execute_dml(script)
249262
results = None
250263
try:
251264
results = cursor.fetchall()
@@ -262,10 +275,10 @@ def __create_seed_table(self, qualifier_map):
262275
SEED_TABLE_NAME, ",".join(seed_column_lines)
263276
)
264277

265-
self.__db_execute(create_statement)
278+
self.__execute_ddl(create_statement)
266279

267280
def __drop_seed_table(self):
268-
self.__db_execute("DROP TABLE IF EXISTS [{}];".format(SEED_TABLE_NAME))
281+
self.__execute_ddl("DROP TABLE IF EXISTS [{}];".format(SEED_TABLE_NAME))
269282

270283
def __insert_seed_row(self, qualifier_map):
271284
column_list = ",".join(
@@ -279,7 +292,7 @@ def __insert_seed_row(self, qualifier_map):
279292
statement = "INSERT INTO [{}]({}) VALUES ({});".format(
280293
SEED_TABLE_NAME, column_list, substitution_list
281294
)
282-
self.__db_execute(statement, value_list)
295+
self.__execute_dml(statement, value_list)
283296

284297
def __seed(self, qualifier_map):
285298
for i in self.progress(
@@ -312,10 +325,10 @@ def create_database(self):
312325

313326
def drop_database(self):
314327
# force connection close so we can always drop the db: sometimes timing makes a normal drop impossible.
315-
self.__execute(
328+
self.__execute_server(
316329
f"ALTER DATABASE [{self.db_name}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE;"
317330
)
318-
self.__execute(f"DROP DATABASE IF EXISTS [{self.db_name}];")
331+
self.__execute_server(f"DROP DATABASE IF EXISTS [{self.db_name}];")
319332

320333
def anonymize_database(self, database_strategy, db_workers):
321334
qualifier_map = database_strategy.fake_update_qualifier_map
@@ -343,13 +356,13 @@ def anonymize_table(progressbar, table_strategy: TableStrategy):
343356

344357
if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE:
345358
progressbar.set_description("Truncating {}".format(table_name))
346-
self.__db_execute(
359+
self.__execute_dml(
347360
"TRUNCATE TABLE {}[{}];".format(schema_prefix, table_name)
348361
)
349362

350363
elif table_strategy.strategy_type == TableStrategyTypes.DELETE:
351364
progressbar.set_description("Deleting {}".format(table_name))
352-
self.__db_execute(
365+
self.__execute_dml(
353366
"DELETE FROM {}[{}];".format(schema_prefix, table_name)
354367
)
355368

@@ -386,7 +399,7 @@ def anonymize_table(progressbar, table_strategy: TableStrategy):
386399

387400
# set ansi warnings off because otherwise we run into lots of little incompatibilities between the seed data nd the columns
388401
# e.g. string or binary data would be truncated (when the data is too long)
389-
self.__db_execute(
402+
self.__execute_dml(
390403
f"{ansi_warnings_prefix} UPDATE {schema_prefix}[{table_name}] SET {column_assignments}{where_clause}; {ansi_warnings_suffix}"
391404
)
392405

@@ -426,7 +439,7 @@ def restore_database(self, input_path):
426439
move_clauses = ", ".join(["MOVE ? TO ?"] * len(move_files))
427440
move_clause_params = [item for pair in move_files.items() for item in pair]
428441

429-
restore_cursor = self.__execute(
442+
restore_cursor = self.__execute_server(
430443
f"RESTORE DATABASE ? FROM DISK = ? WITH {move_clauses}, STATS = ?;",
431444
[self.db_name, input_path, *move_clause_params, self.__STATS],
432445
)
@@ -442,7 +455,7 @@ def dump_database(self, output_path):
442455
",".join(with_options) + ", " if len(with_options) > 0 else ""
443456
)
444457

445-
dump_cursor = self.__execute(
458+
dump_cursor = self.__execute_server(
446459
f"BACKUP DATABASE ? TO DISK = ? WITH {with_options_str}STATS = ?;",
447460
[self.db_name, output_path, self.__STATS],
448461
)

0 commit comments

Comments
 (0)