Skip to content

Commit c4411ec

Browse files
authored
Split demo and base images + cleanup migtool (#22)
1 parent 63766da commit c4411ec

File tree

3 files changed

+82
-43
lines changed

3 files changed

+82
-43
lines changed

.github/workflows/docker-dev-cd.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ jobs:
1515
registry: ghcr.io
1616
username: ${{ github.actor }}
1717
password: ${{ secrets.GITHUB_TOKEN }}
18-
- name: Build the pgsql Docker image
18+
- name: Build the pgsql EMPTY Docker image
19+
run: |
20+
docker build . --tag ghcr.io/openimis/openimis-pgsql:develop-base --target base
21+
docker push ghcr.io/openimis/openimis-pgsql:develop-base
22+
- name: Build the pgsql DEMO Docker image
1923
run: |
2024
docker build . --tag ghcr.io/openimis/openimis-pgsql:develop
21-
docker push ghcr.io/openimis/openimis-pgsql:develop
25+
docker push ghcr.io/openimis/openimis-pgsql:develop

.github/workflows/docker.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ jobs:
1717
registry: ghcr.io
1818
username: ${{ github.actor }}
1919
password: ${{ secrets.GITHUB_TOKEN }}
20-
- name: Build the pgsql Docker image
20+
- name: Build the pgsql EMPTY Docker image
21+
run: |
22+
docker build . --target base --tag ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}-base
23+
docker push ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}-base
24+
- name: Build the pgsql DEMO Docker image
2125
run: |
2226
docker build . --tag ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}
2327
docker push ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}

migtool/migtool.py

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1+
import re
2+
13
import pyodbc # adapter for SQL Server
24
import psycopg2 # adapter for PostgreSQL
35
import configparser # used to read settings from file
46
import datetime # used to properly format dates and datetimes
57
import time # used to calculate time taken
68

9+
# This script was created with global variables without initializing them here. Ideally, we should store a settings
10+
# object rather than all of them separately but this works.
11+
settings = None
12+
EXCLUDED_COLUMNS = ["RowID"]
13+
delete_data = False
14+
historical = False
15+
demo_fix = False
16+
migration_modules = []
17+
old_connection = None
18+
new_connection = None
19+
today = datetime.date.today()
20+
now = datetime.datetime.now()
21+
722

823
# loads connection configuration and migration settings from a file.
924
# In future the settings file could be specified with a parameter.
@@ -29,10 +44,6 @@ def get_settings_from_file():
2944
print(" Histrical data will be migrated to the new database.")
3045
else:
3146
print(" No historical data will be migrated.")
32-
global today
33-
global now
34-
today = datetime.date.today()
35-
now = datetime.datetime.now()
3647
except KeyError as e:
3748
print("\x1b[0;31;48m" + "Error while trying to load settings. " +\
3849
"Please make sure the settings.ini file exists in your working directory." + "\x1b[0m")
@@ -50,8 +61,9 @@ def connect():
5061
global old_connection
5162
old_connection = pyodbc.connect(old_connection_string)
5263
except pyodbc.InterfaceError as exc:
53-
print("\x1b[0;31;48m" +
54-
"ERROR: Could not connect to the SQL Server database. Make sure the server is running and check your settings." +
64+
print("\x1b[0;31;48m"
65+
"ERROR: Could not connect to the SQL Server database. "
66+
"Make sure the server is running and check your settings."
5567
"\x1b[0m")
5668
print(exc)
5769
exit(1)
@@ -61,13 +73,15 @@ def connect():
6173
new_db = settings["NewDB"]
6274
new_connection_string = f'host={new_db["host"]} port={new_db["port"]} dbname={new_db["name"]} ' \
6375
f'user={new_db["user"]} password={new_db["pwd"]}'
64-
new_connection_string = f'postgres://{new_db["user"]}@{new_db["host"]}:{new_db["port"]}/{new_db["name"]}'
76+
# new_connection_string = \
77+
# f'postgres://{new_db["user"]}:{new_db["pwd"]}@{new_db["host"]}:{new_db["port"]}/{new_db["name"]}'
6578
try:
6679
global new_connection
6780
new_connection = psycopg2.connect(new_connection_string)
6881
except psycopg2.OperationalError as exc:
69-
print("\x1b[0;31;48m" +
70-
"ERROR: Could not connect to the PostgreSQL database. Make sure the server is running and check your settings." +
82+
print("\x1b[0;31;48m"
83+
"ERROR: Could not connect to the PostgreSQL database. "
84+
"Make sure the server is running and check your settings."
7185
"\x1b[0m")
7286
print(exc)
7387
exit(1)
@@ -108,43 +122,35 @@ def get_db_tables():
108122
print("Finding tables in both databases.\n")
109123
old_cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';")
110124
new_cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';")
111-
old_tables = list()
112-
for x in old_cursor:
113-
# Remove special characters at the start and end of each item when adding it to the list.
114-
# This way the entries in the old and new list match
115-
old_tables.append(str(x)[2:-4])
116-
new_tables = list()
117-
for x in new_cursor:
118-
# Remove special characters at the start and end of each item when adding it to the list.
119-
# This way the entries in the old and new list match
120-
new_tables.append(str(x)[2:-3])
125+
old_tables = [x[0] for x in old_cursor]
126+
new_tables = [x[0] for x in new_cursor]
121127
return old_tables, new_tables
122128

123129

124130
# This function puts the data from a SELECT statement into string and formats it correctly so that postgres can work
125131
# with it.
126132
def generate_insertion_string(row):
127-
row_str = "("
133+
row_list = []
128134
for x in row:
129135
# Strings must be enclosed in apostrophes, also escape singe quotes in a string by doubling them
130136
if isinstance(x, str):
131-
row_str = row_str + "'" + str(x).replace("'", "''") + "', "
137+
row_list.append("'" + str(x).replace("'", "''") + "'")
132138
# Dates and datetimes must be enclosed in apostrophes
133139
elif isinstance(x, datetime.datetime) or isinstance(x, datetime.date):
134-
row_str = row_str + "'" + str(x) + "', "
135-
# If x is NoneType then str(x) get transtlated to "None", but sql wants "null"
140+
row_list.append("'" + str(x) + "'")
141+
# If x is NoneType then str(x) get translated to "None", but sql wants "null"
136142
elif x is None:
137-
row_str = row_str + "null, "
143+
row_list.append("null")
138144
# If x is bytes we need to make them nice (start with \x and append the data converted to hex):
139145
elif isinstance(x, bytes):
140-
row_str = row_str + "'\\x" + str(x.hex()) + "', "
146+
row_list.append("'\\x" + str(x.hex()) + "'")
141147
else:
142-
row_str = row_str + str(x) + ", "
143-
row_str = row_str[:-2] + ")"
148+
row_list.append(str(x))
149+
row_str = f"({', '.join(row_list)})"
144150
return row_str
145151

146152

147-
# When not migrating historical data, this function figures out what colums "ValidityTo" is so we can later check for
153+
# When not migrating historical data, this function figures out what columns "ValidityTo" is so we can later check for
148154
# each row if it is still valid or already historical
149155
def get_validity_index(rows):
150156
vi = -1
@@ -164,6 +170,8 @@ def get_validity_index(rows):
164170

165171

166172
def get_validity(vi, row):
173+
global today
174+
global now
167175
if historical or ((not historical) and vi == -1):
168176
return True
169177
elif (not historical) and vi != -1:
@@ -185,6 +193,17 @@ def get_validity(vi, row):
185193
return True
186194

187195

196+
def extract_sequence_name(column_default):
197+
if not column_default:
198+
return None
199+
pattern = r"nextval\('([^']*)"
200+
match = re.search(pattern, column_default)
201+
if match:
202+
return match.group(1)
203+
else:
204+
return None
205+
206+
188207
def migrate():
189208
# This list collects all db tables that exist only in one of the databases but not the other.
190209
lonely_tables = list()
@@ -206,26 +225,33 @@ def migrate():
206225
"\"FeedbackUUID\", \"AuditUserID\") VALUES ('2000 01 01 00:00:00.000000', 0, 0, 0);")
207226

208227
# Set up all the columns we're going to migrate.
209-
new_cursor.execute("SELECT COLUMN_NAME FROM information_schema.COLUMNS WHERE TABLE_NAME = '" + table + "';")
228+
new_cursor.execute("SELECT COLUMN_NAME, COLUMN_DEFAULT "
229+
"FROM information_schema.COLUMNS WHERE TABLE_NAME = '" + table + "';")
210230
rows = new_cursor.fetchall()
211231
# While we have the data ready: find out where dates are stored for historical data stuff. validity_index
212232
# stores in which column the date (ValidityTo) is stored
213233
validity_index = -1
214234
if not historical:
215235
validity_index = get_validity_index(rows)
216236
# Finally, set up the columns to migrate
217-
old_cols = ""
218-
new_cols = "("
237+
sequence_columns = {}
238+
old_cols_list = []
239+
new_cols_list = []
219240
for row in rows:
220-
old_cols = old_cols + str(row)[2:-3] + ", "
221-
new_cols = new_cols + "\"" + str(row)[2:-3] + "\", "
222-
old_cols = old_cols[:-2]
223-
new_cols = new_cols[:-2] + ")"
241+
if row[0] not in EXCLUDED_COLUMNS:
242+
col_default = extract_sequence_name(row[1])
243+
if col_default:
244+
sequence_columns[row[0]] = col_default
245+
old_cols_list.append(row[0])
246+
new_cols_list.append(f'"{row[0]}"')
247+
old_cols = ", ".join(old_cols_list)
248+
new_cols = "(" + ", ".join(new_cols_list) + ")"
224249

225250
# Get the data from the old db with these column specifications
226251
print(" Fetching data from old database.")
227252
old_cursor.execute("SELECT COUNT(*) FROM " + table + ";")
228-
print(" Found " + str(old_cursor.fetchone())[1:-3] + " entries.")
253+
print(f" Found {old_cursor.fetchone()[0]} entries.")
254+
print(f" == old_cols: {old_cols} from {table} ==")
229255
old_cursor.execute("SELECT " + old_cols + " FROM " + table + ";")
230256

231257
# Set up the values for the insert statement and execute
@@ -247,14 +273,19 @@ def migrate():
247273
# Not rolling back leads to an InFailedSqlTransaction exception.
248274
new_connection.rollback()
249275
pass
250-
276+
except Exception as e:
277+
print("Failed: INSERT INTO \"" + table + "\" " + new_cols + " VALUES " + row_str + ";")
278+
raise
279+
if sequence_columns:
280+
print(" Data transferred, updating sequences.")
281+
for column, sequence in sequence_columns.items():
282+
new_cursor.execute(f"select setval('{sequence}', max(\"{column}\")) from \"{table}\";")
251283
print(" Table " + table + " has been migrated.\n")
252284

253285
# Table doesn't exist
254286
else:
255-
print("\x1b[0;31;48m" + "WARNING: Table " + table + \
256-
" only exists in one of the databases (but not the other)! Is this correct?" + "\x1b[0m\n")
257-
print("")
287+
print(f"\x1b[0;31;48mWARNING: Table {table} only exists in one of the databases "
288+
f"new: {table in new_tables}, old:{table in old_tables})! Is this correct?\x1b[0m\n")
258289
lonely_tables.append(table)
259290

260291
# Print all tables that have not been migrated due to missing schemas:

0 commit comments

Comments
 (0)