Split demo and base images + cleanup migtool (#22)

edarchis · web-flow · commit c4411ec482e8 · 2023-11-23T09:28:50.000+01:00
diff --git a/.github/workflows/docker-dev-cd.yml b/.github/workflows/docker-dev-cd.yml
@@ -15,7 +15,11 @@ jobs:
         registry: ghcr.io
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
-    - name: Build the pgsql Docker image
+    - name: Build the pgsql EMPTY Docker image
+      run: |
+         docker build . --tag ghcr.io/openimis/openimis-pgsql:develop-base --target base
+         docker push ghcr.io/openimis/openimis-pgsql:develop-base
+    - name: Build the pgsql DEMO Docker image
       run: |
          docker build . --tag ghcr.io/openimis/openimis-pgsql:develop
-         docker push ghcr.io/openimis/openimis-pgsql:develop
+         docker push ghcr.io/openimis/openimis-pgsql:develop
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -17,7 +17,11 @@ jobs:
         registry: ghcr.io
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
-    - name: Build the pgsql Docker image
+    - name: Build the pgsql EMPTY Docker image
+      run: |
+         docker build . --target base --tag ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}-base
+         docker push ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}-base
+    - name: Build the pgsql DEMO Docker image
       run: |
          docker build . --tag ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}
          docker push ghcr.io/openimis/openimis-pgsql:${{ github.event.inputs.tag }}
diff --git a/migtool/migtool.py b/migtool/migtool.py
@@ -1,9 +1,24 @@
+import re
+
 import pyodbc           # adapter for SQL Server
 import psycopg2         # adapter for PostgreSQL
 import configparser     # used to read settings from file
 import datetime         # used to properly format dates and datetimes
 import time             # used to calculate time taken
 
+# This script was created with global variables without initializing them here. Ideally, we should store a settings
+# object rather than all of them separately but this works.
+settings = None
+EXCLUDED_COLUMNS = ["RowID"]
+delete_data = False
+historical = False
+demo_fix = False
+migration_modules = []
+old_connection = None
+new_connection = None
+today = datetime.date.today()
+now = datetime.datetime.now()
+
 
 # loads connection configuration and migration settings from a file.
 # In future the settings file could be specified with a parameter.
@@ -29,10 +44,6 @@ def get_settings_from_file():
             print("  Histrical data will be migrated to the new database.")
         else:
             print("  No historical data will be migrated.")
-            global today
-            global now
-            today = datetime.date.today()
-            now = datetime.datetime.now()
     except KeyError as e:
         print("\x1b[0;31;48m" + "Error while trying to load settings. " +\
               "Please make sure the settings.ini file exists in your working directory." + "\x1b[0m")
@@ -50,8 +61,9 @@ def connect():
         global old_connection
         old_connection = pyodbc.connect(old_connection_string)
     except pyodbc.InterfaceError as exc:
-        print("\x1b[0;31;48m" +
-              "ERROR: Could not connect to the SQL Server database. Make sure the server is running and check your settings." +
+        print("\x1b[0;31;48m"
+              "ERROR: Could not connect to the SQL Server database. "
+              "Make sure the server is running and check your settings."
               "\x1b[0m")
         print(exc)
         exit(1)
@@ -61,13 +73,15 @@ def connect():
     new_db = settings["NewDB"]
     new_connection_string = f'host={new_db["host"]} port={new_db["port"]} dbname={new_db["name"]} ' \
                             f'user={new_db["user"]} password={new_db["pwd"]}'
-    new_connection_string = f'postgres://{new_db["user"]}@{new_db["host"]}:{new_db["port"]}/{new_db["name"]}'
+    # new_connection_string = \
+    #      f'postgres://{new_db["user"]}:{new_db["pwd"]}@{new_db["host"]}:{new_db["port"]}/{new_db["name"]}'
     try:
         global new_connection
         new_connection = psycopg2.connect(new_connection_string)
     except psycopg2.OperationalError as exc:
-        print("\x1b[0;31;48m" +
-              "ERROR: Could not connect to the PostgreSQL database. Make sure the server is running and check your settings." +
+        print("\x1b[0;31;48m"
+              "ERROR: Could not connect to the PostgreSQL database. "
+              "Make sure the server is running and check your settings."
               "\x1b[0m")
         print(exc)
         exit(1)
@@ -108,43 +122,35 @@ def get_db_tables():
     print("Finding tables in both databases.\n")
     old_cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';")
     new_cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';")
-    old_tables = list()
-    for x in old_cursor:
-        # Remove special characters at the start and end of each item when adding it to the list.
-        # This way the entries in the old and new list match
-        old_tables.append(str(x)[2:-4])
-    new_tables = list()
-    for x in new_cursor:
-        # Remove special characters at the start and end of each item when adding it to the list.
-        # This way the entries in the old and new list match
-        new_tables.append(str(x)[2:-3])
+    old_tables = [x[0] for x in old_cursor]
+    new_tables = [x[0] for x in new_cursor]
     return old_tables, new_tables
 
 
 # This function puts the data from a SELECT statement into string and formats it correctly so that postgres can work
 # with it.
 def generate_insertion_string(row):
-    row_str = "("
+    row_list = []
     for x in row:
         # Strings must be enclosed in apostrophes, also escape singe quotes in a string by doubling them
         if isinstance(x, str):
-            row_str = row_str + "'" + str(x).replace("'", "''") + "', "
+            row_list.append("'" + str(x).replace("'", "''") + "'")
         # Dates and datetimes must be enclosed in apostrophes
         elif isinstance(x, datetime.datetime) or isinstance(x, datetime.date):
-            row_str = row_str + "'" + str(x) + "', "
-        # If x is NoneType then str(x) get transtlated to "None", but sql wants "null"
+            row_list.append("'" + str(x) + "'")
+        # If x is NoneType then str(x) get translated to "None", but sql wants "null"
         elif x is None:
-            row_str = row_str + "null, "
+            row_list.append("null")
         # If x is bytes we need to make them nice (start with \x and append the data converted to hex):
         elif isinstance(x, bytes):
-            row_str = row_str + "'\\x" + str(x.hex()) + "', "
+            row_list.append("'\\x" + str(x.hex()) + "'")
         else:
-            row_str = row_str + str(x) + ", "
-    row_str = row_str[:-2] + ")"
+            row_list.append(str(x))
+    row_str = f"({', '.join(row_list)})"
     return row_str
 
 
-# When not migrating historical data, this function figures out what colums "ValidityTo" is so we can later check for
+# When not migrating historical data, this function figures out what columns "ValidityTo" is so we can later check for
 # each row if it is still valid or already historical
 def get_validity_index(rows):
     vi = -1
@@ -164,6 +170,8 @@ def get_validity_index(rows):
 
 
 def get_validity(vi, row):
+    global today
+    global now
     if historical or ((not historical) and vi == -1):
         return True
     elif (not historical) and vi != -1:
@@ -185,6 +193,17 @@ def get_validity(vi, row):
             return True
 
 
+def extract_sequence_name(column_default):
+    if not column_default:
+        return None
+    pattern = r"nextval\('([^']*)"
+    match = re.search(pattern, column_default)
+    if match:
+        return match.group(1)
+    else:
+        return None
+
+
 def migrate():
     # This list collects all db tables that exist only in one of the databases but not the other.
     lonely_tables = list()
@@ -206,26 +225,33 @@ def migrate():
                             "\"FeedbackUUID\", \"AuditUserID\") VALUES ('2000 01 01 00:00:00.000000', 0, 0, 0);")
 
             # Set up all the columns we're going to migrate.
-            new_cursor.execute("SELECT COLUMN_NAME FROM information_schema.COLUMNS WHERE TABLE_NAME = '" + table + "';")
+            new_cursor.execute("SELECT COLUMN_NAME, COLUMN_DEFAULT "
+                               "FROM information_schema.COLUMNS WHERE TABLE_NAME = '" + table + "';")
             rows = new_cursor.fetchall()
             # While we have the data ready: find out where dates are stored for historical data stuff. validity_index
             # stores in which column the date (ValidityTo) is stored
             validity_index = -1
             if not historical:
                 validity_index = get_validity_index(rows)
             # Finally, set up the columns to migrate
-            old_cols = ""
-            new_cols = "("
+            sequence_columns = {}
+            old_cols_list = []
+            new_cols_list = []
             for row in rows:
-                old_cols = old_cols + str(row)[2:-3] + ", "
-                new_cols = new_cols + "\"" + str(row)[2:-3] + "\", "
-            old_cols = old_cols[:-2]
-            new_cols = new_cols[:-2] + ")"
+                if row[0] not in EXCLUDED_COLUMNS:
+                    col_default = extract_sequence_name(row[1])
+                    if col_default:
+                        sequence_columns[row[0]] = col_default
+                    old_cols_list.append(row[0])
+                    new_cols_list.append(f'"{row[0]}"')
+            old_cols = ", ".join(old_cols_list)
+            new_cols = "(" + ", ".join(new_cols_list) + ")"
 
             # Get the data from the old db with these column specifications
             print("    Fetching data from old database.")
             old_cursor.execute("SELECT COUNT(*) FROM " + table + ";")
-            print("    Found " + str(old_cursor.fetchone())[1:-3] + " entries.")
+            print(f"    Found {old_cursor.fetchone()[0]} entries.")
+            print(f"    == old_cols: {old_cols} from {table} ==")
             old_cursor.execute("SELECT " + old_cols + " FROM " + table + ";")
 
             # Set up the values for the insert statement and execute
@@ -247,14 +273,19 @@ def migrate():
                         # Not rolling back leads to an InFailedSqlTransaction exception.
                         new_connection.rollback()
                         pass
-
+                    except Exception as e:
+                        print("Failed: INSERT INTO \"" + table + "\" " + new_cols + " VALUES " + row_str + ";")
+                        raise
+            if sequence_columns:
+                print("    Data transferred, updating sequences.")
+                for column, sequence in sequence_columns.items():
+                    new_cursor.execute(f"select setval('{sequence}', max(\"{column}\")) from \"{table}\";")
             print("  Table " + table + " has been migrated.\n")
 
         # Table doesn't exist
         else:
-            print("\x1b[0;31;48m" + "WARNING: Table " + table + \
-                  " only exists in one of the databases (but not the other)! Is this correct?" + "\x1b[0m\n")
-            print("")
+            print(f"\x1b[0;31;48mWARNING: Table {table} only exists in one of the databases "
+                  f"new: {table in new_tables}, old:{table in old_tables})! Is this correct?\x1b[0m\n")
             lonely_tables.append(table)
 
     # Print all tables that have not been migrated due to missing schemas: