asgeirrr · asgeirrr · Nov 24, 2023 · Nov 24, 2023 · Nov 24, 2023
diff --git a/.github/workflows/test-and-deploy.yaml b/.github/workflows/test-and-deploy.yaml
@@ -0,0 +1,50 @@
+name: Run Tests & Deploy
+on: [push, pull_request]
+
+jobs:
+  build-and-test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+    services:
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          pip install -e '.'
+      - name: Install packages needed for CI
+        run: pip install pre-commit
+      - name: Lint all files
+        run: pre-commit run --all-files --show-diff-on-failure
+      - name: Run tests
+        run: python setup.py test
+  deploy:
+    runs-on: ubuntu-latest
+    needs: build-and-test
+    if: github.ref == 'refs/heads/main'
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.PUBLISH_TOKEN }}
+      - name: Semantic release
+        uses: relekang/python-semantic-release@master
+        with:
+            # Personal Access Token that belongs to an admin of the repo must
+            # be set in PUBLISH_TOKEN secret to bypass `main` branch protection
+            github_token: ${{ secrets.PUBLISH_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,10 +1,30 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.1.4
+  rev: v0.1.6
   hooks:
     # Run the linter.
     - id: ruff
       args: [ --fix ]
     # Run the formatter.
     - id: ruff-format
+- repo: local
+  hooks:
+  - id: commitizen-branch
+    name: commitizen conditionally check branch
+    description: >
+        Commitizen fails when there are no new commits. To overcome this, we created
+        tiny wrapper that verifies there are commits to check.
+
+        Original description from Commitizen:
+        Check all commit messages that are already on the current branch but not the
+        default branch on the origin repository. Useful for checking messages after
+        the fact (e.g., pre-push or in CI) without an expensive check of the entire
+        repository history.
+    entry: ./conditional-commitizen.sh
+    always_run: true
+    language: python
+    minimum_pre_commit_version: "1.4.3"
+    additional_dependencies: [
+        'commitizen',
+    ]
diff --git a/conditional-commitizen.sh b/conditional-commitizen.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+log=$(git log origin/HEAD..HEAD | wc -l)
+if [ "$log" -gt 0 ]; then cz check --rev-range origin/HEAD..HEAD; fi
diff --git a/pgantomizer/anonymize.py b/pgantomizer/anonymize.py
@@ -177,7 +177,9 @@ def check_schema(cursor, schema, db_args):
 
 
 def get_column_update(schema, table, column, data_type):
-    custom_rule = get_in(schema, [table, "custom_rules", column]) if schema[table] else None
+    custom_rule = (
+        get_in(schema, [table, "custom_rules", column]) if schema[table] else None
+    )
 
     if column == get_table_pk_name(schema, table) or (
         schema[table] and column in schema[table].get("raw", [])
@@ -190,7 +192,9 @@ def get_column_update(schema, table, column, data_type):
                     'Custom rule "{}" must provide a non-None value'.format(custom_rule)
                 )
             else:
-                return "{column} = '{value}'".format(column=column, value=custom_rule["value"])
+                return "{column} = '{value}'".format(
+                    column=column, value=custom_rule["value"]
+                )
         elif custom_rule and custom_rule not in CUSTOM_ANONYMIZATION_RULES:
             raise MissingAnonymizationRuleError(
                 'Custom rule "{}" is not defined'.format(custom_rule)
@@ -222,9 +226,13 @@ def anonymize_table(conn, cursor, schema, table, disable_schema_changes):
             cascade = " CASCADE"
 
         logging.debug(
-            "Running TRUNCATE{cascade} on {table} ...".format(table=table, cascade=cascade)
+            "Running TRUNCATE{cascade} on {table} ...".format(
+                table=table, cascade=cascade
+            )
+        )
+        cursor.execute(
+            "TRUNCATE {table} {cascade}".format(table=table, cascade=cascade)
         )
-        cursor.execute("TRUNCATE {table} {cascade}".format(table=table, cascade=cascade))
         return
 
     # Generate list of column_update SQL snippets for UPDATE
@@ -236,20 +244,24 @@ def anonymize_table(conn, cursor, schema, table, disable_schema_changes):
     updated_column_names = []
     for column_name, data_type in cursor.fetchall():
         if not disable_schema_changes:  # Bypass schema changes if explicitly requested
-            prepare_column_for_anonymization(conn, cursor, table, column_name, data_type)
+            prepare_column_for_anonymization(
+                conn, cursor, table, column_name, data_type
+            )
         column_update = get_column_update(schema, table, column_name, data_type)
         if column_update is not None:
             column_updates.append(column_update)
             updated_column_names.append(column_name)
 
     # Process UPDATE if any column_updates requested
     if len(column_updates) > 0:
-        update_statement = "UPDATE {table} SET {column_updates_sql} {where_clause}".format(
-            table=table,
-            column_updates_sql=", ".join(column_updates),
-            where_clause="WHERE {}".format(
-                schema[table].get("where", "TRUE") if schema[table] else "TRUE"
-            ),
+        update_statement = (
+            "UPDATE {table} SET {column_updates_sql} {where_clause}".format(
+                table=table,
+                column_updates_sql=", ".join(column_updates),
+                where_clause="WHERE {}".format(
+                    schema[table].get("where", "TRUE") if schema[table] else "TRUE"
+                ),
+            )
         )
         logging.debug(
             "Running UPDATE on {} for columns {} ...".format(
@@ -269,7 +281,9 @@ def anonymize_db(schema, db_args, disable_schema_changes):
                 "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type <> 'VIEW' ORDER BY table_name;"
             )
             for table_name in cursor.fetchall():
-                anonymize_table(conn, cursor, schema, table_name[0], disable_schema_changes)
+                anonymize_table(
+                    conn, cursor, schema, table_name[0], disable_schema_changes
+                )
             logging.debug("Anonymization complete!")
 
 
@@ -291,9 +305,7 @@ def load_anonymize_remove(
         try:
             load_db_to_new_instance(dump_file, db_args)
             anonymize_db(schema, db_args, disable_schema_changes)
-        except (
-            Exception
-        ):  # Any exception must result into dropping the schema to prevent sensitive data leakage
+        except Exception:  # Any exception must result into dropping the schema to prevent sensitive data leakage
             drop_schema(db_args)
             raise
         finally:
@@ -308,7 +320,9 @@ def main():
         epilog="Beware that all tables in the target DB are dropped "
         "prior to loading the dump and anonymization. See README.md for details.",
     )
-    parser.add_argument("-v", "--verbose", action="count", help="increase output verbosity")
+    parser.add_argument(
+        "-v", "--verbose", action="count", help="increase output verbosity"
+    )
     parser.add_argument(
         "-s",
         "--skip-restore",
@@ -349,7 +363,9 @@ def main():
         help="password of the Postgres user with access to the anonymized database",
         default="",
     )
-    parser.add_argument("--host", help="host where the DB is running", default="localhost")
+    parser.add_argument(
+        "--host", help="host where the DB is running", default="localhost"
+    )
     parser.add_argument("--port", help="port where the DB is running", default="5432")
 
     args = parser.parse_args()

diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@
     ],
     tests_require=[
         "pytest",
-        "pytest-postgresql==1.3.0",
+        "pytest-postgresql",
         "pre-commit",
     ],
     entry_points={

diff --git a/tests/asserts.py b/tests/asserts.py
@@ -1,6 +1,3 @@
-from psycopg2.extras import NamedTupleCursor
-
-
 def assert_customer_anonymized(customer, name, language, currency, ip):
     assert customer[1] == name
     assert customer[2] == language

diff --git a/tests/test_pgantomizer.py b/tests/test_pgantomizer.py
@@ -80,7 +80,9 @@ def test_dump_and_load(original_db, anonymized):
 
 def test_load_anonymize_remove(dumped_db, anonymized):
     assert_db_empty(anonymized)
-    load_anonymize_remove(DUMP_PATH, SCHEMA_PATH, leave_dump=False, db_args=ANONYMIZED_DB_ARGS)
+    load_anonymize_remove(
+        DUMP_PATH, SCHEMA_PATH, leave_dump=False, db_args=ANONYMIZED_DB_ARGS
+    )
     assert_db_anonymized(anonymized)