diff --git a/.config/dictionaries/project.dic b/.config/dictionaries/project.dic index 74723fad7..87a059c42 100644 --- a/.config/dictionaries/project.dic +++ b/.config/dictionaries/project.dic @@ -15,6 +15,7 @@ depgraph devenv dind dockerhub +doseq doublecircle Earthfile Earthfiles @@ -50,6 +51,7 @@ idents JDBC jorm jormungandr +jsonlib junitreport Kroki kubeconfig diff --git a/.github/workflows/validate-project-fields.yml b/.github/workflows/validate-project-fields.yml new file mode 100644 index 000000000..9b8ff2c9b --- /dev/null +++ b/.github/workflows/validate-project-fields.yml @@ -0,0 +1,49 @@ +name: Validate Project Fields + +on: + pull_request: + types: + - opened + - edited + - synchronize + - reopened + - unassigned + +permissions: + contents: write + pull-requests: write + id-token: write + repository-projects: write + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + +jobs: + validate-project-fields: + runs-on: ubuntu-latest + env: + # Needs a PAT Classic with (read:project) + GITHUB_PROJECTS_PAT: ${{ secrets.PROJECTS_PAT }} + GITHUB_REPOSITORY: "${{ github.repository }}" + GITHUB_EVENT_NUMBER: "${{ github.event.number || '0' }}" + PROJECT_NUMBER: 102 + steps: + - name: Fetch Validation Script + uses: actions/checkout@v4 + with: + repository: input-output-hk/catalyst-ci + ref: master + sparse-checkout: | + utilities/project-fields-validator/main.py + sparse-checkout-cone-mode: false + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Run Project Fields Validation + if: always() + continue-on-error: false + run: utilities/project-fields-validator/main.py diff --git a/Justfile b/Justfile index 17c1bbb6e..eb175b123 100644 --- a/Justfile +++ b/Justfile @@ -16,5 +16,15 @@ check-spelling: earthly +clean-spelling-list earthly +check-spelling + +# Fix and Check Markdown files +format-python-code: + ruff check --select I --fix . + ruff format . + +# Fix and Check Markdown files +lint-python: + ruff check . + # Pre Push Checks - intended to be run by a git pre-push hook. -pre-push: check-markdown check-spelling +pre-push: check-markdown check-spelling format-python-code lint-python diff --git a/earthly/docs/common/macros/include.py b/earthly/docs/common/macros/include.py index 4a9ca0b1b..94e615930 100644 --- a/earthly/docs/common/macros/include.py +++ b/earthly/docs/common/macros/include.py @@ -1,6 +1,7 @@ import os -import textwrap import re +import textwrap + def inc_file(env, filename, start_line=0, end_line=None, indent=None): """ @@ -10,7 +11,7 @@ def inc_file(env, filename, start_line=0, end_line=None, indent=None): project. indent = number of spaces to indent every line but the first. """ - + try: full_filename = os.path.join(env.project_dir, filename) @@ -24,8 +25,8 @@ def inc_file(env, filename, start_line=0, end_line=None, indent=None): else: indent = " " * indent text = textwrap.indent(text, indent) - text = text[len(indent):] # First line should not be indented at all. - text = re.sub(r'\n$', '', text, count=1) + text = text[len(indent) :] # First line should not be indented at all. + text = re.sub(r"\n$", "", text, count=1) # print(text) return text except Exception as exc: diff --git a/earthly/docs/dev/local.py b/earthly/docs/dev/local.py index 4a528a53c..0414b43fa 100755 --- a/earthly/docs/dev/local.py +++ b/earthly/docs/dev/local.py @@ -2,13 +2,13 @@ # cspell: words gmtime +import argparse import subprocess +import sys import time +import urllib.request import webbrowser from dataclasses import dataclass, field -import argparse -import sys -import urllib.request class ProcessRunError(Exception): @@ -213,7 +213,7 @@ def main(): # Open the webpage in a browser (once) if not browsed: - browsed=True + browsed = True if not args.no_browser: webbrowser.open(f"http://localhost:{docs_container.exposed_port}") diff --git a/earthly/postgresql/scripts/std_checks.py b/earthly/postgresql/scripts/std_checks.py index 1164445f2..d55277f07 100755 --- a/earthly/postgresql/scripts/std_checks.py +++ b/earthly/postgresql/scripts/std_checks.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 +import argparse + import python.exec_manager as exec_manager import python.vendor_files_check as vendor_files_check -import argparse import rich -from rich import print -import os # This script is run inside the `check` stage for postgres database setup # to perform all high level non-compilation checks. @@ -32,7 +31,7 @@ def main(): # Force color output in CI rich.reconfigure(color_system="256") - parser = argparse.ArgumentParser(description="Postgres checks processing.") + argparse.ArgumentParser(description="Postgres checks processing.") results = exec_manager.Results("Postgres checks") diff --git a/earthly/postgresql/scripts/std_docs.py b/earthly/postgresql/scripts/std_docs.py index e64c7e3ad..32d234bf1 100755 --- a/earthly/postgresql/scripts/std_docs.py +++ b/earthly/postgresql/scripts/std_docs.py @@ -2,16 +2,17 @@ # cspell: words dbmigrations dbhost dbuser dbuserpw Tsvg pgsql11 -from typing import Optional -import python.exec_manager as exec_manager -import python.db_ops as db_ops import argparse -import rich -from rich import print import os import re from textwrap import indent +import python.db_ops as db_ops +import python.exec_manager as exec_manager +import rich +from rich import print + + def process_sql_files(directory): file_pattern = r"V(\d+)__(\w+)\.sql" migrations = {} @@ -32,11 +33,12 @@ def process_sql_files(directory): migrations[version] = { "version": version, "migration_name": migration_name, - "sql_data": sql_data + "sql_data": sql_data, } return migrations, largest_version + class Migrations: def __init__(self, args: argparse.Namespace): """ @@ -73,6 +75,7 @@ def create_markdown_file(self, file_path): print("Markdown file created successfully at: {}".format(file_path)) + def main(): # Force color output in CI rich.reconfigure(color_system="256") @@ -124,9 +127,7 @@ def main(): f"-o docs/database_schema/ " ) res = exec_manager.cli_run( - schemaspy_cmd, - name="Generate SchemaSpy Documentation", - verbose=True + schemaspy_cmd, name="Generate SchemaSpy Documentation", verbose=True ) results.add(res) @@ -135,7 +136,7 @@ def main(): exec_manager.cli_run( 'echo "hide: true" > docs/database_schema/.pages', name="Create .pages file", - verbose=True + verbose=True, ) migrations.create_markdown_file("docs/migrations.md") @@ -145,5 +146,6 @@ def main(): if not results.ok(): exit(1) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/earthly/python/Earthfile b/earthly/python/Earthfile index 9a7f66d77..21425d4eb 100644 --- a/earthly/python/Earthfile +++ b/earthly/python/Earthfile @@ -35,9 +35,15 @@ python-base: # Adjust Poetry's configuration to prevent connection pool warnings. RUN poetry config installer.max-workers 10 + # Extension we use needs rust. + RUN curl https://sh.rustup.rs -sSf | bash -s -- -y + RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc + ENV PATH="/root/.cargo/bin:${PATH}" + # Install ruff for linting. RUN pip3 install ruff RUN pip3 install rich + RUN pip3 install third-party-imports # Universal build scripts we will always need and are not target dependent. COPY --dir scripts /scripts @@ -58,9 +64,10 @@ BUILDER: CHECK: FUNCTION + ARG options # Execute the check script - RUN /scripts/std_checks.py + RUN /scripts/std_checks.py $options LINT_PYTHON: # Linting all Python code is done with ruff diff --git a/earthly/python/scripts/std_checks.py b/earthly/python/scripts/std_checks.py index e435976cb..44e74c621 100755 --- a/earthly/python/scripts/std_checks.py +++ b/earthly/python/scripts/std_checks.py @@ -4,44 +4,114 @@ import subprocess import sys -def check_pyproject_toml(): + +def check_pyproject_toml(stand_alone): # Check if 'pyproject.toml' exists in the project root. - if not os.path.isfile('pyproject.toml'): + if not os.path.isfile("pyproject.toml"): + if stand_alone: + print("pyproject.toml check passed.") + return True + print("Error: pyproject.toml not found.") return False else: + if stand_alone: + print("Error: pyproject.toml found in standalone python module.") + return False + print("pyproject.toml check passed.") return True - -def check_poetry_lock(): + + +def check_poetry_lock(stand_alone): # Check if 'poetry.lock' exists in the project root. - if not os.path.isfile('poetry.lock'): + if not os.path.isfile("poetry.lock"): + if stand_alone: + print("poetry.lock check passed.") + return True + print("Error: poetry.lock not found.") return False else: + if stand_alone: + print("Error: poetry.lock found in stand alone module.") + return False + print("poetry.lock check passed.") return True + +def check_lint_with_ruff(): + # Check Python code linting issues using 'ruff'. + result = subprocess.run(["ruff", "check", "."], capture_output=True) + if result.returncode != 0: + print("Code linting issues found.") + print(result.stdout.decode()) + return False + else: + print("Code linting check passed.") + return True + + def check_code_format_with_ruff(): # Check Python code formatting and linting issues using 'ruff'. - result = subprocess.run(['ruff', 'check', '.'], capture_output=True) + result = subprocess.run(["ruff", "format", "--check", "."], capture_output=True) if result.returncode != 0: - print("Code formatting and linting issues found.") + print("Code formatting issues found.") print(result.stdout.decode()) return False else: - print("Code formatting and linting check passed.") + print("Code formatting check passed.") return True -def main(): + +def zero_third_party_packages_found(output): + lines = output.split("\n") # Split the multiline string into individual lines + + if len(lines) < 2: + return False # The second line doesn't exist + else: + return lines[1].startswith("Found '0' third-party package imports") + + +def check_no_third_party_imports(): + # Check No third party imports have been used + result = subprocess.run(["third-party-imports", "."], capture_output=True) + output = result.stdout.decode() + + if result.returncode != 0 or not zero_third_party_packages_found(output): + print("Checking third party imports failed.") + print(output) + return False + else: + print("Checking third party imports passed.") + return True + + +def main(stand_alone): + if stand_alone: + print( + "Checking Standalone Python files (No third party imports or poetry project)" + ) checks_passed = True # Perform checks - checks_passed &= check_pyproject_toml() - checks_passed &= check_poetry_lock() + + # These are true on python programs that require third party libraries, false otherwise + checks_passed &= check_pyproject_toml(stand_alone) + checks_passed &= check_poetry_lock(stand_alone) + + # Always done + checks_passed &= check_lint_with_ruff() checks_passed &= check_code_format_with_ruff() + # Only done if the code should be able to run without third part libraries + if stand_alone: + checks_passed &= check_no_third_party_imports() + if not checks_passed: sys.exit(1) + if __name__ == "__main__": - main() + print(f"Current Working Directory: {os.getcwd()}") + main("--stand-alone" in sys.argv[1:]) diff --git a/earthly/rust/scripts/std_build.py b/earthly/rust/scripts/std_build.py index 06e2b15cb..863255bd8 100755 --- a/earthly/rust/scripts/std_build.py +++ b/earthly/rust/scripts/std_build.py @@ -2,14 +2,11 @@ # cspell: words lcov depgraph readelf sysroot -import concurrent.futures -import time -import os - import argparse -import rich +import os import python.exec_manager as exec_manager +import rich from python.utils import fix_quoted_earthly_args # This script is run inside the `build` stage. @@ -98,14 +95,11 @@ def cargo_doc(verbose: bool = False) -> exec_manager.Result: env = os.environ env["RUSTDOCFLAGS"] = "-Z unstable-options --enable-index-page" return exec_manager.cli_run( - "cargo +nightly docs", - name="Documentation build", - verbose=verbose + "cargo +nightly docs", name="Documentation build", verbose=verbose ) def cargo_depgraph(runner: exec_manager.ParallelRunner, verbose: bool = False) -> None: - runner.run( exec_manager.cli_run, "cargo depgraph " @@ -250,7 +244,6 @@ def strip(results: exec_manager.Results, bin: str): ) ) -import sys def main(): # Force color output in CI @@ -322,8 +315,12 @@ def main(): ) args = parser.parse_args() - libs = filter(lambda lib: lib.strip() and len(lib.strip()) > 0, args.libs.split(",")) - bins = list(filter(lambda bin: bin.strip() and len(bin.strip()) > 0, args.bins.split(","))) + libs = filter( + lambda lib: lib.strip() and len(lib.strip()) > 0, args.libs.split(",") + ) + bins = list( + filter(lambda bin: bin.strip() and len(bin.strip()) > 0, args.bins.split(",")) + ) with exec_manager.ParallelRunner("Rust build") as runner: # Build the code. @@ -365,7 +362,9 @@ def main(): cargo_modules_lib(runner, lib, not args.disable_docs, args.verbose) for bin in bins: package, bin_name = bin.split("/") - cargo_modules_bin(runner, package, bin_name, not args.disable_docs, args.verbose) + cargo_modules_bin( + runner, package, bin_name, not args.disable_docs, args.verbose + ) results = runner.get_results() @@ -377,7 +376,6 @@ def main(): # Check if all documentation tests pass. results.add(cargo_doctest(args.doctest_flags, args.verbose)) - results.print() if not results.ok(): exit(1) diff --git a/earthly/rust/scripts/std_checks.py b/earthly/rust/scripts/std_checks.py index 615142400..7d5ee4594 100755 --- a/earthly/rust/scripts/std_checks.py +++ b/earthly/rust/scripts/std_checks.py @@ -2,12 +2,13 @@ # cspell: words stdcfgs +import argparse +import os + import python.exec_manager as exec_manager import python.vendor_files_check as vendor_files_check -import argparse import rich from rich import print -import os # This script is run inside the `check` stage for rust projects to perform all # high level non-compilation checks. @@ -22,12 +23,12 @@ def main(): - rust_toolchain_enabled=False - + rust_toolchain_enabled = False + # Force color output in CI rich.reconfigure(color_system="256") - parser = argparse.ArgumentParser( + argparse.ArgumentParser( description="Rust high level non-compilation checks processing." ) @@ -68,7 +69,7 @@ def main(): results.add( vendor_files_check.toml_diff_check( - f"/stdcfgs/cargo_config.toml", ".cargo/config.toml" + "/stdcfgs/cargo_config.toml", ".cargo/config.toml" ) ) if rust_toolchain_enabled: @@ -104,7 +105,10 @@ def main(): results.add(exec_manager.cli_run("cargo machete", name="Unused Dependencies Check")) # Check if we have any supply chain issues with dependencies. results.add( - exec_manager.cli_run("cargo deny check --exclude-dev -W vulnerability -W unmaintained", name="Supply Chain Issues Check") + exec_manager.cli_run( + "cargo deny check --exclude-dev -W vulnerability -W unmaintained", + name="Supply Chain Issues Check", + ) ) results.print() diff --git a/earthly/wasm/c/scripts/std_build.py b/earthly/wasm/c/scripts/std_build.py index fdde66d9c..bbdb5ac91 100755 --- a/earthly/wasm/c/scripts/std_build.py +++ b/earthly/wasm/c/scripts/std_build.py @@ -2,11 +2,12 @@ # cspell: words lcov depgraph readelf sysroot bindgen autodrop mexec -import python.exec_manager as exec_manager import argparse -import rich import os +import python.exec_manager as exec_manager +import rich + # This script is run inside the `build` stage. BINDINGS_SRC = "bindings_src" @@ -15,12 +16,14 @@ def wit_bindgen_c(results: exec_manager.Results, wit_path: str): results.add( exec_manager.cli_run( - " ".join([ - "wit-bindgen c", - "--autodrop-borrows yes", - f"--out-dir {BINDINGS_SRC}", - wit_path - ]), + " ".join( + [ + "wit-bindgen c", + "--autodrop-borrows yes", + f"--out-dir {BINDINGS_SRC}", + wit_path, + ] + ), name="Generate bindings C code.", verbose=True, ) @@ -38,16 +41,18 @@ def clang_wasm_compile(results: exec_manager.Results, c_files: str): ) results.add( exec_manager.cli_run( - " ".join([ - "/opt/wasi-sdk/bin/clang", - "--sysroot=/opt/wasi-sdk/share/wasi-sysroot", - bindings_src, - c_files, - "-Oz", - "-o out.wasm", - "-mexec-model=reactor", - "--target=wasm32-wasi" - ]), + " ".join( + [ + "/opt/wasi-sdk/bin/clang", + "--sysroot=/opt/wasi-sdk/share/wasi-sysroot", + bindings_src, + c_files, + "-Oz", + "-o out.wasm", + "-mexec-model=reactor", + "--target=wasm32-wasi", + ] + ), name="Compile C code to wasm module", verbose=True, ) diff --git a/examples/python/src/sum.py b/examples/python/src/sum.py index a4a5d8094..914032759 100644 --- a/examples/python/src/sum.py +++ b/examples/python/src/sum.py @@ -1,2 +1,2 @@ def sum(a, b): - return a + b \ No newline at end of file + return a + b diff --git a/examples/python/src/test_sum.py b/examples/python/src/test_sum.py index 799fbd21d..66be7c2d9 100644 --- a/examples/python/src/test_sum.py +++ b/examples/python/src/test_sum.py @@ -1,5 +1,6 @@ from src.sum import sum + def test_sum(): assert sum(2, 4) == 6 assert sum(2, 4) != 5 diff --git a/utilities/earthly-cache-watcher/helper.py b/utilities/earthly-cache-watcher/helper.py index c62188a56..14ac26a29 100644 --- a/utilities/earthly-cache-watcher/helper.py +++ b/utilities/earthly-cache-watcher/helper.py @@ -27,19 +27,19 @@ def get_subdirectory_name(working_dir_path: str, path: str): working_dir_path = os.path.abspath(working_dir_path) path = os.path.abspath(path) - if ( - os.path.commonpath([working_dir_path]) - != os.path.commonpath([working_dir_path, path]) + if os.path.commonpath([working_dir_path]) != os.path.commonpath( + [working_dir_path, path] ): return None - + relative_path = os.path.relpath(path, working_dir_path) parts = relative_path.split(os.sep) - + if parts: return parts[0] return None + def add_or_init(obj: dict[str, int], key: str, value: int): obj.setdefault(key, 0) - obj[key] += value \ No newline at end of file + obj[key] += value diff --git a/utilities/earthly-cache-watcher/main.py b/utilities/earthly-cache-watcher/main.py index 5174b26e2..564e30c26 100644 --- a/utilities/earthly-cache-watcher/main.py +++ b/utilities/earthly-cache-watcher/main.py @@ -17,6 +17,7 @@ logger.remove() # Remove the default handler logger.add(sys.stdout, level="INFO", serialize=True, format="{message}") + class Interval: """ A class that repeatedly executes a function @@ -170,11 +171,15 @@ def handle_modified(self, file_path: str): # checks self.check_sizes(layer_name) - logger.debug(" ".join([ - f"file modified: {file_path}", - f"(size changed from {prev_size:,} bytes", - f"to {size:,} bytes)" - ])) + logger.debug( + " ".join( + [ + f"file modified: {file_path}", + f"(size changed from {prev_size:,} bytes", + f"to {size:,} bytes)", + ] + ) + ) else: logger.debug(f"file modified: {file_path} (size unchanged)") except OSError as e: @@ -216,8 +221,7 @@ def handle_deleted(self, file_path: str): def check_sizes(self, layer_name: str, skip_sum_check=False): if ( layer_name in self.layer_index - and self.layer_index[layer_name] - >= large_layer_size + and self.layer_index[layer_name] >= large_layer_size ): self.trigger_layer_size_exceeded(layer_name) @@ -241,17 +245,19 @@ def trigger_layer_size_exceeded(self, layer_name: str): self.triggered_layers.add(layer_name) logger.error( - " ".join([ - f"layer '{layer_name}' exceeds large layer size criteria", - f"(size: {self.layer_index[layer_name]:,} bytes", - f"- limit: {large_layer_size:,} bytes)" - ]), + " ".join( + [ + f"layer '{layer_name}' exceeds large layer size criteria", + f"(size: {self.layer_index[layer_name]:,} bytes", + f"- limit: {large_layer_size:,} bytes)", + ] + ), extra={ "err_type": "layer_size_exceeded", "layer": layer_name, "size": self.layer_index[layer_name], - "limit": large_layer_size - } + "limit": large_layer_size, + }, ) def trigger_interval_growth_exceeded(self): @@ -265,33 +271,37 @@ def trigger_interval_growth_exceeded(self): self.triggered_growth_layers.add(layer_name) logger.error( - " ".join([ - f"layer '{layer_name}'", - f"- {size:,} bytes within the interval" - ]), + " ".join( + [ + f"layer '{layer_name}'", + f"- {size:,} bytes within the interval", + ] + ), extra={ "err_type": "layer_list_growth_exceeded", "layer": layer_name, - "size": size - } + "size": size, + }, ) if has_triggered_layer: size = sum(self.layer_growth_index.values()) logger.error( - " ".join([ - "the total amount of cache growth", - f"within {time_window:,} secs exceeds the limit", - f"(size: {size:,} bytes", - f"- limit: {max_time_window_growth_size:,} bytes)" - ]), + " ".join( + [ + "the total amount of cache growth", + f"within {time_window:,} secs exceeds the limit", + f"(size: {size:,} bytes", + f"- limit: {max_time_window_growth_size:,} bytes)", + ] + ), extra={ "err_type": "interval_growth_exceeded", "size": size, "limit": max_time_window_growth_size, - "within": time_window - } + "within": time_window, + }, ) except RuntimeError as e: logger.error(f"an error occurred: {e}") @@ -300,21 +310,24 @@ def trigger_max_cache_size(self): size = sum(self.layer_index.values()) logger.error( - " ".join([ - "the total amount of cache exceeds the limit", - f"(size: {size:,} bytes", - f"- limit: {max_cache_size:,} bytes)" - ]), + " ".join( + [ + "the total amount of cache exceeds the limit", + f"(size: {size:,} bytes", + f"- limit: {max_cache_size:,} bytes)", + ] + ), extra={ "err_type": "max_cache_size_exceeded", "size": size, - "limit": max_cache_size - } + "limit": max_cache_size, + }, ) def drop(self): self.interval.drop() + def main(): global \ watch_dir, \ @@ -354,14 +367,19 @@ def main(): logger.info(f"with `large_layer_size` set to {large_layer_size:,} bytes") logger.info(f"with `max_cache_size` set to {max_cache_size:,} bytes") logger.info(f"with `time_window` set to {time_window:,} secs") - logger.info(" ".join([ - "with `max_time_window_growth_size` set to", - f"{max_time_window_growth_size:,} bytes" - ])) - logger.info(" ".join([ - "with `log_file_accessing_err` set to", - log_file_accessing_err - ])) + logger.info( + " ".join( + [ + "with `max_time_window_growth_size` set to", + f"{max_time_window_growth_size:,} bytes", + ] + ) + ) + logger.info( + " ".join( + ["with `log_file_accessing_err` set to", log_file_accessing_err] + ) + ) # init watcher handler = ChangeEventHandler(time_window) diff --git a/utilities/project-fields-validator/Earthfile b/utilities/project-fields-validator/Earthfile new file mode 100644 index 000000000..07d1daf36 --- /dev/null +++ b/utilities/project-fields-validator/Earthfile @@ -0,0 +1,27 @@ +VERSION 0.8 + +IMPORT ../../earthly/python AS python-ci + +test: + FROM python-ci+python-base + WORKDIR /stand-alone + COPY . . + # Stand alone python command doesn't use any third party libraries + DO python-ci+CHECK --options="--stand-alone" + +validate-project-fields: + FROM python-ci+python-base + + RUN --no-cache echo No Cache + + COPY . . + + ARG PROJECT_NUMBER=102 + ARG --required GITHUB_REPOSITORY + ARG --required GITHUB_EVENT_NUMBER + + ENV PROJECT_NUMBER=${PROJECT_NUMBER} + ENV GITHUB_REPOSITORY=${GITHUB_REPOSITORY} + ENV GITHUB_EVENT_NUMBER=${GITHUB_EVENT_NUMBER} + + RUN --no-cache --secret GITHUB_PROJECTS_PAT python3 main.py \ No newline at end of file diff --git a/utilities/project-fields-validator/Justfile b/utilities/project-fields-validator/Justfile new file mode 100644 index 000000000..0a736ca11 --- /dev/null +++ b/utilities/project-fields-validator/Justfile @@ -0,0 +1,19 @@ +# use with https://github.com/casey/just +# + +# cspell: words prereqs, commitlog + +default: + @just --list --unsorted + +# Fix and Check Markdown files +format-code: + ruff check --select I --fix . + ruff format . + +# Fix and Check Markdown files +lint: + ruff check . + +# Pre Push Checks - intended to be run by a git pre-push hook. +pre-push: format-code lint diff --git a/utilities/project-fields-validator/README.md b/utilities/project-fields-validator/README.md new file mode 100644 index 000000000..9e8ec9c62 --- /dev/null +++ b/utilities/project-fields-validator/README.md @@ -0,0 +1,6 @@ +# Project fields validator + +This module is used to validate the fields of a project. + +* auto assign PR creator as assignee +* verifies that all project fields are filled out and fail if any are left unfilled. diff --git a/utilities/project-fields-validator/main.py b/utilities/project-fields-validator/main.py new file mode 100755 index 000000000..976cb150b --- /dev/null +++ b/utilities/project-fields-validator/main.py @@ -0,0 +1,588 @@ +#!/usr/bin/env python3 +import json as jsonlib +import os +import sys +import traceback +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from email.message import Message +from enum import Enum +from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set + + +class SafeOpener(urllib.request.OpenerDirector): + """An opener with configurable set of handlers.""" + + opener = None + + def __init__(self, handlers: Iterable = None): + """ + Instantiate an OpenDirector with selected handlers. + + Args: + handlers: an Iterable of handler classes + """ + super().__init__() + handlers = handlers or ( + urllib.request.UnknownHandler, + urllib.request.HTTPDefaultErrorHandler, + urllib.request.HTTPRedirectHandler, + urllib.request.HTTPSHandler, + urllib.request.HTTPErrorProcessor, + ) + + for handler_class in handlers: + handler = handler_class() + self.add_handler(handler) + + +class RequestException(Exception): + """There was an ambiguous exception that occurred while handling your + request. + """ + + def __init__(self, *args, **kwargs): + """Initialize RequestException with `request` and `response` objects.""" + response = kwargs.pop("response", None) + self.response = response + self.request = kwargs.pop("request", None) + if response is not None and not self.request and hasattr(response, "request"): + self.request = self.response.request + super().__init__(*args, **kwargs) + + +class Response(NamedTuple): + """Container for HTTP response.""" + + body: str + headers: Message + status: int + url: str + request: urllib.request.Request + + def json(self) -> Any: + """ + Decode body's JSON. + + Returns: + Pythonic representation of the JSON object + """ + try: + output = jsonlib.loads(self.body) + except jsonlib.JSONDecodeError as e: + raise RequestException(e, response=self) + return output + + def raise_for_status(self): + """Raise an exception if the response is not successful.""" + if self.status >= 400: + raise RequestException(Exception("Status Error"), response=self) + + +# only used by `request` +opener = SafeOpener() + + +def request( + method: str, + url: str, + json: dict = None, + params: dict = None, + headers: dict = None, + data_as_json: bool = True, +) -> Response: + """ + Perform HTTP request. + + Args: + url: url to fetch + json: dict of keys/values to be encoded and submitted + params: dict of keys/values to be encoded in URL query string + headers: optional dict of request headers + method: HTTP method , such as GET or POST + data_as_json: if True, data will be JSON-encoded + + Returns: + A dict with headers, body, status code, and, if applicable, object + rendered from JSON + """ + try: + method = method.upper() + request_data = None + headers = headers or {} + json = json or {} + params = params or {} + headers = {"Accept": "application/json", **headers} + httprequest = None + response = None + + if method == "GET": + params = {**params, **json} + json = None + + if params: + url += "?" + urllib.parse.urlencode(params, doseq=True, safe="/") + + if json: + if data_as_json: + request_data = jsonlib.dumps(json).encode() + headers["Content-Type"] = "application/json; charset=UTF-8" + else: + request_data = urllib.parse.urlencode(json).encode() + + httprequest = urllib.request.Request( + url, + data=request_data, + headers=headers, + method=method, + ) + + with opener.open( + httprequest, + ) as httpresponse: + response = Response( + body=httpresponse.read().decode( + httpresponse.headers.get_content_charset("utf-8") + ), + headers=httpresponse.headers, + status=httpresponse.status, + url=httpresponse.url, + request=httprequest, + ) + except Exception as e: + raise RequestException(e, request=httprequest, response=response) + + return response + + +class FieldType(Enum): + TEXT = "text" + DATE = "date" + SELECT = "name" + NUMBER = "number" + ITERATION = "title" + + +@dataclass +class ProjectField: + name: str + value: Optional[str] = None + field_type: Optional[FieldType] = None + + +class GitHubAPIError(Exception): + """Exception for GitHub API errors""" + + def __init__(self, message: str, response_data: Optional[Dict] = None): + super().__init__(message) + self.response_data = response_data + + +class ProjectFieldsValidator: + BASE_URL = "https://api.github.com" + GRAPHQL_URL = f"{BASE_URL}/graphql" + + def __init__(self, GITHUB_PROJECTS_PAT: str): + if not GITHUB_PROJECTS_PAT: + raise ValueError("GitHub token is required but was empty") + + self.headers = { + "Authorization": f"Bearer {GITHUB_PROJECTS_PAT}", + "Accept": "application/vnd.github.v3+json", + } + self.required_fields = [ + ProjectField("Status"), + ProjectField("Area"), + ProjectField("Priority"), + ProjectField("Estimate"), + ProjectField("Iteration"), + ProjectField("Start"), + ProjectField("End"), + ] + + def _make_request(self, method: str, url: str, **kwargs) -> Dict[str, Any]: + """Generic method to make HTTP requests with error handling""" + try: + response = request(method, url, headers=self.headers, **kwargs) + response.raise_for_status() + + print(f"\nAPI Response Status: {response.status}") + + try: + data = response.json() + + if "errors" in data: + error_messages = "; ".join( + error.get("message", "Unknown error") + for error in data["errors"] + ) + raise GitHubAPIError(f"GraphQL API errors: {error_messages}", data) + + if "data" in data and data["data"] is None: + raise GitHubAPIError("API returned null data", data) + + return data + except jsonlib.JSONDecodeError as e: + raise GitHubAPIError( + f"Failed to parse API response: {str(e)} METHOD={method} URL={url} JSON={kwargs.get('json')}" + ) + + except RequestException as e: + raise GitHubAPIError( + f"GitHub API request failed: {str(e)} METHOD={method} URL={url} ARGS={kwargs}" + ) + + def run_query(self, query: str, variables: Dict[str, Any]) -> Dict[str, Any]: + """Execute a GraphQL query against GitHub's API.""" + return self._make_request( + "POST", self.GRAPHQL_URL, json={"query": query, "variables": variables} + ) + + def get_pr_details( + self, org_name: str, repo_name: str, pr_number: int + ) -> Dict[str, Any]: + """Get PR details including assignees.""" + query = """ + query($org: String!, $repo: String!, $number: Int!) { + repository(owner: $org, name: $repo) { + pullRequest(number: $number) { + id + author { + login + } + assignees(first: 10) { + nodes { + login + } + } + } + } + } + """ + + print(f"\nFetching PR details for {org_name}/{repo_name}#{pr_number}") + + result = self.run_query( + query, {"org": org_name, "repo": repo_name, "number": pr_number} + ) + + if not result.get("data"): + raise GitHubAPIError("No data returned from API", result) + if not result["data"].get("repository"): + raise GitHubAPIError("Repository not found", result) + if not result["data"]["repository"].get("pullRequest"): + raise GitHubAPIError(f"PR #{pr_number} not found", result) + + return result["data"]["repository"]["pullRequest"] + + def assign_pr( + self, org_name: str, repo_name: str, pr_number: int, assignee: str + ) -> None: + """Assign PR to a user using REST API.""" + url = ( + f"{self.BASE_URL}/repos/{org_name}/{repo_name}/issues/{pr_number}/assignees" + ) + try: + self._make_request("POST", url, json={"assignees": [assignee]}) + print(f"✅ PR assigned to @{assignee}") + except GitHubAPIError as e: + print(f"❌ Failed to assign PR to @{assignee}: {str(e)}") + + def get_project_items( + self, org_name: str, project_number: int + ) -> List[Dict[str, Any]]: + """Fetch all items from the project with pagination.""" + query = """ + query($org: String!, $projectNumber: Int!, $cursor: String) { + organization(login: $org) { + projectV2(number: $projectNumber) { + items(first: 100, after: $cursor) { + pageInfo { + hasNextPage + endCursor + } + nodes { + id + content { + ... on PullRequest { + number + title + url + author { + login + } + repository { + name + } + } + } + fieldValues(first: 20) { + nodes { + ... on ProjectV2ItemFieldTextValue { + field { + ... on ProjectV2FieldCommon { + name + } + } + text + } + ... on ProjectV2ItemFieldDateValue { + field { + ... on ProjectV2FieldCommon { + name + } + } + date + } + ... on ProjectV2ItemFieldSingleSelectValue { + field { + ... on ProjectV2FieldCommon { + name + } + } + name + } + ... on ProjectV2ItemFieldNumberValue { + field { + ... on ProjectV2FieldCommon { + name + } + } + number + } + ... on ProjectV2ItemFieldIterationValue { + field { + ... on ProjectV2FieldCommon { + name + } + } + title + startDate + duration + } + } + } + } + } + } + } + } + """ + return self._paginate_items(query, org_name, project_number) + + def _paginate_items( + self, query: str, org_name: str, project_number: int + ) -> List[Dict[str, Any]]: + """Handle pagination for project items.""" + all_items = [] + cursor = None + total_items = 0 + + while True: + variables = { + "org": org_name, + "projectNumber": project_number, + "cursor": cursor, + } + + try: + result = self.run_query(query, variables) + if not result.get("data", {}).get("organization", {}).get("projectV2"): + raise GitHubAPIError("Could not access project data", result) + + project_data = result["data"]["organization"]["projectV2"]["items"] + valid_items = [ + item + for item in project_data["nodes"] + if item.get("content") and isinstance(item["content"], dict) + ] + + all_items.extend(valid_items) + total_items += len(valid_items) + + sys.stdout.write(f"\rFetching project items... {total_items} found") + sys.stdout.flush() + + if not project_data["pageInfo"]["hasNextPage"]: + break + + cursor = project_data["pageInfo"]["endCursor"] + + except GitHubAPIError as e: + print(f"\nError fetching project items: {str(e)}") + if e.response_data: + print("\nAPI Response data:") + print(jsonlib.dumps(e.response_data, indent=2)) + raise + + print("\n") + return all_items + + def validate_item(self, item: Dict[str, Any]) -> Set[str]: + """Validate required fields for an item.""" + field_values = self._extract_field_values(item) + + print("\nCurrent field values:") + print("=" * 50) + for field in self.required_fields: + value = field_values.get(field.name, "❌ empty") + print(f" • {field.name}: {value}") + + return { + field.name + for field in self.required_fields + if field.name not in field_values + } + + def _extract_field_values(self, item: Dict[str, Any]) -> Dict[str, str]: + """Extract field values from item data.""" + field_values = {} + + for field_value in item["fieldValues"]["nodes"]: + if not isinstance(field_value, dict) or "field" not in field_value: + continue + + try: + field_name = field_value["field"]["name"] + for field_type in FieldType: + if field_type.value in field_value: + value = field_value[field_type.value] + if isinstance(value, (int, float)): + value = str(value) + field_values[field_name] = value + break + except (KeyError, TypeError): + continue + + return field_values + + @staticmethod + def print_validation_results(empty_fields: Set[str]) -> None: + """Print validation results in a formatted way.""" + print("\n" + "=" * 50) + print("Validation Results:") + print("=" * 50) + + if not empty_fields: + print("✅ All required fields are filled. Validation passed!") + else: + print("❌ Validation failed. The following fields need to be filled:") + for field in sorted(empty_fields): + print(f" • {field}") + print("\nPlease fill in these fields in the project board.") + + print("=" * 50) + + +def clean_env_var(var: str) -> str: + """Clean environment variable by removing quotes and extra whitespace""" + if var is None: + return None + return var.strip().strip("\"'") + + +def main(): + try: + env_vars = { + "GITHUB_PROJECTS_PAT": clean_env_var(os.environ.get("GITHUB_PROJECTS_PAT")), + "GITHUB_REPOSITORY": clean_env_var(os.environ.get("GITHUB_REPOSITORY")), + "GITHUB_EVENT_NUMBER": clean_env_var(os.environ.get("GITHUB_EVENT_NUMBER")), + "PROJECT_NUMBER": clean_env_var(os.environ.get("PROJECT_NUMBER")), + } + + debug_vars = env_vars.copy() + debug_vars["GITHUB_PROJECTS_PAT"] = "[REDACTED]" if env_vars["GITHUB_PROJECTS_PAT"] else None + print("\nEnvironment variables:") + for key, value in debug_vars.items(): + print(f"{key}: {value}") + + missing_vars = [k for k, v in env_vars.items() if not v] + if missing_vars: + raise ValueError( + f"Missing required environment variables: {', '.join(missing_vars)}" + ) + + try: + pr_number = int(env_vars["GITHUB_EVENT_NUMBER"]) + project_number = int( + env_vars.get("PROJECT_NUMBER", "102") + ) # Default to 102 if not set + except ValueError as e: + raise ValueError( + f"Invalid numeric value in environment variables: {str(e)}" + ) + + github_repository = env_vars["GITHUB_REPOSITORY"] + try: + org_name, repo_name = github_repository.split("/") + except ValueError: + raise ValueError( + f"Invalid repository format: {github_repository}. Expected format: owner/repo" + ) + + print(f"\nValidating PR #{pr_number} in {github_repository}") + print(f"Project number: {project_number}") + print("=" * 50) + + validator = ProjectFieldsValidator(env_vars["GITHUB_PROJECTS_PAT"]) + + try: + pr_details = validator.get_pr_details(org_name, repo_name, pr_number) + author = pr_details["author"]["login"] + assignees = [node["login"] for node in pr_details["assignees"]["nodes"]] + + if not assignees: + print(f"\nAssigning PR to author @{author}") + validator.assign_pr(org_name, repo_name, pr_number, author) + + project_items = validator.get_project_items(org_name, project_number) + pr_items = [ + item + for item in project_items + if ( + item["content"].get("number") == pr_number + and item["content"].get("repository", {}).get("name") == repo_name + ) + ] + + if not pr_items: + print( + f"\nWarning: PR #{pr_number} is not linked to project #{project_number}" + ) + print("Please add it to the project using the following steps:") + print("1. Go to the project board") + print("2. Click '+ Add items'") + print("3. Search for this PR") + print("4. Click 'Add selected items'") + sys.exit(0) + + validation_errors = set() + for item in pr_items: + empty_fields = validator.validate_item(item) + validation_errors.update(empty_fields) + + validator.print_validation_results(validation_errors) + + if validation_errors: + sys.exit(1) + + except GitHubAPIError as e: + print(f"\nError accessing GitHub API: {str(e)}") + if e.response_data: + print("\nAPI Response data:") + print(jsonlib.dumps(e.response_data, indent=2)) + sys.exit(1) + + except ValueError as e: + print(f"Configuration error: {str(e)}") + sys.exit(1) + except Exception as e: + print(f"Error: {str(e)}") + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/utilities/scripts/python/db_ops.py b/utilities/scripts/python/db_ops.py index cea17277b..c1441a65a 100644 --- a/utilities/scripts/python/db_ops.py +++ b/utilities/scripts/python/db_ops.py @@ -7,11 +7,12 @@ import argparse import os +import tempfile +import threading import time from typing import Optional + import python.exec_manager as exec_manager -import tempfile -import threading DB_ARGUMENTS = [ ["dbhost", "DB_HOST", "localhost"], @@ -109,7 +110,7 @@ def init_database(self) -> exec_manager.Result: if res.ok(): with open(f"{self.args.dbpath}/pg_hba.conf", "a") as file: file.write(f"include_if_exists {self.args.dbpath}/pg_hba.extra.conf\n") - file.write(f"include_if_exists /sql/pg_hba.extra.conf\n") + file.write("include_if_exists /sql/pg_hba.extra.conf\n") return res @@ -203,7 +204,7 @@ def setup(self) -> exec_manager.Result: # WARNING: Will destroy all data in the DB return exec_manager.cli_run( - f"psql -v ON_ERROR_STOP=on" + "psql -v ON_ERROR_STOP=on" + f" -d {self.superuser_connection()} " + f" -f {self.args.setupdbsql}" + f' -v dbName="{self.args.dbname}"' @@ -224,7 +225,7 @@ def migrate_schema(self) -> exec_manager.Result: # Run schema migrations return exec_manager.cli_run( f"DATABASE_URL={self.user_connection()}" - + f" refinery migrate -e DATABASE_URL" + + " refinery migrate -e DATABASE_URL" + f" -c {self.args.dbrefinerytoml} " + f" -p {self.args.dbmigrations}", name="Migrate Schema", diff --git a/utilities/scripts/python/diff.py b/utilities/scripts/python/diff.py index 488df1599..3ddfab609 100644 --- a/utilities/scripts/python/diff.py +++ b/utilities/scripts/python/diff.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict @dataclass diff --git a/utilities/scripts/python/exec_manager.py b/utilities/scripts/python/exec_manager.py index 06cdb9660..05a4e78b0 100755 --- a/utilities/scripts/python/exec_manager.py +++ b/utilities/scripts/python/exec_manager.py @@ -1,15 +1,16 @@ # cspell: words rtype +import concurrent.futures +import multiprocessing import subprocess +import textwrap +import time +from dataclasses import dataclass from typing import Optional + from rich import print -from rich.text import Text from rich.table import Table -from dataclasses import dataclass -import textwrap -import time -import multiprocessing -import concurrent.futures +from rich.text import Text def status_for_rc(rc: int) -> str: @@ -150,7 +151,7 @@ def cli_run( log: bool = True, timeout=None, verbose=False, - env=None + env=None, ) -> Result: def procedure() -> ProcedureResult: result = subprocess.run( @@ -160,7 +161,7 @@ def procedure() -> ProcedureResult: stderr=subprocess.STDOUT, text=True, timeout=timeout, - env=env + env=env, ) return ProcedureResult(result.returncode, command, result.stdout) diff --git a/utilities/scripts/python/utils.py b/utilities/scripts/python/utils.py index 8bec32acb..a770d3a9e 100755 --- a/utilities/scripts/python/utils.py +++ b/utilities/scripts/python/utils.py @@ -32,7 +32,6 @@ def fix_quoted_earthly_args(): class TestProcessListWithQuotes(unittest.TestCase): - def test_process_list_with_quotes(self): sys.argv = [sys.argv[0]] + [ "this", @@ -42,7 +41,7 @@ def test_process_list_with_quotes(self): "this", "doesn't", ] - expected_result = ["this", 'has quoted strings in it', "this", "doesn't"] + expected_result = ["this", "has quoted strings in it", "this", "doesn't"] fix_quoted_earthly_args() self.assertEqual(sys.argv[1:], expected_result) diff --git a/utilities/scripts/python/vendor_files_check.py b/utilities/scripts/python/vendor_files_check.py index 155086574..bdd28054f 100644 --- a/utilities/scripts/python/vendor_files_check.py +++ b/utilities/scripts/python/vendor_files_check.py @@ -25,11 +25,12 @@ def toml_diff_check( f"{'' if strict else 'Non '}Strict Checking" + f" if Provided File {provided_file_path} == Vendored File {vendor_file_path}" ) - + try: - with open(vendor_file_path, "rb") as vendor_file, open( - provided_file_path, "rb" - ) as provided_file: + with ( + open(vendor_file_path, "rb") as vendor_file, + open(provided_file_path, "rb") as provided_file, + ): def procedure() -> exec_manager.ProcedureResult: vendor_obj = tomllib.load(vendor_file) @@ -52,8 +53,8 @@ def procedure() -> exec_manager.ProcedureResult: res = exec_manager.Result( 1, command_name, f"Exception caught: {exc}", 0.0, command_name ) - + if log: res.print(verbose_errors=True, verbose=False) - return res + return res