diff --git a/.github/workflows/ruff-action.yaml b/.github/workflows/ruff-action.yaml new file mode 100644 index 00000000..91a3e779 --- /dev/null +++ b/.github/workflows/ruff-action.yaml @@ -0,0 +1,26 @@ +name: ruff-action + +on: pull_request + +jobs: + ruff-format: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run Ruff formatter (check only) + uses: astral-sh/ruff-action@v3 + with: + args: "format --check --diff" + + ruff-check: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run Ruff check (check only) + uses: astral-sh/ruff-action@v3 + with: + args: "check --diff" diff --git a/irods_capability_automated_ingest/celery.py b/irods_capability_automated_ingest/celery.py index 415a7c2f..b93fd950 100644 --- a/irods_capability_automated_ingest/celery.py +++ b/irods_capability_automated_ingest/celery.py @@ -1,5 +1,6 @@ -from . import custom_event_handler, sync_logging +from . import sync_logging +from billiard import current_process from celery import Celery from celery.signals import task_prerun, task_postrun diff --git a/irods_capability_automated_ingest/char_map_util.py b/irods_capability_automated_ingest/char_map_util.py index 773a5a29..86e02791 100644 --- a/irods_capability_automated_ingest/char_map_util.py +++ b/irods_capability_automated_ingest/char_map_util.py @@ -1,4 +1,3 @@ -import base64 import hashlib import logging import re @@ -16,9 +15,9 @@ def _re_wrapper(regex): return lambda ch: type(ch) is str and len(ch) == 1 and regex.match(ch) -_string_replace = lambda _string, _map: _string.translate( - {ord(k): v for k, v in _map.items() if v is not None} -) +def _string_replace(_string, _map): + return _string.translate({ord(k): v for k, v in _map.items() if v is not None}) + _logger = logging.getLogger("char_map_util") @@ -72,10 +71,12 @@ def _fallback(name=None): return h.digest() -_change_encoding_test = lambda c: c -_change_encoding_default = lambda c: ( - chr(c).encode("utf8") if type(c) is int else c.encode("utf8") -) +def _change_encoding_test(c): + return c + + +def _change_encoding_default(c): + return chr(c).encode("utf8") if isinstance(c, int) else c.encode("utf8") # must be called after first use of _encoded_differences() @@ -109,7 +110,6 @@ def translate_string(s, mp): def _encoded_differences(filename, MapFn=None, xfunc=_change_encoding_default): - rx = _allowed_of_type("radixchars", map_fn=MapFn) newname = translate_string(filename, MapFn) gen = ( (tuple(xfunc(_) for _ in a), b) diff --git a/irods_capability_automated_ingest/examples/coll_create_pre_and_post.py b/irods_capability_automated_ingest/examples/coll_create_pre_and_post.py index df13bcad..02cc9f27 100644 --- a/irods_capability_automated_ingest/examples/coll_create_pre_and_post.py +++ b/irods_capability_automated_ingest/examples/coll_create_pre_and_post.py @@ -1,5 +1,3 @@ -import os - from irods_capability_automated_ingest.core import Core from irods_capability_automated_ingest.utils import Operation diff --git a/irods_capability_automated_ingest/examples/coll_modify_pre_and_post.py b/irods_capability_automated_ingest/examples/coll_modify_pre_and_post.py index f47df447..e498b7e2 100644 --- a/irods_capability_automated_ingest/examples/coll_modify_pre_and_post.py +++ b/irods_capability_automated_ingest/examples/coll_modify_pre_and_post.py @@ -1,5 +1,3 @@ -import os - from irods_capability_automated_ingest.core import Core from irods_capability_automated_ingest.utils import Operation diff --git a/irods_capability_automated_ingest/examples/data_obj_create_pre_and_post.py b/irods_capability_automated_ingest/examples/data_obj_create_pre_and_post.py index f0839f65..6768fd44 100644 --- a/irods_capability_automated_ingest/examples/data_obj_create_pre_and_post.py +++ b/irods_capability_automated_ingest/examples/data_obj_create_pre_and_post.py @@ -1,5 +1,3 @@ -import os - from irods_capability_automated_ingest.core import Core from irods_capability_automated_ingest.utils import Operation diff --git a/irods_capability_automated_ingest/examples/data_obj_modify_pre_and_post.py b/irods_capability_automated_ingest/examples/data_obj_modify_pre_and_post.py index e91d03d2..f6b5b06d 100644 --- a/irods_capability_automated_ingest/examples/data_obj_modify_pre_and_post.py +++ b/irods_capability_automated_ingest/examples/data_obj_modify_pre_and_post.py @@ -1,5 +1,3 @@ -import os - from irods_capability_automated_ingest.core import Core from irods_capability_automated_ingest.utils import Operation diff --git a/irods_capability_automated_ingest/examples/no_retry.py b/irods_capability_automated_ingest/examples/no_retry.py index 0e71fb36..9a6ad69d 100644 --- a/irods_capability_automated_ingest/examples/no_retry.py +++ b/irods_capability_automated_ingest/examples/no_retry.py @@ -10,7 +10,6 @@ def operation(session, meta, **options): @staticmethod def pre_data_obj_create(hdlr_mod, logger, session, meta, *args, **options): - target = meta["target"] path = meta["path"] r = get_redis(meta["config"]) diff --git a/irods_capability_automated_ingest/examples/retry.py b/irods_capability_automated_ingest/examples/retry.py index e313b13b..a8e94950 100644 --- a/irods_capability_automated_ingest/examples/retry.py +++ b/irods_capability_automated_ingest/examples/retry.py @@ -19,7 +19,6 @@ def operation(session, meta, **options): @staticmethod def pre_data_obj_create(hdlr_mod, logger, session, meta, *args, **options): path = meta["path"] - target = meta["target"] r = get_redis(meta["config"]) failures = r.get("failures:" + path) diff --git a/irods_capability_automated_ingest/irods/filesystem.py b/irods_capability_automated_ingest/irods/filesystem.py index 4b33009e..5342d4a1 100644 --- a/irods_capability_automated_ingest/irods/filesystem.py +++ b/irods_capability_automated_ingest/irods/filesystem.py @@ -192,7 +192,6 @@ def update_metadata(hdlr_mod, logger, session, meta, **options): data_obj_info = {"objPath": dest_dataobj_logical_fullpath} - outdated_repl_nums = [] found = False resc_name = event_handler.to_resource(session, **options) @@ -412,7 +411,6 @@ def sync_dir_meta(hdlr_mod, logger, session, meta, **options): def sync_data_from_dir(hdlr_mod, meta, logger, content, **options): target = meta["target"] - path = meta["path"] event_handler = custom_event_handler.custom_event_handler(meta) session = irods_utils.irods_session( diff --git a/irods_capability_automated_ingest/irods/irods_utils.py b/irods_capability_automated_ingest/irods/irods_utils.py index d532deec..e07e2ee3 100644 --- a/irods_capability_automated_ingest/irods/irods_utils.py +++ b/irods_capability_automated_ingest/irods/irods_utils.py @@ -1,17 +1,17 @@ from .. import custom_event_handler, sync_logging from ..redis_utils import get_redis -from ..utils import DeleteMode, Operation +from ..utils import DeleteMode -from irods.exception import CollectionDoesNotExist, NetworkException +from irods.exception import NetworkException from irods.models import Collection, DataObject, Resource from irods.session import iRODSSession -import base64 import json import os import redis_lock import ssl import threading +import time irods_session_map = {} irods_session_timer_map = {} @@ -195,7 +195,6 @@ def create_dirs(logger, session, meta, **options): def create_dir(hdlr_mod, logger, session, meta, **options): target = meta["target"] - path = meta["path"] logger.info("creating collection " + target) session.collections.create(target) diff --git a/irods_capability_automated_ingest/irods/s3_bucket.py b/irods_capability_automated_ingest/irods/s3_bucket.py index d882cdbf..0d603115 100644 --- a/irods_capability_automated_ingest/irods/s3_bucket.py +++ b/irods_capability_automated_ingest/irods/s3_bucket.py @@ -2,15 +2,19 @@ from .. import custom_event_handler from ..utils import Operation +from irods.parallel import _Multipart_close_manager from irods.models import Resource, DataObject, Collection import irods.keywords as kw from minio import Minio import base64 +import concurrent import hashlib import io +import multiprocessing import os +import threading def parallel_upload_from_S3( @@ -390,17 +394,12 @@ def no_op(hdlr_mod, logger, session, meta, **options): def sync_file(hdlr_mod, logger, session, meta, scanner, op, **options): dest_dataobj_logical_fullpath = meta["target"] - source_physical_fullpath = meta["path"] - b64_path_str = meta.get("b64_path_str") event_handler = custom_event_handler.custom_event_handler(meta) resc_name = event_handler.to_resource(session, **options) if resc_name is not None: options["destRescName"] = resc_name - if b64_path_str is not None: - source_physical_fullpath = base64.b64decode(b64_path_str) - logger.info( "syncing object %s, options = %s" % (dest_dataobj_logical_fullpath, options) ) @@ -441,7 +440,6 @@ def update_metadata(hdlr_mod, logger, session, meta, **options): data_obj_info = {"objPath": dest_dataobj_logical_fullpath} - outdated_repl_nums = [] found = False resc_name = event_handler.to_resource(session, **options) @@ -661,7 +659,6 @@ def sync_dir_meta(hdlr_mod, logger, session, meta, **options): def sync_data_from_dir(hdlr_mod, meta, logger, content, **options): target = meta["target"] - path = meta["path"] event_handler = custom_event_handler.custom_event_handler(meta) session = irods_utils.irods_session( diff --git a/irods_capability_automated_ingest/irods_sync.py b/irods_capability_automated_ingest/irods_sync.py index 6102514e..a9e1fd47 100644 --- a/irods_capability_automated_ingest/irods_sync.py +++ b/irods_capability_automated_ingest/irods_sync.py @@ -166,7 +166,7 @@ def add_arguments(parser): def handle_start(args): ex_file_arg = args.exclude_file_type - if ex_file_arg != None: + if ex_file_arg is not None: ex_arg_list = [x.strip() for x in ex_file_arg[0].split(",")] check_event_handler(args.event_handler) diff --git a/irods_capability_automated_ingest/redis_key.py b/irods_capability_automated_ingest/redis_key.py index 20564786..74c3e6e0 100644 --- a/irods_capability_automated_ingest/redis_key.py +++ b/irods_capability_automated_ingest/redis_key.py @@ -1,6 +1,4 @@ -import json import time -import traceback MAX_RETRIES = 10 @@ -22,7 +20,7 @@ def retry(self, func, *args, max_retries=MAX_RETRIES): try: res = func(*args) return res - except Exception as err: + except Exception: retries += 1 # logger.info('Retrying. retries=' + str(retries), max_retries=max_retries, func=func, args=args, err=err, stacktrace=traceback.extract_tb(err.__traceback__)) diff --git a/irods_capability_automated_ingest/sync_actions.py b/irods_capability_automated_ingest/sync_actions.py index dbc54765..e51f2da8 100644 --- a/irods_capability_automated_ingest/sync_actions.py +++ b/irods_capability_automated_ingest/sync_actions.py @@ -138,10 +138,7 @@ def start_job(data): restart_queue = data["restart_queue"] sychronous = data["synchronous"] progress = data["progress"] - s3_region_name = data["s3_region_name"] - s3_endpoint_domain = data["s3_endpoint_domain"] s3_keypair = data["s3_keypair"] - s3_multipart_chunksize = data["s3_multipart_chunksize_in_mib"] logger = sync_logging.get_sync_logger(logging_config) data_copy = data.copy() diff --git a/irods_capability_automated_ingest/tasks/filesystem_tasks.py b/irods_capability_automated_ingest/tasks/filesystem_tasks.py index c6610545..e9cd2a50 100644 --- a/irods_capability_automated_ingest/tasks/filesystem_tasks.py +++ b/irods_capability_automated_ingest/tasks/filesystem_tasks.py @@ -11,8 +11,6 @@ from irods.exception import ( CollectionDoesNotExist, - DataObjectDoesNotExist, - PycommandsException, ) # See https://github.com/celery/celery/issues/5362 for information about billiard and Celery. diff --git a/irods_capability_automated_ingest/tasks/s3_bucket_tasks.py b/irods_capability_automated_ingest/tasks/s3_bucket_tasks.py index 41b4ac9e..098c1fda 100644 --- a/irods_capability_automated_ingest/tasks/s3_bucket_tasks.py +++ b/irods_capability_automated_ingest/tasks/s3_bucket_tasks.py @@ -15,9 +15,7 @@ import base64 import datetime import os -import re import redis_lock -import stat import time import traceback @@ -156,11 +154,6 @@ def s3_bucket_sync_path(self, meta): index=current_process().index, ) - exclude_file_name = meta["exclude_file_name"] - exclude_directory_name = meta["exclude_directory_name"] - file_regex = [re.compile(r) for r in exclude_file_name] - dir_regex = [re.compile(r) for r in exclude_directory_name] - for obj in itr: obj_stats = {} diff --git a/irods_capability_automated_ingest/test/test_delete_modes.py b/irods_capability_automated_ingest/test/test_delete_modes.py index c550e484..dfe13e70 100644 --- a/irods_capability_automated_ingest/test/test_delete_modes.py +++ b/irods_capability_automated_ingest/test/test_delete_modes.py @@ -8,17 +8,13 @@ import textwrap import time -from irods.data_object import irods_dirname, irods_basename from irods.exception import CollectionDoesNotExist -from irods.meta import iRODSMeta -from irods.models import Collection, DataObject from irods.session import iRODSSession from irods_capability_automated_ingest.celery import app from irods_capability_automated_ingest.redis_utils import get_redis from irods_capability_automated_ingest.sync_job import sync_job from irods_capability_automated_ingest.utils import DeleteMode, Operation -import irods_capability_automated_ingest.examples from . import test_lib @@ -559,10 +555,10 @@ def test_PUT_APPEND_and_DO_NOT_DELETE_does_not_delete_collections(self): # Case 20 self.do_DO_NOT_DELETE_does_not_delete_collections(Operation.PUT_APPEND) - def test_PUT_SYNC_and_UNREGISTER_are_incompatible(self): + def test_PUT_APPEND_and_UNREGISTER_are_incompatible(self): # Case 21 self.do_incompatible_operation_and_delete_mode( - Operation.PUT_SYNC, DeleteMode.UNREGISTER + Operation.PUT_APPEND, DeleteMode.UNREGISTER ) def test_PUT_APPEND_and_TRASH_deletes_collections(self): diff --git a/irods_capability_automated_ingest/test/test_irods_sync.py b/irods_capability_automated_ingest/test/test_irods_sync.py index c0cd4ef8..884ea933 100644 --- a/irods_capability_automated_ingest/test/test_irods_sync.py +++ b/irods_capability_automated_ingest/test/test_irods_sync.py @@ -12,7 +12,7 @@ ) from shutil import rmtree from signal import SIGINT -from tempfile import NamedTemporaryFile, mkdtemp +from tempfile import NamedTemporaryFile import base64 import glob import os @@ -22,11 +22,9 @@ import subprocess import sys import time -import traceback import unittest from irods.data_object import irods_dirname, irods_basename -from irods.meta import iRODSMeta from irods.models import Collection, DataObject from irods.session import iRODSSession import irods.keywords as kw @@ -36,7 +34,6 @@ from irods_capability_automated_ingest.redis_utils import get_redis from irods_capability_automated_ingest.sync_job import sync_job from irods_capability_automated_ingest.utils import Operation -import irods_capability_automated_ingest.examples from . import test_lib @@ -193,7 +190,10 @@ def read_file(path): def hierarchy_string_for_leaf(session, logical_path, leafName): ptn = re.compile(";" + leafName + "$") - equals_or_is_leaf_of = lambda leaf, hierstr: leaf == hierstr or ptn.search(hierstr) + + def equals_or_is_leaf_of(leaf, hierstr): + return leaf == hierstr or ptn.search(hierstr) + q = session.query(DataObject).filter( DataObject.name == irods_basename(logical_path), Collection.name == irods_dirname(logical_path), @@ -1523,6 +1523,9 @@ def test_register_as_replica_with_resc_name_with_another_replica_in_hier(self): @unittest.skip("irods/irods#3517 - this is not allowed") def test_register_as_replica_root_with_resc_name_with_another_replica_in_hier(self): + job_name = ( + "test_register_as_replica_root_with_resc_name_with_another_replica_in_hier" + ) self.do_put_to_child() self.do_register_as_replica_no_assertions("replica_root_with_resc_name") self.do_assert_failed_queue("wrong paths", job_name=job_name) @@ -1531,6 +1534,7 @@ def test_register_as_replica_root_with_resc_name_with_another_replica_in_hier(se def test_register_as_replica_non_leaf_non_root_with_resc_name_with_another_replica_in_hier( self, ): + job_name = "test_register_as_replica_non_leaf_non_root_with_resc_name_with_another_replica_in_hier" self.do_put_to_child() self.do_register_as_replica_no_assertions( "replica_with_non_root_non_leaf_resc_name" @@ -1540,12 +1544,14 @@ def test_register_as_replica_non_leaf_non_root_with_resc_name_with_another_repli # register with as replica event handler @unittest.skip("irods/irods#4623") def test_register_with_as_replica_event_handler_with_resc_name(self): + job_name = "test_register_with_as_replica_event_handler_with_resc_name" self.do_register("replica_with_resc_name", resc_name=[REGISTER_RESC2A]) self.do_assert_failed_queue(count=None, job_name=job_name) self.do_assert_retry_queue(count=None, job_name=job_name) @unittest.skip("irods/irods#4623") def test_register_with_as_replica_event_handler_root_with_resc_name(self): + job_name = "test_register_with_as_replica_event_handler_root_with_resc_name" self.do_register( "replica_root_with_resc_name", resc_name=[REGISTER_RESC2A, REGISTER_RESC2B] ) @@ -1992,9 +1998,10 @@ class _Test_irods_sync_with_bad_filename: EH_SUFFIX = "" B64_CALCULATION = b64_calculation CHARACTER_MAPPING_TRANSFORM = None - LOGICAL_PATH_CALCULATION = lambda this: join( - this.dest_coll_path, this.unicode_error_filename - ) + + def LOGICAL_PATH_CALCULATION(this): + return join(this.dest_coll_path, this.unicode_error_filename) + DETAILED_CHECK = True # Flag to indicate whether suffixes will be appended to data names for de-ambiguation ALLOW_LOGICAL_NAME_SUFFIX = False @@ -2025,7 +2032,6 @@ def setUp(self): self.expected_logical_path = self.LOGICAL_PATH_CALCULATION() def noop_test(self): - x = 1 pass def tearDown(self): @@ -2328,9 +2334,10 @@ class Test_irods_sync_character_mapped_path( BAD_FILENAME = b"test-file~with@5!non.alphas" # maybe add some unicode EH_SUFFIX = "_using_char_map" CHARACTER_MAPPING_TRANSFORM = character_mapping_transform - LOGICAL_PATH_CALCULATION = lambda this: join( - this.dest_coll_path, this.remapped_bad_filename - ) + + def LOGICAL_PATH_CALCULATION(this): + return join(this.dest_coll_path, this.remapped_bad_filename) + ALLOW_LOGICAL_NAME_SUFFIX = True DETAILED_CHECK = False ANNOTATION_REASON = "character_map" diff --git a/irods_capability_automated_ingest/test/test_s3_bucket_scan.py b/irods_capability_automated_ingest/test/test_s3_bucket_scan.py index 6738d7e5..f7c808f0 100644 --- a/irods_capability_automated_ingest/test/test_s3_bucket_scan.py +++ b/irods_capability_automated_ingest/test/test_s3_bucket_scan.py @@ -3,23 +3,17 @@ import io import os import signal -import shutil import subprocess import tempfile import textwrap import time -from irods.data_object import irods_dirname, irods_basename -from irods.exception import CollectionDoesNotExist -from irods.meta import iRODSMeta -from irods.models import Collection, DataObject from irods.session import iRODSSession from irods_capability_automated_ingest.celery import app from irods_capability_automated_ingest.redis_utils import get_redis from irods_capability_automated_ingest.sync_job import sync_job -from irods_capability_automated_ingest.utils import DeleteMode, Operation -import irods_capability_automated_ingest.examples +from irods_capability_automated_ingest.utils import Operation from minio import Minio diff --git a/irods_capability_automated_ingest/utils.py b/irods_capability_automated_ingest/utils.py index 8a2543ec..59aeea19 100644 --- a/irods_capability_automated_ingest/utils.py +++ b/irods_capability_automated_ingest/utils.py @@ -4,8 +4,6 @@ from uuid import uuid1 from enum import Enum -import os -import stat class Operation(Enum): diff --git a/profile/profile.py b/profile/profile.py index a3202a5c..21732e9c 100644 --- a/profile/profile.py +++ b/profile/profile.py @@ -1,18 +1,31 @@ -import sys import json from elasticsearch import Elasticsearch from elasticsearch.helpers import bulk import argparse -parser = argparse.ArgumentParser(description='Ingest profile data into Elasticsearch') -parser.add_argument('input_file', metavar='INPUT FILE', type=str, - help='input file') -parser.add_argument('--elasticsearch_host', metavar='ELASTICSEARCH HOST', type=str, default="localhost", - help='elasticsearch host') -parser.add_argument('elasticsearch_index', metavar='ELASTICSEARCH INDEX', type=str, - help='elasticsearch index') -parser.add_argument('--additional_key', dest='keys', action='store', nargs="*", default=[], - help='additional key') +parser = argparse.ArgumentParser(description="Ingest profile data into Elasticsearch") +parser.add_argument("input_file", metavar="INPUT FILE", type=str, help="input file") +parser.add_argument( + "--elasticsearch_host", + metavar="ELASTICSEARCH HOST", + type=str, + default="localhost", + help="elasticsearch host", +) +parser.add_argument( + "elasticsearch_index", + metavar="ELASTICSEARCH INDEX", + type=str, + help="elasticsearch index", +) +parser.add_argument( + "--additional_key", + dest="keys", + action="store", + nargs="*", + default=[], + help="additional key", +) args = parser.parse_args() @@ -24,28 +37,22 @@ es = Elasticsearch(output) try: - es.indices.create(index, body={ - "mappings": { - "document": { - "properties": { - "hostname": { - "type": "keyword" - } - } - } - } - }) + es.indices.create( + index, + body={ + "mappings": {"document": {"properties": {"hostname": {"type": "keyword"}}}} + }, + ) except Exception as e: print(e) - -def task_action(): + +def task_action(): task_buf = {} task_counter = {} i = 0 with open(input_file, "r") as f: - line = f.readline().rstrip("\n") while line != "": obj = json.loads(line) @@ -73,17 +80,13 @@ def task_action(): "event_name": event_name, "event_id": obj["event_id"], "path": obj.get("path"), - "target": obj.get("target") + "target": obj.get("target"), } for key in keys: di[key] = obj[key] - d = { - "_index": index, - "_type": "document", - "_source": di - } + d = {"_index": index, "_type": "document", "_source": di} i += 1 print(i) if event_name in task_counter: diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..3a49f1c4 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,58 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +[lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto"