diff --git a/ocifs/__init__.py b/ocifs/__init__.py index f1f35b8..583e733 100644 --- a/ocifs/__init__.py +++ b/ocifs/__init__.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from .core import OCIFileSystem diff --git a/ocifs/core.py b/ocifs/core.py index 9511323..c08a470 100644 --- a/ocifs/core.py +++ b/ocifs/core.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import os from ast import literal_eval @@ -359,6 +359,20 @@ def split_path(self, path, **kwargs): obj_path = obj_path.rstrip("/") return bucket, namespace, obj_path + def setup_oci_client(self, config, **kwargs): + try: + logger.debug( + f"Lakesharing Object Storage Client is being set up for supporting data lake support and " + f"interacting with object storage using the config passed in: {self.config}" + ) + return LakeSharingObjectStorageClient(self.config, **self.config_kwargs) + except Exception as e: + logger.error( + f"Exception encountered when attempting to initialize the Lakesharing Object Storage Client " + f"using the config:{self.config}" + ) + raise e + def connect(self, refresh=True): """Establish oci connection object. @@ -386,20 +400,7 @@ def connect(self, refresh=True): {"additional_user_agent": f"Oracle-ocifs/version={__version__}"} ) self._get_region() - try: - self.oci_client = LakeSharingObjectStorageClient( - self.config, **self.config_kwargs - ) - logger.debug( - f"Lakesharing Object Storage Client is being set up for supporting data lake support and " - f"interacting with object storage using the config passed in: {self.config}" - ) - except Exception as e: - logger.error( - "Exception encountered when attempting to initialize the Lakesharing Object Storage Client" - " using the config:{self.config}" - ) - raise e + self.oci_client = self.setup_oci_client(self.config, **self.config_kwargs) return self.oci_client def invalidate_cache(self, path=None): @@ -1274,6 +1275,42 @@ def walk(self, path, maxdepth=None, **kwargs): raise ValueError("Cannot crawl all of OCI Object Storage") return super().walk(path, maxdepth=maxdepth, **kwargs) + def glob(self, path, maxdepth=None, **kwargs): + """ + Find files by glob-matching. + + If the path ends with '/', only folders are returned. + + We support ``"**"``, + ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation. + + The `maxdepth` option is applied on the first `**` found in the path. + + Search path names that contain embedded characters special to this + implementation of glob may not produce expected results; + e.g., 'foo/bar/*starredfilename*'. + + kwargs are passed to ``ls``. + """ + path_sans_protocol = self._strip_protocol(path) + full_bucket, _, obj_path = path_sans_protocol.partition("/") + # Added the below check for lake support + if "@ocid1.lake" in full_bucket: + ocifs_url = full_bucket + ocifs_url = f"ocilake://{ocifs_url}" + bucket, namespace, key = self.split_path(path) + bucket_full_path = _build_full_path(bucket, namespace, key) + bucket_with_namespace_path = _build_full_path(bucket, namespace) + path_list = super().glob(bucket_full_path, maxdepth=maxdepth, **kwargs) + formatted_path_list = [] + for path in path_list: + formatted_path_list.append( + ocifs_url + path.removeprefix(bucket_with_namespace_path) + ) + return formatted_path_list + else: + return super().glob(path, maxdepth=maxdepth, **kwargs) + def cat(self, path, recursive=False, on_error="raise", **kwargs): """Fetch (potentially multiple) paths' contents diff --git a/ocifs/data_lake/__init__.py b/ocifs/data_lake/__init__.py index b190983..848fa25 100644 --- a/ocifs/data_lake/__init__.py +++ b/ocifs/data_lake/__init__.py @@ -1,3 +1,3 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ diff --git a/ocifs/data_lake/lake_mount.py b/ocifs/data_lake/lake_mount.py index 25580f3..d83acdd 100644 --- a/ocifs/data_lake/lake_mount.py +++ b/ocifs/data_lake/lake_mount.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/data_lake/lake_sharing_client.py b/ocifs/data_lake/lake_sharing_client.py index ae32440..ba12c3e 100644 --- a/ocifs/data_lake/lake_sharing_client.py +++ b/ocifs/data_lake/lake_sharing_client.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import logging import os diff --git a/ocifs/data_lake/lake_sharing_object_storage_client.py b/ocifs/data_lake/lake_sharing_object_storage_client.py index 04763ce..870830e 100644 --- a/ocifs/data_lake/lake_sharing_object_storage_client.py +++ b/ocifs/data_lake/lake_sharing_object_storage_client.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import logging import os diff --git a/ocifs/data_lake/lakehouse.py b/ocifs/data_lake/lakehouse.py index e43c583..6b9bbee 100644 --- a/ocifs/data_lake/lakehouse.py +++ b/ocifs/data_lake/lakehouse.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/data_lake/lakehouse_client.py b/ocifs/data_lake/lakehouse_client.py index 7a0db00..1236363 100644 --- a/ocifs/data_lake/lakehouse_client.py +++ b/ocifs/data_lake/lakehouse_client.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from __future__ import absolute_import diff --git a/ocifs/data_lake/managed_prefix_collection.py b/ocifs/data_lake/managed_prefix_collection.py index ea92d72..4497c45 100644 --- a/ocifs/data_lake/managed_prefix_collection.py +++ b/ocifs/data_lake/managed_prefix_collection.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/data_lake/managed_prefix_summary.py b/ocifs/data_lake/managed_prefix_summary.py index ea79401..677ee3f 100644 --- a/ocifs/data_lake/managed_prefix_summary.py +++ b/ocifs/data_lake/managed_prefix_summary.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/data_lake/mount_specification.py b/ocifs/data_lake/mount_specification.py index 7bf0cf0..1bfc06b 100644 --- a/ocifs/data_lake/mount_specification.py +++ b/ocifs/data_lake/mount_specification.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/data_lake/par_response.py b/ocifs/data_lake/par_response.py index 6588067..209f719 100644 --- a/ocifs/data_lake/par_response.py +++ b/ocifs/data_lake/par_response.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/data_lake/rename_object_details.py b/ocifs/data_lake/rename_object_details.py index bb472b6..ba8feae 100644 --- a/ocifs/data_lake/rename_object_details.py +++ b/ocifs/data_lake/rename_object_details.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from oci.util import ( formatted_flat_dict, diff --git a/ocifs/errors.py b/ocifs/errors.py index 702cc8b..6315f1f 100644 --- a/ocifs/errors.py +++ b/ocifs/errors.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import errno import functools diff --git a/ocifs/tests/__init__.py b/ocifs/tests/__init__.py index be6c0ec..ac2e4b4 100644 --- a/ocifs/tests/__init__.py +++ b/ocifs/tests/__init__.py @@ -1,4 +1,4 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from ..core import OCIFileSystem diff --git a/ocifs/tests/test_integration.py b/ocifs/tests/test_integration.py index 1705c9f..2950736 100644 --- a/ocifs/tests/test_integration.py +++ b/ocifs/tests/test_integration.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import pandas as pd import numpy as np diff --git a/ocifs/tests/test_integration_lake.py b/ocifs/tests/test_integration_lake.py index cb2c322..3e481eb 100644 --- a/ocifs/tests/test_integration_lake.py +++ b/ocifs/tests/test_integration_lake.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import pandas as pd import os diff --git a/ocifs/tests/test_spec.py b/ocifs/tests/test_spec.py index 51546b5..0ae5af7 100644 --- a/ocifs/tests/test_spec.py +++ b/ocifs/tests/test_spec.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2023 Oracle and/or its affiliates. +# Copyright (c) 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from contextlib import contextmanager diff --git a/ocifs/tests/test_spec_lake.py b/ocifs/tests/test_spec_lake.py index 7d8bfac..4dd20ff 100644 --- a/ocifs/tests/test_spec_lake.py +++ b/ocifs/tests/test_spec_lake.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2023 Oracle and/or its affiliates. +# Copyright (c) 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import io @@ -202,6 +202,32 @@ def test_oci_ls(fs): assert nested_file1_path in fs.ls(full_external_mount_name + "/nested") +def test_glob(fs): + fn = full_external_mount_name + "/nested/file1" + assert fn not in fs.glob(full_external_mount_name + "/") + assert fn not in fs.glob(full_external_mount_name + "/*") + assert fn not in fs.glob(full_external_mount_name + "/nested") + assert fn in fs.glob(full_external_mount_name + "/nested/*") + assert fn in fs.glob(full_external_mount_name + "/nested/file*") + assert fn in fs.glob(full_external_mount_name + "/*/*") + assert [full_external_mount_name + "/nested/nested2"] == fs.glob( + full_external_mount_name + "/nested/nested2" + ) + out = fs.glob(full_external_mount_name + "/nested/nested2/*") + assert { + f"{full_external_mount_name}/nested/nested2/file1", + f"{full_external_mount_name}/nested/nested2/file2", + } == set(out) + + # Make sure glob() deals with the dot character (.) correctly. + assert full_external_mount_name + "/file.dat" in fs.glob( + full_external_mount_name + "/file.*" + ) + assert full_external_mount_name + "/filexdat" not in fs.glob( + full_external_mount_name + "/file.*" + ) + + def test_oci_ls_detail(fs): L = fs.ls(full_external_mount_name + "/nested", detail=True) assert all(isinstance(item, CaseInsensitiveDict) for item in L) diff --git a/ocifs/utils.py b/ocifs/utils.py index ef1ee97..782c6c6 100644 --- a/ocifs/utils.py +++ b/ocifs/utils.py @@ -1,5 +1,5 @@ # coding: utf-8 -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import sys diff --git a/setup.py b/setup.py index 305f58b..c569f8f 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ ### File setup.py obsolete and must not be used. Please update pyproject.toml instead.