From 5e47e97e556382815730c52360e3e88cc145c780 Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Sun, 22 Dec 2024 21:01:12 +0000 Subject: [PATCH 1/6] feat: search current working directory for config file --- mkdocs/docs/configuration.md | 2 +- pyiceberg/utils/config.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 621b313613..5ccd51c113 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -28,7 +28,7 @@ hide: There are three ways to pass in configuration: -- Using the `~/.pyiceberg.yaml` configuration file +- Using the `.pyiceberg.yaml` configuration file stored in either the directory specified by the `PYICEBERG_HOME` environment variable, the home directory, or current working directory. - Through environment variables - By passing in credentials through the CLI or the Python API diff --git a/pyiceberg/utils/config.py b/pyiceberg/utils/config.py index 51ab200e10..94e0a178b4 100644 --- a/pyiceberg/utils/config.py +++ b/pyiceberg/utils/config.py @@ -84,12 +84,13 @@ def _load_yaml(directory: Optional[str]) -> Optional[RecursiveDict]: return file_config_lowercase return None - # Give priority to the PYICEBERG_HOME directory - if pyiceberg_home_config := _load_yaml(os.environ.get(PYICEBERG_HOME)): - return pyiceberg_home_config - # Look into the home directory - if pyiceberg_home_config := _load_yaml(os.path.expanduser("~")): - return pyiceberg_home_config + # Directories to search for the configuration file + search_dirs = [os.environ.get(PYICEBERG_HOME), os.path.expanduser("~"), os.getcwd()] + + for directory in search_dirs: + if config := _load_yaml(directory): + return config + # Didn't find a config return None From 3d85bf05469faafe4eae203be6bb94f86d92791c Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Sun, 22 Dec 2024 21:50:00 +0000 Subject: [PATCH 2/6] test: add config file search path for cwd --- tests/utils/test_config.py | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index 066e7d7cc0..c64b9a4776 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -93,3 +93,57 @@ def test_from_configuration_files_get_typed_value(tmp_path_factory: pytest.TempP assert Config().get_bool("legacy-current-snapshot-id") assert Config().get_int("max-workers") == 4 + + +@pytest.mark.parametrize( + "config_location, config_content, expected_result", + [ + ( + "config", + {"catalog": {"default": {"uri": "https://service.io/api"}}}, + {"catalog": {"default": {"uri": "https://service.io/api"}}}, + ), + ( + "home", + {"catalog": {"default": {"uri": "https://service.io/api"}}}, + {"catalog": {"default": {"uri": "https://service.io/api"}}}, + ), + ( + "current", + {"catalog": {"default": {"uri": "https://service.io/api"}}}, + {"catalog": {"default": {"uri": "https://service.io/api"}}}, + ), + ("none", None, None), + ], +) +def test_from_multiple_configuration_files( + monkeypatch: pytest.MonkeyPatch, tmp_path_factory: pytest.TempPathFactory, config_location, config_content, expected_result +): + def create_config_file(directory: str, content: dict) -> None: + config_file_path = os.path.join(directory, ".pyiceberg.yaml") + with open(config_file_path, "w", encoding="utf-8") as file: + yaml_str = as_document(content).as_yaml() if content else "" + file.write(yaml_str) + + config_path = str(tmp_path_factory.mktemp("config")) + home_path = str(tmp_path_factory.mktemp("home")) + current_path = str(tmp_path_factory.mktemp("current")) + + location_to_path = { + "config": config_path, + "home": home_path, + "current": current_path, + } + + if config_location in location_to_path and config_content: + create_config_file(location_to_path[config_location], config_content) + + monkeypatch.setenv("PYICEBERG_HOME", config_path) + monkeypatch.setattr(os.path, "expanduser", lambda _: home_path) + + if config_location == "current": + monkeypatch.chdir(current_path) + + assert ( + Config()._from_configuration_files() == expected_result + ), f"Unexpected configuration result for content: {config_content}" From 3ebbb8c898f1ec185becf2120167ee8c900fe577 Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Sun, 22 Dec 2024 21:57:03 +0000 Subject: [PATCH 3/6] docs: add docs on new .pyiceberg.yaml search path --- mkdocs/docs/api.md | 2 +- mkdocs/docs/cli.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 7aa4159016..aed1473e98 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -49,7 +49,7 @@ catalog: and loaded in python by calling `load_catalog(name="hive")` and `load_catalog(name="rest")`. -This information must be placed inside a file called `.pyiceberg.yaml` located either in the `$HOME` or `%USERPROFILE%` directory (depending on whether the operating system is Unix-based or Windows-based, respectively) or in the `$PYICEBERG_HOME` directory (if the corresponding environment variable is set). +This information must be placed inside a file called `.pyiceberg.yaml` located either in the `$HOME` or `%USERPROFILE%` directory (depending on whether the operating system is Unix-based or Windows-based, respectively), in the current working directory, or in the `$PYICEBERG_HOME` directory (if the corresponding environment variable is set). For more details on possible configurations refer to the [specific page](https://py.iceberg.apache.org/configuration/). diff --git a/mkdocs/docs/cli.md b/mkdocs/docs/cli.md index 28e44955d7..4e37ddb6e5 100644 --- a/mkdocs/docs/cli.md +++ b/mkdocs/docs/cli.md @@ -26,7 +26,7 @@ hide: Pyiceberg comes with a CLI that's available after installing the `pyiceberg` package. -You can pass the path to the Catalog using the `--uri` and `--credential` argument, but it is recommended to setup a `~/.pyiceberg.yaml` config as described in the [Catalog](configuration.md) section. +You can pass the path to the Catalog using the `--uri` and `--credential` argument, but it is recommended to setup a `~/.pyiceberg.yaml` or `./.pyiceberg.yaml` config as described in the [Catalog](configuration.md) section. ```sh ➜ pyiceberg --help From 2bd69139da4585c88df73dc7f7a36903fc7daa7a Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Sun, 22 Dec 2024 22:00:50 +0000 Subject: [PATCH 4/6] tests: fix up mypy issues on tests --- pyiceberg/utils/config.py | 1 + tests/utils/test_config.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pyiceberg/utils/config.py b/pyiceberg/utils/config.py index 94e0a178b4..0c162777d6 100644 --- a/pyiceberg/utils/config.py +++ b/pyiceberg/utils/config.py @@ -85,6 +85,7 @@ def _load_yaml(directory: Optional[str]) -> Optional[RecursiveDict]: return None # Directories to search for the configuration file + # The current search order is: PYICEBERG_HOME, home directory, then current directory search_dirs = [os.environ.get(PYICEBERG_HOME), os.path.expanduser("~"), os.getcwd()] for directory in search_dirs: diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index c64b9a4776..c706a150ad 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import os +from typing import Any, Dict, Optional from unittest import mock import pytest @@ -117,9 +118,13 @@ def test_from_configuration_files_get_typed_value(tmp_path_factory: pytest.TempP ], ) def test_from_multiple_configuration_files( - monkeypatch: pytest.MonkeyPatch, tmp_path_factory: pytest.TempPathFactory, config_location, config_content, expected_result -): - def create_config_file(directory: str, content: dict) -> None: + monkeypatch: pytest.MonkeyPatch, + tmp_path_factory: pytest.TempPathFactory, + config_location: str, + config_content: Optional[Dict[str, Any]], + expected_result: Optional[Dict[str, Any]], +) -> None: + def create_config_file(directory: str, content: Optional[Dict[str, Any]]) -> None: config_file_path = os.path.join(directory, ".pyiceberg.yaml") with open(config_file_path, "w", encoding="utf-8") as file: yaml_str = as_document(content).as_yaml() if content else "" @@ -144,6 +149,6 @@ def create_config_file(directory: str, content: dict) -> None: if config_location == "current": monkeypatch.chdir(current_path) - assert ( - Config()._from_configuration_files() == expected_result - ), f"Unexpected configuration result for content: {config_content}" + assert Config()._from_configuration_files() == expected_result, ( + f"Unexpected configuration result for content: {config_content}" + ) From fe029fe40340186363bd4031ebb07a2219336d72 Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Thu, 26 Dec 2024 15:07:49 +0000 Subject: [PATCH 5/6] tests: ensure multiple lookup resolution is valid and UTF8 usage --- tests/utils/test_config.py | 77 +++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index c706a150ad..010f9a7c9b 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -97,58 +97,73 @@ def test_from_configuration_files_get_typed_value(tmp_path_factory: pytest.TempP @pytest.mark.parametrize( - "config_location, config_content, expected_result", + "config_setup, expected_result", [ + # Validate lookup works with: config > home > cwd ( - "config", - {"catalog": {"default": {"uri": "https://service.io/api"}}}, + {"config_location": "config", "config_content": {"catalog": {"default": {"uri": "https://service.io/api"}}}}, {"catalog": {"default": {"uri": "https://service.io/api"}}}, ), ( - "home", - {"catalog": {"default": {"uri": "https://service.io/api"}}}, + {"config_location": "home", "config_content": {"catalog": {"default": {"uri": "https://service.io/api"}}}}, {"catalog": {"default": {"uri": "https://service.io/api"}}}, ), ( - "current", - {"catalog": {"default": {"uri": "https://service.io/api"}}}, + {"config_location": "current", "config_content": {"catalog": {"default": {"uri": "https://service.io/api"}}}}, {"catalog": {"default": {"uri": "https://service.io/api"}}}, ), - ("none", None, None), + ( + {"config_location": "none", "config_content": None}, + None, + ), + # Validate lookup order: home > cwd if present in both + ( + { + "config_location": "both", + "home_content": {"catalog": {"default": {"uri": "https://service.io/home"}}}, + "current_content": {"catalog": {"default": {"uri": "https://service.io/current"}}}, + }, + {"catalog": {"default": {"uri": "https://service.io/home"}}}, + ), ], ) def test_from_multiple_configuration_files( monkeypatch: pytest.MonkeyPatch, tmp_path_factory: pytest.TempPathFactory, - config_location: str, - config_content: Optional[Dict[str, Any]], + config_setup: Dict[str, Any], expected_result: Optional[Dict[str, Any]], ) -> None: - def create_config_file(directory: str, content: Optional[Dict[str, Any]]) -> None: - config_file_path = os.path.join(directory, ".pyiceberg.yaml") - with open(config_file_path, "w", encoding="utf-8") as file: - yaml_str = as_document(content).as_yaml() if content else "" - file.write(yaml_str) - - config_path = str(tmp_path_factory.mktemp("config")) - home_path = str(tmp_path_factory.mktemp("home")) - current_path = str(tmp_path_factory.mktemp("current")) - - location_to_path = { - "config": config_path, - "home": home_path, - "current": current_path, + def create_config_files( + paths: Dict[str, str], + contents: Dict[str, Optional[Dict[str, Any]]], + ) -> None: + """Helper to create configuration files in specified paths.""" + for location, content in contents.items(): + if content: + config_file_path = os.path.join(paths[location], ".pyiceberg.yaml") + with open(config_file_path, "w", encoding="UTF8") as file: + yaml_str = as_document(content).as_yaml() if content else "" + file.write(yaml_str) + + paths = { + "config": str(tmp_path_factory.mktemp("config")), + "home": str(tmp_path_factory.mktemp("home")), + "current": str(tmp_path_factory.mktemp("current")), } - if config_location in location_to_path and config_content: - create_config_file(location_to_path[config_location], config_content) + contents = { + "config": config_setup.get("config_content") if config_setup.get("config_location") == "config" else None, + "home": config_setup.get("home_content") if config_setup.get("config_location") in ["home", "both"] else None, + "current": config_setup.get("current_content") if config_setup.get("config_location") in ["current", "both"] else None, + } - monkeypatch.setenv("PYICEBERG_HOME", config_path) - monkeypatch.setattr(os.path, "expanduser", lambda _: home_path) + create_config_files(paths, contents) - if config_location == "current": - monkeypatch.chdir(current_path) + monkeypatch.setenv("PYICEBERG_HOME", paths["config"]) + monkeypatch.setattr(os.path, "expanduser", lambda _: paths["home"]) + if config_setup.get("config_location") in ["current", "both"]: + monkeypatch.chdir(paths["current"]) assert Config()._from_configuration_files() == expected_result, ( - f"Unexpected configuration result for content: {config_content}" + f"Unexpected configuration result for content: {expected_result}" ) From 6d14ecefad28970bbf3aeced19e4b50c1be1c184 Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:59:47 -0500 Subject: [PATCH 6/6] Update mkdocs/docs/cli.md Co-authored-by: Fokko Driesprong --- mkdocs/docs/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/cli.md b/mkdocs/docs/cli.md index 4e37ddb6e5..28e44955d7 100644 --- a/mkdocs/docs/cli.md +++ b/mkdocs/docs/cli.md @@ -26,7 +26,7 @@ hide: Pyiceberg comes with a CLI that's available after installing the `pyiceberg` package. -You can pass the path to the Catalog using the `--uri` and `--credential` argument, but it is recommended to setup a `~/.pyiceberg.yaml` or `./.pyiceberg.yaml` config as described in the [Catalog](configuration.md) section. +You can pass the path to the Catalog using the `--uri` and `--credential` argument, but it is recommended to setup a `~/.pyiceberg.yaml` config as described in the [Catalog](configuration.md) section. ```sh ➜ pyiceberg --help