Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: search current working directory for config file #1464

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mkdocs/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ catalog:

and loaded in python by calling `load_catalog(name="hive")` and `load_catalog(name="rest")`.

This information must be placed inside a file called `.pyiceberg.yaml` located either in the `$HOME` or `%USERPROFILE%` directory (depending on whether the operating system is Unix-based or Windows-based, respectively) or in the `$PYICEBERG_HOME` directory (if the corresponding environment variable is set).
This information must be placed inside a file called `.pyiceberg.yaml` located either in the `$HOME` or `%USERPROFILE%` directory (depending on whether the operating system is Unix-based or Windows-based, respectively), in the current working directory, or in the `$PYICEBERG_HOME` directory (if the corresponding environment variable is set).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: include warning about accidentally checking in secrets with git when using the current working directory


For more details on possible configurations refer to the [specific page](https://py.iceberg.apache.org/configuration/).

Expand Down
2 changes: 1 addition & 1 deletion mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ hide:

There are three ways to pass in configuration:

- Using the `~/.pyiceberg.yaml` configuration file
- Using the `.pyiceberg.yaml` configuration file stored in either the directory specified by the `PYICEBERG_HOME` environment variable, the home directory, or current working directory.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: move the extra info about where the file is located down to L37.
also i think its valuable to include a warning about accidentally checking in secrets with git when using the current working directory

- Through environment variables
- By passing in credentials through the CLI or the Python API

Expand Down
14 changes: 8 additions & 6 deletions pyiceberg/utils/config.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried adding a test here, but I wonder if there are opportunities to clean it up.

Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,14 @@ def _load_yaml(directory: Optional[str]) -> Optional[RecursiveDict]:
return file_config_lowercase
return None

# Give priority to the PYICEBERG_HOME directory
if pyiceberg_home_config := _load_yaml(os.environ.get(PYICEBERG_HOME)):
return pyiceberg_home_config
# Look into the home directory
if pyiceberg_home_config := _load_yaml(os.path.expanduser("~")):
return pyiceberg_home_config
# Directories to search for the configuration file
Fokko marked this conversation as resolved.
Show resolved Hide resolved
# The current search order is: PYICEBERG_HOME, home directory, then current directory
search_dirs = [os.environ.get(PYICEBERG_HOME), os.path.expanduser("~"), os.getcwd()]

for directory in search_dirs:
if config := _load_yaml(directory):
return config

# Didn't find a config
return None

Expand Down
74 changes: 74 additions & 0 deletions tests/utils/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import os
from typing import Any, Dict, Optional
from unittest import mock

import pytest
Expand Down Expand Up @@ -93,3 +94,76 @@ def test_from_configuration_files_get_typed_value(tmp_path_factory: pytest.TempP

assert Config().get_bool("legacy-current-snapshot-id")
assert Config().get_int("max-workers") == 4


@pytest.mark.parametrize(
"config_setup, expected_result",
[
# Validate lookup works with: config > home > cwd
(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: for test readability, use ["PYICEBERG_HOME", "HOME", and "CURRENT"]
and replace both with a list ["HOME", "CURRENT"]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like the parameterize test is testing

  1. PYICEBERG_HOME
  2. HOME
  3. CURRENT
  4. None
  5. "both" / ["HOME", "CURRENT"]

i'd add a test for all 3

{"config_location": "config", "config_content": {"catalog": {"default": {"uri": "https://service.io/api"}}}},
{"catalog": {"default": {"uri": "https://service.io/api"}}},
),
(
{"config_location": "home", "config_content": {"catalog": {"default": {"uri": "https://service.io/api"}}}},
{"catalog": {"default": {"uri": "https://service.io/api"}}},
),
(
{"config_location": "current", "config_content": {"catalog": {"default": {"uri": "https://service.io/api"}}}},
{"catalog": {"default": {"uri": "https://service.io/api"}}},
),
(
{"config_location": "none", "config_content": None},
None,
),
# Validate lookup order: home > cwd if present in both
(
{
"config_location": "both",
"home_content": {"catalog": {"default": {"uri": "https://service.io/home"}}},
"current_content": {"catalog": {"default": {"uri": "https://service.io/current"}}},
},
{"catalog": {"default": {"uri": "https://service.io/home"}}},
),
],
)
def test_from_multiple_configuration_files(
IndexSeek marked this conversation as resolved.
Show resolved Hide resolved
monkeypatch: pytest.MonkeyPatch,
tmp_path_factory: pytest.TempPathFactory,
config_setup: Dict[str, Any],
expected_result: Optional[Dict[str, Any]],
) -> None:
def create_config_files(
paths: Dict[str, str],
contents: Dict[str, Optional[Dict[str, Any]]],
) -> None:
"""Helper to create configuration files in specified paths."""
for location, content in contents.items():
if content:
config_file_path = os.path.join(paths[location], ".pyiceberg.yaml")
with open(config_file_path, "w", encoding="UTF8") as file:
yaml_str = as_document(content).as_yaml() if content else ""
file.write(yaml_str)

paths = {
"config": str(tmp_path_factory.mktemp("config")),
"home": str(tmp_path_factory.mktemp("home")),
"current": str(tmp_path_factory.mktemp("current")),
}

contents = {
"config": config_setup.get("config_content") if config_setup.get("config_location") == "config" else None,
"home": config_setup.get("home_content") if config_setup.get("config_location") in ["home", "both"] else None,
"current": config_setup.get("current_content") if config_setup.get("config_location") in ["current", "both"] else None,
}

create_config_files(paths, contents)

monkeypatch.setenv("PYICEBERG_HOME", paths["config"])
monkeypatch.setattr(os.path, "expanduser", lambda _: paths["home"])
if config_setup.get("config_location") in ["current", "both"]:
monkeypatch.chdir(paths["current"])

assert Config()._from_configuration_files() == expected_result, (
f"Unexpected configuration result for content: {expected_result}"
)
Loading