apache
diff --git a/‎.github/workflows/check-md-link.yml
Lines changed: 13 additions & 0 deletions b/‎.github/workflows/check-md-link.yml
Lines changed: 13 additions & 0 deletions
diff --git a/‎mkdocs/docs/api.md
Lines changed: 57 additions & 0 deletions b/‎mkdocs/docs/api.md
Lines changed: 57 additions & 0 deletions
diff --git a/‎mkdocs/docs/cli.md
Lines changed: 1 addition & 0 deletions b/‎mkdocs/docs/cli.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎mkdocs/docs/configuration.md
Lines changed: 14 additions & 0 deletions b/‎mkdocs/docs/configuration.md
Lines changed: 14 additions & 0 deletions
diff --git a/‎pyiceberg/catalog/hive.py
Lines changed: 6 additions & 2 deletions b/‎pyiceberg/catalog/hive.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎pyiceberg/catalog/rest.py
Lines changed: 30 additions & 15 deletions b/‎pyiceberg/catalog/rest.py
Lines changed: 30 additions & 15 deletions
diff --git a/‎pyiceberg/catalog/sql.py
Lines changed: 3 additions & 1 deletion b/‎pyiceberg/catalog/sql.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pyiceberg/cli/console.py
Lines changed: 12 additions & 1 deletion b/‎pyiceberg/cli/console.py
Lines changed: 12 additions & 1 deletion
diff --git a/‎pyiceberg/io/pyarrow.py
Lines changed: 11 additions & 10 deletions b/‎pyiceberg/io/pyarrow.py
Lines changed: 11 additions & 10 deletions
@@ -0,0 +1,13 @@
+name: Check Markdown links
+
+on:
+  push:
+    paths:
+      - mkdocs/**
+
+jobs:
+  markdown-link-check:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@master
+    - uses: gaurav-nelson/github-action-markdown-link-check@v1
@@ -418,6 +418,63 @@ with table.update_schema(allow_incompatible_changes=True) as update:
     update.delete_column("some_field")
 ```
 
+## Partition evolution
+
+PyIceberg supports partition evolution. See the [partition evolution](https://iceberg.apache.org/spec/#partition-evolution)
+for more details.
+
+The API to use when evolving partitions is the `update_spec` API on the table.
+
+```python
+with table.update_spec() as update:
+    update.add_field("id", BucketTransform(16), "bucketed_id")
+    update.add_field("event_ts", DayTransform(), "day_ts")
+```
+
+Updating the partition spec can also be done as part of a transaction with other operations.
+
+```python
+with table.transaction() as transaction:
+    with transaction.update_spec() as update_spec:
+        update_spec.add_field("id", BucketTransform(16), "bucketed_id")
+        update_spec.add_field("event_ts", DayTransform(), "day_ts")
+    # ... Update properties etc
+```
+
+### Add fields
+
+New partition fields can be added via the `add_field` API which takes in the field name to partition on,
+the partition transform, and an optional partition name. If the partition name is not specified,
+one will be created.
+
+```python
+with table.update_spec() as update:
+    update.add_field("id", BucketTransform(16), "bucketed_id")
+    update.add_field("event_ts", DayTransform(), "day_ts")
+    # identity is a shortcut API for adding an IdentityTransform
+    update.identity("some_field")
+```
+
+### Remove fields
+
+Partition fields can also be removed via the `remove_field` API if it no longer makes sense to partition on those fields.
+
+```python
+with table.update_spec() as update:some_partition_name
+    # Remove the partition field with the name
+    update.remove_field("some_partition_name")
+```
+
+### Rename fields
+
+Partition fields can also be renamed via the `rename_field` API.
+
+```python
+with table.update_spec() as update:
+    # Rename the partition field with the name bucketed_id to sharded_id
+    update.rename_field("bucketed_id", "sharded_id")
+```
+
 ## Table properties
 
 Set and remove properties through the `Transaction` API:
 
@@ -36,6 +36,7 @@ Options:
 --catalog TEXT
 --verbose BOOLEAN
 --output [text|json]
+--ugi TEXT
 --uri TEXT
 --credential TEXT
 --help                Show this message and exit.
 
@@ -148,13 +148,27 @@ catalog:
 | Key                    | Example                 | Description                                                                                        |
 | ---------------------- | ----------------------- | -------------------------------------------------------------------------------------------------- |
 | uri                    | https://rest-catalog/ws | URI identifying the REST Server                                                                    |
+| ugi                    | t-1234:secret           | Hadoop UGI for Hive client.                                                                        |
 | credential             | t-1234:secret           | Credential to use for OAuth2 credential flow when initializing the catalog                         |
 | token                  | FEW23.DFSDF.FSDF        | Bearer token value to use for `Authorization` header                                               |
 | rest.sigv4-enabled     | true                    | Sign requests to the REST Server using AWS SigV4 protocol                                          |
 | rest.signing-region    | us-east-1               | The region to use when SigV4 signing a request                                                     |
 | rest.signing-name      | execute-api             | The service signing name to use when SigV4 signing a request                                       |
 | rest.authorization-url | https://auth-service/cc | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') |
 
+### Headers in RESTCatalog
+
+To configure custom headers in RESTCatalog, include them in the catalog properties with the prefix `header.`. This
+ensures that all HTTP requests to the REST service include the specified headers.
+
+```yaml
+catalog:
+  default:
+    uri: http://rest-catalog/ws/
+    credential: t-1234:secret
+    header.content-type: application/vnd.api+json
+```
+
 ## SQL Catalog
 
 The SQL catalog requires a database for its backend. PyIceberg supports PostgreSQL and SQLite through psycopg2. The database connection has to be configured using the `uri` property. See SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls):
 
@@ -130,17 +130,21 @@ class _HiveClient:
 
     _transport: TTransport
     _client: Client
+    _ugi: Optional[List[str]]
 
-    def __init__(self, uri: str):
+    def __init__(self, uri: str, ugi: Optional[str] = None):
         url_parts = urlparse(uri)
         transport = TSocket.TSocket(url_parts.hostname, url_parts.port)
         self._transport = TTransport.TBufferedTransport(transport)
         protocol = TBinaryProtocol.TBinaryProtocol(transport)
 
         self._client = Client(protocol)
+        self._ugi = ugi.split(':') if ugi else None
 
     def __enter__(self) -> Client:
         self._transport.open()
+        if self._ugi:
+            self._client.set_ugi(*self._ugi)
         return self._client
 
     def __exit__(
@@ -233,7 +237,7 @@ class HiveCatalog(Catalog):
 
     def __init__(self, name: str, **properties: str):
         super().__init__(name, **properties)
-        self._client = _HiveClient(properties["uri"])
+        self._client = _HiveClient(properties["uri"], properties.get("ugi"))
 
     def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table:
         properties: Dict[str, str] = table.parameters
 
@@ -28,7 +28,7 @@
     Union,
 )
 
-from pydantic import Field, ValidationError
+from pydantic import Field, ValidationError, field_validator
 from requests import HTTPError, Session
 from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt
 
@@ -69,6 +69,7 @@
 )
 from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder, assign_fresh_sort_order_ids
 from pyiceberg.typedef import EMPTY_DICT, UTF8, IcebergBaseModel
+from pyiceberg.types import transform_dict_value_to_str
 
 if TYPE_CHECKING:
     import pyarrow as pa
@@ -115,6 +116,7 @@ class Endpoints:
 SIGV4_REGION = "rest.signing-region"
 SIGV4_SERVICE = "rest.signing-name"
 AUTH_URL = "rest.authorization-url"
+HEADER_PREFIX = "header."
 
 NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
 
@@ -127,7 +129,7 @@ def _retry_hook(retry_state: RetryCallState) -> None:
 _RETRY_ARGS = {
     "retry": retry_if_exception_type(AuthorizationExpiredError),
     "stop": stop_after_attempt(2),
-    "before": _retry_hook,
+    "before_sleep": _retry_hook,
     "reraise": True,
 }
 
@@ -146,6 +148,8 @@ class CreateTableRequest(IcebergBaseModel):
     write_order: Optional[SortOrder] = Field(alias="write-order")
     stage_create: bool = Field(alias="stage-create", default=False)
     properties: Properties = Field(default_factory=dict)
+    # validators
+    transform_properties_dict_value_to_str = field_validator('properties', mode='before')(transform_dict_value_to_str)
 
 
 class RegisterTableRequest(IcebergBaseModel):
@@ -156,8 +160,10 @@ class RegisterTableRequest(IcebergBaseModel):
 class TokenResponse(IcebergBaseModel):
     access_token: str = Field()
     token_type: str = Field()
-    expires_in: int = Field()
-    issued_token_type: str = Field()
+    expires_in: Optional[int] = Field(default=None)
+    issued_token_type: Optional[str] = Field(default=None)
+    refresh_token: Optional[str] = Field(default=None)
+    scope: Optional[str] = Field(default=None)
 
 
 class ConfigResponse(IcebergBaseModel):
@@ -231,9 +237,9 @@ def _create_session(self) -> Session:
 
         # Sets the client side and server side SSL cert verification, if provided as properties.
         if ssl_config := self.properties.get(SSL):
-            if ssl_ca_bundle := ssl_config.get(CA_BUNDLE):  # type: ignore
+            if ssl_ca_bundle := ssl_config.get(CA_BUNDLE):
                 session.verify = ssl_ca_bundle
-            if ssl_client := ssl_config.get(CLIENT):  # type: ignore
+            if ssl_client := ssl_config.get(CLIENT):
                 if all(k in ssl_client for k in (CERT, KEY)):
                     session.cert = (ssl_client[CERT], ssl_client[KEY])
                 elif ssl_client_cert := ssl_client.get(CERT):
@@ -242,10 +248,7 @@ def _create_session(self) -> Session:
         self._refresh_token(session, self.properties.get(TOKEN))
 
         # Set HTTP headers
-        session.headers["Content-type"] = "application/json"
-        session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION
-        session.headers["User-Agent"] = f"PyIceberg/{__version__}"
-        session.headers["X-Iceberg-Access-Delegation"] = "vended-credentials"
+        self._config_headers(session)
 
         # Configure SigV4 Request Signing
         if str(self.properties.get(SIGV4, False)).lower() == "true":
@@ -292,8 +295,9 @@ def _fetch_access_token(self, session: Session, credential: str) -> str:
         else:
             client_id, client_secret = None, credential
         data = {GRANT_TYPE: CLIENT_CREDENTIALS, CLIENT_ID: client_id, CLIENT_SECRET: client_secret, SCOPE: CATALOG_SCOPE}
-        # Uses application/x-www-form-urlencoded by default
-        response = session.post(url=self.auth_url, data=data)
+        response = session.post(
+            url=self.auth_url, data=data, headers={**session.headers, "Content-type": "application/x-www-form-urlencoded"}
+        )
         try:
             response.raise_for_status()
         except HTTPError as exc:
@@ -447,17 +451,28 @@ def _response_to_table(self, identifier_tuple: Tuple[str, ...], table_response:
             catalog=self,
         )
 
-    def _refresh_token(self, session: Optional[Session] = None, new_token: Optional[str] = None) -> None:
+    def _refresh_token(self, session: Optional[Session] = None, initial_token: Optional[str] = None) -> None:
         session = session or self._session
-        if new_token is not None:
-            self.properties[TOKEN] = new_token
+        if initial_token is not None:
+            self.properties[TOKEN] = initial_token
         elif CREDENTIAL in self.properties:
             self.properties[TOKEN] = self._fetch_access_token(session, self.properties[CREDENTIAL])
 
         # Set Auth token for subsequent calls in the session
         if token := self.properties.get(TOKEN):
             session.headers[AUTHORIZATION_HEADER] = f"{BEARER_PREFIX} {token}"
 
+    def _config_headers(self, session: Session) -> None:
+        session.headers["Content-type"] = "application/json"
+        session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION
+        session.headers["User-Agent"] = f"PyIceberg/{__version__}"
+        session.headers["X-Iceberg-Access-Delegation"] = "vended-credentials"
+        header_properties = self._extract_headers_from_properties()
+        session.headers.update(header_properties)
+
+    def _extract_headers_from_properties(self) -> Dict[str, str]:
+        return {key[len(HEADER_PREFIX) :]: value for key, value in self.properties.items() if key.startswith(HEADER_PREFIX)}
+
     @retry(**_RETRY_ARGS)
     def create_table(
         self,
 
@@ -567,7 +567,9 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper
         Raises:
             NoSuchNamespaceError: If a namespace with the given name does not exist.
         """
-        database_name = self.identifier_to_database(namespace, NoSuchNamespaceError)
+        database_name = self.identifier_to_database(namespace)
+        if not self._namespace_exists(database_name):
+            raise NoSuchNamespaceError(f"Database {database_name} does not exists")
 
         stmt = select(IcebergNamespaceProperties).where(
             IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == database_name
 
@@ -59,11 +59,22 @@ def wrapper(*args: Any, **kwargs: Any):  # type: ignore
 @click.option("--catalog")
 @click.option("--verbose", type=click.BOOL)
 @click.option("--output", type=click.Choice(["text", "json"]), default="text")
+@click.option("--ugi")
 @click.option("--uri")
 @click.option("--credential")
 @click.pass_context
-def run(ctx: Context, catalog: Optional[str], verbose: bool, output: str, uri: Optional[str], credential: Optional[str]) -> None:
+def run(
+    ctx: Context,
+    catalog: Optional[str],
+    verbose: bool,
+    output: str,
+    ugi: Optional[str],
+    uri: Optional[str],
+    credential: Optional[str],
+) -> None:
     properties = {}
+    if ugi:
+        properties["ugi"] = ugi
     if uri:
         properties["uri"] = uri
     if credential:
 
@@ -125,6 +125,7 @@
     visit_with_partner,
 )
 from pyiceberg.table import PropertyUtil, TableProperties, WriteTask
+from pyiceberg.table.metadata import TableMetadata
 from pyiceberg.table.name_mapping import NameMapping
 from pyiceberg.transforms import TruncateTransform
 from pyiceberg.typedef import EMPTY_DICT, Properties, Record
@@ -1720,7 +1721,7 @@ def fill_parquet_file_metadata(
     data_file.split_offsets = split_offsets
 
 
-def write_file(table: Table, tasks: Iterator[WriteTask], file_schema: Optional[Schema] = None) -> Iterator[DataFile]:
+def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
     task = next(tasks)
 
     try:
@@ -1730,15 +1731,15 @@ def write_file(table: Table, tasks: Iterator[WriteTask], file_schema: Optional[S
     except StopIteration:
         pass
 
-    parquet_writer_kwargs = _get_parquet_writer_kwargs(table.properties)
+    parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties)
 
-    file_path = f'{table.location()}/data/{task.generate_data_file_filename("parquet")}'
-    file_schema = file_schema or table.schema()
-    arrow_file_schema = schema_to_pyarrow(file_schema)
+    file_path = f'{table_metadata.location}/data/{task.generate_data_file_filename("parquet")}'
+    schema = table_metadata.schema()
+    arrow_file_schema = schema_to_pyarrow(schema)
 
-    fo = table.io.new_output(file_path)
+    fo = io.new_output(file_path)
     row_group_size = PropertyUtil.property_as_int(
-        properties=table.properties,
+        properties=table_metadata.properties,
         property_name=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
         default=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT,
     )
@@ -1757,16 +1758,16 @@ def write_file(table: Table, tasks: Iterator[WriteTask], file_schema: Optional[S
         # sort_order_id=task.sort_order_id,
         sort_order_id=None,
         # Just copy these from the table for now
-        spec_id=table.spec().spec_id,
+        spec_id=table_metadata.default_spec_id,
         equality_ids=None,
         key_metadata=None,
     )
 
     fill_parquet_file_metadata(
         data_file=data_file,
         parquet_metadata=writer.writer.metadata,
-        stats_columns=compute_statistics_plan(file_schema, table.properties),
-        parquet_column_mapping=parquet_path_to_id_mapping(file_schema),
+        stats_columns=compute_statistics_plan(schema, table_metadata.properties),
+        parquet_column_mapping=parquet_path_to_id_mapping(schema),
     )
     return iter([data_file])