Skip to content

Commit ca70442

Browse files
gardeniaColm Dougan
andauthored
Add hive.kerberos-service-name configuration option (#2032) (#2141)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #2032 --> # Rationale for this change Added new configuration parameter hive.kerberos-service-name (#2032) hive.kerberos-service-name Defaults to "hive" # Are these changes tested? added unit test. # Are there any user-facing changes? this change adds an optional configuration parameter for the hive catalog (hive.kerberos-service-name) which defaults to "hive". the change includes doc updates. <!-- In the case of user-facing changes, please add the changelog label. --> Co-authored-by: Colm Dougan <[email protected]>
1 parent c32aa04 commit ca70442

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

mkdocs/docs/configuration.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,11 @@ catalog:
439439
s3.secret-access-key: password
440440
```
441441

442-
| Key | Example | Description |
443-
|------------------------------| ------- | --------------------------------- |
444-
| hive.hive2-compatible | true | Using Hive 2.x compatibility mode |
445-
| hive.kerberos-authentication | true | Using authentication via Kerberos |
442+
| Key | Example | Description |
443+
|------------------------------| ------- | ------------------------------------ |
444+
| hive.hive2-compatible | true | Using Hive 2.x compatibility mode |
445+
| hive.kerberos-authentication | true | Using authentication via Kerberos |
446+
| hive.kerberos-service-name | hive | Kerberos service name (default hive) |
446447

447448
When using Hive 2.x, make sure to set the compatibility flag:
448449

pyiceberg/catalog/hive.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@
130130

131131
HIVE_KERBEROS_AUTH = "hive.kerberos-authentication"
132132
HIVE_KERBEROS_AUTH_DEFAULT = False
133+
HIVE_KERBEROS_SERVICE_NAME = "hive.kerberos-service-name"
134+
HIVE_KERBEROS_SERVICE_NAME_DEFAULT = "hive"
133135

134136
LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time"
135137
LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time"
@@ -149,9 +151,16 @@ class _HiveClient:
149151
_transport: TTransport
150152
_ugi: Optional[List[str]]
151153

152-
def __init__(self, uri: str, ugi: Optional[str] = None, kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT):
154+
def __init__(
155+
self,
156+
uri: str,
157+
ugi: Optional[str] = None,
158+
kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT,
159+
kerberos_service_name: Optional[str] = HIVE_KERBEROS_SERVICE_NAME,
160+
):
153161
self._uri = uri
154162
self._kerberos_auth = kerberos_auth
163+
self._kerberos_service_name = kerberos_service_name
155164
self._ugi = ugi.split(":") if ugi else None
156165
self._transport = self._init_thrift_transport()
157166

@@ -161,7 +170,7 @@ def _init_thrift_transport(self) -> TTransport:
161170
if not self._kerberos_auth:
162171
return TTransport.TBufferedTransport(socket)
163172
else:
164-
return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service="hive")
173+
return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service=self._kerberos_service_name)
165174

166175
def _client(self) -> Client:
167176
protocol = TBinaryProtocol.TBinaryProtocol(self._transport)
@@ -314,6 +323,7 @@ def _create_hive_client(properties: Dict[str, str]) -> _HiveClient:
314323
uri,
315324
properties.get("ugi"),
316325
property_as_bool(properties, HIVE_KERBEROS_AUTH, HIVE_KERBEROS_AUTH_DEFAULT),
326+
properties.get(HIVE_KERBEROS_SERVICE_NAME, HIVE_KERBEROS_SERVICE_NAME_DEFAULT),
317327
)
318328
except BaseException as e:
319329
last_exception = e

tests/catalog/test_hive.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
DO_NOT_UPDATE_STATS,
4949
DO_NOT_UPDATE_STATS_DEFAULT,
5050
HIVE_KERBEROS_AUTH,
51+
HIVE_KERBEROS_SERVICE_NAME,
5152
LOCK_CHECK_MAX_WAIT_TIME,
5253
LOCK_CHECK_MIN_WAIT_TIME,
5354
LOCK_CHECK_RETRIES,
@@ -1300,7 +1301,20 @@ def test_create_hive_client_success() -> None:
13001301

13011302
with patch("pyiceberg.catalog.hive._HiveClient", return_value=MagicMock()) as mock_hive_client:
13021303
client = HiveCatalog._create_hive_client(properties)
1303-
mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", False)
1304+
mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", False, "hive")
1305+
assert client is not None
1306+
1307+
1308+
def test_create_hive_client_with_kerberos_success() -> None:
1309+
properties = {
1310+
"uri": "thrift://localhost:10000",
1311+
"ugi": "user",
1312+
HIVE_KERBEROS_AUTH: "true",
1313+
HIVE_KERBEROS_SERVICE_NAME: "hiveuser",
1314+
}
1315+
with patch("pyiceberg.catalog.hive._HiveClient", return_value=MagicMock()) as mock_hive_client:
1316+
client = HiveCatalog._create_hive_client(properties)
1317+
mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", True, "hiveuser")
13041318
assert client is not None
13051319

13061320

@@ -1313,7 +1327,7 @@ def test_create_hive_client_multiple_uris() -> None:
13131327
client = HiveCatalog._create_hive_client(properties)
13141328
assert mock_hive_client.call_count == 2
13151329
mock_hive_client.assert_has_calls(
1316-
[call("thrift://localhost:10000", "user", False), call("thrift://localhost:10001", "user", False)]
1330+
[call("thrift://localhost:10000", "user", False, "hive"), call("thrift://localhost:10001", "user", False, "hive")]
13171331
)
13181332
assert client is not None
13191333

0 commit comments

Comments
 (0)