Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
387b634
feat: add gxhash Python bindings
winstxnhdw Nov 13, 2024
35e30d1
feat: add `async` and no GIL functions
winstxnhdw Nov 14, 2024
8000673
feat: remove async variants
winstxnhdw Nov 17, 2024
6afc73a
fix: remove unused imports
winstxnhdw Nov 18, 2024
af41cba
build: add release profile
winstxnhdw Nov 19, 2024
c918354
feat/perf: add performant async implementation
winstxnhdw Mar 26, 2025
95e3cae
feat: use class API instead
winstxnhdw Mar 26, 2025
01759fc
docs: improve docstring clarity
winstxnhdw Mar 26, 2025
2801561
docs: keep docstrings consistent
winstxnhdw Mar 26, 2025
57db9a1
fix: propagate mmap errors to Python
winstxnhdw Mar 27, 2025
d9346e8
docs: describe perfomance quirk at large input sizes
winstxnhdw Mar 28, 2025
ab61583
docs: add docstrings for each hasher class
winstxnhdw Mar 28, 2025
0024b7a
docs: populate README
winstxnhdw Mar 31, 2025
6e15f15
chore: remove redundant `drop`
winstxnhdw Apr 3, 2025
76e7ee5
feat: replace `hash_nogil` with `hash_async`
winstxnhdw May 15, 2025
957bc4c
docs/build: bump version
winstxnhdw May 15, 2025
94b48ac
fix: do not install `gxhash` as relative path
winstxnhdw May 15, 2025
ded1b91
build/docs: disable `hybrid` feature by default
winstxnhdw May 15, 2025
0c350b9
fix: use `PyBytes` to reference bytes in `future_into_py`
winstxnhdw Jun 11, 2025
831c208
feat!: accept file path instead of file
winstxnhdw Jun 11, 2025
7413902
docs: update file hashing examples
winstxnhdw Jun 11, 2025
0dd40dd
docs: remove `nogil` feature
winstxnhdw Jun 11, 2025
acbe602
docs: recommend installing with `hybrid`
winstxnhdw Jun 11, 2025
9b3fb6c
feat: release v0.2.2
winstxnhdw Nov 26, 2025
b65fb7d
perf: inline hasher
winstxnhdw Nov 26, 2025
f6c0bde
feat!: make `bytes` parameter positional-only
winstxnhdw Nov 26, 2025
5a0157d
feat: bump minor version
winstxnhdw Nov 26, 2025
01d3c1f
chore: remove redundant `Ok`
winstxnhdw Nov 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions py-gxhash/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[build]
rustflags = ["-C", "target-cpu=native"]
40 changes: 40 additions & 0 deletions py-gxhash/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[profile.release]
codegen-units = 1
lto = "fat"
panic = "abort"
strip = "symbols"

[package]
name = "py-gxhash"
version = "0.2.3"
edition = "2024"

[lib]
name = "gxhash"
crate-type = ["cdylib"]

[features]
hybrid = ["gxhash/hybrid"]

[dependencies.pyo3]
version = "0.27.1"
default-features = false
features = ["macros"]

[dependencies.pyo3-async-runtimes]
version = "0.27.0"
default-features = false
features = ["tokio-runtime"]

[dependencies.gxhash]
version = "3.5.0"
default-features = false

[dependencies.tokio]
version = "1.44.1"
default-features = false

[build-dependencies.pyo3-build-config]
version = "0.27.1"
default-features = false
features = ["resolve-config"]
9 changes: 9 additions & 0 deletions py-gxhash/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
all:
rm -rf dist
uv run maturin sdist --out dist

publish:
uv publish

clean:
rm -rf dist target .venv
59 changes: 59 additions & 0 deletions py-gxhash/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# gxhash-py

Python bindings for [GxHash](https://github.com/ogxd/gxhash), a blazingly fast and robust non-cryptographic hashing algorithm.

## Features

- **Blazingly Fast**: Minimal-overhead binding to leverage the full speed of GxHash.
- **Zero Python**: Pure Rust backend with zero additional Python runtime overhead.
- **Async-Ready**: Tokio-powered async hashing for fast and efficient concurrency.
- **Fully Typesafe**: Predictable, clean API with complete type safety.

## Installation

You must have [rustup](https://rustup.rs/) installed and set to `nightly`.

```bash
pip install gxhash
```

For the best performance, you can enable the `hybrid` feature with the following.

```bash
pip install gxhash --config-settings build-args="--features hybrid"
```

By default, `gxhash` uses your system's vectorisation features. You can disable this by setting the relevant `RUSTFLAGS`.

```bash
RUSTFLAGS="-C target-cpu=x86-64 -C target-feature=+aes,+avx2" pip install gxhash
```

## Usage

Hashing bytes.

```python
from gxhash import GxHash32

def main():
gxhash = GxHash32(seed=0)
result = gxhash.hash(b"Hello, world!")

if __name__ == "__main__":
main()
```

Hashing bytes asynchronously.

```python
from asyncio import run
from gxhash import GxHash128

async def main():
gxhash = GxHash128(seed=0)
result = await gxhash.hash_async(b"Hello, world!")

if __name__ == "__main__":
run(main())
```
3 changes: 3 additions & 0 deletions py-gxhash/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fn main() {
pyo3_build_config::use_pyo3_cfgs();
}
95 changes: 95 additions & 0 deletions py-gxhash/gxhash.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from typing import Protocol

class Hasher(Protocol):
def __init__(self, *, seed: int) -> None:
"""
Summary
-------
Initialise `Hasher` with a `seed`.
The `seed` should not be exposed as it is used to deterministically generate the hash.
An exposed `seed` would put your service at a higher risk of a DoS attack.

Parameters
----------
seed (`int`)
a seed for the hasher

Example
-------
```python
hasher = GxHash32(seed=1234)
```
"""

def hash(self, bytes: bytes, /) -> int:
"""
Summary
-------
Hashes `bytes` to an `int`.
If your input is in `bytes`, this is the most performant variant of the hasher.

Parameters
----------
bytes (`bytes`)
input bytes

Returns
-------
hash (`int`)
the hash of the input bytes

Example
-------
```python
hasher = GxHash64(seed=1234)
print(f"Hash is {hasher.hash(bytes([42] * 1000))}!")
```
"""

async def hash_async(self, bytes: bytes, /) -> int:
"""
Summary
-------
Hashes `bytes` to an `int` asynchronously.
This method allows you to perform multiple hashes with true multi-threaded parallelism.
If called sequentially, this method is slightly less performant than the default `hash` method.
Otherwise, this variant offers the best raw multi-threaded performance.

Parameters
----------
bytes (`bytes`)
input bytes

Returns
-------
hash (`int`)
the hash of the input bytes

Example
-------
```python
hasher = GxHash128(seed=1234)
print(f"Hash is {await hasher.hash_async(bytes([42] * 1000))}!")
```
"""

class GxHash32(Hasher):
"""
Summary
-------
This class exposes GxHash's 32-bit hash methods.
"""

class GxHash64(Hasher):
"""
Summary
-------
This class exposes GxHash's 64-bit hash methods.
"""

class GxHash128(Hasher):
"""
Summary
-------
This class exposes GxHash's 128-bit hash methods.
"""
55 changes: 55 additions & 0 deletions py-gxhash/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
[project]
name = "gxhash"
description = "Python bindings for GxHash"
readme = "README.md"
license = "MIT"
requires-python = ">=3.8"
dynamic = ["version"]
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]

[dependency-groups]
dev = ["maturin==1.10.2", "pyright>=1.1.407", "ruff>=0.14.6"]

[build-system]
requires = ["maturin>=1.10.2, <2.0.0"]
build-backend = "maturin"

[tool.maturin]
profile = "release"
locked = true
strip = true
features = ["pyo3/extension-module"]
exclude = ["**/uv.lock", "Makefile"]

[tool.ruff]
line-length = 120

[tool.ruff.lint]
select = ["ALL"]
ignore = [
"PYI021",
"A002",
"COM812",
"D203",
"D205",
"D211",
"D212",
"D400",
"D413",
"D415",
"D417",
]

[tool.pyright]
typeCheckingMode = "strict"
62 changes: 62 additions & 0 deletions py-gxhash/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use pyo3::prelude::Bound;
use pyo3::prelude::PyResult;
use pyo3::prelude::Python;
use pyo3::prelude::pyclass;
use pyo3::prelude::pymethods;
use pyo3::types::PyModuleMethods;

#[cfg_attr(not(any(Py_3_8, Py_3_9)), pyclass(frozen, immutable_type))]
#[cfg_attr(any(Py_3_8, Py_3_9), pyclass(frozen))]
struct GxHash32 {
seed: i64,
}

#[cfg_attr(not(any(Py_3_8, Py_3_9)), pyclass(frozen, immutable_type))]
#[cfg_attr(any(Py_3_8, Py_3_9), pyclass(frozen))]
struct GxHash64 {
seed: i64,
}

#[cfg_attr(not(any(Py_3_8, Py_3_9)), pyclass(frozen, immutable_type))]
#[cfg_attr(any(Py_3_8, Py_3_9), pyclass(frozen))]
struct GxHash128 {
seed: i64,
}

macro_rules! impl_gxhash_methods {
($Self:ident, $return_type:ty, $hasher:path) => {
#[pymethods]
impl $Self {
#[new]
fn new(seed: i64) -> Self {
$Self { seed }
}

fn hash(&self, bytes: &[u8]) -> $return_type {
$hasher(bytes, self.seed)
}

fn hash_async<'a>(&self, py: Python<'a>, bytes: pyo3::prelude::Py<pyo3::types::PyBytes>) -> PyResult<Bound<'a, pyo3::prelude::PyAny>> {
let seed = self.seed;

pyo3_async_runtimes::tokio::future_into_py(py, async move {
tokio::task::spawn_blocking(move || $hasher(Python::attach(|py| bytes.as_bytes(py)), seed))
.await
.map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("Task Join Error: {}", e)))
})
}
}
};
}

impl_gxhash_methods!(GxHash32, u32, gxhash::gxhash32);
impl_gxhash_methods!(GxHash64, u64, gxhash::gxhash64);
impl_gxhash_methods!(GxHash128, u128, gxhash::gxhash128);

#[pyo3::prelude::pymodule(name = "gxhash")]
fn pygxhash(m: &Bound<'_, pyo3::prelude::PyModule>) -> PyResult<()> {
m.add_class::<GxHash32>()?;
m.add_class::<GxHash64>()?;
m.add_class::<GxHash128>()?;
Ok(())
}
Loading
Loading