Skip to content
Open
46 changes: 44 additions & 2 deletions duckdb_engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import warnings
from collections.abc import Hashable
from functools import lru_cache
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -175,6 +176,26 @@ def fetchmany(self, size: Optional[int] = None) -> List:
else:
return self.__c.fetchmany(size)

@property
def description(self) -> Optional[List[Tuple[Any, ...]]]:
description = self.__c.description
if description is None:
return None

sanitized: List[Tuple[Any, ...]] = []
for row in description:
if len(row) <= 1:
sanitized.append(tuple(row))
continue

type_code = row[1]
if isinstance(type_code, Hashable):
sanitized.append(tuple(row))
else:
sanitized.append((row[0], str(type_code), *row[2:]))

return sanitized


class DuckDBEngineWarning(Warning):
pass
Expand Down Expand Up @@ -283,6 +304,7 @@ def type_descriptor(self, typeobj: Type[sqltypes.TypeEngine]) -> Any: # type: i
return res

def connect(self, *cargs: Any, **cparams: Any) -> "Connection":
print("duckdb version:", duckdb_version)
core_keys = get_core_config()
preload_extensions = cparams.pop("preload_extensions", [])
config = dict(cparams.get("config", {}))
Expand All @@ -297,8 +319,28 @@ def connect(self, *cargs: Any, **cparams: Any) -> "Connection":
config["custom_user_agent"] = user_agent

filesystems = cparams.pop("register_filesystems", [])

conn = duckdb.connect(*cargs, **cparams)
database = cparams.pop("database", None)
# ducklake handling
if database.startswith("ducklake:") and supports_attach:
alias_ducklake = cparams.pop("alias", "ducklake")
data_path = cparams.pop("data_path", None)
read_only = cparams.pop("read_only", False)

attach_sql = f"""
ATTACH '{database}' AS {alias_ducklake}
"""
if read_only and data_path is not None:
attach_sql += f" (DATA_PATH '{data_path}', READ_ONLY)"
elif read_only:
attach_sql += " (READ_ONLY)"
elif data_path is not None:
attach_sql += f" (DATA_PATH '{data_path}')"

conn = duckdb.connect(*cargs, **cparams)
conn.execute(attach_sql)
conn.execute(f"USE {alias_ducklake}")
else:
conn = duckdb.connect(database, *cargs, **cparams)

for extension in preload_extensions:
conn.execute(f"LOAD {extension}")
Expand Down
237 changes: 237 additions & 0 deletions duckdb_engine/tests/test_ducklake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import tempfile
from pathlib import Path
from typing import Generator, cast

import pytest
import sqlalchemy
from sqlalchemy import create_engine, inspect, text
from sqlalchemy.dialects import registry # type: ignore
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session, sessionmaker


@pytest.fixture
def temp_dir() -> Generator[Path, None, None]:
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)


@pytest.fixture
def ducklake_engine(temp_dir: Path) -> Engine:
data_path = temp_dir / "data"
data_path.mkdir()
registry.register("duckdb", "duckdb_engine", "Dialect")
catalog_path = temp_dir / "test_catalog.ducklake"
data_path = temp_dir / "data"
engine = create_engine(
f"duckdb:///ducklake:{catalog_path}",
connect_args={"data_path": str(data_path), "alias": "test_ducklake"},
)
return engine


@pytest.fixture
def ducklake_session(ducklake_engine: Engine) -> Session:
return sessionmaker(bind=ducklake_engine)()


def test_ducklake_attach_basic(ducklake_engine: Engine, temp_dir: Path) -> None:
with ducklake_engine.connect() as conn:
conn.execute(text("INSTALL ducklake"))
conn.execute(text("LOAD ducklake"))

result = conn.execute(text("SELECT current_database()"))
assert result.scalar() is not None

conn.execute(text("USE test_ducklake"))
current_db = conn.execute(text("SELECT current_database()"))
assert current_db.scalar() == "test_ducklake"


def test_ducklake_query_table(ducklake_engine: Engine) -> None:
with ducklake_engine.connect() as conn:
conn.execute(
text("""
CREATE TABLE test_table (
id INTEGER,
name VARCHAR,
value DOUBLE
)
""")
)

conn.execute(
text("""
INSERT INTO test_table VALUES
(1, 'Alice', 100.5),
(2, 'Bob', 200.7),
(3, 'Charlie', 300.9)
""")
)

result = conn.execute(text("SELECT COUNT(*) FROM test_table"))
assert result.scalar() == 3

result = conn.execute(text("SELECT name FROM test_table WHERE id = 2"))
assert result.scalar() == "Bob"

result = conn.execute(text("SELECT AVG(value) FROM test_table"))
avg_value = result.scalar()
assert avg_value is not None
avg_value_f = cast(float, avg_value)
assert abs(avg_value_f - 200.7) < 0.01


def test_ducklake_query_schema(ducklake_engine: Engine) -> None:
with ducklake_engine.connect() as conn:
conn.execute(text("CREATE SCHEMA test_schema"))

conn.execute(
text("""
CREATE TABLE test_schema.schema_table (
id INTEGER,
description VARCHAR
)
""")
)

inspector = inspect(ducklake_engine)
schemas = inspector.get_schema_names()
print(f"Schemas: {schemas}")
assert "test_ducklake.test_schema" in schemas

tables = inspector.get_table_names(schema="test_ducklake.test_schema")
assert "schema_table" in tables

columns = inspector.get_columns("schema_table", schema="test_schema")
column_names = [col["name"] for col in columns]
assert "id" in column_names
assert "description" in column_names


def test_ducklake_query_view(ducklake_engine: Engine) -> None:
with ducklake_engine.connect() as conn:
conn.execute(text("CREATE SCHEMA test_schema"))
conn.execute(text("use test_schema"))

conn.execute(
text("""
CREATE TABLE test_schema.base_table (
id INTEGER,
category VARCHAR,
amount DOUBLE
)
""")
)

conn.execute(
text("""
INSERT INTO test_schema.base_table VALUES
(1, 'A', 100.0),
(2, 'B', 200.0),
(3, 'A', 150.0),
(4, 'B', 250.0)
""")
)

conn.execute(
text("""
CREATE VIEW category_summary AS
SELECT category, SUM(amount) as total_amount, COUNT(*) as count
FROM test_ducklake.test_schema.base_table
GROUP BY category
""")
)

result = conn.execute(text("SELECT COUNT(*) FROM test_schema.category_summary"))
assert result.scalar() == 2

result = conn.execute(
text("""
SELECT total_amount FROM test_schema.category_summary WHERE category = 'A'
""")
)
assert result.scalar() == 250.0

inspector = inspect(ducklake_engine)
view_names = inspector.get_view_names("test_schema")
assert "category_summary" in view_names


@pytest.fixture
def readonly_ducklake_engine(temp_dir: Path) -> Engine:
data_path = temp_dir / "data"
data_path.mkdir()
registry.register("duckdb", "duckdb_engine", "Dialect")
catalog_path = temp_dir / "test_catalog.ducklake"

# First create some test data with a writable connection
writable_engine = create_engine(
f"duckdb:///ducklake:{catalog_path}",
connect_args={"data_path": str(data_path), "alias": "test_readonly"},
)
with writable_engine.connect() as conn:
conn.execute(
text("""
CREATE TABLE readonly_test (
id INTEGER,
name VARCHAR
)
""")
)
conn.execute(text("INSERT INTO readonly_test VALUES (1, 'existing_data')"))

# Return readonly engine
readonly_engine = create_engine(
f"duckdb:///ducklake:{catalog_path}",
connect_args={
"data_path": str(data_path),
"alias": "test_readonly",
"read_only": True,
},
)
return readonly_engine


def test_postgres(temp_dir: Path) -> Engine:
data_path = temp_dir / "data"
data_path.mkdir()
registry.register("duckdb", "duckdb_engine", "Dialect")

# First create some test data with a writable connection
ducklake_engine = create_engine(
"duckdb:///ducklake:postgres:dbname=ducklake user=postgres password=FvTEBiSdLOlo host=15.235.225.242 port=5433",
)
with ducklake_engine.connect() as conn:
inspector = inspect(ducklake_engine)
schemas = inspector.get_schema_names()
print(f"Schemas: {schemas}")

return ducklake_engine


def test_ducklake_readonly_prevents_writes(readonly_ducklake_engine: Engine) -> None:
with readonly_ducklake_engine.connect() as conn:
# Read operations should work
result = conn.execute(text("SELECT COUNT(*) FROM readonly_test"))
assert result.scalar() == 1

result = conn.execute(text("SELECT name FROM readonly_test WHERE id = 1"))
assert result.scalar() == "existing_data"

# Write operations should fail
# sqlalchemy.exc.ProgrammingError: (duckdb.duckdb.InvalidInputException) Invalid Input Error: Cannot execute statement of type "INSERT" on database "test_readonly" which is attached in read-only mode!
with pytest.raises(sqlalchemy.exc.ProgrammingError):
conn.execute(text("INSERT INTO readonly_test VALUES (2, 'new_data')"))

with pytest.raises(sqlalchemy.exc.ProgrammingError):
conn.execute(text("DELETE FROM readonly_test WHERE id = 1"))

with pytest.raises(sqlalchemy.exc.ProgrammingError):
conn.execute(text("UPDATE readonly_test SET name = 'updated' WHERE id = 1"))

with pytest.raises(sqlalchemy.exc.ProgrammingError):
conn.execute(text("CREATE TABLE new_table (id INTEGER)"))

with pytest.raises(sqlalchemy.exc.ProgrammingError):
conn.execute(text("DROP TABLE readonly_test"))
18 changes: 6 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
[project]
name = "duckdb_engine"
version = "0.17.0"
version = "0.18.0"
description = "SQLAlchemy driver for duckdb"
authors = [
{name = "Elliana May", email = "[email protected]"},
]
authors = [{ name = "Elliana May", email = "[email protected]" }]
license = "MIT"
license-files = ["LICENSE.txt"]
requires-python = ">=3.9,<4"
readme = "README.md"
dependencies = [
"duckdb>=0.5.0",
"sqlalchemy>=1.3.22",
"packaging>=21",
]
dependencies = ["duckdb>=1.4.0", "sqlalchemy>=1.3.22", "packaging>=21"]

[project.urls]
"Bug Tracker" = "https://github.com/Mause/duckdb_engine/issues"
Expand All @@ -28,8 +22,8 @@ mypy = "^1.4"
hypothesis = "^6.75.2"
pandas = "^1"
jupysql = "^0.10.0"
sqlalchemy = {version="^1.3.19", extras=['mypy']}
pytest-cov = {extras = ["coverage"], version = "^5.0.0"}
sqlalchemy = { version = "^1.3.19", extras = ['mypy'] }
pytest-cov = { extras = ["coverage"], version = "^5.0.0" }
pytest-remotedata = "^0.4.0"
pytest-snapshot = ">=0.9.0,<1"
toml = "^0.10.2"
Expand All @@ -43,7 +37,7 @@ pre-commit = { version = "^4.0.0", markers = "python_version >= '3.9'" }
duckdb = "duckdb_engine:Dialect"

[tool.pytest.ini_options]
addopts = "--hypothesis-show-statistics --strict --strict-markers"
# addopts = "--hypothesis-show-statistics --strict --strict-markers"
xfail_strict = true

[tool.mypy]
Expand Down
Loading