Skip to content

Commit

Permalink
partitioning by geo_id
Browse files Browse the repository at this point in the history
  • Loading branch information
bailliekova committed Jan 29, 2025
1 parent 147cca5 commit beba271
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 10 deletions.
9 changes: 8 additions & 1 deletion gerrydb_meta/crud/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
from datetime import datetime, timezone
from typing import Any, Collection, Tuple

from sqlalchemy import exc, insert, update
from sqlalchemy import exc, insert, update, text
from sqlalchemy.orm import Session

from gerrydb_meta import models, schemas
from gerrydb_meta.crud.base import NamespacedCRBase, normalize_path
from gerrydb_meta.enums import ColumnType
from gerrydb_meta.exceptions import ColumnValueTypeError, CreateValueError
from gerrydb_meta.utils import create_column_value_partition_text

log = logging.getLogger()

Expand Down Expand Up @@ -168,6 +169,7 @@ def set_values(
Raises:
ColumnValueTypeError: If column types do not match expected types.
"""
table_name=models.ColumnValue.__table__.name
val_column = COLUMN_TYPE_TO_VALUE_COLUMN[col.type]
now = datetime.now(timezone.utc)

Expand Down Expand Up @@ -202,6 +204,11 @@ def set_values(

# Add the new column values and invalidate the old ones where present.
geo_ids = [geo.geo_id for geo, _ in values]

#make sure partitions exist for all geos
for geo_id in set(geo_ids):
db.execute(create_column_value_partition_text(geo_id=geo_id))

with_tuples = (
db.query(
models.ColumnValue.col_id,
Expand Down
12 changes: 7 additions & 5 deletions gerrydb_meta/crud/geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
from typing import Collection

from geoalchemy2.elements import WKBElement
from sqlalchemy import and_, insert, or_, update
from sqlalchemy import and_, insert, or_, update, text
from sqlalchemy.exc import StatementError
from sqlalchemy.orm import Session

from gerrydb_meta import models, schemas
from gerrydb_meta.crud.base import NamespacedCRBase, normalize_path
from gerrydb_meta.exceptions import BulkCreateError, BulkPatchError
from gerrydb_meta.utils import create_column_value_partition_text

log = logging.getLogger()

Expand Down Expand Up @@ -61,9 +62,6 @@ def create_bulk(
paths=[path for path in paths if paths.count(path) > 1],
)

#THIS IS WHERE WE MAKE GEOIDS, ANNA!
#TODO: also create a column_value empty partition for the geoid. (after the foreign key exists, obvs)

with db.begin(nested=True):
geos = list(
db.scalars(
Expand All @@ -80,6 +78,8 @@ def create_bulk(
],
)
)
for geo in geos:
db.execute(create_column_value_partition_text(geo_id=geo.geo_id))

try:
geo_versions = list(
Expand Down Expand Up @@ -112,9 +112,11 @@ def create_bulk(
)
raise BulkCreateError(
"Failed to insert geometries. Geometries must be encoded in WKB format."
) from ex
) from ex

etag = self._update_etag(db, namespace)



db.flush()
return list(zip(geos, geo_versions)), etag
Expand Down
16 changes: 12 additions & 4 deletions gerrydb_meta/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from uuid import UUID, uuid4

from geoalchemy2 import Geography as SqlGeography
from sqlalchemy import JSON, BigInteger, Boolean, CheckConstraint, DateTime
from sqlalchemy import JSON, BigInteger, Boolean, CheckConstraint, DateTime, text, event
from sqlalchemy import Enum as SqlEnum
from sqlalchemy import (
ForeignKey,
Expand All @@ -17,8 +17,9 @@
UniqueConstraint,
)
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, Session
from sqlalchemy.sql import func
from sqlalchemy.types import CHAR

from gerrydb_meta.enums import (
ColumnKind,
Expand All @@ -27,8 +28,10 @@
ScopeType,
ViewRenderStatus,
)
from gerrydb_meta.utils import create_column_value_partition_text

metadata_obj = MetaData(schema="gerrydb")
SCHEMA= "gerrydb"
metadata_obj = MetaData(schema=SCHEMA)


class Base(DeclarativeBase):
Expand Down Expand Up @@ -371,6 +374,10 @@ def full_path(self):
"""Path with namespace prefix."""
return f"/{self.namespace.path}/{self.path}"

@event.listens_for(Geography, "after_insert")
def create_geo_partition_in_column_value(mapper, connection, geo):
geo_id=geo.geo_id
Session.object_session(geo).execute(create_column_value_partition_text(geo_id=geo_id))

class GeoImport(Base):
__tablename__ = "geo_import"
Expand Down Expand Up @@ -547,7 +554,8 @@ class ColumnSetMember(Base):

class ColumnValue(Base):
__tablename__ = "column_value"
__table_args__ = (UniqueConstraint("col_id", "geo_id", "valid_from"),)
__table_args__ = (UniqueConstraint("col_id", "geo_id", "valid_from"),
{"postgresql_partition_by": "LIST (geo_id)" })

col_id: Mapped[int] = mapped_column(
Integer,
Expand Down
7 changes: 7 additions & 0 deletions gerrydb_meta/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from sqlalchemy import text
from gerrydb_meta import models

def create_column_value_partition_text(geo_id: int):
table_name=models.ColumnValue.__table__.name
sql=f"CREATE TABLE IF NOT EXISTS {models.SCHEMA}_{table_name}_{geo_id} PARTITION OF {models.SCHEMA}.{table_name} FOR VALUES IN ({geo_id})"
return text(sql)

0 comments on commit beba271

Please sign in to comment.