Skip to content

Commit 6198fd2

Browse files
fcollmanCopilot
andauthored
improving query speed (#206)
* improving query speed * adding session caching to cache * Update materializationengine/blueprints/client/cache.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 2d78a50 commit 6198fd2

File tree

6 files changed

+74
-17
lines changed

6 files changed

+74
-17
lines changed

materializationengine/blueprints/client/api2.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
sql_query_warning,
3737
validate_table_args,
3838
)
39+
from materializationengine.blueprints.client.cache import get_cached_view_metadata
3940
from materializationengine.blueprints.client.common import (
4041
unhandled_exception as common_unhandled_exception,
4142
)
@@ -3150,8 +3151,8 @@ def get(
31503151
mat_db_name = f"{aligned_volume_name}"
31513152
else:
31523153
mat_db_name = f"{datastack_name}__mat{version}"
3153-
with request_db_session(mat_db_name) as meta_db:
3154-
md = meta_db.database.get_view_metadata(datastack_name, view_name)
3154+
3155+
md = get_cached_view_metadata(aligned_volume_name, datastack_name, view_name)
31553156

31563157
return md
31573158

@@ -3189,8 +3190,8 @@ def assemble_view_dataframe(datastack_name, version, view_name, data, args):
31893190
get_count = args.get("count", False)
31903191
if get_count:
31913192
limit = None
3192-
with request_db_session(mat_db_name) as mat_db:
3193-
md = mat_db.database.get_view_metadata(datastack_name, view_name)
3193+
3194+
md = get_cached_view_metadata(aligned_volume_name, datastack_name, view_name)
31943195

31953196
if not data.get("desired_resolution", None):
31963197
des_res = [
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
Cached metadata functions for MaterializationEngine.
3+
4+
This module provides cached versions of frequently accessed metadata functions
5+
to reduce database load and improve performance.
6+
"""
7+
8+
from cachetools import TTLCache, cached
9+
from materializationengine.database import dynamic_annotation_cache
10+
from materializationengine.request_db import request_db_session
11+
12+
@cached(cache=TTLCache(maxsize=256, ttl=86400)) # 1 day TTL = 86400 seconds
13+
def get_cached_table_metadata(meta_db_name: str, table_name: str):
14+
"""Get table metadata with 1-day TTL cache.
15+
16+
Args:
17+
meta_db_name (str): The name of the metadata database
18+
table_name (str): The name of the table
19+
20+
Returns:
21+
dict: Table metadata dictionary
22+
"""
23+
with request_db_session(meta_db_name) as meta_db:
24+
return meta_db.database.get_table_metadata(table_name)
25+
26+
27+
@cached(cache=TTLCache(maxsize=256, ttl=86400)) # 1 day TTL = 86400 seconds
28+
def get_cached_view_metadata(meta_db_name: str, datastack_name: str, view_name: str):
29+
"""Get view metadata with 1-day TTL cache.
30+
31+
Args:
32+
meta_db_name (str): The name of the metadata database
33+
datastack_name (str): The name of the datastack
34+
view_name (str): The name of the view
35+
36+
Returns:
37+
dict: View metadata dictionary
38+
"""
39+
with request_db_session(meta_db_name) as meta_db:
40+
return meta_db.database.get_view_metadata(datastack_name, view_name)

materializationengine/blueprints/client/common.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
import traceback
33

44
import numpy as np
5-
from cachetools import LRUCache, cached
5+
from cachetools import LRUCache, TTLCache, cached
66
from cachetools.keys import hashkey
77
from dynamicannotationdb.models import AnalysisTable, AnalysisVersion
88
from flask import abort, current_app, g, request
99

10+
from materializationengine.blueprints.client.cache import get_cached_table_metadata
1011
from materializationengine.blueprints.client.query_manager import QueryManager
1112
from materializationengine.blueprints.client.utils import (
1213
collect_crud_columns,
@@ -197,8 +198,7 @@ def get_flat_model(datastack_name: str, table_name: str, version: int):
197198
if not analysis_version_dict.get("valid", False):
198199
abort(410, "This materialization version is not available")
199200

200-
db = dynamic_annotation_cache.get_db(aligned_volume_name)
201-
metadata = db.database.get_table_metadata(table_name)
201+
metadata = get_cached_table_metadata(aligned_volume_name, table_name)
202202
reference_table = metadata.get("reference_table")
203203
if reference_table:
204204
table_metadata = {"reference_table": reference_table}
@@ -209,6 +209,7 @@ def get_flat_model(datastack_name: str, table_name: str, version: int):
209209
if schema_type is None:
210210
abort(500, f"Schema not found for table {table_name} in version {version}")
211211

212+
db = dynamic_annotation_cache.get_db(aligned_volume_name)
212213
return db.schema.create_flat_model(
213214
table_name=table_name,
214215
schema_type=schema_type,
@@ -254,7 +255,7 @@ def generate_simple_query_dataframe(
254255
db = dynamic_annotation_cache.get_db(aligned_volume_name)
255256
check_read_permission(db, table_name)
256257

257-
ann_md = db.database.get_table_metadata(table_name)
258+
ann_md = get_cached_table_metadata(aligned_volume_name, table_name)
258259

259260
analysis_version_dict, analysis_table_dict = get_analysis_version_and_table(
260261
datastack_name, table_name, version, aligned_volume_name

materializationengine/blueprints/client/query.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,12 @@ def _execute_query(
328328
df = pd.DataFrame({"count": [count]})
329329
else:
330330
if direct_sql_pandas:
331-
with engine.connect() as connection:
332-
statement = str(query.statement.compile(engine, compile_kwargs={"literal_binds": True}))
333-
df = pd.read_sql(statement, connection.connection, coerce_float=False, index_col=index_col, dtype_backend='pyarrow')
331+
statement = str(query.statement.compile(engine, compile_kwargs={"literal_binds": True}))
332+
df = pd.read_sql(statement,
333+
session.connection().connection,
334+
coerce_float=False,
335+
index_col=index_col,
336+
dtype_backend='pyarrow')
334337
else:
335338
df = read_sql_tmpfile(
336339
query.statement.compile(engine, compile_kwargs={"literal_binds": True}),

materializationengine/blueprints/client/query_manager.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
from sqlalchemy.ext.declarative.api import DeclarativeMeta
1818
import datetime
1919

20+
from materializationengine.blueprints.client.cache import get_cached_table_metadata, get_cached_view_metadata
21+
from materializationengine.database import dynamic_annotation_cache
22+
2023
DEFAULT_SUFFIX_LIST = ["x", "y", "z", "xx", "yy", "zz", "xxx", "yyy", "zzz"]
2124
DEFAULT_LIMIT = 250000
2225

@@ -39,8 +42,11 @@ def __init__(
3942
self._db = dynamic_annotation_cache.get_db(db_name)
4043
if meta_db_name is None:
4144
self._meta_db = self._db
45+
self._meta_db_name = db_name
4246
else:
4347
self._meta_db = dynamic_annotation_cache.get_db(meta_db_name)
48+
self._meta_db_name = meta_db_name
49+
4450
self._segmentation_source = segmentation_source
4551
self._split_mode = split_mode
4652
self._random_sample = random_sample
@@ -77,7 +83,7 @@ def _get_split_model(self, table_name):
7783
if table_name in self._split_models.keys():
7884
return self._split_models[table_name]
7985
else:
80-
md = self._meta_db.database.get_table_metadata(table_name)
86+
md = get_cached_table_metadata(self._meta_db_name, table_name)
8187
if md is None:
8288
abort(404, f"Table {table_name} not found in metadata database")
8389
vox_res = np.array(
@@ -93,7 +99,7 @@ def _get_split_model(self, table_name):
9399
reference_table = md.get("reference_table")
94100
if reference_table:
95101
table_metadata = {"reference_table": reference_table}
96-
ref_md = self._meta_db.database.get_table_metadata(reference_table)
102+
ref_md = get_cached_table_metadata(self._meta_db_name, reference_table)
97103
_ = self._db.schema.get_split_models(
98104
reference_table,
99105
ref_md["schema_type"],
@@ -119,7 +125,7 @@ def _get_flat_model(self, table_name):
119125
return self._models[table_name]
120126
else:
121127
# schema = self._meta_db.database.get_table_schema(table_name)
122-
md = self._meta_db.database.get_table_metadata(table_name)
128+
md = get_cached_table_metadata(self._meta_db_name, table_name)
123129
vox_res = np.array(
124130
[
125131
md["voxel_resolution_x"],
@@ -148,7 +154,7 @@ def add_view(self, datastack_name, view_name):
148154
view_table = self._db.database.get_view_table(view_name)
149155
self._tables.add(view_table)
150156
self._models[view_name] = view_table
151-
md = self._meta_db.database.get_view_metadata(datastack_name, view_name)
157+
md = get_cached_view_metadata(self._meta_db_name, datastack_name, view_name)
152158
vox_res = np.array(
153159
[
154160
md["voxel_resolution_x"],

materializationengine/utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ def check_write_permission(db, table_name):
128128

129129
@cached(cache=TTLCache(maxsize=100, ttl=600))
130130
def check_read_permission(db, table_name):
131-
metadata = db.database.get_table_metadata(table_name)
131+
from materializationengine.blueprints.client.cache import get_cached_table_metadata
132+
# Get aligned_volume_name from the db object
133+
aligned_volume_name = db._aligned_volume
134+
metadata = get_cached_table_metadata(aligned_volume_name, table_name)
132135
if metadata["read_permission"] == "GROUP":
133136
if not users_share_common_group(metadata["user_id"]):
134137
abort(
@@ -142,7 +145,10 @@ def check_read_permission(db, table_name):
142145

143146

144147
def check_ownership(db, table_name):
145-
metadata = db.database.get_table_metadata(table_name)
148+
from materializationengine.blueprints.client.cache import get_cached_table_metadata
149+
# Get aligned_volume_name from the db object
150+
aligned_volume_name = db._aligned_volume
151+
metadata = get_cached_table_metadata(aligned_volume_name, table_name)
146152
if metadata["user_id"] != str(g.auth_user["id"]):
147153
abort(401, "You cannot do this because you are not the owner of this table")
148154
return metadata

0 commit comments

Comments
 (0)