Skip to content

Commit 0217b11

Browse files
authored
Merge pull request #319 from hubmapconsortium/yuanzhou/optimization
Back to use Collection.datasets
2 parents 84fa133 + ab2fcfe commit 0217b11

File tree

3 files changed

+29
-93
lines changed

3 files changed

+29
-93
lines changed

src/schema/provenance_schema.yaml

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -192,19 +192,12 @@ ENTITIES:
192192
type: string
193193
description: "Free text description of the collection"
194194
###### Transient properties ######
195-
# Causing performanc issue
196-
# datasets:
197-
# type: list
198-
# transient: true
199-
# generated: true
200-
# description: "The datasets that are contained in the collection."
201-
# on_read_trigger: get_collection_datasets
202-
dataset_uuids:
195+
datasets:
203196
type: list
204197
transient: true
205198
generated: true
206-
description: "The dataset uuids that are contained in the collection."
207-
on_read_trigger: get_collection_dataset_uuids
199+
description: "The datasets that are contained in the collection."
200+
on_read_trigger: get_collection_datasets
208201

209202
############################################# Dataset #############################################
210203
Dataset:

src/schema/schema_neo4j_queries.py

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -361,43 +361,8 @@ def get_dataset_upload(neo4j_driver, uuid, property_key = None):
361361
return result
362362

363363

364-
# """
365-
# Get a list of associated dataset dicts for a given collection
366-
367-
# Parameters
368-
# ----------
369-
# neo4j_driver : neo4j.Driver object
370-
# The neo4j database connection pool
371-
# uuid : str
372-
# The uuid of collection
373-
374-
# Returns
375-
# -------
376-
# list
377-
# The list containing associated dataset dicts
378-
# """
379-
# def get_collection_datasets(neo4j_driver, uuid):
380-
# results = []
381-
382-
# query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
383-
# f"WHERE c.uuid = '{uuid}' "
384-
# f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
385-
386-
# logger.info("======get_collection_datasets() query======")
387-
# logger.info(query)
388-
389-
# with neo4j_driver.session() as session:
390-
# record = session.read_transaction(_execute_readonly_tx, query)
391-
392-
# if record and record[record_field_name]:
393-
# # Convert the list of nodes to a list of dicts
394-
# results = _nodes_to_dicts(record[record_field_name])
395-
396-
# return results
397-
398-
399364
"""
400-
Get a list of associated dataset uuids for a given Collection
365+
Get a list of associated dataset dicts for a given collection
401366
402367
Parameters
403368
----------
@@ -409,24 +374,24 @@ def get_dataset_upload(neo4j_driver, uuid, property_key = None):
409374
Returns
410375
-------
411376
list
412-
The list of associated dataset uuids
377+
The list containing associated dataset dicts
413378
"""
414-
def get_collection_dataset_uuids(neo4j_driver, uuid):
379+
def get_collection_datasets(neo4j_driver, uuid):
415380
results = []
416381

417-
query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
382+
query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
418383
f"WHERE c.uuid = '{uuid}' "
419384
f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
420385

421-
logger.info("======get_collection_dataset_uuids() query======")
386+
logger.info("======get_collection_datasets() query======")
422387
logger.info(query)
423388

424389
with neo4j_driver.session() as session:
425390
record = session.read_transaction(_execute_readonly_tx, query)
426391

427392
if record and record[record_field_name]:
428-
# Just return the list of uuids
429-
results = record[record_field_name]
393+
# Convert the list of nodes to a list of dicts
394+
results = _nodes_to_dicts(record[record_field_name])
430395

431396
return results
432397

src/schema/schema_triggers.py

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -564,43 +564,8 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing
564564
## Trigger methods specific to Collection - DO NOT RENAME
565565
####################################################################################################
566566

567-
# """
568-
# Trigger event method of getting a list of associated datasets for a given collection
569-
570-
# Parameters
571-
# ----------
572-
# property_key : str
573-
# The target property key of the value to be generated
574-
# normalized_type : str
575-
# One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset
576-
# user_token: str
577-
# The user's globus nexus token
578-
# existing_data_dict : dict
579-
# A dictionary that contains all existing entity properties
580-
# new_data_dict : dict
581-
# A merged dictionary that contains all possible input data to be used
582-
583-
# Returns
584-
# -------
585-
# str: The target property key
586-
# list: A list of associated dataset dicts with all the normalized information
587-
# """
588-
# def get_collection_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
589-
# if 'uuid' not in existing_data_dict:
590-
# raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_datasets()' trigger method.")
591-
592-
# datasets_list = schema_neo4j_queries.get_collection_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
593-
594-
# # Additional properties of the datasets to exclude
595-
# # We don't want to show too much nested information
596-
# properties_to_skip = ['direct_ancestors', 'collections']
597-
# complete_entities_list = schema_manager.get_complete_entities_list(user_token, datasets_list, properties_to_skip)
598-
599-
# return property_key, schema_manager.normalize_entities_list_for_response(complete_entities_list)
600-
601-
602567
"""
603-
Trigger event method of getting a list of associated dataset uuids for a given collection
568+
Trigger event method of getting a list of associated datasets for a given collection
604569
605570
Parameters
606571
----------
@@ -618,15 +583,28 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing
618583
Returns
619584
-------
620585
str: The target property key
621-
list: A list of associated dataset uuids
586+
list: A list of associated dataset dicts with all the normalized information
622587
"""
623-
def get_collection_dataset_uuids(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
588+
def get_collection_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
624589
if 'uuid' not in existing_data_dict:
625-
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_dataset_uuids()' trigger method.")
590+
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_datasets()' trigger method.")
591+
592+
datasets_list = schema_neo4j_queries.get_collection_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
593+
594+
# Additional properties of the datasets to exclude
595+
# We don't want to show too much nested information
596+
properties_to_skip = [
597+
'direct_ancestors',
598+
'collections',
599+
'upload',
600+
'title',
601+
'previous_revision_uuid',
602+
'next_revision_uuid'
603+
]
626604

627-
dataset_uuids = schema_neo4j_queries.get_collection_dataset_uuids(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
605+
complete_entities_list = schema_manager.get_complete_entities_list(user_token, datasets_list, properties_to_skip)
628606

629-
return property_key, dataset_uuids
607+
return property_key, schema_manager.normalize_entities_list_for_response(complete_entities_list)
630608

631609

632610
####################################################################################################

0 commit comments

Comments
 (0)