Merge pull request #319 from hubmapconsortium/yuanzhou/optimization

yuanzhou · web-flow · commit 0217b116a6f3 · 2022-03-28T15:41:42.000-04:00
Back to use Collection.datasets
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
@@ -192,19 +192,12 @@ ENTITIES:
         type: string
         description: "Free text description of the collection"
       ###### Transient properties ######
-      # Causing performanc issue
-      # datasets:
-      #   type: list
-      #   transient: true
-      #   generated: true
-      #   description: "The datasets that are contained in the collection."
-      #   on_read_trigger: get_collection_datasets
-      dataset_uuids:
+      datasets:
         type: list
         transient: true
         generated: true
-        description: "The dataset uuids that are contained in the collection."
-        on_read_trigger: get_collection_dataset_uuids
+        description: "The datasets that are contained in the collection."
+        on_read_trigger: get_collection_datasets
 
   ############################################# Dataset #############################################
   Dataset:
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
@@ -361,43 +361,8 @@ def get_dataset_upload(neo4j_driver, uuid, property_key = None):
     return result
 
 
-# """
-# Get a list of associated dataset dicts for a given collection
-
-# Parameters
-# ----------
-# neo4j_driver : neo4j.Driver object
-#     The neo4j database connection pool
-# uuid : str
-#     The uuid of collection
-
-# Returns
-# -------
-# list
-#     The list containing associated dataset dicts
-# """
-# def get_collection_datasets(neo4j_driver, uuid):
-#     results = []
-
-#     query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
-#              f"WHERE c.uuid = '{uuid}' "
-#              f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
-
-#     logger.info("======get_collection_datasets() query======")
-#     logger.info(query)
-
-#     with neo4j_driver.session() as session:
-#         record = session.read_transaction(_execute_readonly_tx, query)
-
-#         if record and record[record_field_name]:
-#             # Convert the list of nodes to a list of dicts
-#             results = _nodes_to_dicts(record[record_field_name])
-
-#     return results
-
-
 """
-Get a list of associated dataset uuids for a given Collection
+Get a list of associated dataset dicts for a given collection
 
 Parameters
 ----------
@@ -409,24 +374,24 @@ def get_dataset_upload(neo4j_driver, uuid, property_key = None):
 Returns
 -------
 list
-    The list of associated dataset uuids
+    The list containing associated dataset dicts
 """
-def get_collection_dataset_uuids(neo4j_driver, uuid):
+def get_collection_datasets(neo4j_driver, uuid):
     results = []
 
-    query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
+    query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
              f"WHERE c.uuid = '{uuid}' "
              f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
 
-    logger.info("======get_collection_dataset_uuids() query======")
+    logger.info("======get_collection_datasets() query======")
     logger.info(query)
 
     with neo4j_driver.session() as session:
         record = session.read_transaction(_execute_readonly_tx, query)
 
         if record and record[record_field_name]:
-            # Just return the list of uuids
-            results = record[record_field_name]
+            # Convert the list of nodes to a list of dicts
+            results = _nodes_to_dicts(record[record_field_name])
 
     return results
 
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
@@ -564,43 +564,8 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing
 ## Trigger methods specific to Collection - DO NOT RENAME
 ####################################################################################################
 
-# """
-# Trigger event method of getting a list of associated datasets for a given collection
-
-# Parameters
-# ----------
-# property_key : str
-#     The target property key of the value to be generated
-# normalized_type : str
-#     One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset
-# user_token: str
-#     The user's globus nexus token
-# existing_data_dict : dict
-#     A dictionary that contains all existing entity properties
-# new_data_dict : dict
-#     A merged dictionary that contains all possible input data to be used
-
-# Returns
-# -------
-# str: The target property key
-# list: A list of associated dataset dicts with all the normalized information
-# """
-# def get_collection_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
-#     if 'uuid' not in existing_data_dict:
-#         raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_datasets()' trigger method.")
-
-#     datasets_list = schema_neo4j_queries.get_collection_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
-
-#     # Additional properties of the datasets to exclude 
-#     # We don't want to show too much nested information
-#     properties_to_skip = ['direct_ancestors', 'collections']
-#     complete_entities_list = schema_manager.get_complete_entities_list(user_token, datasets_list, properties_to_skip)
-
-#     return property_key, schema_manager.normalize_entities_list_for_response(complete_entities_list)
-
-
 """
-Trigger event method of getting a list of associated dataset uuids for a given collection
+Trigger event method of getting a list of associated datasets for a given collection
 
 Parameters
 ----------
@@ -618,15 +583,28 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing
 Returns
 -------
 str: The target property key
-list: A list of associated dataset uuids
+list: A list of associated dataset dicts with all the normalized information
 """
-def get_collection_dataset_uuids(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
+def get_collection_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
     if 'uuid' not in existing_data_dict:
-        raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_dataset_uuids()' trigger method.")
+        raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_datasets()' trigger method.")
+
+    datasets_list = schema_neo4j_queries.get_collection_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+
+    # Additional properties of the datasets to exclude 
+    # We don't want to show too much nested information
+    properties_to_skip = [
+        'direct_ancestors', 
+        'collections', 
+        'upload',
+        'title', 
+        'previous_revision_uuid', 
+        'next_revision_uuid'
+    ]
 
-    dataset_uuids = schema_neo4j_queries.get_collection_dataset_uuids(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    complete_entities_list = schema_manager.get_complete_entities_list(user_token, datasets_list, properties_to_skip)
 
-    return property_key, dataset_uuids
+    return property_key, schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
 
 ####################################################################################################