@@ -76,22 +76,47 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
7676 organ_name = None
7777 donor_metadata = None
7878
79- query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) "
80- # Filter out the Lab entities
81- f"WHERE e.uuid='{ uuid } ' AND s.specimen_type='organ' AND EXISTS(s.organ) "
82- # COLLECT() returns a list
83- # apoc.coll.toSet() reruns a set containing unique nodes
84- f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata" )
79+ with neo4j_driver .session () as session :
80+ # Old time-consuming single query, it takes a significant amounts of DB hits
81+ # query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) "
82+ # f"WHERE e.uuid='{uuid}' AND s.specimen_type='organ' AND EXISTS(s.organ) "
83+ # f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata")
8584
86- logger .info ("======get_dataset_organ_and_donor_info() query======" )
87- logger .info (query )
85+ # logger.info("======get_dataset_organ_and_donor_info() query======")
86+ # logger.info(query)
8887
89- with neo4j_driver .session () as session :
90- record = session .read_transaction (_execute_readonly_tx , query )
88+ # with neo4j_driver.session() as session:
89+ # record = session.read_transaction(_execute_readonly_tx, query)
90+
91+ # if record:
92+ # organ_name = record['organ_name']
93+ # donor_metadata = record['donor_metadata']
94+
95+ # To improve the query performance, we implement the two-step queries to drastically reduce the DB hits
96+ sample_query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) "
97+ f"WHERE e.uuid='{ uuid } ' AND s.specimen_type='organ' AND EXISTS(s.organ) "
98+ f"RETURN DISTINCT s.organ AS organ_name, s.uuid AS sample_uuid" )
99+
100+ logger .info ("======get_dataset_organ_and_donor_info() sample_query======" )
101+ logger .info (sample_query )
102+
103+ sample_record = session .read_transaction (_execute_readonly_tx , sample_query )
104+
105+ if sample_record :
106+ organ_name = sample_record ['organ_name' ]
107+ sample_uuid = sample_record ['sample_uuid' ]
108+
109+ donor_query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(d:Donor) "
110+ f"WHERE s.uuid='{ sample_uuid } ' AND s.specimen_type='organ' AND EXISTS(s.organ) "
111+ f"RETURN DISTINCT d.metadata AS donor_metadata" )
112+
113+ logger .info ("======get_dataset_organ_and_donor_info() donor_query======" )
114+ logger .info (donor_query )
115+
116+ donor_record = session .read_transaction (_execute_readonly_tx , donor_query )
91117
92- if record :
93- organ_name = record ['organ_name' ]
94- donor_metadata = record ['donor_metadata' ]
118+ if donor_record :
119+ donor_metadata = donor_record ['donor_metadata' ]
95120
96121 return organ_name , donor_metadata
97122
@@ -336,8 +361,43 @@ def get_dataset_upload(neo4j_driver, uuid, property_key = None):
336361 return result
337362
338363
364+ # """
365+ # Get a list of associated dataset dicts for a given collection
366+
367+ # Parameters
368+ # ----------
369+ # neo4j_driver : neo4j.Driver object
370+ # The neo4j database connection pool
371+ # uuid : str
372+ # The uuid of collection
373+
374+ # Returns
375+ # -------
376+ # list
377+ # The list containing associated dataset dicts
378+ # """
379+ # def get_collection_datasets(neo4j_driver, uuid):
380+ # results = []
381+
382+ # query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
383+ # f"WHERE c.uuid = '{uuid}' "
384+ # f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
385+
386+ # logger.info("======get_collection_datasets() query======")
387+ # logger.info(query)
388+
389+ # with neo4j_driver.session() as session:
390+ # record = session.read_transaction(_execute_readonly_tx, query)
391+
392+ # if record and record[record_field_name]:
393+ # # Convert the list of nodes to a list of dicts
394+ # results = _nodes_to_dicts(record[record_field_name])
395+
396+ # return results
397+
398+
339399"""
340- Get a list of associated dataset dicts for a given collection
400+ Get a list of associated dataset uuids for a given Collection
341401
342402Parameters
343403----------
@@ -349,24 +409,24 @@ def get_dataset_upload(neo4j_driver, uuid, property_key = None):
349409Returns
350410-------
351411list
352- The list containing associated dataset dicts
412+ The list of associated dataset uuids
353413"""
354- def get_collection_datasets (neo4j_driver , uuid ):
414+ def get_collection_dataset_uuids (neo4j_driver , uuid ):
355415 results = []
356416
357- query = (f"MATCH (e:Entity )-[:IN_COLLECTION]->(c:Collection) "
417+ query = (f"MATCH (e:Dataset )-[:IN_COLLECTION]->(c:Collection) "
358418 f"WHERE c.uuid = '{ uuid } ' "
359419 f"RETURN apoc.coll.toSet(COLLECT(e)) AS { record_field_name } " )
360420
361- logger .info ("======get_collection_datasets () query======" )
421+ logger .info ("======get_collection_dataset_uuids () query======" )
362422 logger .info (query )
363423
364424 with neo4j_driver .session () as session :
365425 record = session .read_transaction (_execute_readonly_tx , query )
366426
367427 if record and record [record_field_name ]:
368- # Convert the list of nodes to a list of dicts
369- results = _nodes_to_dicts ( record [record_field_name ])
428+ # Just return the list of uuids
429+ results = record [record_field_name ]
370430
371431 return results
372432
@@ -467,7 +527,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list):
467527
468528
469529"""
470- Get a list of associated dataset dicts for a given collection
530+ Get a list of associated dataset dicts for a given Upload
471531
472532Parameters
473533----------
@@ -484,7 +544,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list):
484544def get_upload_datasets (neo4j_driver , uuid ):
485545 results = []
486546
487- query = (f"MATCH (e:Entity )-[:IN_UPLOAD]->(s:Upload) "
547+ query = (f"MATCH (e:Dataset )-[:IN_UPLOAD]->(s:Upload) "
488548 f"WHERE s.uuid = '{ uuid } ' "
489549 f"RETURN apoc.coll.toSet(COLLECT(e)) AS { record_field_name } " )
490550
0 commit comments