Skip to content

Commit 4ed3365

Browse files
authored
Merge pull request #851 from hubmapconsortium/karlburke/DatasetTitleForUncommonCases
Initial release for entity-api#257. Revise Dataset title generation.
2 parents eadb7ea + 10c72ab commit 4ed3365

File tree

2 files changed

+289
-103
lines changed

2 files changed

+289
-103
lines changed

src/schema/schema_neo4j_queries.py

Lines changed: 22 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -605,55 +605,42 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None):
605605

606606
return results
607607

608-
609608
"""
610-
Get the sample organ name and donor metadata information of the given dataset uuid
609+
For every Sample organ associated with the given dataset_uuid, retrieve the
610+
organ information and organ Donor information for use in composing a title for the Dataset.
611611
612612
Parameters
613613
----------
614614
neo4j_driver : neo4j.Driver object
615615
The neo4j database connection pool
616-
uuid : str
617-
The uuid of target entity
616+
dataset_uuid : str
617+
The UUID of a Dataset
618618
619619
Returns
620620
-------
621-
str: The sample organ name
622-
str: The donor metadata (string representation of a Python dict)
621+
list : List containing the source metadata (string representation of a Python dict) of each Donor of an
622+
organ Sample associated with the Dataset.
623623
"""
624-
def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
625-
organ_name = None
626-
donor_metadata = None
624+
def get_dataset_donor_organs_info(neo4j_driver, dataset_uuid):
627625

628626
with neo4j_driver.session() as session:
629-
# To improve the query performance, we implement the two-step queries to drastically reduce the DB hits
630-
sample_query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) "
631-
f"WHERE e.uuid='{uuid}' AND s.sample_category='organ' AND s.organ IS NOT NULL "
632-
f"RETURN DISTINCT s.organ AS organ_name, s.uuid AS sample_uuid")
633-
634-
logger.info("======get_dataset_organ_and_donor_info() sample_query======")
635-
logger.info(sample_query)
636-
637-
sample_record = session.read_transaction(execute_readonly_tx, sample_query)
638-
639-
if sample_record:
640-
organ_name = sample_record['organ_name']
641-
sample_uuid = sample_record['sample_uuid']
627+
ds_donors_organs_query = ( f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(org:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor)"
628+
f" WHERE e.uuid='{dataset_uuid}'"
629+
f" AND org.sample_category IS NOT NULL"
630+
f" AND org.sample_category='organ'"
631+
f" AND org.organ IS NOT NULL"
632+
f" RETURN apoc.coll.toSet(COLLECT({{donor_uuid: d.uuid"
633+
f" , donor_metadata: d.metadata"
634+
f" , organ_type: org.organ}})) AS donorOrganSet")
635+
636+
logger.info("======get_dataset_donor_organs_info() ds_donors_organs_query======")
637+
logger.info(ds_donors_organs_query)
642638

643-
donor_query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(d:Donor) "
644-
f"WHERE s.uuid='{sample_uuid}' AND s.sample_category='organ' AND s.organ IS NOT NULL "
645-
f"RETURN DISTINCT d.metadata AS donor_metadata")
646-
647-
logger.info("======get_dataset_organ_and_donor_info() donor_query======")
648-
logger.info(donor_query)
649-
650-
donor_record = session.read_transaction(execute_readonly_tx, donor_query)
651-
652-
if donor_record:
653-
donor_metadata = donor_record['donor_metadata']
654-
655-
return organ_name, donor_metadata
639+
with neo4j_driver.session() as session:
640+
record = session.read_transaction(execute_readonly_tx
641+
, ds_donors_organs_query)
656642

643+
return record['donorOrganSet'] if record and record['donorOrganSet'] else None
657644

658645
def get_entity_type(neo4j_driver, entity_uuid: str) -> str:
659646
query: str = f"Match (ent {{uuid: '{entity_uuid}'}}) return ent.entity_type"

0 commit comments

Comments
 (0)