Skip to content

Commit 831cbdb

Browse files
authored
Merge pull request #811 from hubmapconsortium/Derek-Furst/fix-duplicate-ancestors
Derek furst/fix duplicate ancestors
2 parents 5e8ff19 + e010d85 commit 831cbdb

File tree

1 file changed

+12
-14
lines changed

1 file changed

+12
-14
lines changed

src/schema/schema_neo4j_queries.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,8 @@ def get_children(neo4j_driver, uuid, property_key = None):
176176
query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT]->(:Activity)-[:ACTIVITY_OUTPUT]->(child:Entity) "
177177
# The target entity can't be a Lab
178178
f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' "
179-
# COLLECT() returns a list
180-
# apoc.coll.toSet() reruns a set containing unique nodes
181-
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(child), apoc.map.removeKeys(properties(child), {fields_to_omit})))) AS {record_field_name}")
179+
f"WITH COLLECT(DISTINCT child) AS uniqueChildren "
180+
f"RETURN [a IN uniqueChildren | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
182181

183182
logger.info("======get_children() query======")
184183
logger.info(query)
@@ -228,9 +227,8 @@ def get_parents(neo4j_driver, uuid, property_key = None):
228227
query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
229228
# Filter out the Lab entities
230229
f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
231-
# COLLECT() returns a list
232-
# apoc.coll.toSet() reruns a set containing unique nodes
233-
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(parent), apoc.map.removeKeys(properties(parent), {fields_to_omit})))) AS {record_field_name}")
230+
f"WITH COLLECT(DISTINCT parent) AS uniqueParents "
231+
f"RETURN [a IN uniqueParents | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
234232

235233
logger.info("======get_parents() query======")
236234
logger.info(query)
@@ -392,9 +390,8 @@ def get_ancestors(neo4j_driver, uuid, property_key = None):
392390
query = (f"MATCH (e:Entity)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(ancestor:Entity) "
393391
# Filter out the Lab entities
394392
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
395-
# COLLECT() returns a list
396-
# apoc.coll.toSet() reruns a set containing unique nodes
397-
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(ancestor), apoc.map.removeKeys(properties(ancestor), {fields_to_omit})))) AS {record_field_name}")
393+
f"WITH COLLECT(DISTINCT ancestor) AS uniqueAncestors "
394+
f"RETURN [a IN uniqueAncestors | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
398395

399396
logger.info("======get_ancestors() query======")
400397
logger.info(query)
@@ -443,9 +440,8 @@ def get_descendants(neo4j_driver, uuid, property_key = None):
443440
query = (f"MATCH (e:Entity)-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(descendant:Entity) "
444441
# The target entity can't be a Lab
445442
f"WHERE e.uuid='{uuid}' AND e.entity_type <> 'Lab' "
446-
# COLLECT() returns a list
447-
# apoc.coll.toSet() reruns a set containing unique nodes
448-
f"RETURN apoc.coll.toSet(COLLECT(apoc.create.vNode(labels(descendant), apoc.map.removeKeys(properties(descendant), {fields_to_omit})))) AS {record_field_name}")
443+
f"WITH COLLECT(DISTINCT descendant) AS uniqueDescendants "
444+
f"RETURN [a IN uniqueDescendants | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
449445

450446
logger.info("======get_descendants() query======")
451447
logger.info(query)
@@ -1188,7 +1184,8 @@ def get_collection_datasets(neo4j_driver, uuid):
11881184
fields_to_omit = SchemaConstants.OMITTED_FIELDS
11891185
query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
11901186
f"WHERE c.uuid = '{uuid}' "
1191-
f"RETURN COLLECT(apoc.create.vNode(labels(e), apoc.map.removeKeys(properties(e), {fields_to_omit}))) AS {record_field_name}")
1187+
f"WITH COLLECT(DISTINCT e) AS uniqueDataset "
1188+
f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
11921189

11931190
logger.info("======get_collection_datasets() query======")
11941191
logger.info(query)
@@ -1401,7 +1398,8 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None):
14011398
else:
14021399
query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) "
14031400
f"WHERE s.uuid = '{uuid}' "
1404-
f"RETURN COLLECT(apoc.create.vNode(labels(e), apoc.map.removeKeys(properties(e), {fields_to_omit}))) AS {record_field_name}")
1401+
f"WITH COLLECT(DISTINCT e) AS uniqueUploads "
1402+
f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
14051403

14061404
logger.info("======get_upload_datasets() query======")
14071405
logger.info(query)

0 commit comments

Comments
 (0)