Skip to content

Commit aff8eed

Browse files
authored
Merge pull request #935 from hubmapconsortium/Derek-Furst/avoid-unecessary-linkeages
Derek furst/avoid unecessary linkeages
2 parents aff5979 + c2655be commit aff8eed

File tree

3 files changed

+175
-35
lines changed

3 files changed

+175
-35
lines changed

src/schema/provenance_schema.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,6 @@ ENTITIES:
462462
exposed: false
463463
indexed: false
464464
description: "The uuids of source entities from which this new entity is derived. Used to pass source entity ids in on POST or PUT calls used to create the linkages."
465-
# Note: link_dataset_to_direct_ancestors() will always delete all the old linkages first
466465
after_create_trigger: link_dataset_to_direct_ancestors
467466
after_update_trigger: link_dataset_to_direct_ancestors
468467
direct_ancestors:
@@ -1034,7 +1033,6 @@ ENTITIES:
10341033
exposed: false
10351034
indexed: false
10361035
description: "The uuid of source entity from which this new entity is derived from. Used on creation or edit to create an action and relationship to the ancestor. The direct ancestor must be a Donor or Sample. If the direct ancestor is a Donor, the sample must be of type organ."
1037-
# Note: link_sample_to_direct_ancestor() will always delete all the old linkages first
10381036
after_create_trigger: link_sample_to_direct_ancestor
10391037
after_update_trigger: link_sample_to_direct_ancestor
10401038
before_property_update_validators:

src/schema/schema_neo4j_queries.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,72 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u
757757
tx.rollback()
758758

759759
raise TransactionError(msg)
760+
761+
762+
"""
763+
Create linkages from new direct ancestors to an EXISTING activity node in neo4j.
764+
765+
766+
Parameters
767+
----------
768+
neo4j_driver : neo4j.Driver object
769+
The neo4j database connection pool
770+
entity_uuid : str
771+
The uuid of target child entity
772+
new_ancestor_uuid : str
773+
The uuid of new direct ancestor to be linked
774+
activity_uuid : str
775+
The uuid of the existing activity node to link to
776+
"""
777+
def add_new_ancestors_to_existing_activity(neo4j_driver, new_ancestor_uuids, activity_uuid, create_activity, activity_data_dict, dataset_uuid):
778+
try:
779+
with neo4j_driver.session() as session:
780+
tx = session.begin_transaction()
781+
if create_activity:
782+
create_activity_tx(tx, activity_data_dict)
783+
create_relationship_tx(tx, activity_uuid, dataset_uuid, 'ACTIVITY_OUTPUT', '->')
784+
create_outgoing_activity_relationships_tx(tx=tx
785+
, source_node_uuids=new_ancestor_uuids
786+
, activity_node_uuid=activity_uuid)
787+
788+
tx.commit()
789+
except TransactionError as te:
790+
msg = "TransactionError from calling add_new_ancestors_to_existing_activity(): "
791+
logger.exception(msg)
792+
793+
if tx.closed() == False:
794+
logger.error("Failed to commit add_new_ancestors_to_existing_activity() transaction, rollback")
795+
tx.rollback()
796+
797+
raise TransactionError(msg)
798+
799+
"""
800+
Parameters
801+
----------
802+
neo4j_driver : neo4j.Driver object
803+
The neo4j database connection pool
804+
entity_uuid : str
805+
The uuid of the target entity nodeget_paren
806+
807+
Returns
808+
-------
809+
str
810+
The uuid of the direct ancestor Activity node
811+
"""
812+
def get_parent_activity_uuid_from_entity(neo4j_driver, entity_uuid):
813+
query = """
814+
MATCH (activity:Activity)-[:ACTIVITY_OUTPUT]->(entity:Entity {uuid: $entity_uuid})
815+
RETURN activity.uuid AS activity_uuid
816+
"""
817+
818+
with neo4j_driver.session() as session:
819+
result = session.run(query, entity_uuid=entity_uuid)
820+
821+
record = result.single()
822+
if record:
823+
return record["activity_uuid"]
824+
else:
825+
return None
760826

761827

762828
"""
@@ -1934,6 +2000,52 @@ def _delete_activity_node_and_linkages_tx(tx, uuid):
19342000

19352001
result = tx.run(query)
19362002

2003+
"""
2004+
Delete only the ACTIVITY_INPUT linkages between a target entity and a specific set of its direct ancestors.
2005+
The Activity node and the entity nodes remain intact.
2006+
2007+
Parameters
2008+
----------
2009+
neo4j_driver : neo4j.Driver object
2010+
The neo4j database connection pool
2011+
entity_uuid : str
2012+
The uuid of the target child entity
2013+
ancestor_uuids : list
2014+
A list of uuids of ancestors whose relationships should be deleted
2015+
"""
2016+
def delete_ancestor_linkages_tx(neo4j_driver, entity_uuid, ancestor_uuids):
2017+
query = (
2018+
"MATCH (a:Entity)-[r:ACTIVITY_INPUT]->(activity:Activity)-[:ACTIVITY_OUTPUT]->(t:Entity {uuid: $entity_uuid}) "
2019+
"WHERE a.uuid IN $ancestor_uuids "
2020+
"DELETE r"
2021+
)
2022+
2023+
logger.info("======delete_ancestor_linkages_tx() query======")
2024+
logger.debug(query)
2025+
2026+
try:
2027+
with neo4j_driver.session() as session:
2028+
tx = session.begin_transaction()
2029+
2030+
result = tx.run(
2031+
query,
2032+
entity_uuid=entity_uuid,
2033+
ancestor_uuids=ancestor_uuids
2034+
)
2035+
2036+
2037+
tx.commit()
2038+
2039+
except TransactionError as te:
2040+
msg = "TransactionError from calling delete_ancestor_linkages_tx(): "
2041+
logger.exception(msg)
2042+
2043+
if tx.closed() == False:
2044+
logger.error("Failed to commit delete_ancestor_linkages_tx() transaction, rollback")
2045+
tx.rollback()
2046+
2047+
raise TransactionError(msg)
2048+
19372049
"""
19382050
Delete linkages between a publication and its associated collection
19392051

src/schema/schema_triggers.py

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -898,25 +898,39 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use
898898

899899
if 'direct_ancestor_uuids' not in new_data_dict:
900900
raise KeyError("Missing 'direct_ancestor_uuids' key in 'new_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
901-
901+
create_activity = False
902+
activity_data_dict = {}
902903
dataset_uuid = existing_data_dict['uuid']
903904
direct_ancestor_uuids = new_data_dict['direct_ancestor_uuids']
904905

905-
# Generate property values for Activity node
906-
activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict)
906+
existing_dataset_ancestor_uuids = schema_neo4j_queries.get_dataset_direct_ancestors(schema_manager.get_neo4j_driver_instance(), dataset_uuid, "uuid")
907+
new_ancestors = set(direct_ancestor_uuids)-set(existing_dataset_ancestor_uuids)
908+
ancestors_to_unlink = set(existing_dataset_ancestor_uuids)-set(direct_ancestor_uuids)
909+
activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), dataset_uuid)
907910

908-
try:
909-
# Create a linkage (via one Activity node) between the dataset node and its direct ancestors in neo4j
910-
schema_neo4j_queries.link_entity_to_direct_ancestors(schema_manager.get_neo4j_driver_instance(), dataset_uuid, direct_ancestor_uuids, activity_data_dict)
911-
912-
# Delete the cache of this dataset if any cache exists
913-
# Because the `Dataset.direct_ancestors` field
914-
schema_manager.delete_memcached_cache([dataset_uuid])
915-
except TransactionError:
916-
# No need to log
917-
raise
911+
if not activity_uuid:
912+
activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict)
913+
activity_uuid = activity_data_dict['uuid']
914+
create_activity = True
918915

916+
if new_ancestors:
917+
logger.info(f"Linking the following new ancestors: {', '.join(new_ancestors)}")
918+
try:
919+
schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), list(new_ancestors), activity_uuid, create_activity, activity_data_dict, dataset_uuid)
920+
except TransactionError:
921+
raise
922+
923+
if ancestors_to_unlink:
924+
logger.info(f"Unlinking the following ancestors: {', '.join(ancestors_to_unlink)}")
925+
try:
926+
schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), dataset_uuid, list(ancestors_to_unlink))
927+
except TransactionError:
928+
raise
929+
930+
if not(ancestors_to_unlink or new_ancestors):
931+
logger.info("No new ancestors linked, nor old ancestors unlinked")
919932

933+
920934
"""
921935
TriggerTypeEnum.AFTER_CREATE and TriggerTypeEnum.AFTER_UPDATE
922936
@@ -1913,30 +1927,46 @@ def delete_metadata_files(property_key, normalized_type, request, user_token, ex
19131927
def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict):
19141928
if 'uuid' not in existing_data_dict:
19151929
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.")
1916-
19171930
if 'direct_ancestor_uuid' not in new_data_dict:
19181931
raise KeyError("Missing 'direct_ancestor_uuid' key in 'new_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.")
1919-
1932+
1933+
create_activity = False
1934+
activity_data_dict = {}
19201935
sample_uuid = existing_data_dict['uuid']
1921-
1922-
# Build a list of direct ancestor uuids
1923-
# Only one uuid in the list in this case
1924-
direct_ancestor_uuids = [new_data_dict['direct_ancestor_uuid']]
1925-
1926-
# Generate property values for Activity node
1927-
activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict)
1928-
1929-
try:
1930-
# Create a linkage (via Activity node)
1931-
# between the Sample node and the source entity node in neo4j
1932-
schema_neo4j_queries.link_entity_to_direct_ancestors(schema_manager.get_neo4j_driver_instance(), sample_uuid, direct_ancestor_uuids, activity_data_dict)
1936+
new_ancestors = None
1937+
ancestors_to_unlink = None
1938+
direct_ancestor_uuids = new_data_dict['direct_ancestor_uuid']
1939+
existing_sample_ancestor_uuids = schema_neo4j_queries.get_parents(schema_manager.get_neo4j_driver_instance(), sample_uuid, "uuid")
1940+
if direct_ancestor_uuids not in existing_sample_ancestor_uuids:
1941+
new_ancestors = [direct_ancestor_uuids]
1942+
if not existing_sample_ancestor_uuids:
1943+
new_ancestors = [direct_ancestor_uuids]
19331944

1934-
# Delete the cache of sample if any cache exists
1935-
# Because the `Sample.direct_ancestor` field can be updated
1936-
schema_manager.delete_memcached_cache([sample_uuid])
1937-
except TransactionError:
1938-
# No need to log
1939-
raise
1945+
activity_uuid = schema_neo4j_queries.get_parent_activity_uuid_from_entity(schema_manager.get_neo4j_driver_instance(), sample_uuid)
1946+
if not activity_uuid:
1947+
activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict)
1948+
activity_uuid = activity_data_dict['uuid']
1949+
create_activity = True
1950+
if new_ancestors:
1951+
logger.info(f"Linking the following new ancestors: {new_ancestors}")
1952+
try:
1953+
schema_neo4j_queries.add_new_ancestors_to_existing_activity(schema_manager.get_neo4j_driver_instance(), new_ancestors, activity_uuid, create_activity, activity_data_dict, sample_uuid)
1954+
except TransactionError:
1955+
raise
1956+
ancestors_to_unlink = existing_sample_ancestor_uuids
1957+
else:
1958+
ancestors_to_unlink = existing_sample_ancestor_uuids
1959+
ancestors_to_unlink.remove(direct_ancestor_uuids)
1960+
if ancestors_to_unlink:
1961+
logger.info(f"Unlinking the following ancestor: {ancestors_to_unlink}")
1962+
try:
1963+
schema_neo4j_queries.delete_ancestor_linkages_tx(schema_manager.get_neo4j_driver_instance(), sample_uuid, ancestors_to_unlink)
1964+
except TransactionError:
1965+
raise
1966+
1967+
if not(ancestors_to_unlink or new_ancestors):
1968+
logger.info("No new ancestors linked, nor old ancestors unlinked")
1969+
19401970

19411971
"""
19421972
TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE

0 commit comments

Comments
 (0)