Skip to content

Commit 3e9fef1

Browse files
committed
SnapShooter: Bugfix - cross entity dependencies.
Simplified the attribute paths when specifying hook dependencies. Required for correct execution when hooks run on different entites, but depend on each other.
1 parent 18ce44e commit 3e9fef1

File tree

2 files changed

+15
-97
lines changed

2 files changed

+15
-97
lines changed

dp3/snapshots/snapshot_hooks.py

+10-81
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from collections import defaultdict
77
from collections.abc import Hashable
88
from dataclasses import dataclass, field
9-
from itertools import combinations
109
from typing import Callable, Union
1110

1211
from dp3.common.attrspec import AttrType
@@ -116,11 +115,8 @@ def register(
116115
self.used_links |= self._validate_attr_paths(entity_type, depends_on)
117116
self.used_links |= self._validate_attr_paths(entity_type, may_change)
118117

119-
depends_on = self._expand_path_backlinks(entity_type, depends_on)
120-
may_change = self._expand_path_backlinks(entity_type, may_change)
121-
122-
depends_on = self._embed_base_entity(entity_type, depends_on)
123-
may_change = self._embed_base_entity(entity_type, may_change)
118+
depends_on = self._get_attr_path_destinations(entity_type, depends_on)
119+
may_change = self._get_attr_path_destinations(entity_type, may_change)
124120

125121
hook_id = (
126122
f"{get_func_name(hook)}("
@@ -161,25 +157,18 @@ def _validate_attr_paths(
161157
position = entity_attributes[position.relation_to]
162158
return used_links
163159

164-
def _expand_path_backlinks(self, base_entity: str, paths: list[list[str]]):
160+
def _get_attr_path_destinations(self, base_entity: str, paths: list[list[str]]) -> list[str]:
165161
"""
166-
Returns a list of all possible subpaths considering the path backlinks.
167-
168-
With user defined entities, attributes and dependency paths, presence of backlinks (cycles)
169-
in specified dependency paths must be expected. To fully track dependencies,
170-
we assume any entity repeated in the path can be referenced multiple times,
171-
effectively making a cycle in the path, which can be ignored.
162+
Normalize paths to contain only destination attributes.
163+
Returns:
164+
List of destination attributes as tuples (entity, attr).
172165
"""
173-
expanded_paths = []
166+
destinations = []
174167
for path in paths:
175168
resolved_path = self._resolve_entities_in_path(base_entity, path)
176-
expanded = [resolved_path] + self._catch_them_all(resolved_path)
177-
expanded_paths.extend(
178-
self._extract_path_from_resolved(resolved) for resolved in expanded
179-
)
180-
unique_paths = {tuple(path) for path in expanded_paths}
181-
expanded_paths = sorted([list(path) for path in unique_paths], key=lambda x: len(x))
182-
return expanded_paths
169+
dest_entity, dest_attr = resolved_path[-1]
170+
destinations.append(f"{dest_entity}.{dest_attr}")
171+
return destinations
183172

184173
def _resolve_entities_in_path(self, base_entity: str, path: list[str]) -> list[tuple[str, str]]:
185174
"""
@@ -203,66 +192,6 @@ def _resolve_entities_in_path(self, base_entity: str, path: list[str]) -> list[t
203192
position = entity_attributes[position.relation_to]
204193
return resolved_path
205194

206-
@staticmethod
207-
def _extract_path_from_resolved(path: list[tuple[str, str]]) -> list[str]:
208-
"""Transform list[(entity_name, attr_name)] to list[attr_name]."""
209-
return [attr for entity, attr in path]
210-
211-
def _catch_them_all(self, path: list[tuple[str, str]]) -> list[list[tuple[str, str]]]:
212-
"""
213-
Recursively searches for all possible path cycles and returns
214-
Args:
215-
path: A resolved link path of tuples (entity_name, attr_name).
216-
Returns:
217-
A list of all possible path permutations.
218-
"""
219-
root_cycles = self._get_root_cycles(path)
220-
out = []
221-
for beg, end in root_cycles:
222-
pre = path[:beg]
223-
post = path[end:]
224-
225-
inner_cycles = self._catch_them_all(path[beg:end])
226-
out.extend(pre + inner + post for inner in inner_cycles)
227-
out.append(pre + post)
228-
return out
229-
230-
@staticmethod
231-
def _get_root_cycles(path: list[tuple[str, str]]) -> list[tuple[int, int]]:
232-
"""
233-
Collects indexes of entities on the path, and returns a list of "root cycles"
234-
A root cycle is defined using tuple of (start_index, end_index) in the path.
235-
Examines all possible combinations of backlink cycles,
236-
but returns only ones not completely inside any other existing cycle.
237-
238-
Args:
239-
path: A resolved link path of tuples (entity_name, attr_name).
240-
Returns:
241-
Empty list if path contains no cycles,
242-
a list of all possible "root cycle" combinations otherwise.
243-
"""
244-
entity_indexes: defaultdict[str, list[int]] = defaultdict(list)
245-
for i, (entity, _attr) in enumerate(path):
246-
entity_indexes[entity].append(i)
247-
248-
if not any(len(indexes) > 1 for indexes in entity_indexes.values()):
249-
return []
250-
251-
possible_backlinks = [
252-
combination
253-
for indexes in entity_indexes.values()
254-
for combination in combinations(indexes, 2)
255-
]
256-
return [
257-
(curr_beg, curr_end)
258-
for curr_beg, curr_end in possible_backlinks
259-
if not any(beg < curr_beg and curr_end < end for beg, end in possible_backlinks)
260-
]
261-
262-
@staticmethod
263-
def _embed_base_entity(base_entity: str, paths: list[list[str]]):
264-
return ["->".join([base_entity] + path) for path in paths]
265-
266195
def run(self, entities: dict) -> list[DataPointTask]:
267196
"""Runs registered hooks."""
268197
entity_types = {etype for etype, _ in entities}

tests/test_common/test_snapshots.py

+5-16
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,9 @@ def validate_expanded_paths(self, hook_base, path, expected_expansions):
5959
def test_backlinks(self):
6060
hook_base = {"hook": dummy_hook_abc, "entity_type": "A", "depends_on": [["data2"]]}
6161
paths = [
62-
(["bs", "as", "data1"], ["A->bs->as->data1", "A->data1"]),
63-
(["bs", "as", "bs", "data1"], ["A->bs->as->bs->data1", "A->bs->data1"]),
64-
(["bs", "cs", "ds", "cs", "data1"], ["A->bs->cs->ds->cs->data1", "A->bs->cs->data1"]),
65-
(
66-
["bs", "as", "ds", "as", "data1"],
67-
["A->bs->as->ds->as->data1", "A->bs->as->data1", "A->ds->as->data1", "A->data1"],
68-
),
62+
(["bs", "as", "bs", "data1"], ["B.data1"]),
63+
(["bs", "cs", "ds", "cs", "data1"], ["C.data1"]),
64+
(["bs", "as", "ds", "as", "data1"], ["A.data1"]),
6965
]
7066
for path, expected_expansions in paths:
7167
with self.subTest(path=path, expected_expansions=expected_expansions):
@@ -74,15 +70,8 @@ def test_backlinks(self):
7470
def test_nested_backlinks(self):
7571
hook_base = {"hook": dummy_hook_abc, "entity_type": "A", "depends_on": [["data2"]]}
7672
paths = [
77-
(["bs", "as", "bs", "data1"], ["A->bs->as->bs->data1", "A->bs->data1"]),
78-
(
79-
["bs", "cs", "bs", "as", "data1"],
80-
["A->bs->cs->bs->as->data1", "A->data1", "A->bs->as->data1"],
81-
),
82-
(
83-
["bs", "as", "bs", "as", "data1"],
84-
["A->bs->as->bs->as->data1", "A->data1", "A->bs->as->data1"],
85-
),
73+
(["bs", "as", "bs", "data1"], ["B.data1"]),
74+
(["bs", "cs", "bs", "as", "data1"], ["A.data1"]),
8675
]
8776
for path, expected_expansions in paths:
8877
with self.subTest(path=path, expected_expansions=expected_expansions):

0 commit comments

Comments
 (0)