Skip to content

Commit a895a2b

Browse files
committed
Refactor the model to make it usable without the server
Currently, we can only effectively use the `SemanticMatch` and `EquivalenceTable` classes with the server. This creates a new and improved `SemanticMatchDictStore` and refactors the service to use it. Furthermore, we clean up the service to use it in a more pythonic way, eliminating the need for the `service_model` module.
1 parent ce50050 commit a895a2b

File tree

4 files changed

+227
-55
lines changed

4 files changed

+227
-55
lines changed

semantic_matcher/model.py

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import Dict, List
1+
import json
2+
from typing import Dict, List, Set, Optional, Iterable
23

34
from pydantic import BaseModel
45

@@ -7,15 +8,82 @@ class SemanticMatch(BaseModel):
78
"""
89
A semantic match, mapping two semanticIDs with a matching score. Can be imagined as a weighted graph with
910
`base_semantic_id` ---`score`---> `match_semantic_id`
10-
11-
Todo: Think about static and TTL, but that is optimization
12-
Todo: Maybe we want to have the matching method as debug information
1311
"""
1412
base_semantic_id: str
1513
match_semantic_id: str
1614
score: float
1715
meta_information: Dict
1816

17+
def __hash__(self):
18+
return hash(
19+
(self.base_semantic_id, self.match_semantic_id, self.score, frozenset(self.meta_information.items())))
20+
21+
22+
class SemanticMatchDictStore:
23+
"""
24+
A collection of `SemanticMatch`es, stored in a Dict, where the Key is the `base_semantic_id` and the Value is
25+
the `SemanticMatch` object. This allows for efficient resolution of the `SemanticMatches` of the `base_semantic_id`.
26+
"""
27+
def __init__(self, matches: Iterable[SemanticMatch]):
28+
self._store: Dict[str, Set[SemanticMatch]] = {}
29+
for x in matches:
30+
self.add(x)
31+
32+
def add(self, match: SemanticMatch) -> None:
33+
"""
34+
Add a `SemanticMatch` to the store
35+
"""
36+
if match.base_semantic_id in self._store:
37+
self._store[match.base_semantic_id].add(match)
38+
else:
39+
self._store[match.base_semantic_id] = {match}
40+
41+
def discard(self, match: SemanticMatch) -> None:
42+
"""
43+
Discard a `SemanticMatch` from the store
44+
"""
45+
# First we remove the `SemanticMatch` from the set of matches for that `base_semantic_id`
46+
self._store[match.base_semantic_id].discard(match)
47+
# Then, if there is no more `SemanticMatch`es for that `base_semantic_id`, we remove the Dict entry completely
48+
if not len(self._store[match.base_semantic_id]):
49+
self._store.pop(match.base_semantic_id)
50+
51+
def get_all_matches(self) -> Set[SemanticMatch]:
52+
"""
53+
Return a set of all `SemanticMatch`es currently inside the store
54+
"""
55+
all_matches: Set[SemanticMatch] = set()
56+
for i in self._store.values():
57+
all_matches.update(i)
58+
return all_matches
59+
60+
def get_matches(self, semantic_id: str, min_score: Optional[float] = None) -> Set[SemanticMatch]:
61+
"""
62+
Return all 'SemanticMatches' of a given semantic_id currently inside a store that have a higher or equal
63+
score than the `min_score`.
64+
"""
65+
matches = self._store.get(semantic_id, set())
66+
return {match for match in matches if min_score is None or match.score >= min_score}
67+
68+
69+
def to_file(self, filename: str) -> None:
70+
matches: List[Dict] = [match.model_dump() for match in self.get_all_matches()]
71+
with open(filename, "w") as file:
72+
json.dump(matches, file, indent=4)
73+
74+
@classmethod
75+
def from_file(cls, filename: str) -> "SemanticMatchDictStore":
76+
with open(filename, "r") as file:
77+
matches_data = json.load(file)
78+
matches = [SemanticMatch(**match_dict) for match_dict in matches_data]
79+
return cls(matches)
80+
81+
def __len__(self) -> int:
82+
length = 0
83+
for i in self._store.values():
84+
length += len(i)
85+
return length
86+
1987

2088
class EquivalenceTable(BaseModel):
2189
matches: Dict[str, List[SemanticMatch]]

semantic_matcher/service.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,26 @@
1-
from typing import List
1+
from typing import Optional, List
22

3+
from pydantic import BaseModel
34
import requests
45
from fastapi import APIRouter
56

6-
from semantic_matcher import model, service_model
7+
from semantic_matcher import model
8+
9+
10+
class MatchRequest(BaseModel):
11+
"""
12+
Request body for the :func:`service.SemanticMatchingService.get_match`
13+
14+
:ivar semantic_id: The semantic ID that we want to find matches for
15+
:ivar local_only: If `True`, only check at the local service and do not request other services
16+
:ivar name: Optional name of the resolved semantic ID for NLP matching
17+
:ivar definition: Optional definition of the resolved semantic ID for NLP matching
18+
"""
19+
semantic_id: str
20+
score_limit: float
21+
local_only: bool = True
22+
name: Optional[str] = None
23+
definition: Optional[str] = None
724

825

926
class SemanticMatchingService:
@@ -27,7 +44,7 @@ class SemanticMatchingService:
2744
def __init__(
2845
self,
2946
endpoint: str,
30-
equivalences: model.EquivalenceTable
47+
matches: model.SemanticMatchDictStore
3148
):
3249
"""
3350
Initializer of :class:`~.SemanticMatchingService`
@@ -46,48 +63,42 @@ def __init__(
4663
self.router.add_api_route(
4764
"/get_matches",
4865
self.get_matches,
66+
response_model=List[model.SemanticMatch],
4967
methods=["GET"]
5068
)
5169
self.router.add_api_route(
5270
"/post_matches",
5371
self.post_matches,
5472
methods=["POST"]
5573
)
56-
self.router.add_api_route(
57-
"/clear",
58-
self.remove_all_matches,
59-
methods=["POST"]
60-
)
6174
self.endpoint: str = endpoint
62-
self.equivalence_table: model.EquivalenceTable = equivalences
75+
self.matches: model.SemanticMatchDictStore = matches
6376

6477
def get_all_matches(self):
6578
"""
6679
Returns all matches stored in the equivalence table-
6780
"""
68-
matches = self.equivalence_table.get_all_matches()
81+
matches = self.matches.get_all_matches()
6982
return matches
7083

71-
def remove_all_matches(self):
72-
self.equivalence_table.remove_all_semantic_matches()
7384

7485
def get_matches(
7586
self,
76-
request_body: service_model.MatchRequest
77-
) -> service_model.MatchesList:
87+
request_body: MatchRequest
88+
) -> List[model.SemanticMatch]:
7889
"""
7990
A query to match two SubmodelElements semantically.
8091
8192
Returns a matching score
8293
"""
8394
# Try first local matching
84-
matches: List[model.SemanticMatch] = self.equivalence_table.get_local_matches(
95+
matches: List[model.SemanticMatch] = list(self.matches.get_matches(
8596
semantic_id=request_body.semantic_id,
86-
score_limit=request_body.score_limit
87-
)
97+
min_score=request_body.score_limit
98+
))
8899
# If the request asks us to only locally look, we're done already
89100
if request_body.local_only:
90-
return service_model.MatchesList(matches=matches)
101+
return matches
91102
# Now look for remote matches:
92103
additional_remote_matches: List[model.SemanticMatch] = []
93104
for match in matches:
@@ -97,7 +108,7 @@ def get_matches(
97108
remote_matching_service = self._get_matcher_from_semantic_id(match.match_semantic_id)
98109
if remote_matching_service is None:
99110
continue
100-
remote_matching_request = service_model.MatchRequest(
111+
remote_matching_request = MatchRequest(
101112
semantic_id=match.match_semantic_id,
102113
# This is a simple "Ungleichung"
103114
# Unified score is multiplied: score(A->B) * score(B->C)
@@ -112,20 +123,20 @@ def get_matches(
112123
definition=request_body.definition
113124
)
114125
url = f"{remote_matching_service}/get_matches"
126+
# Todo: Break recursion loop here
115127
new_matches_response = requests.get(url, json=remote_matching_request.model_dump_json())
116-
match_response = service_model.MatchesList.model_validate_json(new_matches_response.text)
117-
additional_remote_matches.extend(match_response.matches)
128+
response_matches = [model.SemanticMatch(**match) for match in new_matches_response.json()]
129+
additional_remote_matches.extend(response_matches)
118130
# Finally, put all matches together and return
119131
matches.extend(additional_remote_matches)
120-
res = service_model.MatchesList(matches=matches)
121-
return res
132+
return matches
122133

123134
def post_matches(
124135
self,
125-
request_body: service_model.MatchesList
136+
request_body: List[model.SemanticMatch]
126137
) -> None:
127-
for match in request_body.matches:
128-
self.equivalence_table.add_semantic_match(match)
138+
for match in request_body:
139+
self.matches.add(match)
129140
# Todo: Figure out how to properly return 200
130141

131142
def _get_matcher_from_semantic_id(self, semantic_id: str) -> str:

semantic_matcher/service_model.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

test/test_model.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import unittest
2+
from typing import Dict
3+
import os
4+
5+
from semantic_matcher.model import SemanticMatch, SemanticMatchDictStore
6+
7+
8+
class TestSemanticMatch(unittest.TestCase):
9+
def test_creation(self):
10+
meta_info: Dict = {"source": "test"}
11+
match = SemanticMatch(
12+
base_semantic_id="id1",
13+
match_semantic_id="id2",
14+
score=0.9,
15+
meta_information=meta_info
16+
)
17+
self.assertEqual(match.base_semantic_id, "id1")
18+
self.assertEqual(match.match_semantic_id, "id2")
19+
self.assertEqual(match.score, 0.9)
20+
self.assertEqual(match.meta_information, meta_info)
21+
22+
23+
class TestSemanticMatchDictStore(unittest.TestCase):
24+
def setUp(self):
25+
self.match1 = SemanticMatch(
26+
base_semantic_id="id1",
27+
match_semantic_id="id2",
28+
score=0.9,
29+
meta_information={"source": "test1"}
30+
)
31+
self.match2 = SemanticMatch(
32+
base_semantic_id="id1",
33+
match_semantic_id="id3",
34+
score=0.8,
35+
meta_information={"source": "test2"}
36+
)
37+
self.match3 = SemanticMatch(
38+
base_semantic_id="id2",
39+
match_semantic_id="id4",
40+
score=0.7,
41+
meta_information={"source": "test3"}
42+
)
43+
self.store = SemanticMatchDictStore([self.match1, self.match2, self.match3])
44+
45+
def test_add(self):
46+
match4 = SemanticMatch(
47+
base_semantic_id="id3",
48+
match_semantic_id="id5",
49+
score=0.85,
50+
meta_information={"source": "test4"}
51+
)
52+
self.store.add(match4)
53+
self.assertIn(match4, self.store._store["id3"])
54+
55+
def test_discard(self):
56+
self.store.discard(self.match1)
57+
self.assertNotIn(self.match1, self.store._store.get("id1", set()))
58+
59+
self.store.discard(self.match2)
60+
self.assertNotIn("id1", self.store._store) # Since id1 had only match1 and match2, it should be removed
61+
62+
def test_get_all_matches(self):
63+
all_matches = self.store.get_all_matches()
64+
self.assertEqual(len(all_matches), 3)
65+
self.assertIn(self.match1, all_matches)
66+
self.assertIn(self.match2, all_matches)
67+
self.assertIn(self.match3, all_matches)
68+
69+
def test_get_matches(self):
70+
# Test without min_score
71+
matches = self.store.get_matches("id1")
72+
self.assertEqual(len(matches), 2)
73+
self.assertIn(self.match1, matches)
74+
self.assertIn(self.match2, matches)
75+
76+
# Test with min_score
77+
matches = self.store.get_matches("id1", min_score=0.85)
78+
self.assertEqual(len(matches), 1)
79+
self.assertIn(self.match1, matches)
80+
self.assertNotIn(self.match2, matches)
81+
82+
# Test with non-existing semantic_id
83+
matches = self.store.get_matches("non_existing")
84+
self.assertEqual(len(matches), 0)
85+
86+
# Test with min_score higher than all matches
87+
matches = self.store.get_matches("id1", min_score=1.0)
88+
self.assertEqual(len(matches), 0)
89+
90+
def test_to_file_and_from_file(self):
91+
filename = "test_store.json"
92+
93+
self.store.to_file(filename)
94+
self.assertTrue(os.path.exists(filename))
95+
96+
loaded_store = SemanticMatchDictStore.from_file(filename)
97+
98+
self.assertEqual(len(loaded_store), len(self.store))
99+
self.assertEqual(self.store.get_all_matches(), loaded_store.get_all_matches())
100+
101+
os.remove(filename) # Clean up test file
102+
103+
def test_length(self):
104+
self.assertEqual(len(self.store), 3)
105+
match4 = SemanticMatch(
106+
base_semantic_id="id3",
107+
match_semantic_id="id5",
108+
score=0.85,
109+
meta_information={"source": "test4"}
110+
)
111+
self.store.add(match4)
112+
self.assertEqual(len(self.store), 4)
113+
self.store.discard(match4)
114+
self.assertEqual(len(self.store), 3)
115+
116+
117+
if __name__ == "__main__":
118+
unittest.main()

0 commit comments

Comments
 (0)