Skip to content

Commit 29aa870

Browse files
IRI in search working (#78)
* Fix ModuleNotFoundError for url_utils in PyPI distribution * Fixed search not working respecting languages * IRI in search doesn't use search but goes directly to concept * formatting --------- Co-authored-by: Valentin Starlinger <[email protected]>
1 parent fa8fe0d commit 29aa870

File tree

2 files changed

+180
-28
lines changed

2 files changed

+180
-28
lines changed

src/py_semantic_taxonomy/adapters/routers/web_router.py

Lines changed: 76 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pathlib import Path as PathLib
33
from urllib.parse import quote, unquote, urlencode
44

5+
import rfc3987
56
import structlog
67
from fastapi import APIRouter, Depends, HTTPException, Path, Request
78
from fastapi.responses import HTMLResponse, RedirectResponse
@@ -22,6 +23,15 @@
2223
router = APIRouter(prefix="/web", include_in_schema=False)
2324

2425

26+
def _is_iri(query: str) -> bool:
27+
"""Check if query string is a valid HTTP/HTTPS IRI."""
28+
try:
29+
parsed = rfc3987.parse(query.strip(), rule="IRI")
30+
return parsed.get("scheme") in ("http", "https")
31+
except ValueError:
32+
return False
33+
34+
2535
def value_for_language(value: list[dict[str, str]], lang: str) -> str:
2636
"""Get the `@value` for a list of multilingual strings with correct `@language` value"""
2737
for dct in value:
@@ -189,10 +199,16 @@ async def web_concept_scheme_view(
189199
},
190200
)
191201
except de.ConceptSchemeNotFoundError:
192-
raise HTTPException(status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found")
202+
raise HTTPException(
203+
status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found"
204+
)
193205
except de.ConceptSchemesNotInDatabase as e:
194-
logger.error("Database error while fetching concept scheme", iri=iri, error=str(e))
195-
raise HTTPException(status_code=500, detail="Database error while fetching concept scheme")
206+
logger.error(
207+
"Database error while fetching concept scheme", iri=iri, error=str(e)
208+
)
209+
raise HTTPException(
210+
status_code=500, detail="Database error while fetching concept scheme"
211+
)
196212

197213

198214
def concept_view_url(
@@ -264,7 +280,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
264280
except de.ConceptNotFoundError:
265281
return iri, iri
266282

267-
relationships = await service.relationships_get(iri=decoded_iri, source=True, target=True)
283+
relationships = await service.relationships_get(
284+
iri=decoded_iri, source=True, target=True
285+
)
268286
broader = [
269287
(await get_concept_and_link(obj.target))
270288
for obj in relationships
@@ -277,7 +295,8 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
277295
]
278296

279297
scheme_list = [
280-
(request.url_for("web_concept_view", iri=quote(s["@id"])), s) for s in concept.schemes
298+
(request.url_for("web_concept_view", iri=quote(s["@id"])), s)
299+
for s in concept.schemes
281300
]
282301

283302
associations = await service.association_get_all(source_concept_iri=concept.id_)
@@ -286,29 +305,27 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
286305
for target in obj.target_concepts:
287306
try:
288307
url, assoc_concept = await get_concept_and_link(target["@id"])
289-
formatted_associations.append(
290-
{
291-
"url": url,
292-
"obj": assoc_concept,
293-
"conditional": None,
294-
"conversion": target.get(
295-
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
296-
),
297-
}
298-
)
308+
formatted_associations.append({
309+
"url": url,
310+
"obj": assoc_concept,
311+
"conditional": None,
312+
"conversion": target.get(
313+
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
314+
),
315+
})
299316
except de.ConceptNotFoundError:
300-
formatted_associations.append(
301-
{
302-
"url": target["@id"],
303-
"obj": target["@id"],
304-
"conditional": None,
305-
"conversion": target.get(
306-
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
307-
),
308-
}
309-
)
310-
311-
languages = [(request.url, Language.get(language).display_name(language).title())] + [
317+
formatted_associations.append({
318+
"url": target["@id"],
319+
"obj": target["@id"],
320+
"conditional": None,
321+
"conversion": target.get(
322+
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
323+
),
324+
})
325+
326+
languages = [
327+
(request.url, Language.get(language).display_name(language).title())
328+
] + [
312329
(
313330
concept_view_url(
314331
request,
@@ -343,7 +360,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
343360
except de.ConceptNotFoundError:
344361
raise HTTPException(status_code=404, detail=f"Concept with IRI `{iri}` not found")
345362
except de.ConceptSchemesNotInDatabase as e:
346-
logger.error("Database error while fetching concept", iri=decoded_iri, error=str(e))
363+
logger.error(
364+
"Database error while fetching concept", iri=decoded_iri, error=str(e)
365+
)
347366
raise HTTPException(status_code=500, detail="Database error while fetching concept")
348367

349368

@@ -357,9 +376,38 @@ async def web_search(
357376
language: str = "en",
358377
semantic: bool = True,
359378
search_service=Depends(get_search_service),
379+
graph_service=Depends(get_graph_service),
360380
settings=Depends(get_settings),
361381
) -> HTMLResponse:
362382
"""Search for concepts."""
383+
# Check if query is an IRI and attempt direct lookup
384+
if query and _is_iri(query):
385+
# Try to get concept directly
386+
try:
387+
concept = await graph_service.concept_get(iri=query)
388+
# If found, redirect to concept page
389+
return RedirectResponse(
390+
url=concept_view_url(
391+
request,
392+
concept.id_,
393+
concept.schemes[0]["@id"],
394+
language,
395+
),
396+
status_code=303, # See Other
397+
)
398+
except de.ConceptNotFoundError:
399+
# Not a concept, try concept scheme
400+
try:
401+
concept_scheme = await graph_service.concept_scheme_get(iri=query)
402+
# If found, redirect to concept scheme page
403+
return RedirectResponse(
404+
url=concept_scheme_view_url(request, concept_scheme.id_, language),
405+
status_code=303, # See Other
406+
)
407+
except de.ConceptSchemeNotFoundError:
408+
# IRI not found in database, fall through to regular search
409+
pass
410+
363411
try:
364412
results = []
365413
if query:

tests/integration/test_web_ui.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,107 @@ async def test_web_search_empty_query(sqlite, anonymous_client):
8585

8686
html_content = response.text
8787
assert "Start searching" in html_content or "Search" in html_content
88+
89+
90+
@pytest.mark.postgres
91+
async def test_web_search_with_concept_iri_redirects_to_concept(
92+
postgres, anonymous_client, cn_db_engine, cn
93+
):
94+
"""Test that searching for a concept IRI redirects directly to the concept page."""
95+
concept_iri = cn.concept_top["@id"]
96+
97+
response = await anonymous_client.get(
98+
"/web/search/",
99+
params={"query": concept_iri, "language": "de"},
100+
follow_redirects=False,
101+
)
102+
103+
# Should redirect with 303 See Other
104+
assert response.status_code == 303
105+
106+
# Should redirect to the concept view page with language preserved
107+
redirect_url = response.headers["location"]
108+
assert "/web/concept/" in redirect_url
109+
assert quote(concept_iri) in redirect_url
110+
assert "language=de" in redirect_url
111+
112+
113+
@pytest.mark.postgres
114+
async def test_web_search_with_concept_scheme_iri_redirects(
115+
postgres, anonymous_client, cn_db_engine, cn
116+
):
117+
"""Test that searching for a concept scheme IRI redirects to the concept scheme page."""
118+
scheme_iri = cn.scheme["@id"]
119+
120+
response = await anonymous_client.get(
121+
"/web/search/",
122+
params={"query": scheme_iri, "language": "en"},
123+
follow_redirects=False,
124+
)
125+
126+
# Should redirect with 303 See Other
127+
assert response.status_code == 303
128+
129+
# Should redirect to the concept scheme view page
130+
redirect_url = response.headers["location"]
131+
assert "/web/concept_scheme/" in redirect_url
132+
assert quote(scheme_iri) in redirect_url
133+
assert "language=en" in redirect_url
134+
135+
136+
@pytest.mark.postgres
137+
async def test_web_search_with_nonexistent_iri_shows_search_page(
138+
postgres, anonymous_client, cn_db_engine
139+
):
140+
"""Test that searching for an IRI that doesn't exist falls back to search (or error if not configured)."""
141+
nonexistent_iri = "http://example.com/nonexistent/concept/12345"
142+
143+
response = await anonymous_client.get(
144+
"/web/search/",
145+
params={"query": nonexistent_iri, "language": "en"},
146+
follow_redirects=True,
147+
)
148+
149+
# Should show search page (200) if search engine configured
150+
# or error (503) if search engine not configured
151+
# Important: should NOT redirect (303) since concept doesn't exist
152+
assert response.status_code in (200, 503)
153+
if response.status_code == 503:
154+
assert "Search engine not available" in response.text or "503" in response.text
155+
156+
157+
@pytest.mark.postgres
158+
@pytest.mark.typesense
159+
async def test_web_search_with_nonexistent_iri_falls_back_to_search(
160+
postgres, typesense, anonymous_client, cn_db_engine, cn
161+
):
162+
"""Test that searching for an IRI that doesn't exist falls back to text search."""
163+
nonexistent_iri = "http://example.com/nonexistent/concept/12345"
164+
165+
response = await anonymous_client.get(
166+
"/web/search/",
167+
params={"query": nonexistent_iri, "language": "en"},
168+
follow_redirects=True,
169+
)
170+
171+
# Should show search page (no redirect) since concept doesn't exist
172+
assert response.status_code == 200
173+
# Should show the search interface (may or may not have results from text search)
174+
assert "Search" in response.text or "search" in response.text
175+
176+
177+
async def test_web_search_with_regular_text_not_treated_as_iri(anonymous_client):
178+
"""Test that regular search text is not treated as an IRI (no database required)."""
179+
response = await anonymous_client.get(
180+
"/web/search/",
181+
params={"query": "test query", "language": "en"},
182+
follow_redirects=True,
183+
)
184+
185+
# Should get error because search engine not configured (503)
186+
# or show search page if engine is configured (200)
187+
# The important thing is it doesn't try to treat it as an IRI and redirect
188+
assert response.status_code in (200, 503)
189+
if response.status_code == 503:
190+
assert "Search engine not available" in response.text or "503" in response.text
191+

0 commit comments

Comments
 (0)