Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 76 additions & 28 deletions src/py_semantic_taxonomy/adapters/routers/web_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path as PathLib
from urllib.parse import quote, unquote, urlencode

import rfc3987
import structlog
from fastapi import APIRouter, Depends, HTTPException, Path, Request
from fastapi.responses import HTMLResponse, RedirectResponse
Expand All @@ -22,6 +23,15 @@
router = APIRouter(prefix="/web", include_in_schema=False)


def _is_iri(query: str) -> bool:
"""Check if query string is a valid HTTP/HTTPS IRI."""
try:
parsed = rfc3987.parse(query.strip(), rule="IRI")
return parsed.get("scheme") in ("http", "https")
except ValueError:
return False


def value_for_language(value: list[dict[str, str]], lang: str) -> str:
"""Get the `@value` for a list of multilingual strings with correct `@language` value"""
for dct in value:
Expand Down Expand Up @@ -189,10 +199,16 @@ async def web_concept_scheme_view(
},
)
except de.ConceptSchemeNotFoundError:
raise HTTPException(status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found")
raise HTTPException(
status_code=404, detail=f"Concept Scheme with IRI `{iri}` not found"
)
except de.ConceptSchemesNotInDatabase as e:
logger.error("Database error while fetching concept scheme", iri=iri, error=str(e))
raise HTTPException(status_code=500, detail="Database error while fetching concept scheme")
logger.error(
"Database error while fetching concept scheme", iri=iri, error=str(e)
)
raise HTTPException(
status_code=500, detail="Database error while fetching concept scheme"
)


def concept_view_url(
Expand Down Expand Up @@ -264,7 +280,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
except de.ConceptNotFoundError:
return iri, iri

relationships = await service.relationships_get(iri=decoded_iri, source=True, target=True)
relationships = await service.relationships_get(
iri=decoded_iri, source=True, target=True
)
broader = [
(await get_concept_and_link(obj.target))
for obj in relationships
Expand All @@ -277,7 +295,8 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
]

scheme_list = [
(request.url_for("web_concept_view", iri=quote(s["@id"])), s) for s in concept.schemes
(request.url_for("web_concept_view", iri=quote(s["@id"])), s)
for s in concept.schemes
]

associations = await service.association_get_all(source_concept_iri=concept.id_)
Expand All @@ -286,29 +305,27 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
for target in obj.target_concepts:
try:
url, assoc_concept = await get_concept_and_link(target["@id"])
formatted_associations.append(
{
"url": url,
"obj": assoc_concept,
"conditional": None,
"conversion": target.get(
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
),
}
)
formatted_associations.append({
"url": url,
"obj": assoc_concept,
"conditional": None,
"conversion": target.get(
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
),
})
except de.ConceptNotFoundError:
formatted_associations.append(
{
"url": target["@id"],
"obj": target["@id"],
"conditional": None,
"conversion": target.get(
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
),
}
)

languages = [(request.url, Language.get(language).display_name(language).title())] + [
formatted_associations.append({
"url": target["@id"],
"obj": target["@id"],
"conditional": None,
"conversion": target.get(
"http://qudt.org/3.0.0/schema/qudt/conversionMultiplier"
),
})

languages = [
(request.url, Language.get(language).display_name(language).title())
] + [
(
concept_view_url(
request,
Expand Down Expand Up @@ -343,7 +360,9 @@ async def get_concept_and_link(iri: str) -> (str, de.Concept | str):
except de.ConceptNotFoundError:
raise HTTPException(status_code=404, detail=f"Concept with IRI `{iri}` not found")
except de.ConceptSchemesNotInDatabase as e:
logger.error("Database error while fetching concept", iri=decoded_iri, error=str(e))
logger.error(
"Database error while fetching concept", iri=decoded_iri, error=str(e)
)
raise HTTPException(status_code=500, detail="Database error while fetching concept")


Expand All @@ -357,9 +376,38 @@ async def web_search(
language: str = "en",
semantic: bool = True,
search_service=Depends(get_search_service),
graph_service=Depends(get_graph_service),
settings=Depends(get_settings),
) -> HTMLResponse:
"""Search for concepts."""
# Check if query is an IRI and attempt direct lookup
if query and _is_iri(query):
# Try to get concept directly
try:
concept = await graph_service.concept_get(iri=query)
# If found, redirect to concept page
return RedirectResponse(
url=concept_view_url(
request,
concept.id_,
concept.schemes[0]["@id"],
language,
),
status_code=303, # See Other
)
except de.ConceptNotFoundError:
# Not a concept, try concept scheme
try:
concept_scheme = await graph_service.concept_scheme_get(iri=query)
# If found, redirect to concept scheme page
return RedirectResponse(
url=concept_scheme_view_url(request, concept_scheme.id_, language),
status_code=303, # See Other
)
except de.ConceptSchemeNotFoundError:
# IRI not found in database, fall through to regular search
pass

try:
results = []
if query:
Expand Down
104 changes: 104 additions & 0 deletions tests/integration/test_web_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,107 @@ async def test_web_search_empty_query(sqlite, anonymous_client):

html_content = response.text
assert "Start searching" in html_content or "Search" in html_content


@pytest.mark.postgres
async def test_web_search_with_concept_iri_redirects_to_concept(
postgres, anonymous_client, cn_db_engine, cn
):
"""Test that searching for a concept IRI redirects directly to the concept page."""
concept_iri = cn.concept_top["@id"]

response = await anonymous_client.get(
"/web/search/",
params={"query": concept_iri, "language": "de"},
follow_redirects=False,
)

# Should redirect with 303 See Other
assert response.status_code == 303

# Should redirect to the concept view page with language preserved
redirect_url = response.headers["location"]
assert "/web/concept/" in redirect_url
assert quote(concept_iri) in redirect_url
assert "language=de" in redirect_url


@pytest.mark.postgres
async def test_web_search_with_concept_scheme_iri_redirects(
postgres, anonymous_client, cn_db_engine, cn
):
"""Test that searching for a concept scheme IRI redirects to the concept scheme page."""
scheme_iri = cn.scheme["@id"]

response = await anonymous_client.get(
"/web/search/",
params={"query": scheme_iri, "language": "en"},
follow_redirects=False,
)

# Should redirect with 303 See Other
assert response.status_code == 303

# Should redirect to the concept scheme view page
redirect_url = response.headers["location"]
assert "/web/concept_scheme/" in redirect_url
assert quote(scheme_iri) in redirect_url
assert "language=en" in redirect_url


@pytest.mark.postgres
async def test_web_search_with_nonexistent_iri_shows_search_page(
postgres, anonymous_client, cn_db_engine
):
"""Test that searching for an IRI that doesn't exist falls back to search (or error if not configured)."""
nonexistent_iri = "http://example.com/nonexistent/concept/12345"

response = await anonymous_client.get(
"/web/search/",
params={"query": nonexistent_iri, "language": "en"},
follow_redirects=True,
)

# Should show search page (200) if search engine configured
# or error (503) if search engine not configured
# Important: should NOT redirect (303) since concept doesn't exist
assert response.status_code in (200, 503)
if response.status_code == 503:
assert "Search engine not available" in response.text or "503" in response.text


@pytest.mark.postgres
@pytest.mark.typesense
async def test_web_search_with_nonexistent_iri_falls_back_to_search(
postgres, typesense, anonymous_client, cn_db_engine, cn
):
"""Test that searching for an IRI that doesn't exist falls back to text search."""
nonexistent_iri = "http://example.com/nonexistent/concept/12345"

response = await anonymous_client.get(
"/web/search/",
params={"query": nonexistent_iri, "language": "en"},
follow_redirects=True,
)

# Should show search page (no redirect) since concept doesn't exist
assert response.status_code == 200
# Should show the search interface (may or may not have results from text search)
assert "Search" in response.text or "search" in response.text


async def test_web_search_with_regular_text_not_treated_as_iri(anonymous_client):
"""Test that regular search text is not treated as an IRI (no database required)."""
response = await anonymous_client.get(
"/web/search/",
params={"query": "test query", "language": "en"},
follow_redirects=True,
)

# Should get error because search engine not configured (503)
# or show search page if engine is configured (200)
# The important thing is it doesn't try to treat it as an IRI and redirect
assert response.status_code in (200, 503)
if response.status_code == 503:
assert "Search engine not available" in response.text or "503" in response.text