Skip to content

Commit 2a3eaa1

Browse files
authored
Index resources in the UrlDispatcher to avoid linear search for most cases (#7829)
1 parent 40a5197 commit 2a3eaa1

File tree

5 files changed

+191
-24
lines changed

5 files changed

+191
-24
lines changed

CHANGES/7829.misc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improved URL handler resolution time by indexing resources in the UrlDispatcher.
2+
For applications with a large number of handlers, this should increase performance significantly.
3+
-- by :user:`bdraco`

aiohttp/web_urldispatcher.py

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -716,13 +716,20 @@ class PrefixedSubAppResource(PrefixResource):
716716
def __init__(self, prefix: str, app: "Application") -> None:
717717
super().__init__(prefix)
718718
self._app = app
719-
for resource in app.router.resources():
720-
resource.add_prefix(prefix)
719+
self._add_prefix_to_resources(prefix)
721720

722721
def add_prefix(self, prefix: str) -> None:
723722
super().add_prefix(prefix)
724-
for resource in self._app.router.resources():
723+
self._add_prefix_to_resources(prefix)
724+
725+
def _add_prefix_to_resources(self, prefix: str) -> None:
726+
router = self._app.router
727+
for resource in router.resources():
728+
# Since the canonical path of a resource is about
729+
# to change, we need to unindex it and then reindex
730+
router.unindex_resource(resource)
725731
resource.add_prefix(prefix)
732+
router.index_resource(resource)
726733

727734
def url_for(self, *args: str, **kwargs: str) -> URL:
728735
raise RuntimeError(".url_for() is not supported " "by sub-application root")
@@ -731,11 +738,6 @@ def get_info(self) -> _InfoDict:
731738
return {"app": self._app, "prefix": self._prefix}
732739

733740
async def resolve(self, request: Request) -> _Resolve:
734-
if (
735-
not request.url.raw_path.startswith(self._prefix2)
736-
and request.url.raw_path != self._prefix
737-
):
738-
return None, set()
739741
match_info = await self._app.router.resolve(request)
740742
match_info.add_app(self._app)
741743
if isinstance(match_info.http_exception, HTTPMethodNotAllowed):
@@ -974,27 +976,55 @@ def __contains__(self, route: object) -> bool:
974976

975977
class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]):
976978
NAME_SPLIT_RE = re.compile(r"[.:-]")
979+
HTTP_NOT_FOUND = HTTPNotFound()
977980

978981
def __init__(self) -> None:
979982
super().__init__()
980983
self._resources: List[AbstractResource] = []
981984
self._named_resources: Dict[str, AbstractResource] = {}
985+
self._resource_index: dict[str, list[AbstractResource]] = {}
986+
self._matched_sub_app_resources: List[MatchedSubAppResource] = []
982987

983988
async def resolve(self, request: Request) -> UrlMappingMatchInfo:
984-
method = request.method
989+
resource_index = self._resource_index
985990
allowed_methods: Set[str] = set()
986991

987-
for resource in self._resources:
992+
# Walk the url parts looking for candidates. We walk the url backwards
993+
# to ensure the most explicit match is found first. If there are multiple
994+
# candidates for a given url part because there are multiple resources
995+
# registered for the same canonical path, we resolve them in a linear
996+
# fashion to ensure registration order is respected.
997+
url_part = request.rel_url.raw_path
998+
while url_part:
999+
for candidate in resource_index.get(url_part, ()):
1000+
match_dict, allowed = await candidate.resolve(request)
1001+
if match_dict is not None:
1002+
return match_dict
1003+
else:
1004+
allowed_methods |= allowed
1005+
if url_part == "/":
1006+
break
1007+
url_part = url_part.rpartition("/")[0] or "/"
1008+
1009+
#
1010+
# We didn't find any candidates, so we'll try the matched sub-app
1011+
# resources which we have to walk in a linear fashion because they
1012+
# have regex/wildcard match rules and we cannot index them.
1013+
#
1014+
# For most cases we do not expect there to be many of these since
1015+
# currently they are only added by `add_domain`
1016+
#
1017+
for resource in self._matched_sub_app_resources:
9881018
match_dict, allowed = await resource.resolve(request)
9891019
if match_dict is not None:
9901020
return match_dict
9911021
else:
9921022
allowed_methods |= allowed
9931023

9941024
if allowed_methods:
995-
return MatchInfoError(HTTPMethodNotAllowed(method, allowed_methods))
996-
else:
997-
return MatchInfoError(HTTPNotFound())
1025+
return MatchInfoError(HTTPMethodNotAllowed(request.method, allowed_methods))
1026+
1027+
return MatchInfoError(self.HTTP_NOT_FOUND)
9981028

9991029
def __iter__(self) -> Iterator[str]:
10001030
return iter(self._named_resources)
@@ -1050,6 +1080,30 @@ def register_resource(self, resource: AbstractResource) -> None:
10501080
self._named_resources[name] = resource
10511081
self._resources.append(resource)
10521082

1083+
if isinstance(resource, MatchedSubAppResource):
1084+
# We cannot index match sub-app resources because they have match rules
1085+
self._matched_sub_app_resources.append(resource)
1086+
else:
1087+
self.index_resource(resource)
1088+
1089+
def _get_resource_index_key(self, resource: AbstractResource) -> str:
1090+
"""Return a key to index the resource in the resource index."""
1091+
# strip at the first { to allow for variables
1092+
return resource.canonical.partition("{")[0].rstrip("/") or "/"
1093+
1094+
def index_resource(self, resource: AbstractResource) -> None:
1095+
"""Add a resource to the resource index."""
1096+
resource_key = self._get_resource_index_key(resource)
1097+
# There may be multiple resources for a canonical path
1098+
# so we keep them in a list to ensure that registration
1099+
# order is respected.
1100+
self._resource_index.setdefault(resource_key, []).append(resource)
1101+
1102+
def unindex_resource(self, resource: AbstractResource) -> None:
1103+
"""Remove a resource from the resource index."""
1104+
resource_key = self._get_resource_index_key(resource)
1105+
self._resource_index[resource_key].remove(resource)
1106+
10531107
def add_resource(self, path: str, *, name: Optional[str] = None) -> Resource:
10541108
if path and not path.startswith("/"):
10551109
raise ValueError("path should be started with / or be empty")

docs/web_reference.rst

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,20 +1865,38 @@ unique *name* and at least one :term:`route`.
18651865

18661866
:term:`web-handler` lookup is performed in the following way:
18671867

1868-
1. Router iterates over *resources* one-by-one.
1869-
2. If *resource* matches to requested URL the resource iterates over
1870-
own *routes*.
1871-
3. If route matches to requested HTTP method (or ``'*'`` wildcard) the
1872-
route's handler is used as found :term:`web-handler`. The lookup is
1873-
finished.
1874-
4. Otherwise router tries next resource from the *routing table*.
1875-
5. If the end of *routing table* is reached and no *resource* /
1876-
*route* pair found the *router* returns special :class:`~aiohttp.abc.AbstractMatchInfo`
1868+
1. The router splits the URL and checks the index from longest to shortest.
1869+
For example, '/one/two/three' will first check the index for
1870+
'/one/two/three', then '/one/two' and finally '/'.
1871+
2. If the URL part is found in the index, the list of routes for
1872+
that URL part is iterated over. If a route matches to requested HTTP
1873+
method (or ``'*'`` wildcard) the route's handler is used as the chosen
1874+
:term:`web-handler`. The lookup is finished.
1875+
3. If the route is not found in the index, the router tries to find
1876+
the route in the list of :class:`~aiohttp.web.MatchedSubAppResource`,
1877+
(current only created from :meth:`~aiohttp.web.Application.add_domain`),
1878+
and will iterate over the list of
1879+
:class:`~aiohttp.web.MatchedSubAppResource` in a linear fashion
1880+
until a match is found.
1881+
4. If no *resource* / *route* pair was found, the *router*
1882+
returns the special :class:`~aiohttp.abc.AbstractMatchInfo`
18771883
instance with :attr:`aiohttp.abc.AbstractMatchInfo.http_exception` is not ``None``
18781884
but :exc:`HTTPException` with either *HTTP 404 Not Found* or
18791885
*HTTP 405 Method Not Allowed* status code.
18801886
Registered :meth:`~aiohttp.abc.AbstractMatchInfo.handler` raises this exception on call.
18811887

1888+
Fixed paths are preferred over variable paths. For example,
1889+
if you have two routes ``/a/b`` and ``/a/{name}``, then the first
1890+
route will always be preferred over the second one.
1891+
1892+
If there are multiple dynamic paths with the same fixed prefix,
1893+
they will be resolved in order of registration.
1894+
1895+
For example, if you have two dynamic routes that are prefixed
1896+
with the fixed ``/users`` path such as ``/users/{x}/{y}/z`` and
1897+
``/users/{x}/y/z``, the first one will be preferred over the
1898+
second one.
1899+
18821900
User should never instantiate resource classes but give it by
18831901
:meth:`UrlDispatcher.add_resource` call.
18841902

@@ -1900,7 +1918,10 @@ Resource classes hierarchy::
19001918
Resource
19011919
PlainResource
19021920
DynamicResource
1921+
PrefixResource
19031922
StaticResource
1923+
PrefixedSubAppResource
1924+
MatchedSubAppResource
19041925

19051926

19061927
.. class:: AbstractResource

tests/test_urldispatch.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,10 +1264,17 @@ async def test_prefixed_subapp_overlap(app: Any) -> None:
12641264
subapp2.router.add_get("/b", handler2)
12651265
app.add_subapp("/ss", subapp2)
12661266

1267+
subapp3 = web.Application()
1268+
handler3 = make_handler()
1269+
subapp3.router.add_get("/c", handler3)
1270+
app.add_subapp("/s/s", subapp3)
1271+
12671272
match_info = await app.router.resolve(make_mocked_request("GET", "/s/a"))
12681273
assert match_info.route.handler is handler1
12691274
match_info = await app.router.resolve(make_mocked_request("GET", "/ss/b"))
12701275
assert match_info.route.handler is handler2
1276+
match_info = await app.router.resolve(make_mocked_request("GET", "/s/s/c"))
1277+
assert match_info.route.handler is handler3
12711278

12721279

12731280
async def test_prefixed_subapp_empty_route(app: Any) -> None:

tests/test_web_urldispatcher.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from aiohttp import web
1212
from aiohttp.pytest_plugin import AiohttpClient
13-
from aiohttp.web_urldispatcher import SystemRoute
13+
from aiohttp.web_urldispatcher import Resource, SystemRoute
1414

1515

1616
@pytest.mark.parametrize(
@@ -142,7 +142,6 @@ async def test_access_to_the_file_with_spaces(
142142
r = await client.get(url)
143143
assert r.status == 200
144144
assert (await r.text()) == data
145-
await r.release()
146145

147146

148147
async def test_access_non_existing_resource(
@@ -544,3 +543,86 @@ async def handler(request: web.Request) -> web.Response:
544543
r = await client.get(yarl.URL(urlencoded_path, encoded=True))
545544
assert r.status == expected_http_resp_status
546545
await r.release()
546+
547+
548+
async def test_order_is_preserved(aiohttp_client: AiohttpClient) -> None:
549+
"""Test route order is preserved.
550+
551+
Note that fixed/static paths are always preferred over a regex path.
552+
"""
553+
app = web.Application()
554+
555+
async def handler(request: web.Request) -> web.Response:
556+
assert isinstance(request.match_info._route.resource, Resource)
557+
return web.Response(text=request.match_info._route.resource.canonical)
558+
559+
app.router.add_get("/first/x/{b}/", handler)
560+
app.router.add_get(r"/first/{x:.*/b}", handler)
561+
562+
app.router.add_get(r"/second/{user}/info", handler)
563+
app.router.add_get("/second/bob/info", handler)
564+
565+
app.router.add_get("/third/bob/info", handler)
566+
app.router.add_get(r"/third/{user}/info", handler)
567+
568+
app.router.add_get(r"/forth/{name:\d+}", handler)
569+
app.router.add_get("/forth/42", handler)
570+
571+
app.router.add_get("/fifth/42", handler)
572+
app.router.add_get(r"/fifth/{name:\d+}", handler)
573+
574+
client = await aiohttp_client(app)
575+
576+
r = await client.get("/first/x/b/")
577+
assert r.status == 200
578+
assert await r.text() == "/first/x/{b}/"
579+
580+
r = await client.get("/second/frank/info")
581+
assert r.status == 200
582+
assert await r.text() == "/second/{user}/info"
583+
584+
# Fixed/static paths are always preferred over regex paths
585+
r = await client.get("/second/bob/info")
586+
assert r.status == 200
587+
assert await r.text() == "/second/bob/info"
588+
589+
r = await client.get("/third/bob/info")
590+
assert r.status == 200
591+
assert await r.text() == "/third/bob/info"
592+
593+
r = await client.get("/third/frank/info")
594+
assert r.status == 200
595+
assert await r.text() == "/third/{user}/info"
596+
597+
r = await client.get("/forth/21")
598+
assert r.status == 200
599+
assert await r.text() == "/forth/{name}"
600+
601+
# Fixed/static paths are always preferred over regex paths
602+
r = await client.get("/forth/42")
603+
assert r.status == 200
604+
assert await r.text() == "/forth/42"
605+
606+
r = await client.get("/fifth/21")
607+
assert r.status == 200
608+
assert await r.text() == "/fifth/{name}"
609+
610+
r = await client.get("/fifth/42")
611+
assert r.status == 200
612+
assert await r.text() == "/fifth/42"
613+
614+
615+
async def test_url_with_many_slashes(aiohttp_client: AiohttpClient) -> None:
616+
app = web.Application()
617+
618+
class MyView(web.View):
619+
async def get(self) -> web.Response:
620+
return web.Response()
621+
622+
app.router.add_routes([web.view("/a", MyView)])
623+
624+
client = await aiohttp_client(app)
625+
626+
r = await client.get("///a")
627+
assert r.status == 200
628+
await r.release()

0 commit comments

Comments
 (0)