Skip to content

Commit

Permalink
🕸 remove infinite redirect
Browse files Browse the repository at this point in the history
  • Loading branch information
Joshix-1 committed Sep 30, 2023
1 parent 78cc988 commit 9af3635
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 40 deletions.
27 changes: 27 additions & 0 deletions an_website/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from tornado.httpserver import HTTPServer
from tornado.log import LogFormatter
from tornado.web import Application, RedirectHandler
from typed_stream import Stream

from . import (
DIR,
Expand Down Expand Up @@ -342,6 +343,31 @@ def ignore_modules(config: BetterConfigParser) -> None:
)


def get_normed_paths_from_module_infos(
module_infos: Iterable[ModuleInfo],
) -> tuple[str, ...]:
"""Get all paths from the module infos."""

def info_to_paths(info: ModuleInfo) -> Iterable[str | None]:
return (
info.path,
*info.aliases,
*Stream(info.sub_pages).map(lambda page: page.path),
)

return (
Stream(module_infos)
.flat_map(info_to_paths)
.filter()
.filter(lambda path: path.startswith("/"))
.map(str.strip, "/")
.filter(lambda p: len(p) > 1)
.map(str.lower)
.distinct()
.collect(tuple)
)


def make_app(config: ConfigParser) -> str | Application:
"""Create the Tornado application and return it."""
module_infos, duration = time_function(get_module_infos)
Expand All @@ -356,6 +382,7 @@ def make_app(config: ConfigParser) -> str | Application:
return Application(
handlers, # type: ignore[arg-type]
MODULE_INFOS=module_infos,
NORMED_PATHS=get_normed_paths_from_module_infos(module_infos),
HANDLERS=handlers,
# General settings
autoreload=False,
Expand Down
14 changes: 10 additions & 4 deletions an_website/utils/base_request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from datetime import date, datetime, timedelta, timezone, tzinfo
from functools import cached_property, partial, reduce
from typing import TYPE_CHECKING, Any, ClassVar, Final, cast
from urllib.parse import SplitResult, quote, urlsplit, urlunsplit
from urllib.parse import SplitResult, urlsplit, urlunsplit
from zoneinfo import ZoneInfo

import elasticapm # type: ignore[import]
Expand Down Expand Up @@ -299,8 +299,14 @@ def fix_url(
if isinstance(url, str):
url = urlsplit(url)
if url.netloc and url.netloc.lower() != self.request.host.lower():
url = urlsplit(f"/redirect?to={quote(url.geturl())}")
path = url.path if new_path is None else new_path # the path of the url
path = "/redirect"
query_args["to"] = url.geturl()
url = urlsplit(self.request.full_url())
else:
path = url.path if new_path is None else new_path
path = f"/{path.strip('/')}".lower()
if path == "/lolwut":
path = path.upper()
if path.startswith("/soundboard/files/") or path in FILE_HASHES_DICT:
query_args.update(
{key: None for key in self.user_settings.iter_option_names()}
Expand All @@ -322,7 +328,7 @@ def fix_url(
(
self.request.protocol,
self.request.host,
path.rstrip("/"),
"" if path == "/" else path,
url.query,
url.fragment,
)
Expand Down
41 changes: 5 additions & 36 deletions an_website/utils/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from .base_request_handler import BaseRequestHandler
from .utils import (
SUS_PATHS,
normalized_levenshtein,
get_close_matches,
remove_suffix_ignore_case,
replace_umlauts,
)
Expand Down Expand Up @@ -110,41 +110,10 @@ async def prepare(self) -> None:
if len(this_path_normalized) == 1:
return self.redirect(self.fix_url(new_path="/"))

distances: list[tuple[float, str]] = []
max_dist = 0.5

for module_info in self.get_module_infos():
if module_info.path is not None:
dist = min( # get the smallest distance with the aliases
normalized_levenshtein(
this_path_normalized, path.strip("/").lower()
)
for path in (*module_info.aliases, module_info.path)
if path != "/z" # do not redirect to /z
)
if dist <= max_dist:
# only if the distance is less than or equal {max_dist}
distances.append((dist, module_info.path))
if len(module_info.sub_pages) > 0:
distances.extend(
(
normalized_levenshtein(
this_path_normalized,
sub_page.path.strip("/").lower(),
),
sub_page.path,
)
for sub_page in module_info.sub_pages
if sub_page.path is not None
)

if len(distances) > 0:
# sort to get the one with the smallest distance in index 0
distances.sort()
dist, path = distances[0] # pylint: disable=redefined-outer-name
# redirect only if the distance is less than or equal {max_dist}
if dist <= max_dist:
return self.redirect(self.fix_url(new_path=path), False)
paths: tuple[str, ...] = self.settings.get("NORMED_PATHS") or ()
matches = get_close_matches(this_path_normalized, paths, count=1)
if matches:
return self.redirect(self.fix_url(new_path=matches[0]), False)

self.set_status(404)
self.write_error(404)
Expand Down
36 changes: 36 additions & 0 deletions an_website/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import argparse
import asyncio
import contextlib
import heapq
import logging
import os
import pathlib
Expand Down Expand Up @@ -577,6 +578,41 @@ def normalized_levenshtein(string1: str, string2: str) -> float:
return float(distance(string1, string2)) / max(len(string1), len(string2))


def get_close_matches( # based on difflib.get_close_matches
word: str,
possibilities: Iterable[str],
count: int = 3,
cutoff: float = 0.5,
) -> tuple[str, ...]:
"""Use normalized_levenshtein to return list of the best "good enough" matches.
word is a sequence for which close matches are desired (typically a string).
possibilities is a list of sequences against which to match word
(typically a list of strings).
Optional arg count (default 3) is the maximum number of close matches to
return. count must be > 0.
Optional arg cutoff (default 0.5) is a float in [0, 1]. Possibilities
that don't score at least that similar to word are ignored.
The best (no more than count) matches among the possibilities are returned
in a tuple, sorted by similarity score, most similar first.
"""
if count <= 0:
raise ValueError(f"count must be > 0: {count}")
if not 0.0 <= cutoff <= 1.0:
raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff}")
result: list[tuple[float, str]] = []
for possibility in possibilities:
ratio: float = normalized_levenshtein(possibility, word)
if ratio <= cutoff:
result.append((ratio, possibility))
# Strip scores for the best count matches
return tuple(word for score, word in heapq.nsmallest(count, result))


def parse_bumpscosity(value: str | int | None) -> BumpscosityValue:
"""Parse a string to a valid bumpscosity value."""
if isinstance(value, str):
Expand Down
6 changes: 6 additions & 0 deletions tests/test_request_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ async def test_json_apis(fetch: FetchCallable) -> None: # noqa: F811
async def test_not_found_handler(fetch: FetchCallable) -> None: # noqa: F811
"""Check if the NotFoundHandler works."""
assert_valid_html_response(await fetch("/qwertzuiop"), {404})
assert_valid_html_response(
await fetch(
"/https:/github.com/asozialesnetzwerk/vertauschtewoerterplugin"
),
{404},
)

await assert_valid_redirect(fetch, "/services.html", "/services", {308})
await assert_valid_redirect(fetch, "/services/", "/services", {308})
Expand Down

0 comments on commit 9af3635

Please sign in to comment.