Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fonctionnalité : ajoute la possibilité de rechercher dans les guides de contribution #42

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions geotribu_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
parser_latest_content,
parser_open_result,
parser_search_content,
parser_search_guides,
parser_search_image,
parser_upgrade,
)
Expand Down Expand Up @@ -183,6 +184,17 @@ def main(args: list[str] = None):
add_common_arguments(subcmd_search_content)
parser_search_content(subcmd_search_content)

# Search contribution guides
subcmd_search_guides = subparsers.add_parser(
"search-guides",
aliases=["guides", "contribution"],
help="Rechercher dans les guides de contribution",
formatter_class=main_parser.formatter_class,
prog="search-guides",
)
add_common_arguments(subcmd_search_guides)
parser_search_guides(subcmd_search_guides)

# Search Image
subcmd_search_image = subparsers.add_parser(
"search-image",
Expand Down
12 changes: 12 additions & 0 deletions geotribu_cli/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ class GeotribuDefaults:
cdn_base_url: str = "https://cdn.geotribu.fr/"
cdn_base_path: str = "img"
cdn_search_index: str = "search-index.json"
# Guides de contribution
guides_base_url: str = "https://contribuer.geotribu.fr/"
guides_search_index: str = "search/search_index.json"
# comments
comments_base_url: str = "https://comments.geotribu.fr/"
# RSS
Expand All @@ -41,6 +44,15 @@ def cdn_search_index_full_url(self) -> str:
"""
return f"{self.cdn_base_url}{self.cdn_base_path}/{self.cdn_search_index}"

@property
def guides_search_index_full_url(self) -> str:
"""Returns search index full URL for contribuer.geotribu.fr.

Returns:
str: URL as string
"""
return f"{self.guides_base_url}{self.guides_search_index}"

@property
def rss_created_full_url(self) -> str:
"""Returns website RSS full URL for latest created contents.
Expand Down
1 change: 1 addition & 0 deletions geotribu_cli/subcommands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
from .open_result import parser_open_result # noqa: F401
from .rss_reader import parser_latest_content # noqa: F401
from .search_content import parser_search_content # noqa: F401
from .search_guides import parser_search_guides # noqa: F401
from .search_image import parser_search_image # noqa: F401
from .upgrade import parser_upgrade # noqa: F401
318 changes: 318 additions & 0 deletions geotribu_cli/subcommands/search_guides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
#! python3 # noqa: E265

# ############################################################################
# ########## IMPORTS #############
# ################################

# standard library
import argparse
import logging
import sys
from pathlib import Path

# 3rd party
import orjson
from lunr import lunr
from lunr.index import Index
from rich import print
from rich.table import Table

# package
from geotribu_cli.__about__ import __title__, __version__
from geotribu_cli.constants import GeotribuDefaults
from geotribu_cli.utils.file_downloader import download_remote_file_to_local
from geotribu_cli.utils.file_stats import is_file_older_than
from geotribu_cli.utils.formatters import convert_octets

# ############################################################################
# ########## GLOBALS #############
# ################################

logger = logging.getLogger(__name__)
defaults_settings = GeotribuDefaults()

# ############################################################################
# ########## FUNCTIONS ###########
# ################################


def format_output_result(
result: list[dict], search_term: str = None, format_type: str = None, count: int = 5
) -> str:
"""Format result according to output option.

Args:
result (list[dict]): result to format
search_term (str, optional): term used for search. Defaults to None.
format_type (str, optional): format output option. Defaults to None.
count (int, optional): _description_. Defaults to 5.

Returns:
str: formatted result ready to print
"""

if format_type == "table":
table = Table(
title=f"Recherche de contenus - {count}/{len(result)} résultats "
f"avec le terme : {search_term}",
show_lines=True,
highlight=True,
caption=f"{__title__} {__version__}",
)

# columns
table.add_column(header="Titre", justify="left", style="default")
table.add_column(header="Score", style="magenta")
table.add_column(header="URL", justify="right", style="blue")

# iterate over results
for r in result[:count]:
table.add_row(
r.get("titre"),
r.get("score"),
r.get("url"),
)

return table
else:
return result


def generate_index_from_docs(
input_documents_to_index: dict,
index_ref_id: str,
index_configuration: dict,
index_fieds_definition: list[dict],
) -> Index:
"""_summary_

Args:
input_documents_to_index (dict): documents to index
index_ref_id (str): field to use as index primary key
index_configuration (dict): index configuration (language, etc.)
index_fieds_definition (List[dict]): fields settings (boost, etc.)

Returns:
Index: lunr Index
"""

idx: Index = lunr(
ref=index_ref_id,
fields=index_fieds_definition,
documents=input_documents_to_index,
languages=index_configuration.get("lang", "fr"),
)

return idx


# ############################################################################
# ########## CLI #################
# ################################


def parser_search_guides(
subparser: argparse.ArgumentParser,
) -> argparse.ArgumentParser:
"""Set the argument parser for search-content subcommand.

Args:
subparser (argparse.ArgumentParser): parser to set up

Returns:
argparse.ArgumentParser: parser ready to use
"""

subparser.add_argument(
"search_term",
help="Terme de recherche.",
type=str,
metavar="search-term",
)

subparser.add_argument(
"-r",
"--remote-index-file",
help="Emplacement du fichier distant.",
default=defaults_settings.guides_search_index_full_url,
type=str,
dest="remote_index_file",
)

subparser.add_argument(
"-l",
"--local-index-file",
help="Emplacement du fichier local.",
default=Path().home() / ".geotribu/search/contribuer_search_index.json",
type=Path,
dest="local_index_file",
)

subparser.add_argument(
"-n",
"--results-number",
type=int,
default=5,
help="Nombre de résultats à retourner.",
dest="results_number",
)

subparser.add_argument(
"-x",
"--expiration-rotating-hours",
help="Nombre d'heures à partir duquel considérer le fichier local comme périmé.",
default=24 * 7,
type=int,
dest="expiration_rotating_hours",
)

subparser.add_argument(
"-o",
"--format-output",
choices=["table", "brut"],
default="table",
help="Format de sortie.",
dest="format_output",
)

subparser.set_defaults(func=run)

return subparser


# ############################################################################
# ########## MAIN ################
# ################################


def run(args: argparse.Namespace):
"""Run the sub command logic.

There are 2 files involved (names can vary):

- `contribuer_content_listing.json`: the downloaded file from the website which is \
just a listing of contents
- `contribuer_search_index.json` (= args.local_index_file): the file with the indexed \
contents with lunr built locally from the listing file.

Process:

#. Check if the local index file exists and is up to date
#. If not:
#. Download the website contents listing from remote
#. Generate a local index from the contents listing
#. Load the local index
#. Perform the search

Args:
args (argparse.Namespace): arguments passed to the subcommand
"""
logger.debug(f"Running {args.command} with {args}")

args.local_index_file.parent.mkdir(parents=True, exist_ok=True)

# local contents listing file
local_listing_file = Path(
args.local_index_file.parent / "contribuer_content_listing.json"
)

# check local file index
if not args.local_index_file.exists() or is_file_older_than(
args.local_index_file, args.expiration_rotating_hours
):
# if the local index doesn't exist or exists but it's outdated: download the
# listing from website
get_local_contents_listing = download_remote_file_to_local(
remote_url_to_download=args.remote_index_file,
local_file_path=local_listing_file,
expiration_rotating_hours=args.expiration_rotating_hours,
)
if not isinstance(get_local_contents_listing, Path):
logger.error(
f"Le téléchargement du fichier distant {args.remote_index_file} "
f"ou la récupération du fichier local {local_listing_file} a échoué."
)
if isinstance(get_local_contents_listing, Exception):
logger.error(get_local_contents_listing)
sys.exit()
logger.info(
f"Local listing file: {local_listing_file}, "
f"{convert_octets(local_listing_file.stat().st_size)}"
)

with local_listing_file.open(mode="rb") as j:
data: dict = orjson.loads(j.read())

# build index from contents listing
idx = generate_index_from_docs(
input_documents_to_index=data.get("docs"),
index_ref_id="location",
index_configuration=data.get("config"),
index_fieds_definition=[
dict(field_name="title", boost=10),
dict(field_name="tags", boost=5),
dict(field_name="text"),
],
)

# save it as JSON file for next time
serialized_idx = idx.serialize()

# export into a JSON file
args.local_index_file.unlink(missing_ok=True)

with args.local_index_file.open(mode="wb") as fd:
# json.dump(serialized_idx, fd, separators=(",", ":"))
fd.write(orjson.dumps(serialized_idx))

logger.info(
f"Local index generated into {args.local_index_file} "
f"from contents listing ({local_listing_file})."
)
else:
# load
with local_listing_file.open("rb") as fd:
contents_listing = orjson.loads(fd.read())

# load previously built index
logger.info(
f"Local index file ({args.local_index_file}) exists and is not "
f"older than {args.expiration_rotating_hours} hour(s). "
"Lets use it to perform search."
)
with args.local_index_file.open("rb") as fd:
serialized_idx = orjson.loads(fd.read())
idx = Index.load(serialized_idx)

# recherche
search_results: list[dict] = idx.search(f"{args.search_term}*")

# résultats : enrichissement et filtre
final_results = []

for result in search_results:
mapped_content = contents_listing.get(result.get("ref"))

# crée un résultat de sortie
out_result = {
"titre": mapped_content.get("title"),
"score": f"{result.get('score'):.3}",
"url": f"{defaults_settings.site_base_url}{result.get('ref')}",
}

final_results.append(out_result)

# formatage de la sortie
print(
format_output_result(
result=final_results,
search_term=args.search_term,
format_type=args.format_output,
count=args.results_number,
)
)


# -- Stand alone execution
if __name__ == "__main__":
pass