From da2563b510682afbf783c564bfc57094fa19a4e3 Mon Sep 17 00:00:00 2001 From: Dennis Priskorn <68460690+dpriskorn@users.noreply.github.com> Date: Sat, 1 Jan 2022 13:17:24 +0100 Subject: [PATCH 1/4] config.example.py: Use generic username --- config.example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.example.py b/config.example.py index deed9b8..3854a3c 100644 --- a/config.example.py +++ b/config.example.py @@ -7,11 +7,11 @@ password = "" # Global settings +wiki_user = "User:Username" # Change this to your username logging.basicConfig(level=logging.WARNING) version = "0.2" # Don't touch this. wd_prefix = "http://www.wikidata.org/entity/" endpoint = "https://query.wikidata.org/sparql" -wiki_user = "User:So9q" # Change this to your username user_agent = f"ItemSubjector/{version} (https://github.com/dpriskorn/ItemSubjector), {wiki_user}" tool_url = "https://github.com/dpriskorn/ItemSubjector" tool_wikipage = "Wikidata:Tools/ItemSubjector" From fab148ad33ac709a1635ba02afe0ed0f7db080e6 Mon Sep 17 00:00:00 2001 From: Dennis Priskorn <68460690+dpriskorn@users.noreply.github.com> Date: Sat, 1 Jan 2022 15:19:09 +0100 Subject: [PATCH 2/4] config.example.py: Use generic username & new variable list_of_allowed_aliases README.md: More information about the botpassword permissions. suggestion.py: extract_search_strings(): Honor the new variable --- README.md | 6 +++++- config.example.py | 1 + src/models/suggestion.py | 6 +++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7ca54b4..5b6d5cb 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,11 @@ See [Kubernetes_HOWTO.md](Kubernetes_HOWTO.md) # Setup Like my other tools, copy config.example.py -> config.py and enter the botusername -(e.g. So9q@itemsubjector) and password +(e.g. So9q@itemsubjector) and password +(first [create a botpassword](https://www.wikidata.org/wiki/Special:BotPasswords) +for your account +and make sure you give it the *edit page permission* +and *high volume permissions*) * e.g. `cp config.example.py config.py && nano config.py` *GNU Nano is an editor, press `ctrl+x` when you are done and `y` to save your changes* diff --git a/config.example.py b/config.example.py index 3854a3c..256149d 100644 --- a/config.example.py +++ b/config.example.py @@ -8,6 +8,7 @@ # Global settings wiki_user = "User:Username" # Change this to your username +list_of_allowed_aliases = [] # Add elements like this ["API"] logging.basicConfig(level=logging.WARNING) version = "0.2" # Don't touch this. wd_prefix = "http://www.wikidata.org/entity/" diff --git a/src/models/suggestion.py b/src/models/suggestion.py index 02e7f03..d1cac86 100644 --- a/src/models/suggestion.py +++ b/src/models/suggestion.py @@ -5,6 +5,7 @@ from wikibaseintegrator.datatypes import Item as ItemType +import config from src.helpers.calculations import calculate_random_editgroups_hash from src.helpers.cleaning import clean_rich_formatting from src.helpers.console import print_search_strings_table, console @@ -112,8 +113,11 @@ def clean_special_symbols(string: str): ): for alias in self.item.aliases: # logger.debug(f"extracting alias:{alias}") - if len(alias) < 5: + if len(alias) < 5 and alias not in config.list_of_allowed_aliases: console.print(f"Skipping short alias '{alias}' to avoid false positives", style="#FF8000") + elif alias in config.list_of_allowed_aliases: + console.print(f"Found {alias} in the allow list") + self.search_strings.append(clean_special_symbols(alias)) else: self.search_strings.append(clean_special_symbols(alias)) # logger.debug(f"search_strings:{self.search_strings}") From 9e0dd67e9685683fdc41b3c330ab8df25b7852f6 Mon Sep 17 00:00:00 2001 From: Dennis Priskorn <68460690+dpriskorn@users.noreply.github.com> Date: Sat, 1 Jan 2022 16:24:03 +0100 Subject: [PATCH 3/4] __init__.py: fix formatting jobs.py: get_validated_main_subjects_as_jobs(): add return typing, enable task selection and pass on the chosen task. menus.py: Comment out old code and sort the functions wikidata.py: Improve the debugging and check the task object. --- src/__init__.py | 1 + src/helpers/jobs.py | 18 ++++++--- src/helpers/menus.py | 86 +++++++++++++++++++++--------------------- src/models/wikidata.py | 8 +++- 4 files changed, 64 insertions(+), 49 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index 0a0623e..b537c09 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -87,6 +87,7 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None, else: console.print("Got 0 results. Try another query or debug it using --debug") + def export_jobs_to_quickstatements(): logger = logging.getLogger(__name__) logger.info("Exporting jobs to QuickStatements V1 commands. One file for each job.") diff --git a/src/helpers/jobs.py b/src/helpers/jobs.py index 7e70f0d..047e5d6 100644 --- a/src/helpers/jobs.py +++ b/src/helpers/jobs.py @@ -8,11 +8,12 @@ from src import strip_prefix, print_best_practice, console, ask_yes_no_question, \ TaskIds, print_found_items_table, ask_add_to_job_queue, print_keep_an_eye_on_wdqs_lag, print_running_jobs, \ print_finished, print_job_statistics +from src.helpers.menus import select_task from src.models.academic_journals import AcademicJournalItems from src.models.riksdagen_documents import RiksdagenDocumentItems from src.models.scholarly_articles import ScholarlyArticleItems from src.models.thesis import ThesisItems -from src.tasks import tasks +from src.tasks import tasks, Task if TYPE_CHECKING: from src import Task, BatchJob @@ -138,9 +139,11 @@ def handle_job_preparation_or_run_directly_if_any_jobs(args: argparse.Namespace run_jobs(jobs) -def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None, - main_subjects: List[str] = None, - jobs: List[BatchJob] = None): +def get_validated_main_subjects_as_jobs( + args: argparse.Namespace = None, + main_subjects: List[str] = None, + jobs: List[BatchJob] = None +) -> List[BatchJob]: """This function randomly picks a subject and present it for validation""" # logger = logging.getLogger(__name__) if jobs is None: @@ -151,6 +154,11 @@ def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None, raise ValueError("args was None") if main_subjects is None: raise ValueError("main subjects was None") + task: Task = select_task() + if task is None: + raise ValueError("Got no task") + if not isinstance(task, Task): + raise ValueError("task was not a Task object") # TODO implement better check for duplicates to avoid wasting resources picked_before = [] while True: @@ -159,7 +167,7 @@ def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None, if qid not in picked_before: job = process_qid_into_job(qid=qid, # The scientific article task is hardcoded for now - task=tasks[0], + task=task, args=args, confirmation=args.no_confirmation) if job is not None: diff --git a/src/helpers/menus.py b/src/helpers/menus.py index bd16101..13300f2 100644 --- a/src/helpers/menus.py +++ b/src/helpers/menus.py @@ -4,49 +4,8 @@ from consolemenu import SelectionMenu from src.models.suggestion import Suggestion -from src.models.wikidata import WikimediaLanguageCode, Item - -# def select_lexical_category(): -# logger = logging.getLogger(__name__) -# menu = SelectionMenu(WikidataLexicalCategory.__members__.keys(), "Select a lexical category") -# menu.show() -# menu.join() -# selected_lexical_category_index = menu.selected_option -# category_mapping = {} -# for index, item in enumerate(WikidataLexicalCategory): -# category_mapping[index] = item -# selected_lexical_category = category_mapping[selected_lexical_category_index] -# logger.debug(f"selected:{selected_lexical_category_index}=" -# f"{selected_lexical_category}") -# return selected_lexical_category -from src.tasks import tasks - - -def select_language(): - logger = logging.getLogger(__name__) - menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language") - menu.show() - menu.join() - selected_language_index = menu.selected_option - mapping = {} - for index, item in enumerate(WikimediaLanguageCode): - mapping[index] = item - selected_language = mapping[selected_language_index] - logger.debug(f"selected:{selected_language_index}=" - f"{selected_language}") - return selected_language - - -def select_task(): - logger = logging.getLogger(__name__) - menu = SelectionMenu(tasks, "Select a task") - menu.show() - menu.join() - task_index = menu.selected_option - selected_task = tasks[task_index] - logger.debug(f"selected:{task_index}=" - f"{selected_task}") - return selected_task +from src.models.wikidata import Item +from src.tasks import tasks, Task def select_suggestion(suggestions: List[Suggestion] = None, @@ -66,3 +25,44 @@ def select_suggestion(suggestions: List[Suggestion] = None, logger.debug(f"selected:{selected_index}=" f"{selected_suggestion}") return selected_suggestion + + +def select_task() -> Task: + logger = logging.getLogger(__name__) + menu = SelectionMenu(tasks, "Select a task") + menu.show() + menu.join() + task_index = menu.selected_option + selected_task = tasks[task_index] + logger.debug(f"selected:{task_index}=" + f"{selected_task}") + return selected_task + + +# def select_language(): +# logger = logging.getLogger(__name__) +# menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language") +# menu.show() +# menu.join() +# selected_language_index = menu.selected_option +# mapping = {} +# for index, item in enumerate(WikimediaLanguageCode): +# mapping[index] = item +# selected_language = mapping[selected_language_index] +# logger.debug(f"selected:{selected_language_index}=" +# f"{selected_language}") +# return selected_language + +# def select_lexical_category(): +# logger = logging.getLogger(__name__) +# menu = SelectionMenu(WikidataLexicalCategory.__members__.keys(), "Select a lexical category") +# menu.show() +# menu.join() +# selected_lexical_category_index = menu.selected_option +# category_mapping = {} +# for index, item in enumerate(WikidataLexicalCategory): +# category_mapping[index] = item +# selected_lexical_category = category_mapping[selected_lexical_category_index] +# logger.debug(f"selected:{selected_lexical_category_index}=" +# f"{selected_lexical_category}") +# return selected_lexical_category \ No newline at end of file diff --git a/src/models/wikidata.py b/src/models/wikidata.py index 5f47b01..f4a25ea 100644 --- a/src/models/wikidata.py +++ b/src/models/wikidata.py @@ -805,7 +805,11 @@ def __init__(self, if id is not None: self.id = str(EntityID(id)) if description is None and label is None and aliases is None: - logging.debug("here now") + logging.debug("No of description, label or aliases received") + if task is None: + raise ValueError("Got no task") + if not isinstance(task, Task): + raise ValueError("task was not a Task object") self.fetch_label_and_description_and_aliases(task=task) elif label is None or aliases is None: raise ValueError("This is not supported. " @@ -847,6 +851,8 @@ def fetch_label_and_description_and_aliases(self, """Fetch label and aliases in the task language from the Wikidata API""" if task is None: raise ValueError("task was None") + if not isinstance(task, Task): + raise ValueError("task was not a Task object") from src.helpers.console import console with console.status(f"Fetching {task.language_code.name.title()} label and aliases from the Wikidata API..."): wbi = WikibaseIntegrator() From 4e370a106a447970b96cd4f2d66d82ef45cae03c Mon Sep 17 00:00:00 2001 From: Dennis Priskorn <68460690+dpriskorn@users.noreply.github.com> Date: Mon, 3 Jan 2022 10:06:20 +0100 Subject: [PATCH 4/4] menus.py: Fix clean exit from menus --- src/helpers/menus.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/helpers/menus.py b/src/helpers/menus.py index 13300f2..a35973e 100644 --- a/src/helpers/menus.py +++ b/src/helpers/menus.py @@ -18,13 +18,13 @@ def select_suggestion(suggestions: List[Suggestion] = None, menu.join() selected_index = menu.selected_option selected_suggestion = None - if selected_index == len(suggestions) + 1: + if selected_index > (len(suggestions) - 1): logger.debug("The user choose to skip") else: selected_suggestion = tasks[selected_index] logger.debug(f"selected:{selected_index}=" f"{selected_suggestion}") - return selected_suggestion + return selected_suggestion def select_task() -> Task: @@ -33,6 +33,9 @@ def select_task() -> Task: menu.show() menu.join() task_index = menu.selected_option + if task_index > (len(tasks) - 1): + logger.info("Got exit") + exit(0) selected_task = tasks[task_index] logger.debug(f"selected:{task_index}=" f"{selected_task}")