diff --git a/README.md b/README.md index 7ca54b4..5b6d5cb 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,11 @@ See [Kubernetes_HOWTO.md](Kubernetes_HOWTO.md) # Setup Like my other tools, copy config.example.py -> config.py and enter the botusername -(e.g. So9q@itemsubjector) and password +(e.g. So9q@itemsubjector) and password +(first [create a botpassword](https://www.wikidata.org/wiki/Special:BotPasswords) +for your account +and make sure you give it the *edit page permission* +and *high volume permissions*) * e.g. `cp config.example.py config.py && nano config.py` *GNU Nano is an editor, press `ctrl+x` when you are done and `y` to save your changes* diff --git a/config.example.py b/config.example.py index deed9b8..256149d 100644 --- a/config.example.py +++ b/config.example.py @@ -7,11 +7,12 @@ password = "" # Global settings +wiki_user = "User:Username" # Change this to your username +list_of_allowed_aliases = [] # Add elements like this ["API"] logging.basicConfig(level=logging.WARNING) version = "0.2" # Don't touch this. wd_prefix = "http://www.wikidata.org/entity/" endpoint = "https://query.wikidata.org/sparql" -wiki_user = "User:So9q" # Change this to your username user_agent = f"ItemSubjector/{version} (https://github.com/dpriskorn/ItemSubjector), {wiki_user}" tool_url = "https://github.com/dpriskorn/ItemSubjector" tool_wikipage = "Wikidata:Tools/ItemSubjector" diff --git a/src/__init__.py b/src/__init__.py index 0a0623e..b537c09 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -87,6 +87,7 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None, else: console.print("Got 0 results. Try another query or debug it using --debug") + def export_jobs_to_quickstatements(): logger = logging.getLogger(__name__) logger.info("Exporting jobs to QuickStatements V1 commands. One file for each job.") diff --git a/src/helpers/jobs.py b/src/helpers/jobs.py index 4c808de..b0ed45a 100644 --- a/src/helpers/jobs.py +++ b/src/helpers/jobs.py @@ -9,11 +9,12 @@ from src import strip_prefix, print_best_practice, console, ask_yes_no_question, \ TaskIds, print_found_items_table, ask_add_to_job_queue, print_keep_an_eye_on_wdqs_lag, print_running_jobs, \ print_finished, print_job_statistics +from src.helpers.menus import select_task from src.models.academic_journals import AcademicJournalItems from src.models.riksdagen_documents import RiksdagenDocumentItems from src.models.scholarly_articles import ScholarlyArticleItems from src.models.thesis import ThesisItems -from src.tasks import tasks +from src.tasks import tasks, Task if TYPE_CHECKING: from src import Task, BatchJob @@ -139,9 +140,11 @@ def handle_job_preparation_or_run_directly_if_any_jobs(args: argparse.Namespace run_jobs(jobs) -def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None, - main_subjects: List[str] = None, - jobs: List[BatchJob] = None): +def get_validated_main_subjects_as_jobs( + args: argparse.Namespace = None, + main_subjects: List[str] = None, + jobs: List[BatchJob] = None +) -> List[BatchJob]: """This function randomly picks a subject and present it for validation""" logger = logging.getLogger(__name__) if jobs is None: @@ -152,8 +155,12 @@ def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None, raise ValueError("args was None") if main_subjects is None: raise ValueError("main subjects was None") - jobs = jobs subjects_not_picked_yet = main_subjects + task: Task = select_task() + if task is None: + raise ValueError("Got no task") + if not isinstance(task, Task): + raise ValueError("task was not a Task object") while True: # Check if we have any subjects left in the list if len(subjects_not_picked_yet) > 0: @@ -162,7 +169,7 @@ def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None, subjects_not_picked_yet.remove(qid) job = process_qid_into_job(qid=qid, # The scientific article task is hardcoded for now - task=tasks[0], + task=task, args=args, confirmation=args.no_confirmation) if job is not None: diff --git a/src/helpers/menus.py b/src/helpers/menus.py index bd16101..a35973e 100644 --- a/src/helpers/menus.py +++ b/src/helpers/menus.py @@ -4,65 +4,68 @@ from consolemenu import SelectionMenu from src.models.suggestion import Suggestion -from src.models.wikidata import WikimediaLanguageCode, Item - -# def select_lexical_category(): -# logger = logging.getLogger(__name__) -# menu = SelectionMenu(WikidataLexicalCategory.__members__.keys(), "Select a lexical category") -# menu.show() -# menu.join() -# selected_lexical_category_index = menu.selected_option -# category_mapping = {} -# for index, item in enumerate(WikidataLexicalCategory): -# category_mapping[index] = item -# selected_lexical_category = category_mapping[selected_lexical_category_index] -# logger.debug(f"selected:{selected_lexical_category_index}=" -# f"{selected_lexical_category}") -# return selected_lexical_category -from src.tasks import tasks +from src.models.wikidata import Item +from src.tasks import tasks, Task -def select_language(): +def select_suggestion(suggestions: List[Suggestion] = None, + item: Item = None): + if item is None or suggestions is None: + raise ValueError("Did not get what we need") logger = logging.getLogger(__name__) - menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language") + menu = SelectionMenu(suggestions, f"Does any of these fit the label \n'{item.label}'") menu.show() menu.join() - selected_language_index = menu.selected_option - mapping = {} - for index, item in enumerate(WikimediaLanguageCode): - mapping[index] = item - selected_language = mapping[selected_language_index] - logger.debug(f"selected:{selected_language_index}=" - f"{selected_language}") - return selected_language + selected_index = menu.selected_option + selected_suggestion = None + if selected_index > (len(suggestions) - 1): + logger.debug("The user choose to skip") + else: + selected_suggestion = tasks[selected_index] + logger.debug(f"selected:{selected_index}=" + f"{selected_suggestion}") + return selected_suggestion -def select_task(): +def select_task() -> Task: logger = logging.getLogger(__name__) menu = SelectionMenu(tasks, "Select a task") menu.show() menu.join() task_index = menu.selected_option + if task_index > (len(tasks) - 1): + logger.info("Got exit") + exit(0) selected_task = tasks[task_index] logger.debug(f"selected:{task_index}=" f"{selected_task}") return selected_task -def select_suggestion(suggestions: List[Suggestion] = None, - item: Item = None): - if item is None or suggestions is None: - raise ValueError("Did not get what we need") - logger = logging.getLogger(__name__) - menu = SelectionMenu(suggestions, f"Does any of these fit the label \n'{item.label}'") - menu.show() - menu.join() - selected_index = menu.selected_option - selected_suggestion = None - if selected_index == len(suggestions) + 1: - logger.debug("The user choose to skip") - else: - selected_suggestion = tasks[selected_index] - logger.debug(f"selected:{selected_index}=" - f"{selected_suggestion}") - return selected_suggestion +# def select_language(): +# logger = logging.getLogger(__name__) +# menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language") +# menu.show() +# menu.join() +# selected_language_index = menu.selected_option +# mapping = {} +# for index, item in enumerate(WikimediaLanguageCode): +# mapping[index] = item +# selected_language = mapping[selected_language_index] +# logger.debug(f"selected:{selected_language_index}=" +# f"{selected_language}") +# return selected_language + +# def select_lexical_category(): +# logger = logging.getLogger(__name__) +# menu = SelectionMenu(WikidataLexicalCategory.__members__.keys(), "Select a lexical category") +# menu.show() +# menu.join() +# selected_lexical_category_index = menu.selected_option +# category_mapping = {} +# for index, item in enumerate(WikidataLexicalCategory): +# category_mapping[index] = item +# selected_lexical_category = category_mapping[selected_lexical_category_index] +# logger.debug(f"selected:{selected_lexical_category_index}=" +# f"{selected_lexical_category}") +# return selected_lexical_category \ No newline at end of file diff --git a/src/models/suggestion.py b/src/models/suggestion.py index 02e7f03..d1cac86 100644 --- a/src/models/suggestion.py +++ b/src/models/suggestion.py @@ -5,6 +5,7 @@ from wikibaseintegrator.datatypes import Item as ItemType +import config from src.helpers.calculations import calculate_random_editgroups_hash from src.helpers.cleaning import clean_rich_formatting from src.helpers.console import print_search_strings_table, console @@ -112,8 +113,11 @@ def clean_special_symbols(string: str): ): for alias in self.item.aliases: # logger.debug(f"extracting alias:{alias}") - if len(alias) < 5: + if len(alias) < 5 and alias not in config.list_of_allowed_aliases: console.print(f"Skipping short alias '{alias}' to avoid false positives", style="#FF8000") + elif alias in config.list_of_allowed_aliases: + console.print(f"Found {alias} in the allow list") + self.search_strings.append(clean_special_symbols(alias)) else: self.search_strings.append(clean_special_symbols(alias)) # logger.debug(f"search_strings:{self.search_strings}") diff --git a/src/models/wikidata.py b/src/models/wikidata.py index 5f47b01..f4a25ea 100644 --- a/src/models/wikidata.py +++ b/src/models/wikidata.py @@ -805,7 +805,11 @@ def __init__(self, if id is not None: self.id = str(EntityID(id)) if description is None and label is None and aliases is None: - logging.debug("here now") + logging.debug("No of description, label or aliases received") + if task is None: + raise ValueError("Got no task") + if not isinstance(task, Task): + raise ValueError("task was not a Task object") self.fetch_label_and_description_and_aliases(task=task) elif label is None or aliases is None: raise ValueError("This is not supported. " @@ -847,6 +851,8 @@ def fetch_label_and_description_and_aliases(self, """Fetch label and aliases in the task language from the Wikidata API""" if task is None: raise ValueError("task was None") + if not isinstance(task, Task): + raise ValueError("task was not a Task object") from src.helpers.console import console with console.status(f"Fetching {task.language_code.name.title()} label and aliases from the Wikidata API..."): wbi = WikibaseIntegrator()