Skip to content
This repository has been archived by the owner on Jan 23, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' into remove_picked_subject_from_list
Browse files Browse the repository at this point in the history
  • Loading branch information
dpriskorn authored Feb 9, 2022
2 parents 6ac76bd + 4e370a1 commit ee453fc
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 54 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ See [Kubernetes_HOWTO.md](Kubernetes_HOWTO.md)
# Setup
Like my other tools, copy config.example.py ->
config.py and enter the botusername
(e.g. So9q@itemsubjector) and password
(e.g. So9q@itemsubjector) and password
(first [create a botpassword](https://www.wikidata.org/wiki/Special:BotPasswords)
for your account
and make sure you give it the *edit page permission*
and *high volume permissions*)
* e.g. `cp config.example.py config.py && nano config.py`

*GNU Nano is an editor, press `ctrl+x` when you are done and `y` to save your changes*
Expand Down
3 changes: 2 additions & 1 deletion config.example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
password = ""

# Global settings
wiki_user = "User:Username" # Change this to your username
list_of_allowed_aliases = [] # Add elements like this ["API"]
logging.basicConfig(level=logging.WARNING)
version = "0.2" # Don't touch this.
wd_prefix = "http://www.wikidata.org/entity/"
endpoint = "https://query.wikidata.org/sparql"
wiki_user = "User:So9q" # Change this to your username
user_agent = f"ItemSubjector/{version} (https://github.com/dpriskorn/ItemSubjector), {wiki_user}"
tool_url = "https://github.com/dpriskorn/ItemSubjector"
tool_wikipage = "Wikidata:Tools/ItemSubjector"
Expand Down
1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None,
else:
console.print("Got 0 results. Try another query or debug it using --debug")


def export_jobs_to_quickstatements():
logger = logging.getLogger(__name__)
logger.info("Exporting jobs to QuickStatements V1 commands. One file for each job.")
Expand Down
19 changes: 13 additions & 6 deletions src/helpers/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
from src import strip_prefix, print_best_practice, console, ask_yes_no_question, \
TaskIds, print_found_items_table, ask_add_to_job_queue, print_keep_an_eye_on_wdqs_lag, print_running_jobs, \
print_finished, print_job_statistics
from src.helpers.menus import select_task
from src.models.academic_journals import AcademicJournalItems
from src.models.riksdagen_documents import RiksdagenDocumentItems
from src.models.scholarly_articles import ScholarlyArticleItems
from src.models.thesis import ThesisItems
from src.tasks import tasks
from src.tasks import tasks, Task

if TYPE_CHECKING:
from src import Task, BatchJob
Expand Down Expand Up @@ -139,9 +140,11 @@ def handle_job_preparation_or_run_directly_if_any_jobs(args: argparse.Namespace
run_jobs(jobs)


def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None,
main_subjects: List[str] = None,
jobs: List[BatchJob] = None):
def get_validated_main_subjects_as_jobs(
args: argparse.Namespace = None,
main_subjects: List[str] = None,
jobs: List[BatchJob] = None
) -> List[BatchJob]:
"""This function randomly picks a subject and present it for validation"""
logger = logging.getLogger(__name__)
if jobs is None:
Expand All @@ -152,8 +155,12 @@ def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None,
raise ValueError("args was None")
if main_subjects is None:
raise ValueError("main subjects was None")
jobs = jobs
subjects_not_picked_yet = main_subjects
task: Task = select_task()
if task is None:
raise ValueError("Got no task")
if not isinstance(task, Task):
raise ValueError("task was not a Task object")
while True:
# Check if we have any subjects left in the list
if len(subjects_not_picked_yet) > 0:
Expand All @@ -162,7 +169,7 @@ def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None,
subjects_not_picked_yet.remove(qid)
job = process_qid_into_job(qid=qid,
# The scientific article task is hardcoded for now
task=tasks[0],
task=task,
args=args,
confirmation=args.no_confirmation)
if job is not None:
Expand Down
91 changes: 47 additions & 44 deletions src/helpers/menus.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,65 +4,68 @@
from consolemenu import SelectionMenu

from src.models.suggestion import Suggestion
from src.models.wikidata import WikimediaLanguageCode, Item

# def select_lexical_category():
# logger = logging.getLogger(__name__)
# menu = SelectionMenu(WikidataLexicalCategory.__members__.keys(), "Select a lexical category")
# menu.show()
# menu.join()
# selected_lexical_category_index = menu.selected_option
# category_mapping = {}
# for index, item in enumerate(WikidataLexicalCategory):
# category_mapping[index] = item
# selected_lexical_category = category_mapping[selected_lexical_category_index]
# logger.debug(f"selected:{selected_lexical_category_index}="
# f"{selected_lexical_category}")
# return selected_lexical_category
from src.tasks import tasks
from src.models.wikidata import Item
from src.tasks import tasks, Task


def select_language():
def select_suggestion(suggestions: List[Suggestion] = None,
item: Item = None):
if item is None or suggestions is None:
raise ValueError("Did not get what we need")
logger = logging.getLogger(__name__)
menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language")
menu = SelectionMenu(suggestions, f"Does any of these fit the label \n'{item.label}'")
menu.show()
menu.join()
selected_language_index = menu.selected_option
mapping = {}
for index, item in enumerate(WikimediaLanguageCode):
mapping[index] = item
selected_language = mapping[selected_language_index]
logger.debug(f"selected:{selected_language_index}="
f"{selected_language}")
return selected_language
selected_index = menu.selected_option
selected_suggestion = None
if selected_index > (len(suggestions) - 1):
logger.debug("The user choose to skip")
else:
selected_suggestion = tasks[selected_index]
logger.debug(f"selected:{selected_index}="
f"{selected_suggestion}")
return selected_suggestion


def select_task():
def select_task() -> Task:
logger = logging.getLogger(__name__)
menu = SelectionMenu(tasks, "Select a task")
menu.show()
menu.join()
task_index = menu.selected_option
if task_index > (len(tasks) - 1):
logger.info("Got exit")
exit(0)
selected_task = tasks[task_index]
logger.debug(f"selected:{task_index}="
f"{selected_task}")
return selected_task


def select_suggestion(suggestions: List[Suggestion] = None,
item: Item = None):
if item is None or suggestions is None:
raise ValueError("Did not get what we need")
logger = logging.getLogger(__name__)
menu = SelectionMenu(suggestions, f"Does any of these fit the label \n'{item.label}'")
menu.show()
menu.join()
selected_index = menu.selected_option
selected_suggestion = None
if selected_index == len(suggestions) + 1:
logger.debug("The user choose to skip")
else:
selected_suggestion = tasks[selected_index]
logger.debug(f"selected:{selected_index}="
f"{selected_suggestion}")
return selected_suggestion
# def select_language():
# logger = logging.getLogger(__name__)
# menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language")
# menu.show()
# menu.join()
# selected_language_index = menu.selected_option
# mapping = {}
# for index, item in enumerate(WikimediaLanguageCode):
# mapping[index] = item
# selected_language = mapping[selected_language_index]
# logger.debug(f"selected:{selected_language_index}="
# f"{selected_language}")
# return selected_language

# def select_lexical_category():
# logger = logging.getLogger(__name__)
# menu = SelectionMenu(WikidataLexicalCategory.__members__.keys(), "Select a lexical category")
# menu.show()
# menu.join()
# selected_lexical_category_index = menu.selected_option
# category_mapping = {}
# for index, item in enumerate(WikidataLexicalCategory):
# category_mapping[index] = item
# selected_lexical_category = category_mapping[selected_lexical_category_index]
# logger.debug(f"selected:{selected_lexical_category_index}="
# f"{selected_lexical_category}")
# return selected_lexical_category
6 changes: 5 additions & 1 deletion src/models/suggestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from wikibaseintegrator.datatypes import Item as ItemType

import config
from src.helpers.calculations import calculate_random_editgroups_hash
from src.helpers.cleaning import clean_rich_formatting
from src.helpers.console import print_search_strings_table, console
Expand Down Expand Up @@ -112,8 +113,11 @@ def clean_special_symbols(string: str):
):
for alias in self.item.aliases:
# logger.debug(f"extracting alias:{alias}")
if len(alias) < 5:
if len(alias) < 5 and alias not in config.list_of_allowed_aliases:
console.print(f"Skipping short alias '{alias}' to avoid false positives", style="#FF8000")
elif alias in config.list_of_allowed_aliases:
console.print(f"Found {alias} in the allow list")
self.search_strings.append(clean_special_symbols(alias))
else:
self.search_strings.append(clean_special_symbols(alias))
# logger.debug(f"search_strings:{self.search_strings}")
Expand Down
8 changes: 7 additions & 1 deletion src/models/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,11 @@ def __init__(self,
if id is not None:
self.id = str(EntityID(id))
if description is None and label is None and aliases is None:
logging.debug("here now")
logging.debug("No of description, label or aliases received")
if task is None:
raise ValueError("Got no task")
if not isinstance(task, Task):
raise ValueError("task was not a Task object")
self.fetch_label_and_description_and_aliases(task=task)
elif label is None or aliases is None:
raise ValueError("This is not supported. "
Expand Down Expand Up @@ -847,6 +851,8 @@ def fetch_label_and_description_and_aliases(self,
"""Fetch label and aliases in the task language from the Wikidata API"""
if task is None:
raise ValueError("task was None")
if not isinstance(task, Task):
raise ValueError("task was not a Task object")
from src.helpers.console import console
with console.status(f"Fetching {task.language_code.name.title()} label and aliases from the Wikidata API..."):
wbi = WikibaseIntegrator()
Expand Down

0 comments on commit ee453fc

Please sign in to comment.