diff --git a/config.example.py b/config.example.py
index 256149d..6080917 100644
--- a/config.example.py
+++ b/config.example.py
@@ -3,13 +3,15 @@
 from pathlib import Path
 
 # Add your botpassword and login here:
+from typing import List
+
 username = ""
 password = ""
 
 # Global settings
+loglevel = logging.WARNING
 wiki_user = "User:Username"  # Change this to your username
-list_of_allowed_aliases = [] # Add elements like this ["API"]
-logging.basicConfig(level=logging.WARNING)
+list_of_allowed_aliases: List[str] = []  # Add elements like this ["API"]
 version = "0.2"  # Don't touch this.
 wd_prefix = "http://www.wikidata.org/entity/"
 endpoint = "https://query.wikidata.org/sparql"
diff --git a/diagrams/classes.puml b/diagrams/classes.puml
new file mode 100644
index 0000000..3bf280d
--- /dev/null
+++ b/diagrams/classes.puml
@@ -0,0 +1,169 @@
+@startuml
+'https://plantuml.com/class-diagram
+
+abstract class BaseModel
+
+package wikimedia {
+    enum WikimediaLanguageCode {
+        BASQUE
+        BENGALI
+        BOKMÅL
+        CZECH
+        DANISH
+        ENGLISH
+        ESTONIAN
+        FRENCH
+        GERMAN
+        HEBREW
+        LATIN
+        MALAYALAM
+        RUSSIAN
+        SWEDISH
+    }
+    enum WikimediaLanguageQID {
+        BASQUE = "Q8752"
+        BENGALI = "Q9610"
+        BOKMÅL = "Q25167"
+        CZECH = "Q9056"
+        DANISH = "Q9035"
+        ENGLISH = "Q1860"
+        ESTONIAN = "Q9072"
+        FRENCH = "Q150"
+        GERMAN = "Q188"
+        HEBREW = "Q9288"
+        LATIN = "Q397"
+        MALAYALAM = "Q36236"
+        RUSSIAN = "Q7737"
+        SWEDISH = "Q9027"
+    }
+    package wikidata {
+        class Entity {
+        id: Optional[str]
+        label: str
+        upload_one_statement_to_wikidata()
+        url()
+        }
+        class EntityID{
+        letter: WikidataNamespaceLetters
+            rest: str
+        __init__()
+        __str__()
+        }
+        class ForeignID{
+        __init__()
+        }
+        class SparqlItem{
+            item: Value
+            itemLabel: Value
+            validate_qid_and_copy_label()
+        }
+        class Item{
+            label: Optional[str] = None
+            description: Optional[str] = None
+            aliases: Optional[List[str]] = None
+            __init__()
+            __str__()
+            parse_json()
+            parse_from_wdqs_json()
+            fetch_label_and_description_and_aliases()
+        }
+        enum WikidataGrammaticalFeature {
+            ACTIVE_VOICE
+            DEFINITE
+            GENITIVE_CASE
+            IMPERATIVE
+            INDEFINITE
+            INFINITIVE
+            NOMINATIVE_CASE
+            PASSIVE_VOICE
+            PLURAL
+            PRESENT_TENSE
+            PRETERITE
+            SIMPLE_PRESENT
+            SINGULAR
+            SUPINE
+            THIRD_PERSON_SINGULAR
+        }
+        enum WikidataLexicalCategory {
+            ADJECTIVE
+            ADVERB
+            AFFIX
+            NOUN
+            PROPER_NOUN
+            VERB
+        }
+        enum WikidataNamespaceLetters {
+            ITEM
+            LEXEME
+            PROPERTY
+        }
+    }
+}
+package items {
+    abstract class Items
+    class AcademicJournalItems {
+    fetch_based_on_label()
+    }
+    class RiksdagenDocumentItems {
+    +list
+    +fetch_based_on_label()
+    }
+
+    class ScholarlyArticleItems {
+    +list
+    +fetch_based_on_label()
+    }
+    class ThesisItems {
+    list
+    fetch_based_on_label()
+    }
+}
+class Suggestion {
+    item: Item = None
+    search_strings: List[str] = None
+    task: Task = None
+    args: argparse.Namespace = None
+    __init__()
+    __str__()
+    add_to_items()
+    extract_search_strings()
+    search_urls ())
+}
+
+class Task {
+    best_practice_information: Union[str, None] = None
+    id: TaskIds = None
+    label: str = None
+    language_code: SupportedLanguageCode = None
+    number_of_queries_per_search_string = 1
+    __init__()
+    __str__()
+}
+
+class BatchJobs {
+job_count
+jobs: List[BatchJob]
+print_running_jobs()
+run_jobs()
+}
+
+class BatchJob {
+    +suggestion: Suggestion
+    +items: Items
+    run()
+}
+
+Items <|-- AcademicJournalItems
+Items <|-- RiksdagenDocumentItems
+Items <|-- ScholarlyArticleItems
+Items <|-- ThesisItems
+BaseModel <|-- Entity
+BaseModel <|-- Task
+BaseModel <|-- Suggestion
+BaseModel <|-- BatchJob
+BaseModel <|-- BatchJobs
+BaseModel <|-- Items
+Entity <|-- Item
+Item <|-- SparqlItem
+
+@enduml
\ No newline at end of file
diff --git a/diagrams/sequence_sparql.puml b/diagrams/sequence_sparql.puml
new file mode 100644
index 0000000..3f94e25
--- /dev/null
+++ b/diagrams/sequence_sparql.puml
@@ -0,0 +1,50 @@
+@startuml
+'https://plantuml.com/sequence-diagram
+
+autonumber
+actor User
+'cloud Wikidata
+User -> ItemSubjector : start script
+alt "arguments: sparql && limit"
+    ItemSubjector -> Wikidata : fetch subjects
+    Wikidata -> ItemSubjector : response
+    loop "for each item in list"
+        alt "below limit"
+            ItemSubjector -> Wikidata : fetch details about the item
+            Wikidata -> ItemSubjector : response
+            ItemSubjector -> Wikidata : fetch scientific articles according to SPARQL query built based on the details
+            Wikidata -> ItemSubjector : response
+            ItemSubjector -> User : present max 50 items
+            ItemSubjector -> User : ask for approval of batch
+            ItemSubjector -> User : show count of batches and matches in the job list in memory
+        end
+        alt "above limit"
+            ItemSubjector -> User : ask before continuing
+        end
+    end
+    alt "user choose not to continue"
+        ItemSubjector -> Wikidata : Upload main subjects to all matches
+    end
+end
+alt "arguments: sparql && limit && prepare-jobs"
+    ItemSubjector -> Wikidata : fetch subjects
+    Wikidata -> ItemSubjector : response
+    loop "for each item in list"
+        alt "below limit"
+            ItemSubjector -> Wikidata : fetch details about the item
+            Wikidata -> ItemSubjector : response
+            ItemSubjector -> Wikidata : fetch scientific articles according to SPARQL query built based on the details
+            Wikidata -> ItemSubjector : response
+            ItemSubjector -> User : present max 50 items
+            ItemSubjector -> User : ask for approval of batch
+            ItemSubjector -> User : show count of batches and matches in the job list in memory
+        end
+        alt "above limit"
+            ItemSubjector -> User : ask before continuing
+        end
+    end
+    alt "user choose not to continue"
+        ItemSubjector -> Wikidata : save to job list on disk
+    end
+end
+@enduml
\ No newline at end of file
diff --git a/fetch_main_subjects.py b/fetch_main_subjects.py
deleted file mode 100644
index 27e83cd..0000000
--- a/fetch_main_subjects.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import logging
-import random
-
-from wikibaseintegrator import wbi_config
-from wikibaseintegrator.wbi_helpers import execute_sparql_query
-
-import config
-from src import console
-from src.helpers.cleaning import strip_prefix
-from src.helpers.pickle import add_to_main_subject_pickle
-
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger(__name__)
-wbi_config.config["USER_AGENT_DEFAULT"] = config.user_agent
-console.print("Fetching 100,000 main subjects")
-console.input("Press enter to continue")
-subjects = []
-# This offset ensures that we don't get
-# the same subset of subjects every time we run it
-randomizing_offset: int = random.randint(1, 500000)
-console.print(f"Random offset used: {randomizing_offset} for this run")
-for i in range(0+randomizing_offset, 100000+randomizing_offset, 10000):
-    print(i)
-    # title: Get main subjects used at least once on scholarly articles
-    results = execute_sparql_query(f"""
-SELECT ?subject
-WHERE
-{{
-{{
-SELECT DISTINCT ?subject WHERE {{
-    hint:Query hint:optimizer "None".
-    ?item wdt:P31 wd:Q13442814;
-          wdt:P921 ?subject.
-}}
-offset {i}
-limit 10000
-}}
-MINUS{{
-?item wdt:P31 wd:Q8054.  # protein
-}}
-MINUS{{
-?item wdt:P279 wd:Q8054.  # protein
-}}
-MINUS{{
-?item wdt:P31 wd:Q7187.  # gene
-}}
-MINUS{{
-?item wdt:P279 wd:Q7187.  # gene
-}}
-}}
-    """)
-    if len(results) == 0:
-        raise ValueError("No main subjects found")
-    else:
-        # print("adding lexemes to list")
-        # pprint(results.keys())
-        # pprint(results["results"].keys())
-        # pprint(len(results["results"]["bindings"]))
-        for result in results["results"]["bindings"]:
-            # print(result)
-            subjects.append(strip_prefix(result["subject"]["value"]))
-            # exit(0)
-console.print(f"{len(subjects)} fetched")
-console.print("Filtering out duplicates")
-subjects_without_duplicates = set()
-for subject in subjects:
-    subjects_without_duplicates.add(subject)
-console.print(f"Saving {len(subjects_without_duplicates)} "
-              f"to pickle '{config.main_subjects_pickle_file_path}' (overwriting)")
-add_to_main_subject_pickle(subjects)
-console.print("Done")
\ No newline at end of file
diff --git a/itemsubjector.py b/itemsubjector.py
index 344ed19..182c802 100644
--- a/itemsubjector.py
+++ b/itemsubjector.py
@@ -1,6 +1,6 @@
 import logging
 
-from src import *
+import src
 
 logging.basicConfig(level=logging.DEBUG)
-main()
\ No newline at end of file
+src.main()
diff --git a/requirements.txt b/requirements.txt
index bef4108..404a2cd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 console-menu
 git+git://github.com/LeMyst/WikibaseIntegrator@v0.12.0.dev5#egg=wikibaseintegrator
 rich~=10.9.0
-SPARQLWrapper~=1.8.5
\ No newline at end of file
+SPARQLWrapper~=1.8.5
+pydantic
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
index 7223475..6b1a561 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1,30 +1,32 @@
 import argparse
 import logging
-from typing import List
 
-from wikibaseintegrator import wbi_login, wbi_config
-from wikibaseintegrator.wbi_helpers import execute_sparql_query
+import pandas as pd  # type: ignore
+from wikibaseintegrator import wbi_login, wbi_config  # type: ignore
+from wikibaseintegrator.wbi_helpers import execute_sparql_query  # type: ignore
 
 import config
 from src.helpers.argparse_setup import setup_argparse_and_return_args
 from src.helpers.cleaning import strip_prefix
-from src.helpers.console import console, print_found_items_table, ask_add_to_job_queue, print_running_jobs, \
+from src.helpers.console import console, print_found_items_table, ask_add_to_job_queue, \
     ask_yes_no_question, print_finished, \
     print_keep_an_eye_on_wdqs_lag, print_best_practice, print_job_statistics, ask_discard_existing_job_pickle
 from src.helpers.enums import TaskIds
-from src.helpers.jobs import process_qid_into_job, process_user_supplied_qids_into_batch_jobs, run_jobs, \
+from src.helpers.jobs import process_qid_into_job, process_user_supplied_qids_into_batch_jobs, \
     handle_job_preparation_or_run_directly_if_any_jobs, get_validated_main_subjects_as_jobs
 from src.helpers.menus import select_task
 from src.helpers.migration import migrate_pickle_detection
 from src.helpers.pickle import parse_job_pickle, remove_job_pickle, add_to_job_pickle, check_if_pickle_exists, \
-    parse_main_subjects_pickle, get_hash_of_job_pickle
+    get_hash_of_job_pickle
 from src.models.batch_job import BatchJob
-from src.models.quickstatements import QuickStatementsCommandVersion1
+from src.models.batch_jobs import BatchJobs
 from src.models.suggestion import Suggestion
 from src.models.task import Task
-from src.models.wikidata import Item, EntityID
+from src.models.wikimedia.wikidata.entiyt_id import EntityId
 from src.tasks import tasks
 
+logging.basicConfig(level=config.loglevel)
+
 
 def login():
     with console.status("Logging in with WikibaseIntegrator..."):
@@ -38,30 +40,12 @@ def login():
         wbi_config.config["USER_AGENT_DEFAULT"] = config.user_agent
 
 
-def match_existing_main_subjects(args: argparse.Namespace = None,
-                                 jobs: List[BatchJob] = None):
-    if jobs is None:
-        raise ValueError("jobs was None")
-    if not isinstance(jobs, List):
-        raise ValueError("jobs was not a list")
-    with console.status("Reading the main subjects file into memory"):
-        main_subjects = parse_main_subjects_pickle()
-    # raise Exception("debug exit")
-    jobs = get_validated_main_subjects_as_jobs(args=args,
-                                               main_subjects=main_subjects,
-                                               jobs=jobs)
-    handle_job_preparation_or_run_directly_if_any_jobs(args=args, jobs=jobs)
-
-
-def match_main_subjects_from_sparql(args: argparse.Namespace = None,
-                                    jobs: List[BatchJob] = None):
+def match_main_subjects_from_sparql(args: argparse.Namespace = None):
     """Collect subjects via SPARQL and call get_validated_main_subjects()
     If we get any validated jobs we handle them"""
     logger = logging.getLogger(__name__)
-    if jobs is None:
-        raise ValueError("jobs was None")
-    if not isinstance(jobs, List):
-        raise ValueError("jobs was not a list")
+    if args is None or args.sparql is None:
+        raise ValueError("args.sparql was None")
     if "P1889" not in args.sparql:
         console.print("Your SPARQL did not contain P1889 (different from). "
                       "Please include 'MINUS {?item wdt:P1889 [].}' "
@@ -78,12 +62,9 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None,
             main_subjects.append(item_json["item"]["value"])
     if len(main_subjects) > 0:
         console.print(f"Got {len(main_subjects)} results")
-        jobs = get_validated_main_subjects_as_jobs(
-            args=args,
-            main_subjects=main_subjects,
-            jobs=jobs
-        )
-        handle_job_preparation_or_run_directly_if_any_jobs(args=args, jobs=jobs)
+        batchjobs = get_validated_main_subjects_as_jobs(args=args,
+                                                        main_subjects=main_subjects)
+        handle_job_preparation_or_run_directly_if_any_jobs(args=args, batchjobs=batchjobs)
     else:
         console.print("Got 0 results. Try another query or debug it using --debug")
 
@@ -91,16 +72,15 @@ def match_main_subjects_from_sparql(args: argparse.Namespace = None,
 def export_jobs_to_dataframe():
     logger = logging.getLogger(__name__)
     logger.info("Exporting jobs to DataFrame. All jobs are appended to one frame")
-    jobs = parse_job_pickle()
-    if jobs is not None:
-        number_of_jobs = len(jobs)
-        if jobs is not None and number_of_jobs > 0:
-            logger.info(f"Found {number_of_jobs} jobs")
+    batchjobs = parse_job_pickle()
+    if batchjobs is not None:
+        if batchjobs is not None and batchjobs.job_count > 0:
+            logger.info(f"Found {batchjobs.job_count} jobs")
             df = pd.DataFrame()
             count = 1
-            for job in jobs:
+            for job in batchjobs.jobs:
                 count += 1
-                logger.info(f"Working on job {count}/{number_of_jobs}")
+                logger.info(f"Working on job {count}/{batchjobs.job_count}")
                 job_df = pd.DataFrame()
                 for item in job.items.list:
                     job_df = job_df.append(pd.DataFrame(data=[dict(
@@ -117,36 +97,11 @@ def export_jobs_to_dataframe():
     else:
         console.print("No jobs found. Create a job list first by using '--prepare-jobs'")
 
-def export_jobs_to_quickstatements():
-    logger = logging.getLogger(__name__)
-    logger.info("Exporting jobs to QuickStatements V1 commands. One file for each job.")
-    jobs = parse_job_pickle()
-    if jobs is not None and len(jobs) > 0:
-        for job in jobs:
-            # Convert all items
-            lines = []
-            for item in job.items.list:
-                line = QuickStatementsCommandVersion1(
-                    target=EntityID(item.id),
-                    property=EntityID("P921"),
-                    value=EntityID(job.suggestion.item.id),
-                )
-                lines.append(line)
-            logger.debug(f"Got {len(lines)} QS lines to export")
-            filename = (f"quickstatements-export-"
-                        f"{job.suggestion.item.id}-"
-                        f"{job.suggestion.item.label}.csv")
-            with open(filename, "w") as file:
-                for line in lines:
-                    file.write(f"{str(line)}\n")
-            console.print(f"Wrote to {filename} in the current directory")
-
 
 def main():
     """This is the main function that makes everything else happen"""
     logger = logging.getLogger(__name__)
     migrate_pickle_detection()
-    jobs: List[BatchJob] = []
     args = setup_argparse_and_return_args()
     # console.print(args.list)
     if args.remove_prepared_jobs is True:
@@ -156,33 +111,22 @@ def main():
     if args.prepare_jobs is True:
         logger.info("Preparing jobs")
         if check_if_pickle_exists(config.job_pickle_file_path):
-            if not ask_discard_existing_job_pickle():
-                # the default is yes
-                # to avoid running batches multiple times by
-                # mistake (which does not harm Wikidata, but waste
-                # precious computing resources which we want to avoid.)
-                jobs = parse_job_pickle(silent=True)
-                if len(jobs) > 0:
-                    console.print(f"Found and loaded {len(jobs)} "
-                                  f"jobs with a total of "
-                                  f"{sum(len(job.items.list) for job in jobs)} items")
-            remove_job_pickle(silent=True)
+            if ask_discard_existing_job_pickle():
+                remove_job_pickle(silent=True)
+            else:
+                console.print("Quitting.")
     if args.run_prepared_jobs is True:
         logger.info("Running prepared jobs")
-        jobs = parse_job_pickle()
-        if jobs is not None and len(jobs) > 0:
+        batchjobs = parse_job_pickle()
+        if batchjobs is not None and len(batchjobs.jobs) > 0:
             file_hash = get_hash_of_job_pickle()
-            run_jobs(jobs)
+            batchjobs.run_jobs()
             # Remove the pickle afterwards
             remove_job_pickle(hash=file_hash)
-    if args.export_job_list_to_quickstatements:
-        export_jobs_to_quickstatements()
     elif args.export_jobs_to_dataframe:
         export_jobs_to_dataframe()
-    elif args.match_existing_main_subjects is True:
-        match_existing_main_subjects(args=args, jobs=jobs)
     elif args.sparql:
-        match_main_subjects_from_sparql(args=args, jobs=jobs)
+        match_main_subjects_from_sparql(args=args)
     else:
         # if not args.run_prepared_jobs:
         if args.add is None:
@@ -191,8 +135,10 @@ def main():
         task: Task = select_task()
         if task is None:
             raise ValueError("Got no task")
+        jobs = []
         jobs.extend(process_user_supplied_qids_into_batch_jobs(args=args, task=task))
-        handle_job_preparation_or_run_directly_if_any_jobs(args=args, jobs=jobs)
+        batchjobs = BatchJobs(jobs=jobs)
+        handle_job_preparation_or_run_directly_if_any_jobs(args=args, batchjobs=batchjobs)
 
 
 if __name__ == "__main__":
diff --git a/src/helpers/argparse_setup.py b/src/helpers/argparse_setup.py
index 3b9cfb0..2d24c52 100644
--- a/src/helpers/argparse_setup.py
+++ b/src/helpers/argparse_setup.py
@@ -98,12 +98,6 @@ def setup_argparse_and_return_args():
         type=int,
         help='When working on SPARQL queries of e.g. galaxies, match more until this many matches are in the job list'
     )
-    parser.add_argument(
-        '--export-job-list-to-quickstatements', '-qs',
-        action='store_true',
-        help='Export the prepared job list to QuickStatements.',
-        default=False
-    )
     parser.add_argument(
         '--export-jobs-to-dataframe',
         action='store_true',
diff --git a/src/helpers/cleaning.py b/src/helpers/cleaning.py
index b15ffec..12de0a9 100644
--- a/src/helpers/cleaning.py
+++ b/src/helpers/cleaning.py
@@ -5,17 +5,17 @@ def strip_bad_chars(string):
     # https://stackoverflow.com/questions/3411771/best-way-to-replace-multiple-characters-in-a-string
     return (
         string
-        # Needed for matching backslashes e.g. "Dmel\CG5330" on Q29717230
-        .replace("\\", "\\\\")
-        # Needed for when labels contain apostrophe
-        .replace("'", "\\'")
-        .replace(",", "")
-        .replace(":", "")
-        .replace(";", "")
-        .replace("(", "")
-        .replace(")", "")
-        .replace("[", "")
-        .replace("]", "")
+            # Needed for matching backslashes e.g. "Dmel\CG5330" on Q29717230
+            .replace("\\", "\\\\")
+            # Needed for when labels contain apostrophe
+            .replace("'", "\\'")
+            .replace(",", "")
+            .replace(":", "")
+            .replace(";", "")
+            .replace("(", "")
+            .replace(")", "")
+            .replace("[", "")
+            .replace("]", "")
     )
 
 
@@ -30,4 +30,4 @@ def strip_prefix(qid):
     if "http://www.wikidata.org/entity/" in qid:
         qid = qid[31:]
     # logger.debug(f"qid:{qid}")
-    return qid
\ No newline at end of file
+    return qid
diff --git a/src/helpers/console.py b/src/helpers/console.py
index 6a19f23..8d11e89 100644
--- a/src/helpers/console.py
+++ b/src/helpers/console.py
@@ -1,5 +1,7 @@
+from __future__ import annotations
+
 import argparse
-from typing import List
+from typing import List, TYPE_CHECKING
 from urllib.parse import quote
 
 from rich.console import Console
@@ -7,8 +9,11 @@
 
 from src.helpers.cleaning import clean_rich_formatting
 from src.models.batch_job import BatchJob
-from src.models.task import Task
-from src.models.wikidata import Items
+from src.models.batch_jobs import BatchJobs
+
+if TYPE_CHECKING:
+    from src.models.items import Items
+    from src.models.task import Task
 
 console = Console()
 
@@ -74,6 +79,8 @@ def print_found_items_table(args: argparse.Namespace = None,
         raise ValueError("args was None")
     if items is None:
         raise ValueError("items was None")
+    if items.list is None:
+        raise ValueError("items.list was None")
     table = Table(title="Matched items found")
     if len(items.list) < 1000:
         list_to_show = items.list[0:50]
@@ -89,6 +96,8 @@ def print_found_items_table(args: argparse.Namespace = None,
     if args.show_item_urls:
         table.add_column(f"Wikidata URL")
     for item in list_to_show:
+        if item.label is None:
+            raise ValueError("item.label was None")
         if args.show_item_urls:
             label = clean_rich_formatting(item.label)
             table.add_row(label, item.url())
@@ -98,37 +107,41 @@ def print_found_items_table(args: argparse.Namespace = None,
 
 
 def ask_add_to_job_queue(job: BatchJob = None):
+    if job is None:
+        raise ValueError("job was None")
+    if job.suggestion.item is None:
+        raise ValueError("job.suggestion.item was None")
+    if job.suggestion.item.label is None:
+        raise ValueError("job.suggestion.item.label was None")
+    if job.suggestion.item.description is None:
+        raise ValueError("job.suggestion.item.description was None")
+    if job.items.list is None:
+        raise ValueError("job.items.list was None")
     return ask_yes_no_question(f"Do you want to add this job for "
                                f"[magenta]{job.suggestion.item.label}: "
                                f"{job.suggestion.item.description}[/magenta] with "
                                f"{len(job.items.list)} items to the queue? (see {job.suggestion.item.url()})")
 
 
-def print_running_jobs(jobs: List[BatchJob] = None):
-    if jobs is None:
-        raise ValueError("jobs was None")
-    console.print(f"Running {len(jobs)} job(s) with a total of "
-                  f"{sum(len(job.items.list) for job in jobs)} items "
-                  f"non-interactively now. You can take a "
-                  f"coffee break and lean back :)")
-
-
 def print_finished():
     console.print("All jobs finished successfully")
 
 
-def print_job_statistics(jobs: List[BatchJob] = None):
-    if jobs is None:
+def print_job_statistics(batchjobs: BatchJobs = None):
+    if batchjobs is None:
         raise ValueError("jobs was None")
-    if len(jobs) == 0:
+    if batchjobs.jobs is None:
+        raise ValueError("batchjobs.jobs was None")
+    if not isinstance(batchjobs.jobs, list):
+        raise ValueError("jobs was not a list")
+    if len(batchjobs.jobs) == 0:
         console.print("The jobs list is empty")
     else:
-        console.print(f"The jobs list now contain a total of {len(jobs)} "
+        console.print(f"The jobs list now contain a total of {len(batchjobs.jobs)} "
                       f"jobs with a total of "
-                      f"{sum(len(job.items.list) for job in jobs)} items")
+                      f"{sum(len(job.items.list) for job in batchjobs.jobs if batchjobs.jobs is not None and job is not None)} items")
 
 
 def ask_discard_existing_job_pickle():
     return ask_yes_no_question("A prepared list of jobs already exist, "
-                               "do you want to overwrite it? "
-                               "(pressing no will append to it)")
+                               "do you want to delete it?")
diff --git a/src/helpers/enums.py b/src/helpers/enums.py
index bb7fff0..be9cd65 100644
--- a/src/helpers/enums.py
+++ b/src/helpers/enums.py
@@ -10,4 +10,4 @@ class TaskIds(Enum):
     SCHOLARLY_ARTICLES = auto()
     RIKSDAGEN_DOCUMENTS = auto()
     THESIS = auto()
-    ACADEMIC_JOURNALS = auto()
\ No newline at end of file
+    ACADEMIC_JOURNALS = auto()
diff --git a/src/helpers/jobs.py b/src/helpers/jobs.py
index b0ed45a..761df9c 100644
--- a/src/helpers/jobs.py
+++ b/src/helpers/jobs.py
@@ -4,38 +4,42 @@
 import logging
 import random
 from datetime import datetime
-from typing import Union, List, TYPE_CHECKING
+from typing import Union, List, TYPE_CHECKING, Optional
 
 from src import strip_prefix, print_best_practice, console, ask_yes_no_question, \
-    TaskIds, print_found_items_table, ask_add_to_job_queue, print_keep_an_eye_on_wdqs_lag, print_running_jobs, \
-    print_finished, print_job_statistics
+    TaskIds, print_found_items_table, ask_add_to_job_queue, print_keep_an_eye_on_wdqs_lag, print_finished, \
+    print_job_statistics
 from src.helpers.menus import select_task
-from src.models.academic_journals import AcademicJournalItems
-from src.models.riksdagen_documents import RiksdagenDocumentItems
-from src.models.scholarly_articles import ScholarlyArticleItems
-from src.models.thesis import ThesisItems
-from src.tasks import tasks, Task
+from src.models.batch_jobs import BatchJobs
+from src.models.items import Items
+from src.models.items.academic_journals import AcademicJournalItems
+from src.models.items.riksdagen_documents import RiksdagenDocumentItems
+from src.models.items.scholarly_articles import ScholarlyArticleItems
+from src.models.items.thesis import ThesisItems
+from src.tasks import Task
 
 if TYPE_CHECKING:
     from src import Task, BatchJob
 
+# TODO rewrite as OOP
+logger = logging.getLogger(__name__)
+
 
 def process_qid_into_job(qid: str = None,
                          task: Task = None,
                          args: argparse.Namespace = None,
                          confirmation: bool = False) -> Union[BatchJob, None]:
-    # logger = logging.getLogger(__name__)
     if qid is None:
         raise ValueError("qid was None")
     if args is None:
         raise ValueError("args was None")
     if task is None:
         raise ValueError("task was None")
-    from src import Item
+    from src.models.wikimedia.wikidata.item import Item
     item = Item(
         id=strip_prefix(qid),
-        task=task
     )
+    item.fetch_label_and_description_and_aliases(task=task)
     if item.label is not None:
         console.print(f"Working on {item}")
         # generate suggestion with all we need
@@ -49,10 +53,14 @@ def process_qid_into_job(qid: str = None,
             answer = ask_yes_no_question("Do you want to continue?")
             if not answer:
                 return None
+        suggestion.extract_search_strings()
+        if suggestion.search_strings is None:
+            raise ValueError("suggestion.search_strings was None")
         with console.status(f'Fetching items with labels that have one of '
                             f'the search strings by running a total of '
                             f'{len(suggestion.search_strings) * task.number_of_queries_per_search_string} '
                             f'queries on WDQS...'):
+            items: Optional[Items] = None
             if task.id == TaskIds.SCHOLARLY_ARTICLES:
                 items = ScholarlyArticleItems()
             elif task.id == TaskIds.RIKSDAGEN_DOCUMENTS:
@@ -65,7 +73,13 @@ def process_qid_into_job(qid: str = None,
                 raise ValueError(f"{task.id} was not recognized")
             items.fetch_based_on_label(suggestion=suggestion,
                                        task=task)
+        if items.list is None:
+            raise ValueError("items.list was None")
         if len(items.list) > 0:
+            # Remove duplicates
+            logger.warning(f"{len(items.list)} before duplicate removal")
+            items.list = list(set(items.list))
+            logger.warning(f"{len(items.list)} after duplicate removal")
             # Randomize the list
             items.random_shuffle_list()
             print_found_items_table(args=args,
@@ -75,14 +89,13 @@ def process_qid_into_job(qid: str = None,
                 items=items,
                 suggestion=suggestion
             )
-            answer = ask_add_to_job_queue(job)
-            if answer:
-                return job
+            return job
         else:
             console.print("No matching items found")
             return None
     else:
         console.print(f"Label for {task.language_code} was None on {item.url()}, skipping")
+        return None
 
 
 def process_user_supplied_qids_into_batch_jobs(args: argparse.Namespace = None,
@@ -105,52 +118,33 @@ def process_user_supplied_qids_into_batch_jobs(args: argparse.Namespace = None,
     return jobs
 
 
-def run_jobs(jobs: List[BatchJob] = None):
-    if jobs is None:
-        raise ValueError("jobs was None")
-    print_keep_an_eye_on_wdqs_lag()
-    from src import login
-    login()
-    print_running_jobs(jobs)
-    count = 0
-    start_time = datetime.now()
-    for job in jobs:
-        count += 1
-        job.run(jobs=jobs, job_count=count)
-        console.print(f"runtime until now: {datetime.now() - start_time}")
-    print_finished()
-    end_time = datetime.now()
-    console.print(f'Total runtime: {end_time - start_time}')
 
 
 def handle_job_preparation_or_run_directly_if_any_jobs(args: argparse.Namespace = None,
-                                                       jobs: List[BatchJob] = None):
-    if len(jobs) > 0:
+                                                       batchjobs: BatchJobs = None):
+    if batchjobs is None:
+        raise ValueError("batchjobs was None")
+    if args is None:
+        raise ValueError("args was None")
+    if len(batchjobs.jobs) > 0:
         if args.prepare_jobs:
-            console.print(f"Adding {len(jobs)} job(s) to the jobs file")
-            for job in jobs:
+            console.print(f"Adding {len(batchjobs.jobs)} job(s) to the jobs file")
+            for job in batchjobs.jobs:
                 from src import add_to_job_pickle
                 add_to_job_pickle(job)
-            print_job_statistics(jobs=jobs)
+            print_job_statistics(batchjobs=batchjobs)
             console.print(f"You can run the jobs "
                           f"non-interactively e.g. on the Toolforge "
                           f"Kubernetes cluster using -r or --run-prepared-jobs. "
                           f"See Kubernetes_HOWTO.md for details.")
         else:
-            run_jobs(jobs)
+            batchjobs.run_jobs()
 
 
-def get_validated_main_subjects_as_jobs(
-        args: argparse.Namespace = None,
-        main_subjects: List[str] = None,
-        jobs: List[BatchJob] = None
-) -> List[BatchJob]:
+def get_validated_main_subjects_as_jobs(args: argparse.Namespace = None,
+                                        main_subjects: List[str] = None) -> BatchJobs:
     """This function randomly picks a subject and present it for validation"""
     logger = logging.getLogger(__name__)
-    if jobs is None:
-        raise ValueError("jobs was None")
-    if not isinstance(jobs, List):
-        raise ValueError("jobs was not a list")
     if args is None:
         raise ValueError("args was None")
     if main_subjects is None:
@@ -161,6 +155,7 @@ def get_validated_main_subjects_as_jobs(
         raise ValueError("Got no task")
     if not isinstance(task, Task):
         raise ValueError("task was not a Task object")
+    batchjobs = BatchJobs(jobs=[])
     while True:
         # Check if we have any subjects left in the list
         if len(subjects_not_picked_yet) > 0:
@@ -173,13 +168,21 @@ def get_validated_main_subjects_as_jobs(
                                        args=args,
                                        confirmation=args.no_confirmation)
             if job is not None:
-                jobs.append(job)
-                logger.debug(f"joblist now has {len(jobs)} jobs")
-            print_job_statistics(jobs=jobs)
+                if args.no_ask_match_more_limit is None:
+                    answer = ask_add_to_job_queue(job)
+                    if answer:
+                        batchjobs.jobs.append(job)
+                else:
+                    batchjobs.jobs.append(job)
+                logger.debug(f"joblist now has {len(batchjobs.jobs)} jobs")
+            print_job_statistics(batchjobs=batchjobs)
             if len(subjects_not_picked_yet) > 0:
                 if (
                         args.no_ask_match_more_limit is None or
-                        args.no_ask_match_more_limit < sum(len(job.items.list) for job in jobs)
+                        args.no_ask_match_more_limit < sum(
+                    len(job.items.list) for job in batchjobs.jobs
+                    if job.items.list is not None
+                )
                 ):
                     answer_was_yes = ask_yes_no_question("Match one more?")
                     if not answer_was_yes:
@@ -190,4 +193,11 @@ def get_validated_main_subjects_as_jobs(
         else:
             console.print("No more subjects in the list. Exiting.")
             break
-    return jobs
+    if args.no_ask_match_more_limit is not None:
+        batchjobs_limit = BatchJobs(jobs=[])
+        for job in batchjobs.jobs:
+            answer = ask_add_to_job_queue(job)
+            if answer:
+                batchjobs_limit.jobs.append(job)
+        return batchjobs_limit
+    return batchjobs
diff --git a/src/helpers/menus.py b/src/helpers/menus.py
index a35973e..26ee8b2 100644
--- a/src/helpers/menus.py
+++ b/src/helpers/menus.py
@@ -1,11 +1,11 @@
 import logging
 from typing import List
 
-from consolemenu import SelectionMenu
+from consolemenu import SelectionMenu  # type: ignore
 
 from src.models.suggestion import Suggestion
-from src.models.wikidata import Item
-from src.tasks import tasks, Task
+from src.models.wikimedia.wikidata.item import Item
+from src.tasks import Task
 
 
 def select_suggestion(suggestions: List[Suggestion] = None,
@@ -20,7 +20,9 @@ def select_suggestion(suggestions: List[Suggestion] = None,
     selected_suggestion = None
     if selected_index > (len(suggestions) - 1):
         logger.debug("The user choose to skip")
+        return None
     else:
+        from src.tasks import tasks
         selected_suggestion = tasks[selected_index]
         logger.debug(f"selected:{selected_index}="
                      f"{selected_suggestion}")
@@ -29,7 +31,10 @@ def select_suggestion(suggestions: List[Suggestion] = None,
 
 def select_task() -> Task:
     logger = logging.getLogger(__name__)
-    menu = SelectionMenu(tasks, "Select a task")
+    from src.tasks import tasks
+    labels = list([task.label for task in tasks])
+    # console.print(labels)
+    menu = SelectionMenu(labels, "Select a task")
     menu.show()
     menu.join()
     task_index = menu.selected_option
@@ -41,7 +46,6 @@ def select_task() -> Task:
                  f"{selected_task}")
     return selected_task
 
-
 # def select_language():
 #     logger = logging.getLogger(__name__)
 #     menu = SelectionMenu(WikimediaLanguageCode.__members__.keys(), "Select a language")
@@ -68,4 +72,4 @@ def select_task() -> Task:
 #     selected_lexical_category = category_mapping[selected_lexical_category_index]
 #     logger.debug(f"selected:{selected_lexical_category_index}="
 #                  f"{selected_lexical_category}")
-#     return selected_lexical_category
\ No newline at end of file
+#     return selected_lexical_category
diff --git a/src/helpers/migration.py b/src/helpers/migration.py
index 084b6c4..bbcbc88 100644
--- a/src/helpers/migration.py
+++ b/src/helpers/migration.py
@@ -6,9 +6,6 @@ def migrate_pickle_detection():
         if config.job_pickle_file_path is None:
             raise ValueError("the variable job_pickle_file_path in config "
                              "has to contain a string like 'pickle.dat'")
-        if config.main_subjects_pickle_file_path is None:
-            raise ValueError("The variable main_subjects_pickle_file_path"
-                             "is None, see config.example.py")
     except AttributeError:
         raise ValueError("You need to migrate the new pickle variables"
                          "in config.example.py to your config.py before "
diff --git a/src/helpers/pickle.py b/src/helpers/pickle.py
index f2a5085..42bb448 100644
--- a/src/helpers/pickle.py
+++ b/src/helpers/pickle.py
@@ -1,12 +1,15 @@
-import os
 import hashlib
+import os
 import pickle
-from typing import List
+from typing import List, Optional
 
 import config
 from src.helpers.console import console
 from src.models.batch_job import BatchJob
 
+# TODO rewrite as OOP
+from src.models.batch_jobs import BatchJobs
+
 
 def add_to_job_pickle(job: BatchJob = None):
     if job is None:
@@ -16,12 +19,6 @@ def add_to_job_pickle(job: BatchJob = None):
             pickle.dump(job, file, pickle.DEFAULT_PROTOCOL)
 
 
-def add_to_main_subject_pickle(subjects: List[str] = None):
-    with open(config.main_subjects_pickle_file_path, 'wb') as file:
-        for qid in subjects:
-            pickle.dump(qid, file, pickle.DEFAULT_PROTOCOL)
-
-
 def read_from_pickle(path):
     with open(path, 'rb') as file:
         try:
@@ -38,7 +35,7 @@ def check_if_pickle_exists(path):
         return False
 
 
-def parse_job_pickle(silent: bool = False) -> List[BatchJob]:
+def parse_job_pickle(silent: bool = False) -> Optional[BatchJobs]:
     """Reads the pickle into a list of batch jobs"""
     if check_if_pickle_exists(config.job_pickle_file_path):
         jobs: List[BatchJob] = []
@@ -47,28 +44,13 @@ def parse_job_pickle(silent: bool = False) -> List[BatchJob]:
         if len(jobs) == 0:
             if not silent:
                 console.print("No prepared jobs found")
+            return None
         else:
-            return jobs
+            return BatchJobs(jobs=jobs)
     else:
         if not silent:
             console.print("No pickle file found")
-
-
-def parse_main_subjects_pickle() -> List[str]:
-    """Reads the pickle into a list of main subjects"""
-    if check_if_pickle_exists(config.main_subjects_pickle_file_path):
-        subjects = []
-        for subject in read_from_pickle(config.main_subjects_pickle_file_path):
-            subjects.append(subject)
-        if len(subjects) == 0:
-            console.print("No qids found in the pickle.")
-        else:
-            # print(f"found:{subjects}")
-            return subjects
-    else:
-        console.print("No main subjects pickle file found. "
-                      "Create it by running 'python fetch_main_subjects.py'")
-        exit(0)
+        return None
 
 
 def remove_job_pickle(silent: bool = False,
diff --git a/src/models/batch_job.py b/src/models/batch_job.py
index b96c5b4..e5cf6ec 100644
--- a/src/models/batch_job.py
+++ b/src/models/batch_job.py
@@ -1,21 +1,13 @@
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import List, TYPE_CHECKING
+from typing import List
 
-if TYPE_CHECKING:
-    from src.models.suggestion import Suggestion
-    from src.models.wikidata import Items
+from pydantic import BaseModel
 
+from src.models.items import Items
+from src.models.suggestion import Suggestion
 
-@dataclass
-class BatchJob:
+
+class BatchJob(BaseModel):
     """Models a batch job intended to be run non-interactively"""
     suggestion: Suggestion
     items: Items
 
-    def run(self, jobs: List[BatchJob], job_count: int = None):
-        if jobs is None:
-            raise ValueError("jobs was None")
-        if job_count is None:
-            raise ValueError("job count was None")
-        self.suggestion.add_to_items(items=self.items, jobs=jobs, job_count=job_count)
diff --git a/src/models/batch_jobs.py b/src/models/batch_jobs.py
new file mode 100644
index 0000000..820e931
--- /dev/null
+++ b/src/models/batch_jobs.py
@@ -0,0 +1,38 @@
+from datetime import datetime
+from typing import List
+
+from pydantic import BaseModel
+
+from src.models.batch_job import BatchJob
+
+
+class BatchJobs(BaseModel):
+    jobs: List[BatchJob]
+
+    @property
+    def job_count(self):
+        return len(self.jobs)
+
+    def print_running_jobs(self):
+        if not isinstance(self.jobs, list):
+            raise ValueError("jobs is not a list")
+        from src.helpers.console import console
+        console.print(f"Running {len(self.jobs)} job(s) with a total of "
+                      f"{sum(len(job.items.list) for job in self.jobs if job.items.list is not None)} items "
+                      f"non-interactively now. You can take a "
+                      f"coffee break and lean back :)")
+
+    def run_jobs(self):
+        from src.helpers.console import console, print_keep_an_eye_on_wdqs_lag, print_finished
+        if self.jobs is None or len(self.jobs) == 0:
+            raise ValueError("did not get what we need")
+        print_keep_an_eye_on_wdqs_lag()
+        from src import login
+        login()
+        self.print_running_jobs()
+        start_time = datetime.now()
+        for job in self.jobs:
+            job.suggestion.add_to_items(items=job.items, jobs=self.jobs, job_count=self.job_count)
+        print_finished()
+        end_time = datetime.now()
+        console.print(f'Total runtime: {end_time - start_time}')
diff --git a/src/models/items/__init__.py b/src/models/items/__init__.py
new file mode 100644
index 0000000..1b5dd9e
--- /dev/null
+++ b/src/models/items/__init__.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+import random
+from typing import List, TYPE_CHECKING, Optional
+
+from pydantic import BaseModel
+
+from src.models.task import Task
+from src.models.wikimedia.wikidata.sparql_item import SparqlItem
+
+if TYPE_CHECKING:
+    from src.models.suggestion import Suggestion
+
+
+class Items(BaseModel):
+    list: Optional[List[SparqlItem]]
+
+    def fetch_based_on_label(self,
+                             suggestion: Suggestion = None,
+                             task: Task = None):
+        pass
+
+    def random_shuffle_list(self):
+        random.shuffle(self.list)
diff --git a/src/models/academic_journals.py b/src/models/items/academic_journals.py
similarity index 70%
rename from src/models/academic_journals.py
rename to src/models/items/academic_journals.py
index 4ecf2cb..1b14772 100644
--- a/src/models/academic_journals.py
+++ b/src/models/items/academic_journals.py
@@ -1,22 +1,14 @@
 import logging
 
-from wikibaseintegrator.wbi_helpers import execute_sparql_query
+from wikibaseintegrator.wbi_helpers import execute_sparql_query  # type: ignore
 
 import config
 from src.helpers.cleaning import strip_bad_chars
 from src.helpers.console import console
 from src.models.suggestion import Suggestion
 from src.models.task import Task
-from src.models.wikidata import Items, Item
-
-
-def process_results(results):
-    items = []
-    for item_json in results["results"]["bindings"]:
-        logging.debug(f"item_json:{item_json}")
-        item = Item(json=item_json)
-        items.append(item)
-    return items
+from src.models.wikimedia.wikidata.item import Item
+from src.models.items import Items
 
 
 class AcademicJournalItems(Items):
@@ -25,11 +17,30 @@ class AcademicJournalItems(Items):
     def fetch_based_on_label(self,
                              suggestion: Suggestion = None,
                              task: Task = None):
+        def process_results(results):
+            # TODO refactor into private method
+            items = []
+            for item_json in results["results"]["bindings"]:
+                logging.debug(f"item_json:{item_json}")
+                item = Item(json=item_json)
+                items.append(item)
+            return items
+
         # logger = logging.getLogger(__name__)
         if suggestion is None:
             raise ValueError("suggestion was None")
         if task is None:
             raise ValueError("task was None")
+        if task.language_code is None:
+            raise ValueError("task.language_code was None")
+        if suggestion.search_strings is None:
+            raise ValueError("suggestion.search_strings was None")
+        if suggestion.item is None:
+            raise ValueError("suggestion.item was None")
+        if suggestion.item.id is None:
+            raise ValueError("suggestion.item.id was None")
+        if suggestion.args is None:
+            raise ValueError("suggestion.args was None")
         # Fetch all items matching the search strings
         self.list = []
         for search_string in suggestion.search_strings:
diff --git a/src/models/riksdagen_documents.py b/src/models/items/riksdagen_documents.py
similarity index 82%
rename from src/models/riksdagen_documents.py
rename to src/models/items/riksdagen_documents.py
index d2fb8df..c9ac4be 100644
--- a/src/models/riksdagen_documents.py
+++ b/src/models/items/riksdagen_documents.py
@@ -1,12 +1,13 @@
 import logging
 
-from wikibaseintegrator.wbi_helpers import execute_sparql_query
+from wikibaseintegrator.wbi_helpers import execute_sparql_query  # type: ignore
 
 import config
 from src.helpers.console import console
+from src.models.items import Items
 from src.models.suggestion import Suggestion
 from src.models.task import Task
-from src.models.wikidata import Items, Item
+from src.models.wikimedia.wikidata.sparql_item import SparqlItem
 
 
 class RiksdagenDocumentItems(Items):
@@ -16,11 +17,19 @@ def fetch_based_on_label(self,
         # logger = logging.getLogger(__name__)
         if suggestion is None:
             raise ValueError("suggestion was None")
+        if suggestion.item is None:
+            raise ValueError("suggestion.item was None")
+        if suggestion.args is None:
+            raise ValueError("suggestion.args was None")
         if suggestion.args.limit_to_items_without_p921:
             raise Exception("Limiting to items without P921 is not "
                             "supported yet for this task.")
+        if suggestion.search_strings is None:
+            raise ValueError("suggestion.search_strings was None")
         if task is None:
             raise ValueError("task was None")
+        if task.language_code is None:
+            raise ValueError("task.language_code was None")
         # Fetch all items maching the search strings
         self.list = []
         # Include spaces around the n-gram to avoid edits like this one
@@ -53,8 +62,7 @@ def fetch_based_on_label(self,
             ''', debug=suggestion.args.debug_sparql)
             for item_json in results["results"]["bindings"]:
                 logging.debug(f"item_json:{item_json}")
-                item = Item(json=item_json,
-                            task=task)
+                item = SparqlItem(**item_json)
                 self.list.append(item)
             logging.info(f'Got {len(results["results"]["bindings"])} items from '
                          f'WDQS using the search string {search_string}')
diff --git a/src/models/items/scholarly_articles.py b/src/models/items/scholarly_articles.py
new file mode 100644
index 0000000..3001563
--- /dev/null
+++ b/src/models/items/scholarly_articles.py
@@ -0,0 +1,147 @@
+import logging
+
+from wikibaseintegrator.wbi_helpers import execute_sparql_query  # type: ignore
+
+import config
+from src.helpers.cleaning import strip_bad_chars
+from src.helpers.console import console
+from src.models.suggestion import Suggestion
+from src.models.task import Task
+from src.models.items import Items
+from src.models.wikimedia.wikidata.sparql_item import SparqlItem
+
+
+class ScholarlyArticleItems(Items):
+    """This supports both published peer reviewed articles and preprints"""
+
+    def fetch_based_on_label(self,
+                             suggestion: Suggestion = None,
+                             task: Task = None):
+        def build_query(suggestion: Suggestion = None,
+                        search_string: str = None,
+                        task: Task = None,
+                        cirrussearch_parameters: str = None):
+            # TODO refactor
+            if suggestion is None:
+                raise ValueError("suggestion was None")
+            if suggestion.item is None:
+                raise ValueError("suggestion.item was None")
+            if search_string is None:
+                raise ValueError("search_string was None")
+            if task is None:
+                raise ValueError("task was None")
+            if task.language_code is None:
+                raise ValueError("task.language_code was None")
+            if cirrussearch_parameters is None:
+                raise ValueError("cirrussearch_parameters was None")
+            # This query uses https://www.w3.org/TR/sparql11-property-paths/ to
+            # find subjects that are subclass of one another up to 3 hops away
+            # This query also uses the https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
+            # which has a hardcoded limit of 10,000 items so you will never get more matches than that
+            # This query use regex to match beginning, middle and end of the label of matched items
+            # The replacing lines should match the similar python replacements in cleaning.py
+            # The replacing with "\\\\\\\\" becomes "\\\\" after leaving python and then it works in
+            # SPARQL where it becomes "\\" and thus match a single backslash
+            return (f"""
+                #{config.user_agent}
+                SELECT DISTINCT ?item ?itemLabel 
+                WHERE {{
+                  hint:Query hint:optimizer "None".
+                  BIND(STR('{cirrussearch_parameters} \"{search_string}\"') as ?search_string)
+                  SERVICE wikibase:mwapi {{
+                    bd:serviceParam wikibase:api "Search";
+                                    wikibase:endpoint "www.wikidata.org";
+                                    mwapi:srsearch ?search_string.
+                    ?title wikibase:apiOutput mwapi:title. 
+                  }}
+                  BIND(IRI(CONCAT(STR(wd:), ?title)) AS ?item)
+                  ?item rdfs:label ?label.
+                  BIND(REPLACE(LCASE(?label), ",", "") as ?label1)
+                  BIND(REPLACE(?label1, ":", "") as ?label2)
+                  BIND(REPLACE(?label2, ";", "") as ?label3)          
+                  BIND(REPLACE(?label3, "\\\\(", "") as ?label4)
+                  BIND(REPLACE(?label4, "\\\\)", "") as ?label5)
+                  BIND(REPLACE(?label5, "\\\\[", "") as ?label6)
+                  BIND(REPLACE(?label6, "\\\\]", "") as ?label7)
+                  BIND(REPLACE(?label7, "\\\\\\\\", "") as ?label8)
+                  BIND(?label8 as ?cleaned_label)
+                  FILTER(CONTAINS(?cleaned_label, ' {search_string.lower()} '@{task.language_code.value}) || 
+                         REGEX(?cleaned_label, '.* {search_string.lower()}$'@{task.language_code.value}) ||
+                         REGEX(?cleaned_label, '^{search_string.lower()} .*'@{task.language_code.value}))
+                  MINUS {{?item wdt:P921/wdt:P279 wd:{suggestion.item.id}. }}
+                  MINUS {{?item wdt:P921/wdt:P279/wdt:P279 wd:{suggestion.item.id}. }}
+                  MINUS {{?item wdt:P921/wdt:P279/wdt:P279/wdt:P279 wd:{suggestion.item.id}. }}
+                  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
+                }}
+                """)
+
+        def process_results(results):
+            # TODO refactor
+            items = []
+            for item_json in results["results"]["bindings"]:
+                logging.debug(f"item_json:{item_json}")
+                item = SparqlItem(**item_json)
+                item.validate_qid_and_copy_label()
+                items.append(item)
+            return items
+
+        # logger = logging.getLogger(__name__)
+        if suggestion is None:
+            raise ValueError("suggestion was None")
+        if suggestion.item is None:
+            raise ValueError("suggestion.item was None")
+        if suggestion.args is None:
+            raise ValueError("suggestion.args was None")
+        if suggestion.args.limit_to_items_without_p921:
+            raise Exception("Limiting to items without P921 is not "
+                            "supported yet for this task.")
+        if suggestion.search_strings is None:
+            raise ValueError("suggestion.search_strings was None")
+        if task is None:
+            raise ValueError("task was None")
+        if task.language_code is None:
+            raise ValueError("task.language_code was None")
+        if suggestion.args.limit_to_items_without_p921:
+            console.print("Limiting to scholarly articles without P921 main subject only")
+            cirrussearch_parameters = f"haswbstatement:P31=Q13442814 -haswbstatement:P921"
+        else:
+            cirrussearch_parameters = f"haswbstatement:P31=Q13442814 -haswbstatement:P921={suggestion.item.id}"
+        # Fetch all items matching the search strings
+        self.list = []
+        for search_string in suggestion.search_strings:
+            search_string = strip_bad_chars(search_string)
+            results = execute_sparql_query(
+                build_query(
+                    cirrussearch_parameters=cirrussearch_parameters,
+                    suggestion=suggestion,
+                    search_string=search_string,
+                    task=task)
+            )
+            logging.info(f'Got {len(results["results"]["bindings"])} scholarly items from '
+                         f'WDQS using the search string {search_string}')
+            self.list.extend(process_results(results))
+            # preprints
+            # We don't use CirrusSearch in this query because we can do it more easily in
+            # SPARQL on a small subgraph like this
+            # find all items that are ?item wdt:P31/wd:P279* wd:Q1266946
+            # minus the QID we want to add
+            results_preprint = execute_sparql_query(f'''
+                #{config.user_agent}
+                SELECT DISTINCT ?item ?itemLabel 
+                WHERE {{
+                  ?item wdt:P31/wd:P279* wd:Q580922. # preprint 
+                  MINUS {{
+                  ?item wdt:P921 wd:{suggestion.item.id};
+                  }}
+                  ?item rdfs:label ?label.
+                  FILTER(CONTAINS(LCASE(?label), " {search_string.lower()} "@{task.language_code.value}) || 
+                         REGEX(LCASE(?label), ".* {search_string.lower()}$"@{task.language_code.value}) ||
+                         REGEX(LCASE(?label), "^{search_string.lower()} .*"@{task.language_code.value}))
+                  MINUS {{?item wdt:P921/wdt:P279 wd:{suggestion.item.id}. }}
+                  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
+                }}
+                ''', debug=suggestion.args.debug_sparql)
+            logging.info(f'Got {len(results["results"]["bindings"])} preprint items from '
+                         f'WDQS using the search string {search_string}')
+            self.list.extend(process_results(results_preprint))
+        console.print(f"Got a total of {len(self.list)} items")
diff --git a/src/models/thesis.py b/src/models/items/thesis.py
similarity index 85%
rename from src/models/thesis.py
rename to src/models/items/thesis.py
index 5569ae9..db205a4 100644
--- a/src/models/thesis.py
+++ b/src/models/items/thesis.py
@@ -1,13 +1,13 @@
 import logging
 
-from wikibaseintegrator.wbi_helpers import execute_sparql_query
+from wikibaseintegrator.wbi_helpers import execute_sparql_query  # type: ignore
 
 from src.helpers.console import console
+from src.models.items import Items
 from src.models.suggestion import Suggestion
 from src.models.task import Task
-from src.models.wikidata import Items, Item
-
 # There were ~16.000 thesis' in WD when this was written
+from src.models.wikimedia.wikidata.sparql_item import SparqlItem
 
 
 class ThesisItems(Items):
@@ -17,11 +17,15 @@ def fetch_based_on_label(self,
         # logger = logging.getLogger(__name__)
         if suggestion is None:
             raise ValueError("suggestion was None")
+        if suggestion.search_strings is None:
+            raise ValueError("suggestion.search_strings was None")
         if suggestion.args.limit_to_items_without_p921:
             raise Exception("Limiting to items without P921 is not "
                             "supported yet for this task.")
         if task is None:
             raise ValueError("task was None")
+        if task.language_code is None:
+            raise ValueError("task.language_code was None")
         # Fetch all items maching the search strings
         self.list = []
         for search_string in suggestion.search_strings:
@@ -54,8 +58,7 @@ def fetch_based_on_label(self,
             ''', debug=suggestion.args.debug_sparql)
             for item_json in results["results"]["bindings"]:
                 logging.debug(f"item_json:{item_json}")
-                item = Item(json=item_json,
-                            task=task)
+                item = SparqlItem(**item_json)
                 self.list.append(item)
             logging.info(f'Got {len(results["results"]["bindings"])} items from '
                          f'WDQS using the search string {search_string}')
diff --git a/src/models/quickstatements.py b/src/models/quickstatements.py
deleted file mode 100644
index e5313fa..0000000
--- a/src/models/quickstatements.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from dataclasses import dataclass
-
-from src.models.wikidata import EntityID
-
-
-@dataclass
-class QuickStatementsCommandVersion1:
-    """This models the simple line-based QS commands
-
-    For now we only support QID-values
-
-    Q1\tP1\tQ1"""
-    target: EntityID = None
-    property: EntityID = None
-    value: EntityID = None
-
-    def __str__(self):
-        return f"{self.target}\t{self.property}\t{self.value}"
diff --git a/src/models/scholarly_articles.py b/src/models/scholarly_articles.py
deleted file mode 100644
index 812ffba..0000000
--- a/src/models/scholarly_articles.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import logging
-
-from wikibaseintegrator.wbi_helpers import execute_sparql_query
-
-import config
-from src.helpers.cleaning import strip_bad_chars
-from src.helpers.console import console
-from src.models.suggestion import Suggestion
-from src.models.task import Task
-from src.models.wikidata import Items, Item
-
-
-def build_query(suggestion: Suggestion = None,
-                search_string: str = None,
-                task: Task = None,
-                cirrussearch_parameters: str = None):
-    if suggestion is None:
-        raise ValueError("suggestion was None")
-    if search_string is None:
-        raise ValueError("search_string was None")
-    if task is None:
-        raise ValueError("task was None")
-    if cirrussearch_parameters is None:
-        raise ValueError("cirrussearch_parameters was None")
-    # This query uses https://www.w3.org/TR/sparql11-property-paths/ to
-    # find subjects that are subclass of one another up to 3 hops away
-    # This query also uses the https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
-    # which has a hardcoded limit of 10,000 items so you will never get more matches than that
-    # This query use regex to match beginning, middle and end of the label of matched items
-    # The replacing lines should match the similar python replacements in cleaning.py
-    # The replacing with "\\\\\\\\" becomes "\\\\" after leaving python and then it works in
-    # SPARQL where it becomes "\\" and thus match a single backslash
-    return (f"""
-        #{config.user_agent}
-        SELECT DISTINCT ?item ?itemLabel 
-        WHERE {{
-          hint:Query hint:optimizer "None".
-          BIND(STR('{cirrussearch_parameters} \"{search_string}\"') as ?search_string)
-          SERVICE wikibase:mwapi {{
-            bd:serviceParam wikibase:api "Search";
-                            wikibase:endpoint "www.wikidata.org";
-                            mwapi:srsearch ?search_string.
-            ?title wikibase:apiOutput mwapi:title. 
-          }}
-          BIND(IRI(CONCAT(STR(wd:), ?title)) AS ?item)
-          ?item rdfs:label ?label.
-          BIND(REPLACE(LCASE(?label), ",", "") as ?label1)
-          BIND(REPLACE(?label1, ":", "") as ?label2)
-          BIND(REPLACE(?label2, ";", "") as ?label3)          
-          BIND(REPLACE(?label3, "\\\\(", "") as ?label4)
-          BIND(REPLACE(?label4, "\\\\)", "") as ?label5)
-          BIND(REPLACE(?label5, "\\\\[", "") as ?label6)
-          BIND(REPLACE(?label6, "\\\\]", "") as ?label7)
-          BIND(REPLACE(?label7, "\\\\\\\\", "") as ?label8)
-          BIND(?label8 as ?cleaned_label)
-          FILTER(CONTAINS(?cleaned_label, ' {search_string.lower()} '@{task.language_code.value}) || 
-                 REGEX(?cleaned_label, '.* {search_string.lower()}$'@{task.language_code.value}) ||
-                 REGEX(?cleaned_label, '^{search_string.lower()} .*'@{task.language_code.value}))
-          MINUS {{?item wdt:P921/wdt:P279 wd:{suggestion.item.id}. }}
-          MINUS {{?item wdt:P921/wdt:P279/wdt:P279 wd:{suggestion.item.id}. }}
-          MINUS {{?item wdt:P921/wdt:P279/wdt:P279/wdt:P279 wd:{suggestion.item.id}. }}
-          SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
-        }}
-        """)
-
-
-def process_results(results):
-    items = []
-    for item_json in results["results"]["bindings"]:
-        logging.debug(f"item_json:{item_json}")
-        item = Item(json=item_json)
-        items.append(item)
-    return items
-
-
-class ScholarlyArticleItems(Items):
-    """This supports both published peer reviewed articles and preprints"""
-    def fetch_based_on_label(self,
-                             suggestion: Suggestion = None,
-                             task: Task = None):
-        # logger = logging.getLogger(__name__)
-        if suggestion is None:
-            raise ValueError("suggestion was None")
-        if task is None:
-            raise ValueError("task was None")
-        if suggestion.args.limit_to_items_without_p921:
-            console.print("Limiting to scholarly articles without P921 main subject only")
-            cirrussearch_parameters = f"haswbstatement:P31=Q13442814 -haswbstatement:P921"
-        else:
-            cirrussearch_parameters = f"haswbstatement:P31=Q13442814 -haswbstatement:P921={suggestion.item.id}"
-        # Fetch all items matching the search strings
-        self.list = []
-        for search_string in suggestion.search_strings:
-            search_string = strip_bad_chars(search_string)
-            results = execute_sparql_query(
-                build_query(
-                    cirrussearch_parameters=cirrussearch_parameters,
-                    suggestion=suggestion,
-                    search_string=search_string,
-                    task=task)
-            )
-            logging.info(f'Got {len(results["results"]["bindings"])} scholarly items from '
-                         f'WDQS using the search string {search_string}')
-            self.list.extend(process_results(results))
-            # preprints
-            # We don't use CirrusSearch in this query because we can do it more easily in
-            # SPARQL on a small subgraph like this
-            # find all items that are ?item wdt:P31/wd:P279* wd:Q1266946
-            # minus the QID we want to add
-            results_preprint = execute_sparql_query(f'''
-                #{config.user_agent}
-                SELECT DISTINCT ?item ?itemLabel 
-                WHERE {{
-                  ?item wdt:P31/wd:P279* wd:Q580922. # preprint 
-                  MINUS {{
-                  ?item wdt:P921 wd:{suggestion.item.id};
-                  }}
-                  ?item rdfs:label ?label.
-                  FILTER(CONTAINS(LCASE(?label), " {search_string.lower()} "@{task.language_code.value}) || 
-                         REGEX(LCASE(?label), ".* {search_string.lower()}$"@{task.language_code.value}) ||
-                         REGEX(LCASE(?label), "^{search_string.lower()} .*"@{task.language_code.value}))
-                  MINUS {{?item wdt:P921/wdt:P279 wd:{suggestion.item.id}. }}
-                  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
-                }}
-                ''', debug=suggestion.args.debug_sparql)
-            logging.info(f'Got {len(results["results"]["bindings"])} preprint items from '
-                         f'WDQS using the search string {search_string}')
-            self.list.extend(process_results(results_preprint))
-        console.print(f"Got a total of {len(self.list)} items")
diff --git a/src/models/suggestion.py b/src/models/suggestion.py
index d1cac86..c55689e 100644
--- a/src/models/suggestion.py
+++ b/src/models/suggestion.py
@@ -1,40 +1,32 @@
+from __future__ import annotations
+
 import argparse
 import logging
-from typing import List
+from typing import List, Optional, TYPE_CHECKING
 from urllib.parse import quote
 
-from wikibaseintegrator.datatypes import Item as ItemType
+from pydantic import BaseModel
+from wikibaseintegrator.datatypes import Item as ItemType  # type: ignore
 
 import config
 from src.helpers.calculations import calculate_random_editgroups_hash
 from src.helpers.cleaning import clean_rich_formatting
-from src.helpers.console import print_search_strings_table, console
-from src.helpers.enums import TaskIds
-from src.models.batch_job import BatchJob
+from src.models.items import Items
 from src.models.task import Task
-from src.models.wikidata import Item, Items
+from src.models.wikimedia.wikidata.item import Item
 
+if TYPE_CHECKING:
+    from src.models.batch_job import BatchJob
 
-class Suggestion:
-    item: Item = None
-    search_strings: List[str] = None
-    task: Task = None
-    args: argparse.Namespace = None
 
-    def __init__(self,
-                 item: Item = None,
-                 task: Task = None,
-                 args=None):
-        if item is None:
-            raise ValueError("item was None")
-        else:
-            self.item = item
-        if task is None:
-            raise ValueError("task was None")
-        else:
-            self.task = task
-            self.args = args
-            self.extract_search_strings()
+class Suggestion(BaseModel):
+    item: Item
+    task: Task
+    args: argparse.Namespace
+    search_strings: Optional[List[str]] = None
+
+    class Config:
+        arbitrary_types_allowed = True
 
     def __str__(self):
         """Return label and description, the latter cut to 50 chars"""
@@ -61,6 +53,8 @@ def add_to_items(self,
         This function is non-interactive"""
         if items is None:
             raise ValueError("Items was None")
+        if items.list is None:
+            raise ValueError("items.list was None")
         if jobs is None:
             raise ValueError("jobs was None")
         if job_count is None:
@@ -69,6 +63,7 @@ def add_to_items(self,
         count = 0
         for target_item in items.list:
             count += 1
+            from src import console
             with console.status(f"Uploading main subject "
                                 f"[green]{clean_rich_formatting(self.item.label)}[/green] "
                                 f"to {clean_rich_formatting(target_item.label)}"):
@@ -96,20 +91,22 @@ def extract_search_strings(self):
         def clean_special_symbols(string: str):
             return string.replace("®", "").replace("™", "")
 
+        from src.helpers.console import console
         logger = logging.getLogger(__name__)
         if self.args is None:
             raise ValueError("args was None")
         else:
             logger.debug(f"args:{self.args}")
             if self.args.no_aliases is True:
+                from src import console
                 console.print("Alias matching is turned off")
                 no_aliases = True
             else:
                 no_aliases = False
         self.search_strings: List[str] = [clean_special_symbols(self.item.label)]
         if (
-            self.item.aliases is not None and
-            no_aliases is False
+                self.item.aliases is not None and
+                no_aliases is False
         ):
             for alias in self.item.aliases:
                 # logger.debug(f"extracting alias:{alias}")
@@ -121,10 +118,13 @@ def clean_special_symbols(string: str):
                 else:
                     self.search_strings.append(clean_special_symbols(alias))
         # logger.debug(f"search_strings:{self.search_strings}")
+        from src.helpers.console import print_search_strings_table
         print_search_strings_table(args=self.args,
                                    search_strings=self.search_strings)
 
     def search_urls(self) -> List[str]:
+        if self.search_strings is None:
+            raise ValueError("self.search_strings was None")
         urls = []
         for search_string in self.search_strings:
             search_term = quote(f'"{search_string}"')
diff --git a/src/models/task.py b/src/models/task.py
index 1260025..3a4858b 100644
--- a/src/models/task.py
+++ b/src/models/task.py
@@ -1,37 +1,18 @@
 from typing import Union
 
+from pydantic import BaseModel
+
 from src.helpers.enums import SupportedLanguageCode, TaskIds
 
 
-# console-menu does not support dataclass (yet)
-# @dataclass
-class Task:
+class Task(BaseModel):
     """This class holds the tasks presented to the
     user in the menu and related data"""
-    best_practice_information: Union[str, None] = None
-    id: TaskIds = None
-    label: str = None
-    language_code: SupportedLanguageCode = None
+    best_practice_information: Union[str, None]
+    id: TaskIds
+    label: str
+    language_code: SupportedLanguageCode
     number_of_queries_per_search_string = 1
 
-    def __init__(self,
-                 best_practice_information: str = None,
-                 id: TaskIds = None,
-                 label: str = None,
-                 language_code: SupportedLanguageCode = None,
-                 number_of_queries_per_search_string: int = None):
-        if id is None:
-            raise ValueError("Got no id")
-        if label is None:
-            raise ValueError("Got no label")
-        if language_code is None:
-            raise ValueError("Got no language_code")
-        self.id = id
-        self.label = label
-        self.language_code = language_code
-        self.best_practice_information = best_practice_information
-        if number_of_queries_per_search_string is not None:
-            self.number_of_queries_per_search_string = number_of_queries_per_search_string
-
     def __str__(self):
         return f"{self.label}"
diff --git a/src/models/wikidata.py b/src/models/wikidata.py
deleted file mode 100644
index f4a25ea..0000000
--- a/src/models/wikidata.py
+++ /dev/null
@@ -1,883 +0,0 @@
-"""
-Model from LexUtils
-"""
-import logging
-import random
-from enum import Enum
-from typing import List
-
-from wikibaseintegrator import wbi_config, WikibaseIntegrator
-from wikibaseintegrator.datatypes import BaseDataType
-from wikibaseintegrator.models import Alias
-from wikibaseintegrator.wbi_enums import ActionIfExists
-
-import config
-# We get the URL for the Wikibase from here
-from src.models.task import Task
-
-wbi_config.config['USER_AGENT'] = config.user_agent
-
-
-class WikidataGrammaticalFeature(Enum):
-    # Swedish
-    ACTIVE_VOICE = "Q1317831"
-    PRETERITE = "Q442485"
-    INFINITIVE = "Q179230"
-    PRESENT_TENSE = "Q192613"
-    SUPINE = "Q548470"
-    IMPERATIVE = "Q22716"
-    PASSIVE_VOICE = "Q1194697"
-    SINGULAR = "Q110786"
-    NOMINATIVE_CASE = "Q131105"
-    INDEFINITE = "Q53997857"
-    DEFINITE = "Q53997851"
-    PLURAL = "Q146786"
-    GENITIVE_CASE = "Q146233"
-    # English
-    SIMPLE_PRESENT = "Q3910936"
-    THIRD_PERSON_SINGULAR = "Q51929447"
-
-
-class WikidataLexicalCategory(Enum):
-    NOUN = "Q1084"
-    VERB = "Q24905"
-    ADVERB = "Q380057"
-    ADJECTIVE = "Q34698"
-    AFFIX = "Q62155"
-    PROPER_NOUN = "Q147276"
-
-
-class WikimediaLanguageCode(Enum):
-    DANISH = "da"
-    SWEDISH = "sv"
-    BOKMÅL = "nb"
-    ENGLISH = "en"
-    FRENCH = "fr"
-    RUSSIAN = "ru"
-    ESTONIAN = "et"
-    MALAYALAM = "ml"
-    LATIN = "la"
-    HEBREW = "he"
-    BASQUE = "eu"
-    GERMAN = "de"
-    BENGALI = "bn"
-    CZECH = "cs"
-
-
-class WikimediaLanguageQID(Enum):
-    DANISH = "Q9035"
-    SWEDISH = "Q9027"
-    BOKMÅL = "Q25167"
-    ENGLISH = "Q1860"
-    FRENCH = "Q150"
-    RUSSIAN = "Q7737"
-    ESTONIAN = "Q9072"
-    MALAYALAM = "Q36236"
-    LATIN = "Q397"
-    HEBREW = "Q9288"
-    BASQUE = "Q8752"
-    GERMAN = "Q188"
-    BENGALI = "Q9610"
-    CZECH = "Q9056"
-
-
-class WikidataNamespaceLetters(Enum):
-    PROPERTY = "P"
-    ITEM = "Q"
-    LEXEME = "L"
-    #FORM = "F"
-    #SENSE = "S"
-
-
-class EntityID:
-    letter: WikidataNamespaceLetters
-    # This can be e.g. "32698-F1" in the case of a lexeme
-    rest: str
-
-    def __init__(self,
-                 entity_id: str):
-        logger = logging.getLogger(__name__)
-        if entity_id is not None:
-            # Remove prefix if found
-            if config.wd_prefix in entity_id:
-                logger.debug("Removing prefix")
-                entity_id = entity_id.replace(config.wd_prefix, "")
-            if len(entity_id) > 1:
-                logger.info(f"entity_id:{entity_id}")
-                self.letter = WikidataNamespaceLetters(entity_id[0])
-                self.rest = entity_id[1:]
-            else:
-                raise ValueError("Entity ID was too short.")
-        else:
-            raise ValueError("Entity ID was None")
-
-    def __str__(self):
-        return f"{self.letter.value}{self.rest}"
-
-    # def extract_wdqs_json_entity_id(self, json: Dict, sparql_variable: str):
-    #     self.__init__(json[sparql_variable]["value"].replace(
-    #         config.wd_prefix, ""
-    #     ))
-
-
-class ForeignID:
-    id: str
-    property: str  # This is the property with type ExternalId
-    source_item_id: str  # This is the Q-item for the source
-
-    def __init__(self,
-                 id: str = None,
-                 property: str = None,
-                 source_item_id: str = None):
-        self.id = id
-        self.property = str(EntityID(property))
-        self.source_item_id = str(EntityID(source_item_id))
-
-
-class Form:
-    """
-    Model for a Wikibase form
-    """
-    id: str
-    representation: str
-    grammatical_features: List[WikidataGrammaticalFeature]
-    # We store these on the form because they are needed
-    # to determine if an example fits or not
-    lexeme_id: str
-    lexeme_category: str
-
-    def __init__(self, json):
-        """Parse the form json"""
-        logger = logging.getLogger(__name__)
-        try:
-            logger.info(json["lexeme"])
-            self.id = str(EntityID(json["lexeme"]["value"]))
-        except KeyError:
-            pass
-        try:
-            logger.info(json["form"])
-            self.id = str(EntityID(json["form"]["value"]))
-        except KeyError:
-            pass
-        try:
-            self.representation: str = json["form_representation"]["value"]
-        except KeyError:
-            pass
-        try:
-            self.lexeme_category: WikidataLexicalCategory = WikidataLexicalCategory(
-                str(EntityID(json["category"]["value"]))
-            )
-        except:
-            raise ValueError(f'Could not find lexical category from '
-                             f'{json["category"]["value"]}')
-        try:
-            self.grammatical_features = []
-            logger.info(json["grammatical_features"])
-            for feature in json["grammatical_features"]["value"].split(","):
-                # TODO parse features with Enum
-                feature_id = WikidataGrammaticalFeature(str(EntityID(feature)))
-                self.grammatical_features.append(feature_id)
-        except KeyError:
-            pass
-
-
-class Sense:
-    pass
-
-
-class Entity:
-    """Base entity with code that is the same for both items and lexemes"""
-    id: str
-    label: str
-
-    def upload_one_statement_to_wikidata(self,
-                                         statement: BaseDataType = None,
-                                         summary: str = None,
-                                         editgroups_hash: str = None):
-        """Upload one statement and always append
-        This mandates an editgroups hash to be supplied"""
-        logger = logging.getLogger(__name__)
-        if self.id is None:
-            raise ValueError("no id on item")
-        if statement is None:
-            raise ValueError("Statement was None")
-        if summary is None:
-            raise ValueError("summary was None")
-        if editgroups_hash is None:
-            raise ValueError("editgroup_hash was None")
-        if config.login_instance is None:
-            raise ValueError("No login instance in config.login_instance")
-        wbi = WikibaseIntegrator(login=config.login_instance)
-        item = wbi.item.get(self.id)
-        item.add_claims(
-            [statement],
-            action_if_exists=ActionIfExists.APPEND)
-        result = item.write(
-            summary=f"Added {summary} with [[{config.tool_wikipage}]] "
-                    f"([[:toolforge:editgroups/b/CB/{editgroups_hash}|details]])"
-        )
-        logger.debug(f"result from WBI:{result}")
-
-    def url(self):
-        return f"http://www.wikidata.org/entity/{self.id}"
-
-
-# class Lexeme(Entity):
-#     id: str
-#     lemma: str
-#     lexical_category: WikidataLexicalCategory
-#     forms: List[Form]
-#     senses: List[Sense]
-#     # Needed for duplicate lookup
-#     language_code: WikimediaLanguageCode
-#
-#     def __init__(self,
-#                  id: str = None,
-#                  lemma: str = None,
-#                  lexical_category: str = None,
-#                  language_code: WikimediaLanguageCode = None):
-#         if id is not None:
-#             self.id = str(EntityID(id))
-#         self.lemma = lemma
-#         if lexical_category is None:
-#             raise ValueError("Lexical category was None")
-#         if isinstance(lexical_category, WikidataLexicalCategory):
-#             self.lexical_category = lexical_category
-#         else:
-#             self.lexical_category = WikidataLexicalCategory(EntityID(lexical_category))
-#         if language_code is not None:
-#             self.language_code: WikimediaLanguageCode = language_code
-#
-#     def create(self):
-#         if self.id is not None:
-#             raise ValueError("Lexeme already has an id, aborting")
-#         lexeme = wbi_core.LexemeEngine()
-#
-#     def parse_from_wdqs_json(self, json):
-#         self.forms = []
-#         self.senses = []
-#         for variable in json:
-#             logging.debug(variable)
-#             if variable == "form":
-#                 form = Form(variable)
-#                 self.forms.append(form)
-#             if variable == "sense":
-#                 sense = Sense(variable)
-#                 self.senses.append(sense)
-#             if variable == "category":
-#                 self.lexical_category = EntityID(wdqs.extract_wikibase_value(variable))
-#
-#     def url(self):
-#         return f"{config.wd_prefix}{self.id}"
-#
-#     def upload_foreign_id_to_wikidata(self,
-#                                       foreign_id: ForeignID = None):
-#         """Upload to enrich the wonderful Wikidata <3"""
-#         logger = logging.getLogger(__name__)
-#         if foreign_id is None:
-#             raise Exception("Foreign id was None")
-#         print(f"Uploading {foreign_id.id} to {self.id}: {self.lemma}")
-#         statement = wbi_datatype.ExternalID(
-#             prop_nr=foreign_id.property,
-#             value=foreign_id.id,
-#         )
-#         described_by_source = wbi_datatype.ItemID(
-#             prop_nr="P1343",  # stated in
-#             value=foreign_id.source_item_id
-#         )
-#         # TODO does this overwrite or append?
-#         item = wbi_core.ItemEngine(
-#             data=[statement,
-#                   described_by_source],
-#             item_id=self.id
-#         )
-#         # debug WBI error
-#         # print(item.get_json_representation())
-#         result = item.write(
-#             config.login_instance,
-#             edit_summary=f"Added foreign identifier with [[{config.tool_url}]]"
-#         )
-#         logger.debug(f"result from WBI:{result}")
-#         print(self.url())
-#         # exit(0)
-#
-#     def count_number_of_senses_with_P5137(self):
-#         """Returns an int"""
-#         result = (execute_sparql_query(f'''
-#         SELECT
-#         (COUNT(?sense) as ?count)
-#         WHERE {{
-#           VALUES ?l {{wd:{self.id}}}.
-#           ?l ontolex:sense ?sense.
-#           ?sense skos:definition ?gloss.
-#           # Exclude lexemes without a linked QID from at least one sense
-#           ?sense wdt:P5137 [].
-#         }}'''))
-#         count: int = wdqs.extract_count(result)
-#         logging.debug(f"count:{count}")
-#         return count
-#
-#     def add_usage_example(
-#             document_id=None,
-#             sentence=None,
-#             lid=None,
-#             form_id=None,
-#             sense_id=None,
-#             word=None,
-#             publication_date=None,
-#             language_style=None,
-#             type_of_reference=None,
-#             source=None,
-#             line=None,
-#     ):
-#         # TODO convert to use OOP
-#         logger = logging.getLogger(__name__)
-#         # Use WikibaseIntegrator aka wbi to upload the changes in one edit
-#         link_to_form = wbi_datatype.Form(
-#             prop_nr="P5830",
-#             value=form_id,
-#             is_qualifier=True
-#         )
-#         link_to_sense = wbi_datatype.Sense(
-#             prop_nr="P6072",
-#             value=sense_id,
-#             is_qualifier=True
-#         )
-#         if language_style == "formal":
-#             style = "Q104597585"
-#         else:
-#             if language_style == "informal":
-#                 style = "Q901711"
-#             else:
-#                 print(_("Error. Language style {} ".format(language_style) +
-#                         "not one of (formal,informal). Please report a bug at " +
-#                         "https://github.com/egils-consulting/LexUtils/issues"))
-#                 sleep(config.sleep_time)
-#                 return "error"
-#         logging.debug("Generating qualifier language_style " +
-#                       f"with {style}")
-#         language_style_qualifier = wbi_datatype.ItemID(
-#             prop_nr="P6191",
-#             value=style,
-#             is_qualifier=True
-#         )
-#         # oral or written
-#         if type_of_reference == "written":
-#             medium = "Q47461344"
-#         else:
-#             if type_of_reference == "oral":
-#                 medium = "Q52946"
-#             else:
-#                 print(_("Error. Type of reference {} ".format(type_of_reference) +
-#                         "not one of (written,oral). Please report a bug at " +
-#                         "https://github.com/egils-consulting/LexUtils/issues"))
-#                 sleep(config.sleep_time)
-#                 return "error"
-#         logging.debug(_("Generating qualifier type of reference " +
-#                         "with {}".format(medium)))
-#         type_of_reference_qualifier = wbi_datatype.ItemID(
-#             prop_nr="P3865",
-#             value=medium,
-#             is_qualifier=True
-#         )
-#         if source == "riksdagen":
-#             if publication_date is not None:
-#                 publication_date = datetime.fromisoformat(publication_date)
-#             else:
-#                 print(_("Publication date of document {} ".format(document_id) +
-#                         "is missing. We have no fallback for that at the moment. " +
-#                         "Abort adding usage example."))
-#                 return "error"
-#             stated_in = wbi_datatype.ItemID(
-#                 prop_nr="P248",
-#                 value="Q21592569",
-#                 is_reference=True
-#             )
-#             # TODO lookup if we have a QID for the source
-#             document_id = wbi_datatype.ExternalID(
-#                 prop_nr="P8433",  # Riksdagen Document ID
-#                 value=document_id,
-#                 is_reference=True
-#             )
-#             reference = [
-#                 stated_in,
-#                 document_id,
-#                 wbi_datatype.Time(
-#                     prop_nr="P813",  # Fetched today
-#                     time=datetime.utcnow().replace(
-#                         tzinfo=timezone.utc
-#                     ).replace(
-#                         hour=0,
-#                         minute=0,
-#                         second=0,
-#                     ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
-#                     is_reference=True,
-#                 ),
-#                 wbi_datatype.Time(
-#                     prop_nr="P577",  # Publication date
-#                     time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"),
-#                     is_reference=True,
-#                 ),
-#                 type_of_reference_qualifier,
-#             ]
-#         elif source == "europarl":
-#             stated_in = wbi_datatype.ItemID(
-#                 prop_nr="P248",
-#                 value="Q5412081",
-#                 is_reference=True
-#             )
-#             reference = [
-#                 stated_in,
-#                 wbi_datatype.Time(
-#                     prop_nr="P813",  # Fetched today
-#                     time=datetime.utcnow().replace(
-#                         tzinfo=timezone.utc
-#                     ).replace(
-#                         hour=0,
-#                         minute=0,
-#                         second=0,
-#                     ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
-#                     is_reference=True,
-#                 ),
-#                 wbi_datatype.Time(
-#                     prop_nr="P577",  # Publication date
-#                     time="+2012-05-12T00:00:00Z",
-#                     is_reference=True,
-#                 ),
-#                 wbi_datatype.Url(
-#                     prop_nr="P854",  # reference url
-#                     value="http://www.statmt.org/europarl/v7/sv-en.tgz",
-#                     is_reference=True,
-#                 ),
-#                 # filename in archive
-#                 wbi_datatype.String(
-#                     (f"europarl-v7.{config.language_code}" +
-#                      f"-en.{config.language_code}"),
-#                     "P7793",
-#                     is_reference=True,
-#                 ),
-#                 # line number
-#                 wbi_datatype.String(
-#                     str(line),
-#                     "P7421",
-#                     is_reference=True,
-#                 ),
-#                 type_of_reference_qualifier,
-#             ]
-#         elif source == "ksamsok":
-#             # No date is provided unfortunately, so we set it to unknown value
-#             stated_in = wbi_datatype.ItemID(
-#                 prop_nr="P248",
-#                 value="Q7654799",
-#                 is_reference=True
-#             )
-#             document_id = wbi_datatype.ExternalID(
-#                 # K-Samsök URI
-#                 prop_nr="P1260",
-#                 value=document_id,
-#                 is_reference=True
-#             )
-#             reference = [
-#                 stated_in,
-#                 document_id,
-#                 wbi_datatype.Time(
-#                     prop_nr="P813",  # Fetched today
-#                     time=datetime.utcnow().replace(
-#                         tzinfo=timezone.utc
-#                     ).replace(
-#                         hour=0,
-#                         minute=0,
-#                         second=0,
-#                     ).strftime("+%Y-%m-%dT%H:%M:%SZ"),
-#                     is_reference=True,
-#                 ),
-#                 wbi_datatype.Time(
-#                     # We don't know the value of the publication dates unfortunately
-#                     prop_nr="P577",  # Publication date
-#                     time="",
-#                     snak_type="somevalue",
-#                     is_reference=True,
-#                 ),
-#                 type_of_reference_qualifier,
-#             ]
-#         else:
-#             raise ValueError(f"Did not recognize the source {source}")
-#         if reference is None:
-#             raise ValueError(_("No reference defined, cannot add usage example"))
-#         else:
-#             # This is the usage example statement
-#             claim = wbi_datatype.MonolingualText(
-#                 sentence,
-#                 "P5831",
-#                 language=config.language_code,
-#                 # Add qualifiers
-#                 qualifiers=[
-#                     link_to_form,
-#                     link_to_sense,
-#                     language_style_qualifier,
-#                 ],
-#                 # Add reference
-#                 references=[reference],
-#             )
-#             if config.debug_json:
-#                 logging.debug(f"claim:{claim.get_json_representation()}")
-#             item = wbi_core.ItemEngine(
-#                 item_id=lid,
-#             )
-#             # Updating appends by default in v0.11.0
-#             item.update(data=[claim])
-#             # if config.debug_json:
-#             #     print(item.get_json_representation())
-#             if config.login_instance is None:
-#                 # Authenticate with WikibaseIntegrator
-#                 print("Logging in with Wikibase Integrator")
-#                 config.login_instance = wbi_login.Login(
-#                     user=config.username, pwd=config.password
-#                 )
-#             result = item.write(
-#                 config.login_instance,
-#                 edit_summary=(
-#                     _("Added usage example " +
-#                       "with [[Wikidata:Tools/LexUtils]] v{}".format(config.version))
-#                 )
-#             )
-#             if config.debug_json:
-#                 logging.debug(f"result from WBI:{result}")
-#             # TODO add handling of result from WBI and return True == Success or False
-#             return result
-#
-#     def find_duplicates(self):
-#         """Lookup duplicates using the
-#         Wikidata Lexeme Forms Duplicate API"""
-#         url = ("https://lexeme-forms.toolforge.org/api/v1/duplicates/www/"
-#                f"{self.language_code.value}/{self.lemma}")
-#         response = requests.get(url, headers={"Accept": "application/json"})
-#         if response.status_code == 204:
-#             return None
-#         elif response.status_code == 200:
-#             return response.json()
-#         else:
-#             raise Exception(f"Got {response.status_code}: {response.text}")
-#
-#
-# class LexemeLanguage:
-#     lexemes: List[Lexeme]
-#     language_code: WikimediaLanguageCode
-#     language_qid: WikimediaLanguageQID
-#     senses_with_P5137_per_lexeme: float
-#     senses_with_P5137: int
-#     forms: int
-#     forms_with_an_example: int
-#     forms_without_an_example: List[Form]
-#     lexemes_count: int
-#
-#     def __init__(self, language_code: str):
-#         self.language_code = WikimediaLanguageCode(language_code)
-#         self.language_qid = WikimediaLanguageQID[self.language_code.name]
-#
-#     def fetch_forms_missing_an_example(self):
-#         logger = logging.getLogger(__name__)
-#         results = execute_sparql_query(f'''
-#             #title:Forms that have no example demonstrating them
-#             select ?lexeme ?form ?form_representation ?category
-#             (group_concat(distinct ?feature; separator = ",") as ?grammatical_features)
-#             WHERE {{
-#                 ?lexeme dct:language wd:{self.language_qid.value};
-#                         wikibase:lemma ?lemma;
-#                         wikibase:lexicalCategory ?category;
-#                         ontolex:lexicalForm ?form.
-#                 ?form ontolex:representation ?form_representation;
-#                 wikibase:grammaticalFeature ?feature.
-#                 MINUS {{
-#                 ?lexeme p:P5831 ?statement.
-#                 ?statement ps:P5831 ?example;
-#                          pq:P6072 [];
-#                          pq:P5830 ?form_with_example.
-#                 }}
-#             }}
-#             group by ?lexeme ?form ?form_representation ?category
-#             limit 50''')
-#         self.forms_without_an_example = []
-#         logger.info("Got the data")
-#         logger.info(f"data:{results.keys()}")
-#         try:
-#             #logger.info(f"data:{results['results']['bindings']}")
-#             for entry in results["results"]['bindings']:
-#                 logger.info(f"data:{entry.keys()}")
-#                 logging.info(f"lexeme_json:{entry}")
-#                 f = Form(entry)
-#                 self.forms_without_an_example.append(f)
-#         except KeyError:
-#             logger.error("Got no results")
-#         logger.info(f"Got {len(self.forms_without_an_example)} "
-#                      f"forms from WDQS for language {self.language_code.name}")
-#
-#     def fetch_lexemes(self):
-#         # TODO port to use the Lexeme class instead of heavy dataframes which we don't need
-#         raise Exception("This is deprecated.")
-#         results = execute_sparql_query(f'''
-#         SELECT DISTINCT
-#         ?entity_lid ?form ?word (?categoryLabel as ?category)
-#         (?grammatical_featureLabel as ?feature) ?sense ?gloss
-#         WHERE {{
-#           ?entity_lid a ontolex:LexicalEntry; dct:language wd:{self.language_qid.value}.
-#           VALUES ?excluded {{
-#             # exclude affixes and interfix
-#             wd:Q62155 # affix
-#             wd:Q134830 # prefix
-#             wd:Q102047 # suffix
-#             wd:Q1153504 # interfix
-#           }}
-#           MINUS {{?entity_lid wdt:P31 ?excluded.}}
-#           ?entity_lid wikibase:lexicalCategory ?category.
-#
-#           # We want only lexemes with both forms and at least one sense
-#           ?entity_lid ontolex:lexicalForm ?form.
-#           ?entity_lid ontolex:sense ?sense.
-#
-#           # Exclude lexemes without a linked QID from at least one sense
-#           ?sense wdt:P5137 [].
-#           ?sense skos:definition ?gloss.
-#           # Get only the swedish gloss, exclude otherwise
-#           FILTER(LANG(?gloss) = "{self.language_code.value}")
-#
-#           # This remove all lexemes with at least one example which is not
-#           # ideal
-#           MINUS {{?entity_lid wdt:P5831 ?example.}}
-#           ?form wikibase:grammaticalFeature ?grammatical_feature.
-#           # We extract the word of the form
-#           ?form ontolex:representation ?word.
-#           SERVICE wikibase:label
-#           {{ bd:serviceParam wikibase:language "{self.language_code.value},en". }}
-#         }}
-#         limit {config.sparql_results_size}
-#         offset {config.sparql_offset}
-#         ''')
-#         self.lexemes = []
-#         for lexeme_json in results:
-#             logging.debug(f"lexeme_json:{lexeme_json}")
-#             l = Lexeme.parse_wdqs_json(lexeme_json)
-#             self.lexemes.append(l)
-#         logging.info(f"Got {len(self.lexemes)} lexemes from "
-#                      f"WDQS for language {self.language_code.name}")
-#
-#     def count_number_of_lexemes(self):
-#         """Returns an int"""
-#         logger = logging.getLogger(__name__)
-#         result = (execute_sparql_query(f'''
-#         SELECT
-#         (COUNT(?l) as ?count)
-#         WHERE {{
-#           ?l dct:language wd:{self.language_qid.value}.
-#         }}'''))
-#         logger.debug(f"result:{result}")
-#         count: int = wdqs.extract_count(result)
-#         logging.debug(f"count:{count}")
-#         return count
-#
-#     def count_number_of_senses_with_p5137(self):
-#         """Returns an int"""
-#         logger = logging.getLogger(__name__)
-#         result = (execute_sparql_query(f'''
-#         SELECT
-#         (COUNT(?sense) as ?count)
-#         WHERE {{
-#           ?l dct:language wd:{self.language_qid.value}.
-#           ?l ontolex:sense ?sense.
-#           ?sense skos:definition ?gloss.
-#           # Exclude lexemes without a linked QID from at least one sense
-#           ?sense wdt:P5137 [].
-#         }}'''))
-#         logger.debug(f"result:{result}")
-#         count: int = wdqs.extract_count(result)
-#         logging.debug(f"count:{count}")
-#         return count
-#
-#     def count_number_of_forms_without_an_example(self):
-#         """Returns an int"""
-#         # TODO fix this to count all senses in a given language
-#         result = (execute_sparql_query(f'''
-#         SELECT
-#         (COUNT(?form) as ?count)
-#         WHERE {{
-#           ?l dct:language wd:{self.language_qid.value}.
-#           ?l ontolex:lexicalForm ?form.
-#           ?l ontolex:sense ?sense.
-#           # exclude lexemes that already have at least one example
-#           MINUS {{?l wdt:P5831 ?example.}}
-#           # Exclude lexemes without a linked QID from at least one sense
-#           ?sense wdt:P5137 [].
-#         }}'''))
-#         count: int = wdqs.extract_count(result)
-#         logging.debug(f"count:{count}")
-#         self.forms_without_an_example = count
-#
-#     def count_number_of_forms_with_examples(self):
-#         pass
-#
-#     def count_number_of_forms(self):
-#         pass
-#
-#     def calculate_statistics(self):
-#         self.lexemes_count: int = self.count_number_of_lexemes()
-#         self.senses_with_P5137: int = self.count_number_of_senses_with_p5137()
-#         self.calculate_senses_with_p5137_per_lexeme()
-#
-#     def calculate_senses_with_p5137_per_lexeme(self):
-#         self.senses_with_P5137_per_lexeme = round(
-#             self.senses_with_P5137 / self.lexemes_count, 3
-#         )
-#
-#     def print(self):
-#         print(f"{self.language_code.name} has "
-#               f"{self.senses_with_P5137} senses with linked QID in "
-#               f"total on {self.lexemes_count} lexemes "
-#               f"which is {self.senses_with_P5137_per_lexeme} per lexeme.")
-#
-# # TODO decide where to put this code
-# class LexemeStatistics:
-#     total_lexemes: int
-#
-#     def __init__(self):
-#         self.calculate_total_lexemes()
-#         self.rank_languages_based_on_statistics()
-#
-#     def calculate_total_lexemes(self) -> int:
-#         """Calculate how many lexemes exists in Wikidata"""
-#         result = (execute_sparql_query(f'''
-#         SELECT
-#         (COUNT(?l) as ?count)
-#         WHERE {{
-#           ?l a ontolex:LexicalEntry.
-#         }}'''))
-#         count: int = wdqs.extract_count(result)
-#         logging.debug(f"count:{count}")
-#         self.total_lexemes = count
-#
-#     def rank_languages_based_on_statistics(self):
-#         logger = logging.getLogger(__name__)
-#         language_objects = []
-#         print("Fetching data...")
-#         for language_code in WikimediaLanguageCode:
-#             logger.info(f"Working on {language_code.name}")
-#             language = LexemeLanguage(language_code)
-#             language.calculate_statistics()
-#             language_objects.append(language)
-#         sorted_by_senses_with_p5137_per_lexeme = sorted(
-#             language_objects,
-#             key=lambda language: language.senses_with_P5137_per_lexeme,
-#             reverse=True
-#         )
-#         print("Languages ranked by most senses linked to items:")
-#         for language in sorted_by_senses_with_p5137_per_lexeme:
-#             language.print()
-#         # Generator expression
-#         total_lexemes_among_supported_languages: int = sum(
-#             language.lexemes_count for language in language_objects
-#         )
-#         # logger.debug(f"total:{total_lexemes_among_supported_languages}")
-#         percent = round(
-#             total_lexemes_among_supported_languages * 100 / self.total_lexemes
-#         )
-#         print(f"These languages have {total_lexemes_among_supported_languages} "
-#               f"lexemes out of {self.total_lexemes} in total ({percent}%)")
-#
-#
-class Item(Entity):
-    """This represents an item in Wikidata
-    We always work on one language at a time,
-    so don't bother with languages here and keep to simple strings"""
-    id: str =  None
-    label: str = None
-    description: str = None
-    aliases: List[str] = None
-
-    def __init__(self,
-                 id: str = None,
-                 json: str = None,
-                 label: str = None,
-                 description: str = None,
-                 aliases: List[str] = None,
-                 task: Task = None):
-        if json is not None:
-            self.parse_json(json)
-        else:
-            if id is not None:
-                self.id = str(EntityID(id))
-            if description is None and label is None and aliases is None:
-                logging.debug("No of description, label or aliases received")
-                if task is None:
-                    raise ValueError("Got no task")
-                if not isinstance(task, Task):
-                    raise ValueError("task was not a Task object")
-                self.fetch_label_and_description_and_aliases(task=task)
-            elif label is None or aliases is None:
-                raise ValueError("This is not supported. "
-                                 "Either both state the label and "
-                                 "aliases or None of them")
-            else:
-                self.label = label
-                self.aliases = aliases
-                self.description = description
-
-    def __str__(self):
-        return f"{self.label}, see {self.url()}"
-
-    def parse_json(self, json):
-        """Parse the WDQS json"""
-        logger = logging.getLogger(__name__)
-        try:
-            logger.debug(f'item_json:{json["item"]}')
-            self.id = str(EntityID(json["item"]["value"]))
-        except KeyError:
-            pass
-        try:
-            logger.debug(json["itemLabel"])
-            self.label = (json["itemLabel"]["value"])
-        except KeyError:
-            logger.info(f"no label found")
-
-    def parse_from_wdqs_json(self, json):
-        """Parse the json into the object"""
-        for variable in json:
-            logging.debug(variable)
-            if variable == "item":
-                self.id = variable
-            if variable == "itemLabel":
-                self.label = variable
-
-    def fetch_label_and_description_and_aliases(self,
-                                                task: Task = None):
-        """Fetch label and aliases in the task language from the Wikidata API"""
-        if task is None:
-            raise ValueError("task was None")
-        if not isinstance(task, Task):
-            raise ValueError("task was not a Task object")
-        from src.helpers.console import console
-        with console.status(f"Fetching {task.language_code.name.title()} label and aliases from the Wikidata API..."):
-            wbi = WikibaseIntegrator()
-            item = wbi.item.get(self.id)
-            label = item.labels.get(task.language_code.value)
-            if label is not None:
-                self.label = str(label)
-            description = item.descriptions.get(task.language_code.value)
-            if description is not None:
-                self.description = str(description)
-            aliases: List[Alias] = item.aliases.get(task.language_code.value)
-            # logging.debug(f"aliases from wbi:{item.aliases.get('en')}")
-            if aliases is not None:
-                self.aliases = []
-                for alias in aliases:
-                    self.aliases.append(str(alias))
-                    # logging.debug(f"appended:{alias.value}")
-                # logging.debug(f"aliases:{self.aliases}")
-
-
-class Items:
-    list: List[Item] = []
-
-    def fetch_based_on_label(self):
-        pass
-
-    def random_shuffle_list(self):
-        random.shuffle(self.list)
diff --git a/src/models/wikimedia/__init__.py b/src/models/wikimedia/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/wikimedia/enum.py b/src/models/wikimedia/enum.py
new file mode 100644
index 0000000..ef8afeb
--- /dev/null
+++ b/src/models/wikimedia/enum.py
@@ -0,0 +1,35 @@
+from enum import Enum
+
+
+class WikimediaLanguageCode(Enum):
+    BASQUE = "eu"
+    BENGALI = "bn"
+    BOKMÅL = "nb"
+    CZECH = "cs"
+    DANISH = "da"
+    ENGLISH = "en"
+    ESTONIAN = "et"
+    FRENCH = "fr"
+    GERMAN = "de"
+    HEBREW = "he"
+    LATIN = "la"
+    MALAYALAM = "ml"
+    RUSSIAN = "ru"
+    SWEDISH = "sv"
+
+
+class WikimediaLanguageQID(Enum):
+    BASQUE = "Q8752"
+    BENGALI = "Q9610"
+    BOKMÅL = "Q25167"
+    CZECH = "Q9056"
+    DANISH = "Q9035"
+    ENGLISH = "Q1860"
+    ESTONIAN = "Q9072"
+    FRENCH = "Q150"
+    GERMAN = "Q188"
+    HEBREW = "Q9288"
+    LATIN = "Q397"
+    MALAYALAM = "Q36236"
+    RUSSIAN = "Q7737"
+    SWEDISH = "Q9027"
\ No newline at end of file
diff --git a/src/models/wikimedia/wikidata/__init__.py b/src/models/wikimedia/wikidata/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/wikimedia/wikidata/entity.py b/src/models/wikimedia/wikidata/entity.py
new file mode 100644
index 0000000..89a91e9
--- /dev/null
+++ b/src/models/wikimedia/wikidata/entity.py
@@ -0,0 +1,57 @@
+import logging
+from typing import Optional
+
+from pydantic import BaseModel
+from wikibaseintegrator import WikibaseIntegrator  # type: ignore
+from wikibaseintegrator import wbi_config
+from wikibaseintegrator.datatypes import BaseDataType  # type: ignore
+from wikibaseintegrator.wbi_enums import ActionIfExists  # type: ignore
+
+import config
+
+wbi_config.config['USER_AGENT'] = config.user_agent
+
+
+class Entity(BaseModel):
+    """Base entity with code that is the same for both items and lexemes"""
+    id: Optional[str]
+    label: Optional[str]
+
+    def __eq__(self, other):
+        """This helps in removing duplicates
+        https://stackoverflow.com/questions/4169252/remove-duplicates-in-list-of-object-with-python"""
+        return self.id == other.id
+
+    def __hash__(self):
+        return hash(('id', self.id))
+
+    def upload_one_statement_to_wikidata(self,
+                                         statement: BaseDataType = None,
+                                         summary: str = None,
+                                         editgroups_hash: str = None):
+        """Upload one statement and always append
+        This mandates an editgroups hash to be supplied"""
+        logger = logging.getLogger(__name__)
+        if self.id is None:
+            raise ValueError("no id on item")
+        if statement is None:
+            raise ValueError("Statement was None")
+        if summary is None:
+            raise ValueError("summary was None")
+        if editgroups_hash is None:
+            raise ValueError("editgroup_hash was None")
+        if config.login_instance is None:
+            raise ValueError("No login instance in config.login_instance")
+        wbi = WikibaseIntegrator(login=config.login_instance)
+        item = wbi.item.get(self.id)
+        item.add_claims(
+            [statement],
+            action_if_exists=ActionIfExists.APPEND)
+        result = item.write(
+            summary=f"Added {summary} with [[{config.tool_wikipage}]] "
+                    f"([[:toolforge:editgroups/b/CB/{editgroups_hash}|details]])"
+        )
+        logger.debug(f"result from WBI:{result}")
+
+    def url(self):
+        return f"http://www.wikidata.org/entity/{self.id}"
diff --git a/src/models/wikimedia/wikidata/entiyt_id.py b/src/models/wikimedia/wikidata/entiyt_id.py
new file mode 100644
index 0000000..fe86931
--- /dev/null
+++ b/src/models/wikimedia/wikidata/entiyt_id.py
@@ -0,0 +1,35 @@
+import logging
+
+import config
+from src.models.wikimedia.wikidata.enums import WikidataNamespaceLetters
+
+# TODO convert this to special constr type with a validator
+class EntityId:
+    letter: WikidataNamespaceLetters
+    # This can be e.g. "32698-F1" in the case of a lexeme
+    rest: str
+
+    def __init__(self,
+                 entity_id: str):
+        logger = logging.getLogger(__name__)
+        if entity_id is not None:
+            # Remove prefix if found
+            if config.wd_prefix in entity_id:
+                logger.debug("Removing prefix")
+                entity_id = entity_id.replace(config.wd_prefix, "")
+            if len(entity_id) > 1:
+                logger.info(f"entity_id:{entity_id}")
+                self.letter = WikidataNamespaceLetters(entity_id[0])
+                self.rest = entity_id[1:]
+            else:
+                raise ValueError("Entity ID was too short.")
+        else:
+            raise ValueError("Entity ID was None")
+
+    def __str__(self):
+        return f"{self.letter.value}{self.rest}"
+
+    # def extract_wdqs_json_entity_id(self, json: Dict, sparql_variable: str):
+    #     self.__init__(json[sparql_variable]["value"].replace(
+    #         config.wd_prefix, ""
+    #     ))
diff --git a/src/models/wikimedia/wikidata/enums.py b/src/models/wikimedia/wikidata/enums.py
new file mode 100644
index 0000000..1e40e46
--- /dev/null
+++ b/src/models/wikimedia/wikidata/enums.py
@@ -0,0 +1,36 @@
+from enum import Enum
+
+
+class WikidataGrammaticalFeature(Enum):
+    ACTIVE_VOICE = "Q1317831"
+    DEFINITE = "Q53997851"
+    GENITIVE_CASE = "Q146233"
+    IMPERATIVE = "Q22716"
+    INDEFINITE = "Q53997857"
+    INFINITIVE = "Q179230"
+    NOMINATIVE_CASE = "Q131105"
+    PASSIVE_VOICE = "Q1194697"
+    PLURAL = "Q146786"
+    PRESENT_TENSE = "Q192613"
+    PRETERITE = "Q442485"
+    SIMPLE_PRESENT = "Q3910936"
+    SINGULAR = "Q110786"
+    SUPINE = "Q548470"
+    THIRD_PERSON_SINGULAR = "Q51929447"
+
+
+class WikidataLexicalCategory(Enum):
+    ADJECTIVE = "Q34698"
+    ADVERB = "Q380057"
+    AFFIX = "Q62155"
+    NOUN = "Q1084"
+    PROPER_NOUN = "Q147276"
+    VERB = "Q24905"
+
+
+class WikidataNamespaceLetters(Enum):
+    ITEM = "Q"
+    LEXEME = "L"
+    PROPERTY = "P"
+    # FORM = "F"
+    # SENSE = "S"
diff --git a/src/models/wikimedia/wikidata/foreign_id.py b/src/models/wikimedia/wikidata/foreign_id.py
new file mode 100644
index 0000000..85e5c79
--- /dev/null
+++ b/src/models/wikimedia/wikidata/foreign_id.py
@@ -0,0 +1,21 @@
+from typing import Optional
+
+from src.models.wikimedia.wikidata.entiyt_id import EntityId
+
+
+class ForeignID:
+    id: Optional[str]
+    property: Optional[str]  # This is the property with type ExternalId
+    source_item_id: Optional[str]  # This is the Q-item for the source
+
+    def __init__(self,
+                 id: Optional[str] = None,
+                 property: Optional[str] = None,
+                 source_item_id: Optional[str] = None):
+        self.id = id
+        if property is None:
+            raise ValueError("property was None")
+        self.property = str(EntityId(property))
+        if source_item_id is None:
+            raise ValueError("source_item_id was None")
+        self.source_item_id = str(EntityId(source_item_id))
diff --git a/src/models/wikimedia/wikidata/item.py b/src/models/wikimedia/wikidata/item.py
new file mode 100644
index 0000000..1e041d0
--- /dev/null
+++ b/src/models/wikimedia/wikidata/item.py
@@ -0,0 +1,50 @@
+from typing import List, Optional
+
+from wikibaseintegrator import WikibaseIntegrator  # type: ignore
+from wikibaseintegrator import wbi_config  # type: ignore
+from wikibaseintegrator.models import Alias  # type: ignore
+
+import config
+from src.models.task import Task
+from src.models.wikimedia.wikidata.entity import Entity
+
+wbi_config.config['USER_AGENT'] = config.user_agent
+
+
+class Item(Entity):
+    """This represents an item in Wikidata
+    We always work on one language at a time,
+    so we don't bother with languages here and keep to simple strings"""
+    description: Optional[str] = None
+    aliases: Optional[List[str]] = None
+
+    def __str__(self):
+        return f"{self.label}, see {self.url()}"
+
+    def fetch_label_and_description_and_aliases(self,
+                                                task: Task = None):
+        """Fetch label and aliases in the task language from the Wikidata API"""
+        if task is None:
+            raise ValueError("task was None")
+        if not isinstance(task, Task):
+            raise ValueError("task was not a Task object")
+        if task.language_code is None:
+            raise ValueError("task.language_code was None")
+        from src.helpers.console import console
+        with console.status(f"Fetching {task.language_code.name.title()} label and aliases from the Wikidata API..."):
+            wbi = WikibaseIntegrator()
+            item = wbi.item.get(self.id)
+            label = item.labels.get(task.language_code.value)
+            if label is not None:
+                self.label = str(label)
+            description = item.descriptions.get(task.language_code.value)
+            if description is not None:
+                self.description = str(description)
+            aliases: List[Alias] = item.aliases.get(task.language_code.value)
+            # logging.debug(f"aliases from wbi:{item.aliases.get('en')}")
+            if aliases is not None:
+                self.aliases = []
+                for alias in aliases:
+                    self.aliases.append(str(alias))
+                    # logging.debug(f"appended:{alias.value}")
+                # logging.debug(f"aliases:{self.aliases}")
diff --git a/src/models/wikimedia/wikidata/sparql_item.py b/src/models/wikimedia/wikidata/sparql_item.py
new file mode 100644
index 0000000..e486a13
--- /dev/null
+++ b/src/models/wikimedia/wikidata/sparql_item.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel
+
+from src.models.wikimedia.wikidata.entiyt_id import EntityId
+from src.models.wikimedia.wikidata.item import Item
+
+
+class Value(BaseModel):
+    value: str
+
+
+class SparqlItem(Item):
+    """This class models the data we get from SPARQL"""
+    item: Value
+    itemLabel: Value
+
+    def validate_qid_and_copy_label(self):
+        self.id = str(EntityId(self.item.value))
+        self.label = self.itemLabel.value
diff --git a/src/tasks.py b/src/tasks.py
index 5b80b5e..1e1a7a9 100644
--- a/src/tasks.py
+++ b/src/tasks.py
@@ -4,7 +4,7 @@
 # When adding a new task, also add it in the enum
 
 tasks = [
-    Task(
+    Task(**dict(
         id=TaskIds.SCHOLARLY_ARTICLES,
         label="Add main subject to scholarly articles and preprints",
         language_code=SupportedLanguageCode.ENGLISH,
@@ -22,14 +22,14 @@
             "sub forms of screening have been matched."
         ),
         number_of_queries_per_search_string=2
-    ),
-    Task(
+    )),
+    Task(**dict(
         id=TaskIds.RIKSDAGEN_DOCUMENTS,
         label="Add main subject to documents from Riksdagen",
         language_code=SupportedLanguageCode.SWEDISH,
         best_practice_information=None
-    ),
-    Task(
+    )),
+    Task(**dict(
         id=TaskIds.THESIS,
         label="Add main subject to thesis' and technical reports",
         language_code=SupportedLanguageCode.ENGLISH,
@@ -46,11 +46,11 @@
             "avoid the more general 'cancer screening' until all "
             "sub forms of screening have been matched."
         ),
-    ),
-    Task(
+    )),
+    Task(**dict(
             id=TaskIds.ACADEMIC_JOURNALS,
             label="Add main subject to academic journals",
             language_code=SupportedLanguageCode.ENGLISH,
             best_practice_information=None
-    ),
+    )),
 ]