Skip to content
This repository has been archived by the owner on Jan 23, 2024. It is now read-only.

Commit

Permalink
Merge pull request #42 from dpriskorn/ask_when_limit_reached
Browse files Browse the repository at this point in the history
Ask when limit reached and use pydantics BaseModel
  • Loading branch information
dpriskorn authored Feb 24, 2022
2 parents ec0209a + 92632e3 commit c619cdb
Show file tree
Hide file tree
Showing 37 changed files with 935 additions and 1,412 deletions.
6 changes: 4 additions & 2 deletions config.example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from pathlib import Path

# Add your botpassword and login here:
from typing import List

username = ""
password = ""

# Global settings
loglevel = logging.WARNING
wiki_user = "User:Username" # Change this to your username
list_of_allowed_aliases = [] # Add elements like this ["API"]
logging.basicConfig(level=logging.WARNING)
list_of_allowed_aliases: List[str] = [] # Add elements like this ["API"]
version = "0.2" # Don't touch this.
wd_prefix = "http://www.wikidata.org/entity/"
endpoint = "https://query.wikidata.org/sparql"
Expand Down
169 changes: 169 additions & 0 deletions diagrams/classes.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
@startuml
'https://plantuml.com/class-diagram

abstract class BaseModel

package wikimedia {
enum WikimediaLanguageCode {
BASQUE
BENGALI
BOKMÅL
CZECH
DANISH
ENGLISH
ESTONIAN
FRENCH
GERMAN
HEBREW
LATIN
MALAYALAM
RUSSIAN
SWEDISH
}
enum WikimediaLanguageQID {
BASQUE = "Q8752"
BENGALI = "Q9610"
BOKMÅL = "Q25167"
CZECH = "Q9056"
DANISH = "Q9035"
ENGLISH = "Q1860"
ESTONIAN = "Q9072"
FRENCH = "Q150"
GERMAN = "Q188"
HEBREW = "Q9288"
LATIN = "Q397"
MALAYALAM = "Q36236"
RUSSIAN = "Q7737"
SWEDISH = "Q9027"
}
package wikidata {
class Entity {
id: Optional[str]
label: str
upload_one_statement_to_wikidata()
url()
}
class EntityID{
letter: WikidataNamespaceLetters
rest: str
__init__()
__str__()
}
class ForeignID{
__init__()
}
class SparqlItem{
item: Value
itemLabel: Value
validate_qid_and_copy_label()
}
class Item{
label: Optional[str] = None
description: Optional[str] = None
aliases: Optional[List[str]] = None
__init__()
__str__()
parse_json()
parse_from_wdqs_json()
fetch_label_and_description_and_aliases()
}
enum WikidataGrammaticalFeature {
ACTIVE_VOICE
DEFINITE
GENITIVE_CASE
IMPERATIVE
INDEFINITE
INFINITIVE
NOMINATIVE_CASE
PASSIVE_VOICE
PLURAL
PRESENT_TENSE
PRETERITE
SIMPLE_PRESENT
SINGULAR
SUPINE
THIRD_PERSON_SINGULAR
}
enum WikidataLexicalCategory {
ADJECTIVE
ADVERB
AFFIX
NOUN
PROPER_NOUN
VERB
}
enum WikidataNamespaceLetters {
ITEM
LEXEME
PROPERTY
}
}
}
package items {
abstract class Items
class AcademicJournalItems {
fetch_based_on_label()
}
class RiksdagenDocumentItems {
+list
+fetch_based_on_label()
}

class ScholarlyArticleItems {
+list
+fetch_based_on_label()
}
class ThesisItems {
list
fetch_based_on_label()
}
}
class Suggestion {
item: Item = None
search_strings: List[str] = None
task: Task = None
args: argparse.Namespace = None
__init__()
__str__()
add_to_items()
extract_search_strings()
search_urls ())
}

class Task {
best_practice_information: Union[str, None] = None
id: TaskIds = None
label: str = None
language_code: SupportedLanguageCode = None
number_of_queries_per_search_string = 1
__init__()
__str__()
}

class BatchJobs {
job_count
jobs: List[BatchJob]
print_running_jobs()
run_jobs()
}

class BatchJob {
+suggestion: Suggestion
+items: Items
run()
}

Items <|-- AcademicJournalItems
Items <|-- RiksdagenDocumentItems
Items <|-- ScholarlyArticleItems
Items <|-- ThesisItems
BaseModel <|-- Entity
BaseModel <|-- Task
BaseModel <|-- Suggestion
BaseModel <|-- BatchJob
BaseModel <|-- BatchJobs
BaseModel <|-- Items
Entity <|-- Item
Item <|-- SparqlItem

@enduml
50 changes: 50 additions & 0 deletions diagrams/sequence_sparql.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
@startuml
'https://plantuml.com/sequence-diagram

autonumber
actor User
'cloud Wikidata
User -> ItemSubjector : start script
alt "arguments: sparql && limit"
ItemSubjector -> Wikidata : fetch subjects
Wikidata -> ItemSubjector : response
loop "for each item in list"
alt "below limit"
ItemSubjector -> Wikidata : fetch details about the item
Wikidata -> ItemSubjector : response
ItemSubjector -> Wikidata : fetch scientific articles according to SPARQL query built based on the details
Wikidata -> ItemSubjector : response
ItemSubjector -> User : present max 50 items
ItemSubjector -> User : ask for approval of batch
ItemSubjector -> User : show count of batches and matches in the job list in memory
end
alt "above limit"
ItemSubjector -> User : ask before continuing
end
end
alt "user choose not to continue"
ItemSubjector -> Wikidata : Upload main subjects to all matches
end
end
alt "arguments: sparql && limit && prepare-jobs"
ItemSubjector -> Wikidata : fetch subjects
Wikidata -> ItemSubjector : response
loop "for each item in list"
alt "below limit"
ItemSubjector -> Wikidata : fetch details about the item
Wikidata -> ItemSubjector : response
ItemSubjector -> Wikidata : fetch scientific articles according to SPARQL query built based on the details
Wikidata -> ItemSubjector : response
ItemSubjector -> User : present max 50 items
ItemSubjector -> User : ask for approval of batch
ItemSubjector -> User : show count of batches and matches in the job list in memory
end
alt "above limit"
ItemSubjector -> User : ask before continuing
end
end
alt "user choose not to continue"
ItemSubjector -> Wikidata : save to job list on disk
end
end
@enduml
71 changes: 0 additions & 71 deletions fetch_main_subjects.py

This file was deleted.

4 changes: 2 additions & 2 deletions itemsubjector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from src import *
import src

logging.basicConfig(level=logging.DEBUG)
main()
src.main()
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
console-menu
git+git://github.com/LeMyst/[email protected]#egg=wikibaseintegrator
rich~=10.9.0
SPARQLWrapper~=1.8.5
SPARQLWrapper~=1.8.5
pydantic
Loading

0 comments on commit c619cdb

Please sign in to comment.