This repository has been archived by the owner on Jan 23, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #42 from dpriskorn/ask_when_limit_reached
Ask when limit reached and use pydantics BaseModel
- Loading branch information
Showing
37 changed files
with
935 additions
and
1,412 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
@startuml | ||
'https://plantuml.com/class-diagram | ||
|
||
abstract class BaseModel | ||
|
||
package wikimedia { | ||
enum WikimediaLanguageCode { | ||
BASQUE | ||
BENGALI | ||
BOKMÅL | ||
CZECH | ||
DANISH | ||
ENGLISH | ||
ESTONIAN | ||
FRENCH | ||
GERMAN | ||
HEBREW | ||
LATIN | ||
MALAYALAM | ||
RUSSIAN | ||
SWEDISH | ||
} | ||
enum WikimediaLanguageQID { | ||
BASQUE = "Q8752" | ||
BENGALI = "Q9610" | ||
BOKMÅL = "Q25167" | ||
CZECH = "Q9056" | ||
DANISH = "Q9035" | ||
ENGLISH = "Q1860" | ||
ESTONIAN = "Q9072" | ||
FRENCH = "Q150" | ||
GERMAN = "Q188" | ||
HEBREW = "Q9288" | ||
LATIN = "Q397" | ||
MALAYALAM = "Q36236" | ||
RUSSIAN = "Q7737" | ||
SWEDISH = "Q9027" | ||
} | ||
package wikidata { | ||
class Entity { | ||
id: Optional[str] | ||
label: str | ||
upload_one_statement_to_wikidata() | ||
url() | ||
} | ||
class EntityID{ | ||
letter: WikidataNamespaceLetters | ||
rest: str | ||
__init__() | ||
__str__() | ||
} | ||
class ForeignID{ | ||
__init__() | ||
} | ||
class SparqlItem{ | ||
item: Value | ||
itemLabel: Value | ||
validate_qid_and_copy_label() | ||
} | ||
class Item{ | ||
label: Optional[str] = None | ||
description: Optional[str] = None | ||
aliases: Optional[List[str]] = None | ||
__init__() | ||
__str__() | ||
parse_json() | ||
parse_from_wdqs_json() | ||
fetch_label_and_description_and_aliases() | ||
} | ||
enum WikidataGrammaticalFeature { | ||
ACTIVE_VOICE | ||
DEFINITE | ||
GENITIVE_CASE | ||
IMPERATIVE | ||
INDEFINITE | ||
INFINITIVE | ||
NOMINATIVE_CASE | ||
PASSIVE_VOICE | ||
PLURAL | ||
PRESENT_TENSE | ||
PRETERITE | ||
SIMPLE_PRESENT | ||
SINGULAR | ||
SUPINE | ||
THIRD_PERSON_SINGULAR | ||
} | ||
enum WikidataLexicalCategory { | ||
ADJECTIVE | ||
ADVERB | ||
AFFIX | ||
NOUN | ||
PROPER_NOUN | ||
VERB | ||
} | ||
enum WikidataNamespaceLetters { | ||
ITEM | ||
LEXEME | ||
PROPERTY | ||
} | ||
} | ||
} | ||
package items { | ||
abstract class Items | ||
class AcademicJournalItems { | ||
fetch_based_on_label() | ||
} | ||
class RiksdagenDocumentItems { | ||
+list | ||
+fetch_based_on_label() | ||
} | ||
|
||
class ScholarlyArticleItems { | ||
+list | ||
+fetch_based_on_label() | ||
} | ||
class ThesisItems { | ||
list | ||
fetch_based_on_label() | ||
} | ||
} | ||
class Suggestion { | ||
item: Item = None | ||
search_strings: List[str] = None | ||
task: Task = None | ||
args: argparse.Namespace = None | ||
__init__() | ||
__str__() | ||
add_to_items() | ||
extract_search_strings() | ||
search_urls ()) | ||
} | ||
|
||
class Task { | ||
best_practice_information: Union[str, None] = None | ||
id: TaskIds = None | ||
label: str = None | ||
language_code: SupportedLanguageCode = None | ||
number_of_queries_per_search_string = 1 | ||
__init__() | ||
__str__() | ||
} | ||
|
||
class BatchJobs { | ||
job_count | ||
jobs: List[BatchJob] | ||
print_running_jobs() | ||
run_jobs() | ||
} | ||
|
||
class BatchJob { | ||
+suggestion: Suggestion | ||
+items: Items | ||
run() | ||
} | ||
|
||
Items <|-- AcademicJournalItems | ||
Items <|-- RiksdagenDocumentItems | ||
Items <|-- ScholarlyArticleItems | ||
Items <|-- ThesisItems | ||
BaseModel <|-- Entity | ||
BaseModel <|-- Task | ||
BaseModel <|-- Suggestion | ||
BaseModel <|-- BatchJob | ||
BaseModel <|-- BatchJobs | ||
BaseModel <|-- Items | ||
Entity <|-- Item | ||
Item <|-- SparqlItem | ||
|
||
@enduml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
@startuml | ||
'https://plantuml.com/sequence-diagram | ||
|
||
autonumber | ||
actor User | ||
'cloud Wikidata | ||
User -> ItemSubjector : start script | ||
alt "arguments: sparql && limit" | ||
ItemSubjector -> Wikidata : fetch subjects | ||
Wikidata -> ItemSubjector : response | ||
loop "for each item in list" | ||
alt "below limit" | ||
ItemSubjector -> Wikidata : fetch details about the item | ||
Wikidata -> ItemSubjector : response | ||
ItemSubjector -> Wikidata : fetch scientific articles according to SPARQL query built based on the details | ||
Wikidata -> ItemSubjector : response | ||
ItemSubjector -> User : present max 50 items | ||
ItemSubjector -> User : ask for approval of batch | ||
ItemSubjector -> User : show count of batches and matches in the job list in memory | ||
end | ||
alt "above limit" | ||
ItemSubjector -> User : ask before continuing | ||
end | ||
end | ||
alt "user choose not to continue" | ||
ItemSubjector -> Wikidata : Upload main subjects to all matches | ||
end | ||
end | ||
alt "arguments: sparql && limit && prepare-jobs" | ||
ItemSubjector -> Wikidata : fetch subjects | ||
Wikidata -> ItemSubjector : response | ||
loop "for each item in list" | ||
alt "below limit" | ||
ItemSubjector -> Wikidata : fetch details about the item | ||
Wikidata -> ItemSubjector : response | ||
ItemSubjector -> Wikidata : fetch scientific articles according to SPARQL query built based on the details | ||
Wikidata -> ItemSubjector : response | ||
ItemSubjector -> User : present max 50 items | ||
ItemSubjector -> User : ask for approval of batch | ||
ItemSubjector -> User : show count of batches and matches in the job list in memory | ||
end | ||
alt "above limit" | ||
ItemSubjector -> User : ask before continuing | ||
end | ||
end | ||
alt "user choose not to continue" | ||
ItemSubjector -> Wikidata : save to job list on disk | ||
end | ||
end | ||
@enduml |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
import logging | ||
|
||
from src import * | ||
import src | ||
|
||
logging.basicConfig(level=logging.DEBUG) | ||
main() | ||
src.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
console-menu | ||
git+git://github.com/LeMyst/[email protected]#egg=wikibaseintegrator | ||
rich~=10.9.0 | ||
SPARQLWrapper~=1.8.5 | ||
SPARQLWrapper~=1.8.5 | ||
pydantic |
Oops, something went wrong.