diff --git a/README.md b/README.md
index 5a9fcff..5f9afdd 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
# Word-Hoarder
-A program for creating a searchable local language dictionary based (mainly) on dumped wiktionary data. Allows users to collect definitions which can be exported as a machine readable flashcard file. Currently supports Latin, Ancient Greek and Old English.
+A program for creating a searchable local language dictionary based (mainly) on extracted wiktionary data. Allows users to collect definitions which can be exported as a machine readable flashcard file. Currently supports Latin, Ancient Greek and Old English.
## Parsing Data
-### dump_parser.py
-This module processes wiktionary dump files which can be found at kaikki.org
+### convert_file_utilities.py
+This module processes extracted wiktionary data files which can be found at kaikki.org
- https://kaikki.org/dictionary/Latin/
- https://kaikki.org/dictionary/Ancient%20Greek/index.html
@@ -11,7 +11,7 @@ This module processes wiktionary dump files which can be found at kaikki.org
See https://github.com/tatuylonen/wiktextract
-The module looks for the files in a subfolder of main directory containing the source files: "dumps_unsorted".
+The module looks for the files in a subfolder of main directory containing the source files: "kaikki_json_files".
The module organizes the data into a standard data structure used in this program.
@@ -36,7 +36,7 @@ Definitions are made of standard python data structures.
Senses:
{
"gloss": string containing a word sense you would find in a single line of a definition in a typical dictionary,
-"tags": tags related to a specific word sense such as "Pre-classical" or "transitive"
+"tags": [tags related to a specific word sense such as "Pre-classical" or "transitive"]
}
### dictionary_LSJ.py and dictionary_Middle_Liddell.py
diff --git a/src/combine_entries.py b/src/combine_entries.py
deleted file mode 100644
index 8b72030..0000000
--- a/src/combine_entries.py
+++ /dev/null
@@ -1,63 +0,0 @@
-
-import pickle
-from load_dict import change_path
-
-change_path('dictionaries')
-with open('new_dictionary_unjoined.txt','rb') as file:
- dictionary = pickle.load(file)
-
-alpha = {}
-for i in range(0,26):
- alpha[chr(i+97)] = 0
-for i in range(len(dictionary['definitions'])):
- if dictionary['definitions'][i]['handle'][0].lower() in alpha:
- alpha[dictionary['definitions'][i]['handle'][0].lower()] = i
-
-'''
-prev = 0
-counter = 0
-for key in alpha:
- print(f"processing {key}s")
- for i in range(prev,alpha[key]):
- if dictionary['definitions'][i] in dictionary['definitions'][i + 1:i + 10]:
- dictionary['definitions'][i]['tags'].append('DUPE')
- prev = alpha[key]
- print(f"{key}s completed")
-
-offset = 0
-for i in range(len(dictionary['definitions'])):
- if "DUPE" in dictionary['definitions'][i - offset]['tags']:
- print(f"deleting {dictionary['definitions'][i - offset]['handle']}")
- del dictionary['definitions'][i - offset]
- offset += 1
-'''
-prev = 0
-counter = 0
-comp = ''
-for key in alpha:
- handles = []
- print(f"processing {key}s")
- for i in range(prev,alpha[key]):
- if comp == dictionary['definitions'][i]:
- continue
- else:
- comp = dictionary['definitions'][i]
- for j in range(1,len(dictionary['definitions'][i + 1:i+20])):
- if dictionary['definitions'][i + j]['handle'] == dictionary['definitions'][i]['handle']:
- print(f"i={i},j={j}; adding {dictionary['definitions'][i]['handle']} to {dictionary['definitions'][i + j]['handle']}")
- dictionary['definitions'][i]['entries'].extend(dictionary['definitions'][i + j]['entries'])
- dictionary['definitions'][i]['roots'].extend(dictionary['definitions'][i + j]['roots'])
- dictionary['definitions'][i + j]['tags'].append('DUPE')
- prev = alpha[key]
- print(f"{key}s completed")
-
-offset = 0
-for i in range(len(dictionary['definitions'])):
- if "DUPE" in dictionary['definitions'][i - offset]['tags']:
- print(f"deleting {dictionary['definitions'][i - offset]['handle']}")
- del dictionary['definitions'][i - offset]
- offset += 1
-
-
-with open(dictionary['file'],mode = 'wb') as openFile:
- pickle.dump(dictionary, openFile)
diff --git a/src/dump_parser.py b/src/convert_file_utilities.py
similarity index 81%
rename from src/dump_parser.py
rename to src/convert_file_utilities.py
index 83462ee..cb1f319 100644
--- a/src/dump_parser.py
+++ b/src/convert_file_utilities.py
@@ -10,7 +10,7 @@
import difflib
import parser_shell
-from load_dict import change_path, pick_language
+from load_dict import change_path, pick_language, KAIKKI_JSON_FILES, SUPPLEMENTARY_LANGUAGE_FILES
import edit_all
from get_selection import get_selection
from language_splitter import split_language
@@ -41,7 +41,9 @@ def filter_tags(gloss_parts, existing_tags, Test):
return new_tags, gloss_parts
def paren_cut(gloss, tags):
-
+ ''' Attempts to cut a parenthetical from the beginning of a sense if it
+ duplicates the tags.
+ '''
if gloss[0] != "(":
return gloss, tags
@@ -59,7 +61,10 @@ def paren_cut(gloss, tags):
-def add_def(senses,new_gloss,gloss_tags):
+def add_def(senses, new_gloss, gloss_tags):
+ ''' Attempts to read item from line 'senses' in json file to find
+ all non-duplicate glosses and collect tags corresponding to each gloss
+ '''
if ")" in new_gloss:
for d in senses:
if new_gloss[new_gloss.find(")") + 2:] == d['gloss']:
@@ -81,6 +86,9 @@ def add_def(senses,new_gloss,gloss_tags):
senses.append({'gloss':new_gloss,'tags':copy.deepcopy(gloss_tags)})
def create_senses(line_senses, tag_list):
+ ''' Iterate through line senses and collect all non-duplicate senses
+ and tags corresponding to each sense.
+ '''
senses = []
dupe_list = []
@@ -132,14 +140,21 @@ def process_glosses(glosses, gloss_tags):
return senses
def get_file_selection(Test, test_file, test_language):
- change_path('dumps_unsorted')
+ ''' User input function to select from avialable kaikki files
+ '''
+ change_path(KAIKKI_JSON_FILES)
+
if Test:
return test_file, test_language
+
else:
+ # fine all .json files in KAIKKI_JSON_FILES
myFiles = glob.glob('*.json')
if myFiles == []:
print("\nSorry no saved dictionaries")
return None, None
+
+ # prompt user to choose a file
else:
options = {'0':f"\nChoose from the following files: (0 to go back)\n"}
for index in range(len(myFiles)):
@@ -150,10 +165,15 @@ def get_file_selection(Test, test_file, test_language):
return None, None
else:
file = myFiles[int(user_input)-1]
+
+ # user must also tell the program what the language is
language = pick_language()
+
return file, language
def print_debug_info(line, counter):
+ ''' Function for viewing unprocessed json data
+ '''
print('\n')
print(f"\tline: {counter}, word: {line['word']}")
print("WORD ITEMS >>>>>>>>>>>>>>")
@@ -164,37 +184,56 @@ def print_debug_info(line, counter):
print(item)
def handle_pos(line):
+ ''' Matches json pos abbreviations with those used in
+ word-hoarder definitions.
+ '''
pos_mapping = {
'adv': 'adverb',
'adj': 'adjective',
'prep': 'preposition',
'intj': 'interjection'
}
-
pos = line['pos']
line['pos'] = pos_mapping.get(pos, pos)
return line['pos']
def handle_senses(line,tag_list):
+ ''' json file contains edge cases where expected part of the
+ line entry are blank or missing.
+ '''
if 'tags' in line['senses'][0]:
+
tag = line['senses'][0]['tags']
+
+ # the dictionary may contain a few word with 'no-senses' etc. as
+ # the only sense. The user should still have these available because
+ # it is easier to modify than creating a new definition from scratch.
if 'no-senses' in tag or 'no-gloss' in tag or 'empty-gloss' in tag:
if isinstance(tag,list):
return [{'gloss': ", ".join(tag), 'tags': []}]
else:
return [{'gloss': tag, 'tags': []}]
else:
+
+ # normal case when the line has a full complement of senses
return create_senses(line['senses'], tag_list)
else:
+ # line may have senses but missing the 'tag' key
return create_senses(line['senses'], tag_list)
def handle_etymology(line):
+ ''' Check is 'etymology' key is present
+ '''
if 'etymology_text' in line:
return line['etymology_text']
else:
return ''
def handle_parts(line,get_simple=None):
+ ''' Will attempt to convert information in json line to simpleParts
+ if the necessary ingrediants are present. Otherwise defaults to
+ line['word']
+ '''
if get_simple:
return get_simple(line['pos'], line['head_templates'][0]['expansion'], line['word']) if 'head_templates' in line else line['word']
else:
@@ -202,6 +241,8 @@ def handle_parts(line,get_simple=None):
def handle_word_entry(line,tag_list,get_simple=None):
+ ''' Retrieve data from json line and assign to word-hoarder entry
+ '''
return {
'partOfSpeech': handle_pos(line),
'principleParts': line['head_templates'][0]['expansion'] if 'head_templates' in line else line['word'],
@@ -213,6 +254,9 @@ def handle_word_entry(line,tag_list,get_simple=None):
def handle_word(line,tag_list,language,get_simple=None):
+ ''' Retrieve data from json line and assign to word-hoarder definition.
+ Definitions have only one entry until de-duplication.
+ '''
return {
'heading': line['word'],
'handle': unidecode(line['word']) if language in ["Latin", "Italian"] else line['word'],
@@ -222,6 +266,9 @@ def handle_word(line,tag_list,language,get_simple=None):
}
def ui_template(new_dictionary,dict_str,shrt_str,cite,cite_2='',dict_f=""):
+ ''' Standard ui prompts to add supplemental dictionaries and display
+ information about data sources.
+ '''
user_input = input(f"\nAdd definitiions from: \"{dict_str}\"?"\
+ "\nType 'y' to add definitions, Press 'Enter' to continue: " )
@@ -234,6 +281,8 @@ def ui_template(new_dictionary,dict_str,shrt_str,cite,cite_2='',dict_f=""):
thank(dict_str,shrt_str,n,cite)
def thank(dict_str,shrt_str,length,cite,cite_2='',dict_f=""):
+ ''' Standard prompt to display attribution/information about data sources.
+ '''
print(f"\nYour dictionary now contains ( {length:,} ) unique definitions after adding {shrt_str}.")
print(f"Data files courtesy of {cite}.",end='')
if cite_2:
@@ -245,6 +294,9 @@ def thank(dict_str,shrt_str,length,cite,cite_2='',dict_f=""):
input(f"\n(Press 'Enter' to continue)")
def parse_lines(input_file,tag_list,language,get_simple=None):
+ ''' Receives an open kaikk.org json input_file and reads lines
+ to decode json objects into simplified word hoarder data structure.
+ '''
definitions_dict = {}
counter = 0
for line in input_file:
@@ -285,28 +337,14 @@ def parse_lines(input_file,tag_list,language,get_simple=None):
thank(dict_str,shrt_str,n,cite,cite_2)
return definitions
-# duplicate of a function found in tables.py
-'''
-def replace_greek(word):
- alt_letters = {
- 'Ἀ':'Α',
- 'ά':'α', 'ἀ':'α', 'ἄ':'α', 'ἅ':'α', 'ἆ':'α', 'ᾰ':'α', 'ᾱ':'α', 'ᾴ':'α',
- 'έ':'ε', 'ἐ':'ε', 'ἑ':'ε', 'ἔ':'ε', 'ἕ':'ε',
- 'ή':'η','ἡ':'η', 'ἤ':'η', 'ἥ':'η', 'ῆ':'η',
- 'ί':'ι','ἰ':'ι', 'ἱ':'ι', 'ἴ':'ι', 'ἵ':'ι', 'ἶ':'ι', 'ῐ':'ι', 'ῑ':'ι', 'ῖ':'ι',
- 'ό':'ο','ὀ':'ο', 'ὁ':'ο', 'ὄ':'ο', 'ὅ':'ο',
- 'ῥ':'ρ',
- 'ύ':'υ','ὐ':'υ', 'ὑ':'υ', 'ὔ':'υ', 'ὕ':'υ', 'ὖ':'υ', 'ὗ':'υ','ῠ':'υ', 'ῡ':'υ', 'ῦ':'υ',
- 'ώ':'ω', 'ὧ':'ω','ῶ':'ω', 'ῷ':'ω'
- }
- for x in word:
- if x in alt_letters:
- word = word.replace(x,alt_letters[x])
- return word
-'''
-def sort_dump():
+def convert_files():
+ ''' Converts kaikki.org json files to smaller files containing
+ just the necessary information for students looking for quick
+ reference.
+ '''
+ # This module can be run independently for debugging purposes
print_mode = Test
save_mode = not Test
test_language = 'Ancient Greek'
@@ -314,11 +352,12 @@ def sort_dump():
# Load list of tags that are not descriptive/overly general
try:
- change_path('texts')
+ change_path(SUPPLEMENTARY_LANGUAGE_FILES)
with open('ignore_tags.txt','r') as f:
tag_list = json.load(f)
+
except FileNotFoundError:
- print("'ignore_tags.txt' not found in 'texts' directory.")
+ print(f"'ignore_tags.txt' not found in '{SUPPLEMENTARY_LANGUAGE_FILES}' directory.")
input("Enter to continue")
tag_list = []
@@ -333,23 +372,25 @@ def sort_dump():
else:
simple = None
+ # create file string and initialize new dictionary
sorted_file = language.replace(" ", '') + "Dump.txt"
new_dictionary = {'definitions':[], 'file': sorted_file, 'language':language}
print(f"Parsing {file}")
# attempt to parse kaikki.org json file
- change_path('dumps_unsorted')
+ change_path(KAIKKI_JSON_FILES)
try:
with open(file, 'r') as input_file:
new_dictionary['definitions'] = parse_lines(input_file, tag_list,language,simple)
# save a list of all tags that were encountered
- change_path('texts')
+ change_path(SUPPLEMENTARY_LANGUAGE_FILES)
with open(language + '_new_tag_list.txt', mode='w') as f:
json.dump(tag_list, f)
+
except FileNotFoundError:
- print(f"'{file}' not found in 'dumps_unsorted' directory")
+ print(f"'{file}' not found in '{KAIKKI_JSON_FILES}' directory")
input("Enter to continue")
return
@@ -388,8 +429,6 @@ def sort_dump():
from dictionary_MLJohnson import Johnson_OED
ui_template(new_dictionary,dict_str,shrt_str,cite,cite_2,Johnson_OED)
- # Last few sections may leave CWD == 'texts', change to 'dumps_sorted'
- change_path('dumps_sorted')
# sort by handle string
new_dictionary['definitions'].sort(key=lambda item: item.get('handle').lower())
diff --git a/src/create_word.py b/src/create_word.py
index 2eae3be..d297c05 100644
--- a/src/create_word.py
+++ b/src/create_word.py
@@ -3,7 +3,7 @@
create word:
create a new word based on user input
- intended to be used if valid latin word cannot be retrieved by wiktionary parser
+ intended to be used if valid word cannot be retrieved from language dictionary
create entry:
create a new entry within a word
@@ -49,11 +49,11 @@ def create_word(current_dict,tags):
new_word['entries'][0], dummy = edit_entry.edit_entry(new_word['entries'][0],new_word)
# call word options, from here return
- load_dict.change_path('dumps_sorted')
+ load_dict.change_path(SORTED_LANGUAGE_FILES)
if current_dict['language'] == 'Latin' or current_dict['language'] == "Ancient Greek":
wiki_dump = parser_shell.load_big_language(new_word['heading'][0],current_dict['language'])
else:
- wiki_dump = parser_shell.load_dump(current_dict['language'])
+ wiki_dump = parser_shell.load_sorted_language(current_dict['language'])
parser_shell.save_word(new_word,wiki_dump,2)
current_dict = parser_shell.save_word(new_word,current_dict)
return current_dict
diff --git a/src/dict_utilities.py b/src/dict_utilities.py
deleted file mode 100644
index 1a278e1..0000000
--- a/src/dict_utilities.py
+++ /dev/null
@@ -1,11 +0,0 @@
-
-
-# Print Progress
-def printpr(counter, modulo=10000):
- if counter % modulo == 0:
- print(".",end='',flush=True)
- if counter % (modulo*100) == 0:
- print(f' {counter:,} lines parsed',flush=True)
-
-
-
diff --git a/src/dictionary_LSJ.py b/src/dictionary_LSJ.py
index 952f06e..effa8c1 100644
--- a/src/dictionary_LSJ.py
+++ b/src/dictionary_LSJ.py
@@ -1,219 +1,60 @@
+from load_dict import change_path, SUPPLEMENTARY_LANGUAGE_FILES
+from perseus_xml_utilities import printpr, cut_text, translate_greek, smart_join, configure_parts, process_entry, get_def
-import parser_shell
-from load_dict import change_path
-from copy import deepcopy
-import edit_all
-import pickle
-import beta_code
-from language_splitter import split_language
-from dict_utilities import printpr
-
-
-interval = 1000
-level = 0
-start = (interval * level)
-stop = (interval) * (level + 1)
-debug_print = False
progress_print = True
-def cut_text(text,start,stop):
- p = text.find(start) + len(start)
- text = text[p:]
- text = text[:text.find(stop)]
- return text
-
-def translate_greek(text,bold):
- text = list(text)
- text = [x for x in text if not x.isnumeric()]
- text = "".join(text)
- if bold:
- return "" + beta_code.beta_code_to_greek(text) + ""
- else:
- return beta_code.beta_code_to_greek(text)
-
-
-def smart_join(text):
- s = ""
- for i in range(len(text)):
- if i == 0:
- s = text[i]
- else:
- if s[-1] != " " and s[-1] not in ['('] and text[i][0] != " " and text[i][0] not in [',','.',';',')','?']:
- s += " " + text[i]
- else:
- s += text[i]
- return s.replace(" .",".").replace(" ,",",").replace(" :",":")
-
-
-def configure_parts(senses):
- count = 0
-
- for i in senses[0]['gloss']:
- if i == "(":
- count += 1
-
- if i == ")" and count != 0:
- count -= 1
-
- if count < 0:
- # Error too many )s, unbalanced parens
- print(senses[0]['gloss'])
- print(senses[1]['gloss'])
- break
-
- if count != 0:
- parens = 0
-
- for i in range(len(senses[1]['gloss'])):
- if senses[1]['gloss'][i] == ")":
- parens += 1
-
- if senses[1]['gloss'][i] == "(":
- parens -= 1
-
- if parens == count:
- break
-
- if i < len(senses[1]['gloss']) - 1:
- senses[0]['gloss'] = smart_join([senses[0]['gloss'],senses[1]['gloss'][: i + 1]])
- senses[1]['gloss'] = senses[1]['gloss'][i + 1 :]
-
- for i in range(len(senses[1]['gloss'])):
- if senses[1]['gloss'][i].isalpha() or senses[1]['gloss'][i] == "=":
- break
- senses[1]['gloss'] = senses[1]['gloss'][i:]
- else:
- senses[0]['gloss'] += ")"
-
- return senses
-
-
-def process_entry(text):
- definition = {'heading':'',
- 'handle':'',
- 'tags':set(),
- 'entries':[]}
-
- entry = {'senses':[],
- 'partOfSpeech':'',
- 'principleParts':'',
- 'simpleParts':'',
- 'etymology':''}
-
- handle = cut_text(text,"key=\"","\"")
- definition['heading'] = definition['handle'] = translate_greek(handle,False)
-
- if debug_print:
- print("@"*5000 + f"\nheading \"{definition['heading']}\"")
-
- while text != "":
- text, senses = get_def(text)
-
- for x in senses:
- if debug_print:
- print(f"definition: {x['gloss']}")
- entry['senses'].append(deepcopy(x))
-
- if len(entry['senses']) > 1:
- entry['senses'] = configure_parts(entry['senses'])
- entry['simpleParts'] = entry["principleParts"] = entry['senses'][0]['gloss']
- entry['senses'].pop(0)
- else:
- entry['simpleParts'] = entry["principleParts"] = definition['heading']
-
- definition['entries'].append(deepcopy(entry))
- definition['tags'].add("LSJ")
- return definition
-
-def get_def(text):
- m = 0
- gloss = []
- senses = []
- greek = False
- candidate_tag = ''
- while True:
- pull = text[:text.find("<")]
- brac = text[text.find("<"):text.find(">")+1]
- text = text[text.find(">") + 1:]
-
- if greek and pull != "":
- pull = translate_greek(pull,True)
- greek = False
-
- if 'lang=\"greek\"' in brac:
- greek = True
-
- if "" or text == "":
- gloss = smart_join(gloss).strip(",. ")
- if gloss != "":
- senses.append({'gloss':gloss,'tags':[]})
- break
-
- return text, senses
-
-def extract_dictionary(perseus, dictionary):
- line_list = []
- ignition = False
-
- for line in f.readlines():
- if "" + beta_code.beta_code_to_greek(text) + ""
- else:
- return beta_code.beta_code_to_greek(text)
-
-
-def smart_join(text):
- if debug_print:
- print(text)
- s = ""
- for i in range(len(text)):
- if i == 0:
- s = text[i]
- else:
- if s[-1] != " " and s[-1] not in ['('] and text[i][0] != " " and text[i][0] not in [',','.',';',')','?']:
- s += " " + text[i]
- else:
- s += text[i]
- return s.replace(" .",".").replace(" ,",",").replace(" :",":")
-
-def configure_parts(senses):
- count = 0
- for i in senses[0]['gloss']:
- if i == "(":
- count += 1
- if i == ")" and count != 0:
- count -= 1
-
- if count < 0:
- # Unbalance parens
- if debug_print:
- # need db printing function for line and function info
- print(senses[0]['gloss'])
- print(senses[1]['gloss'])
- break
-
- if count != 0:
- if debug_print:
- # need db printing function
- print(senses[0]['gloss'])
- print(senses[1]['gloss'])
-
- parens = 0
- for i in range(len(senses[1]['gloss'])):
- if senses[1]['gloss'][i] == ")":
- parens += 1
- if senses[1]['gloss'][i] == "(":
- parens -= 1
- if parens == count:
- break
- if i < len(senses[1]['gloss']) - 1:
- senses[0]['gloss'] = smart_join([senses[0]['gloss'],senses[1]['gloss'][: i + 1]])
- senses[1]['gloss'] = senses[1]['gloss'][i + 1 :]
- for i in range(len(senses[1]['gloss'])):
- if senses[1]['gloss'][i].isalpha() or senses[1]['gloss'][i] == "=":
- break
- senses[1]['gloss'] = senses[1]['gloss'][i:]
- else:
- senses[0]['gloss'] += ")"
-
- if debug_print:
- # These statements are all useless
- print(senses[0]['gloss'])
- print(senses[1]['gloss'])
- return senses
-
-
-def process_entry(text):
- definition = {'heading':'',
- 'handle':'',
- 'tags':set("Middle Liddell"),
- 'entries':[]}
-
- entry = {'senses':[],
- 'partOfSpeech':'',
- 'principleParts':'',
- 'simpleParts':'',
- 'etymology':''}
-
- handle = cut_text(text,"key=\"","\"")
- if debug_print:
- print(handle)
-
- definition['heading'] = definition['handle'] = translate_greek(handle,False)
-
- if debug_print:
- print("@"*5000 + f"\nheading \"{definition['heading']}\"")
-
- while text != "":
- text, senses = get_def(text)
-
- for x in senses:
- if debug_print:
- print(f"definition: {x['gloss']}")
- entry['senses'].append(deepcopy(x))
-
- if len(entry['senses']) > 1:
- entry['senses'] = configure_parts(entry['senses'])
- entry['simpleParts'] = entry["principleParts"] = entry['senses'][0]['gloss']
- entry['senses'].pop(0)
- else:
- entry['simpleParts'] = entry["principleParts"] = definition['heading']
- definition['entries'].append(deepcopy(entry))
- definition['tags'].add("Middle Liddell")
-
- if debug_print:
- print(definition)
- return definition
-
-def get_def(text):
- m = 0
- gloss = []
- senses = []
- greek = quote = author = False
- candidate_tag = ''
- while True:
- pull = text[:text.find("<")]
- brac = text[text.find("<"):text.find(">")+1]
- text = text[text.find(">") + 1:]
-
- if greek and pull != "":
- pull = translate_greek(pull,True)
- greek = False
-
- if 'lang=\"greek\"' in brac:
- greek = True
-
- if "" or text == "":
- gloss = smart_join(gloss).strip(",. ")
- if gloss != "":
- senses.append({'gloss':gloss,'tags':[]})
- break
-
- return text, senses
-
-def extract_dictionary(perseus, dictionary):
- line_list = []
- ignition = False
- for i in range(len(perseus)):
-
- if "{current_dict['definitions'][i]['heading'].strip()}{c}"
+ # loop through the entries in the definition and print only those matching 'pos'
for j in range(len(current_dict['definitions'][i]['entries'])):
entry = current_dict['definitions'][i]['entries'][j]
partOfSpeech = entry['partOfSpeech']
- if function2(partOfSpeech,pos) or not split:
- pass
- else:
+
+ # if pos test returns False and split == True, skip this particular entry
+ if pos_test_entries(partOfSpeech,pos) == False and split == True:
continue
- entry = current_dict['definitions'][i]['entries'][j]
+ # include etymology string is less than 126 characters
if 'etymology' in entry:
if len(entry['etymology']) <= 125:
- word_string += f'{entry["etymology"]}
'
+ word_string += f'{entry["etymology"]}
'
+
+ # returns the rest of the entry with proper formatting
word_string += get_entry_string(entry,current_dict['language'])
+ # add separator character between end of back of card - beginning of tags
word_string += c
+ # add tags as a comma separated list in double quotes
for tag in current_dict['definitions'][i]['tags']:
word_string += '"' + tag + '"; '
word_string.strip('; ')
+ # print formatted flashcard string to file
print(word_string)
@@ -404,31 +439,38 @@ def print_dict(current_dict,mode='Quizlet'):
# END PRINT DICT
def split_tags(tags,next_index,previous_tags):
+ ''' This function attempts to group tags together when multiple
+ senses have a set of common tags. If a sense differs from a
+ group by a single tag the spoiler is inserted into the line
+ for the sense to preserve the grouping.
+ '''
current_index = next_index - 1
- ''' If previous (current) tags and current tags both exist '''
+ # If previous (current) tags and current tags both exist
if previous_tags != [] and tags[current_index] != []:
match = True
+ # previous tags longer than current tags
if len(previous_tags) > len(tags[current_index]):
match = False
else:
+ # check if first n tags match between previous and current tag sets
for i in range(len(previous_tags)):
if previous_tags[i] != tags[current_index][i]:
match = False
- ''' If all previous (common) tags match with first n current tags '''
+ # If all previous (common) tags match with first n current tags
if match:
- ''' Seperate current into common and distinct tags '''
+ # Seperate current into common and distinct tags
return tags[current_index][:i+1], tags[current_index][i+1:]
- ''' Current did not match previous common tags, inspect next tags '''
+ # Current did not match previous common tags, inspect next tags
if len(tags) > next_index:
if tags[next_index] != []:
- ''' Next tags exist and are not empty '''
+ # Next tags exist and are not empty '''
if tags[next_index] == tags[next_index - 1]:
- ''' Tags are exactly the same, all current tags will be common tags '''
+ # Tags are exactly the same, all current tags will be common tags
return tags[current_index], []
- ''' Find the smaller of the two lists '''
+ # Find the smaller of the two lists
if len(tags[current_index]) <= len(tags[next_index]):
shorter = tags[current_index]
longer = tags[next_index]
@@ -437,109 +479,109 @@ def split_tags(tags,next_index,previous_tags):
longer = tags[current_index]
for i in range(len(shorter)):
if shorter[i] != longer[i]:
- ''' Once lists are no longer the same we have common and distict tags '''
+ # Once lists are no longer the same we have common and distict tags
if i == 0:
- ''' If no matches, all tags are common '''
+ # If no matches, all tags are common
return tags[current_index], []
- ''' Else use index to seperate common and distinct '''
+ # Else use index to seperate common and distinct
return tags[current_index][:i], tags[current_index][i:]
- ''' If next tags don't exist or are empty, all tags are common tags '''
+ # If next tags don't exist or are empty, all tags are common tags '''
return tags[current_index], []
def get_entry_string(entry,language):
- entry_string = ''
- entry_string += f'' + f"{entry['simpleParts'].strip()}
"
- text = [definition['gloss'] for definition in entry['senses']]
- def_tags = [definition['tags'] for definition in entry['senses']]
+ ''' Functionality to format a definition with html tags so the
+ card with have left alignment, bolded heading and two level ordered lists
+ when diplayed in Anki flashcard manager.
+ '''
+
+ # start with bolded simpleParts for entry heading
+ entry_string = f'
' + f"{entry['simpleParts'].strip()}
"
+
+ # change senses from list of dictionary to two lists
+ sense_glosses = [definition['gloss'] for definition in entry['senses']]
+ sense_tags = [definition['tags'] for definition in entry['senses']]
+
+ # short_senses attempts to trim down overly long senses for more readable flashcards
if language == "Latin":
- text = short_senses(text,def_tags)
- bank = ["*","^","†","∆"]
- custom = []
- while text[0][-1] in bank:
- custom.append(text[0][-1])
- text[0] = text[0][:-1]
-
- if custom:
- if len(custom) == 4:
- entry_string += f'
' #style="color:#750265;">'
- elif len(custom) >= 2:
- entry_string += f'' # style="color:#5C4033;">'
- elif len(custom) == 1:
- if custom[0] == f'*':
- entry_string += f'' # style="color:#00008B";">'
- elif custom[0] == '^':
- entry_string += f'' # style="color:#006400";">'
- elif custom[0] == '†':
- entry_string += f'' # style="color:#483C32";">'
- elif custom[0] == '∆':
- entry_string += f'' # style="color:#7d022f";">'
- entry_string += ''
- else:
-
- entry_string += f''
- special_tags = ['LTRG','Oxford','Liddell & Scott','Athenaze']
+ sense_glosses = short_senses(sense_glosses)
- line_tags = def_tags[0]
+ # open first order list
+ entry_string += f''
+
+ # start with tags belonging to the first sense
+ line_tags = sense_tags[0]
+ # no previous tags
previous_tags = []
- common_tags, distinct_tags = split_tags(def_tags,1,previous_tags)
+
+ # determines of current sense should continue to be grouped under same
+ # tags as previous and whether current grouping should be closed
+ common_tags, distinct_tags = split_tags(sense_tags,1,previous_tags)
+
+ # determine if a sublist should be started for the first sense
open_sublist = False
if line_tags:
open_sublist = True
entry_string += pretty_print_tags(common_tags,-1)
+
+ # next index is an arugment for split_tags
next_index = 1
- for line, tags in zip(text,def_tags):
- common_tags, distinct_tags = split_tags(def_tags,next_index,previous_tags)
+
+ # iterate through tuples of sense glosses and tags
+ for gloss, tags in zip(sense_glosses,sense_tags):
+
+ # determine if senses are compatible for grouping under common tags
+ common_tags, distinct_tags = split_tags(sense_tags,next_index,previous_tags)
previous_tags = common_tags
+
+ # indicates the current grouping must be closed
if common_tags != line_tags:
- tag_break = False
- for tag in line_tags:
- if tag in special_tags:
- tag_break = True
- for tag in tags:
- if tag in special_tags:
- tag_break = False
line_tags = common_tags
+
+ # close previous sublist (grouped senses)
if open_sublist:
entry_string += "
"
- if tag_break:
- entry_string += '
'
+
+ # if next/current sense has tags, start a new sublist
if common_tags != []:
open_sublist = True
entry_string += pretty_print_tags(common_tags,-1)
+ # othter senses will print to list level one
else:
open_sublist = False
+
+ # 'spoiler' tags are added into the line for the sense
if distinct_tags:
- entry_string += '- ' + "(" + ",".join(distinct_tags) + ") " + line.strip(";,. ").strip("†∆*^") + '
'
+ entry_string += '- ' + "(" + ",".join(distinct_tags) + ") " + gloss.strip(";,. ").strip("†∆*^") + '
'
else:
- entry_string += '- ' + line.strip(";,. ").strip("†∆*^") + '
'
+ entry_string += '- ' + gloss.strip(";,. ").strip("†∆*^") + '
'
+
next_index += 1
+
+ # close both list levels, add breaks to give space before next entry starts on multi-entry cards
if open_sublist:
entry_string += "
"
- entry_string += '
'
+ entry_string += '
'
+
+ # return formated string to flashcard printing function
return entry_string
# PRINT GLOSS SETUP
# # # # # # # # # # # #
def print_gloss_setup(current_dict):
-
+ ''' Functionality to pretty print a list of word definitions so that is will fit well
+ into a standard page. The idea to allow the user to create study sheets for a
+ particular set of tags such a textbook chapter or a section of a Latin text.
+ '''
# change file path to 'prints' folder
- load_dict.change_path('glosses')
+ change_path('glosses')
myFiles = glob.glob('*.txt')
while True:
+ # give user option to select from all tags found in the current dictionary
master_list = word_methods.get_master_list(current_dict)
tags = word_methods.getTags([],'filter',master_list)
@@ -661,7 +703,7 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
# check if definition exceeds desired length
text = [d['gloss'] for d in word['entries'][x]['senses']]
dtags = [d['tags'] for d in word['entries'][x]['senses']]
- #text = short_senses(text, dtags)
+
if len(text) == 1:
entry_string += f"~) " + text[0].strip('*^†∆') + "; "
else:
@@ -686,6 +728,8 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
elif current_dict['language'] == "Ancient Greek":
entry_string = word['entries'][x]['simpleParts'][:word['entries'][x]['simpleParts'].find(')')+1].strip()
+
+ # TODO: this should be replaced with the get_visible_length function from the get_selection module
length_string = entry_string.lower().replace("θ",'t')
length_string = length_string.replace("χ",'k')
length_string = length_string.replace('φ','f')
@@ -710,92 +754,80 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
return counter
# END PRINT GLOSS
+''' The rest of these functions attempt to shorten senses that are overly long
+ to make more useful flashcards.
+'''
+
# CHOP LINE
# # # # # # # # # # # # #
-def chop_line(text,tags):
- size = sum([len(line) for line in text])
- if len(text) < 3:
+def chop_line(senses):
+ ''' Determines how much each sense should be shortened based on
+ the total number of senses and the total characters
+ '''
+ size = sum([len(sense) for sense in senses])
+
+ if len(senses) < 3:
limit = 5
- elif len(text) == 3:
+ elif len(senses) == 3:
limit = 4
- elif len(text) > 3:
+ elif len(senses) > 3:
limit = 3
- custom = []
- bank = ["*","^","†","∆"]
- special_tags = ['LTRG','Oxford','Liddell & Scott','Athenaze']
- special = False
- for i in range(len(text)):
- if text[i] == "":
- continue
- for tag in tags[i]:
- if tag in special_tags:
- special = True
- while text[i][-1] in bank:
- if text[i][-1] not in custom:
- custom.append(text[i][-1])
- text[i] = text[i][:-1]
- if custom:
- c_string = ''
- if "*" in custom:
- c_string += "*"
- if "^" in custom:
- c_string += "^"
- if "†" in custom:
- c_string += "†"
- if "∆" in custom:
- c_string += "∆"
- text[0] += c_string
- elif special:
- pass
- elif size > 150:
- for i in range(len(text)):
- text[i] = short_line(text[i],limit)
- return text
+ if size > 150:
+ for i in range(len(senses)):
+ senses[i] = short_line(senses[i],limit)
+ return senses
# SHORT LINE
# # # # # # # # # #
def short_line(line,limit):
+ ''' Attempts to shorten line, aborts under certain conditions such as
+ parens at the end (usually contain something important)
+ '''
- #print(f"PRINT SHORT LINE WHILE TOP PRE SPLIT:\n{line}")
line = re.split(",|;",line)
stop = orstop = parstop = limit
+
+ # if line contains and 'or' avoid cutting before the or
for i in range(len(line)):
orlist = [x for x in line[i:] if " or " in x]
if orlist != []:
orstop = i + 1
continue
else:
- #stop = max(i,limit)
break
+ # if line contains parens, avoid cutting before the last closing paren
for i in range(len(line)):
parlist = [x for x in line[i:] if ")" in x or "(" in x]
if parlist != []:
parstop = i + 1
continue
else:
- #stop = max(i,limit)
break
+
+ # determine the least safe cutting point
stop = max(orstop,parstop,limit)
line = line[:stop]
- new_text = ''
- for i in range(len(line)):
- new_text += line[i].strip() + ', '
- line = new_text.strip(", ")
- #print(f"PRINT END SHORT LINE:\n{line}")
+
+ # reconstruct string
+ new_text = ", ".join(word.strip() for word in line)
+
return line
# SHORT senses
# # # # # # # # # #
-def short_senses(text,tags):
+def short_senses(text):
+ ''' removes empty strings before and after calling chop_line
+ '''
while '' in text:
text.remove('')
- text = chop_line(text,tags)
- size = sum([len(line) for line in text])
+
+ text = chop_line(text)
while '' in text:
text.remove('')
+
for i in range(len(text)):
text[i] = text[i].strip(',;')
return text
diff --git a/src/edit_entry.py b/src/edit_entry.py
index dc0f2ca..79ad270 100644
--- a/src/edit_entry.py
+++ b/src/edit_entry.py
@@ -23,39 +23,40 @@
from tables_greek_ext import auto_parts
from pyfiglet import figlet_format
+
+# string used multiple times
+confirm_str = "'1' to confirm, any other key to cancel: "
+
# EDIT ENRTY
# # # # # # # #
entry_string = ''
def edit_entry(entry,new_word):
-
- # string used multiple times
- confirm_str = "'1' to confirm, any other key to cancel: "
-
- # rest of function contained in loop
+ ''' Menu for user options to edit an entry of a definition.
+ TODO: each option should be moved to a separate function.
+ '''
while True:
# flag if only one definition exists
- if len(entry['senses']) == 1:
- singleton = True
- else:
- singleton = False
+ singleton = True if len(entry['senses']) == 1 else False
# display entry
entry_string = get_entry(entry)
- # get user selection
+ # define user options
options = {
'1':"\n==================================\nEntry Options:\n>'1' to add definition\n",
'2':">'2' to change definition\n"}
if singleton:
options.update({
'3':">'3' to replace definition\n"})
- # only display if more than one definition
+
+ # only display these options if more than one definition
if not singleton:
options.update({
'3':">'3' to replace all\n",
'4':">'4' to move definitions\n",
'5':">'5' to delete definitions\n"})
+
# more options
options.update({
'tag':">'tag' to tag defintions",
@@ -65,6 +66,7 @@ def edit_entry(entry,new_word):
'etym':">'etym' to change etymology\n",
'ps':">'ps' to change part of speech\n",
'0':">'0' to go back ",'00':">'00' to finish and save\n"})
+
user_input = get_selection(options,entry_string)
# Option to finish/go back
@@ -76,211 +78,48 @@ def edit_entry(entry,new_word):
# Option to add new
elif user_input == '1':
- while True:
- print("\nChoose postion of new definition (1-n)")
- try:
- place = int(input(": "))-1
- except:
- print("Invalid")
- continue
- break
- if place < 0:
- continue
- print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)")
- new_definition = {'gloss':input(': ')}
-
- if new_definition['gloss'] != '0':
- new_definition['tags'] = []
- print("Enter definition tags ('0' to finish)")
- new_tag = input(": ")
- if new_tag != '0':
- new_definition['tags'].append(new_tag)
- entry['senses'].insert(place,new_definition)
+ add_new(entry)
+ # apply a new tag to some of the senses within the definition
elif user_input.lower() == 'tag':
- exit_loop = False
- while not exit_loop:
- print("Enter the tag you want to apply ('0' to go back)")
- new_tag = input(": ")
- if new_tag == '0':
- exit_loop = True
- else:
- exit_inner_loop = False
- while exit_inner_loop == False:
- message = "\n==================================\nChoose the definition you want to tag\n'0' to go back"
- selection = select_definition(entry,message)
- if selection == None:
- exit_inner_loop = True
- else:
- entry['senses'][selection]['tags'].append(new_tag)
+ add_new_tag(entry)
+ # remove all tags, currently an invisible option
elif user_input.lower() == "untag_all":
for i in entry['senses']:
i['tags'] = []
+ # remove a tag from some of the senses within the definition
elif user_input.lower() == 'untag':
- exit_loop = False
- while not exit_loop:
- message = "\n==================================\nChoose the definition you want to untag\n'0' to go back"
- selection = select_definition(entry,message)
- if selection == None:
- exit_loop = True
- elif entry['senses'][selection]['tags']:
- exit_inner_loop = False
- while exit_inner_loop == False:
- for i in range(len(entry['senses'][selection]['tags'])):
- print(f"{i+1}. {entry['senses'][selection]['tags'][i]}")
- print("Select the tag you want to remove ('0' to go back)")
- tag_no = input(": ")
- if tag_no == '0':
- exit_inner_loop = True
- elif int(tag_no) - 1 in range(len(entry['senses'][selection]['tags'])):
- del entry['senses'][selection]['tags'][int(tag_no) - 1]
- if entry['senses'][selection]['tags'] == []:
- print("\ndefinitions has no more tags")
- exit_inner_loop = True
- else:
- print("\ninvalid selection")
- else:
- print('\ndefinition has no tags')
-
+ remove_tag(entry)
# Option to change definition
elif user_input == '2':
- exit_loop = False
- while not exit_loop:
- if singleton:
- selection = 0
- else:
- message = "\n==================================\nChoose the definition you want to change\n'0' to go back"
- selection = select_definition(entry,message)
- if selection == None:
- exit_loop = True
- definition_string = ''
- while selection != None:
- definition_string += f"Definition: {entry['senses'][selection]['gloss']}\nTags: {', '.join(entry['senses'][selection]['tags'])}\n"
- options = {'0':f"Change Definition Options:\n>'0' to go back",'00':">'00' to finish\n",'1':">'1' to remove words\n",
- '2':">'2' to add text to the end\n",
- '3':">'3' to add text to the beginning\n",
- '4':">'4' to write new definition\n"}
- user_input = get_selection(options,definition_string)
- definition_string = ""
- if user_input == '0':
- selection = None
- if singleton:
- exit_loop = True
- elif user_input == '00':
- selection = None
- exit_loop == True
- elif user_input == '1':
- entry['senses'][selection]['gloss'] = remove_words(entry['senses'][selection]['gloss'])
- elif user_input == '2':
- print("\nEnter text to add to definition ('0' to go back) (ā, ē, ī, ō, ū)")
- new_text = input(': ')
- if new_text != '0':
- entry['senses'][selection]['gloss'] += new_text
- elif user_input == '3':
- print("\nEnter text to add to definition ('0' to go back) (ā, ē, ī, ō, ū)")
- new_text = input(': ')
- if new_text != '0':
- entry['senses'][selection]['gloss'] = new_text + entry['senses'][selection]['gloss']
- elif user_input == '4':
- print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)")
- new_definition = input(': ')
- if new_definition != '0':
- entry['senses'][selection]['gloss'] = new_definition
- entry['senses'][selection]['tags'] = []
- while True:
- print("Enter definition tags ('0' to finish)")
- new_tag = input(": ")
- if new_tag == '0':
- break
- else:
- entry['senses'][selection]['tags'].append(new_tag)
+ change_definition(entry,singleton)
# Option to move definition
elif user_input == '4' and not singleton:
+ move_definition(entry)
- if len(entry['senses']) == 2:
- entry['senses'] = move_entries(entry['senses'],1,0)
- else:
- exit_inner_loop = False
- while not exit_inner_loop:
- message = "\n==================================\nChoose the definition you want to move\n'0' to go back"
- take = select_definition(entry,message)
-
- if take != None:
- message = "\nMove to what position?\n'0' to go back"
- put = select_definition(entry,message)
-
- if put != None:
- entry['senses'] = move_entries(entry['senses'],take,put)
-
- else:
- exit_inner_loop = True
-
- # Option to delete definition
+ # Option to delete definition
elif user_input == '5' and not singleton:
- exit_inner_loop = False
- while not exit_inner_loop:
- message = "\n==================================\nChoose the definition you want to delete\n'0' to go back"
- selection = select_definition(entry,message)
+ delete_definition(entry)
- if selection != None:
- print(f"\nAre you sure to want to delete {selection+1}?")
- user_input = input(confirm_str)
-
- if user_input == '1':
- del entry['senses'][selection]
-
- else:
- exit_inner_loop = True
- if len(entry['senses']) == 1:
- exit_inner_loop = singleton = True
-
- # Options to replace all definitions
+ # Options to replace all definitions
elif user_input == '3':
- print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)")
- new_definition = {'gloss':input(': ')}
-
- if new_definition != '0':
- new_definition['tags'] = []
-
- print("Enter definition tags ('0' to finish)")
- new_tag = input(": ")
- if new_tag != '0':
- new_definition['tags'].append(new_tag)
- entry['senses'] = [new_definition]
+ replace_all(entry)
- # Option to rewrite principle parts
+ # Option to rewrite principle parts
elif user_input.lower() == 'parts':
- print("'1' to auto retreieve verb parts (Greek only), any other key to proceed")
- user_input = input(": ")
- if user_input == '1':
- entry['simpleParts'] = auto_parts(entry['simpleParts'],True)
- else:
- print("\nEnter your new principal parts ('0' to go back) (ā, ē, ī, ō, ū)")
- new_definition = input(': ')
+ change_principle_parts(entry)
- if new_definition != '0':
- entry['simpleParts'] = new_definition
-
- # Option to rewrite principle parts
+ # Option to rewrite etymology
elif user_input.lower() == 'etym':
- print("\nEnter your new etymology ('0' to go back) (ā, ē, ī, ō, ū) ('X' to delete)")
- user_input = input(': ')
- if user_input.upper() == "X":
- entry['etymology'] = ''
- elif user_input != '0':
- entry['etymology'] = user_input
+ change_etymology(entry)
+ # option to change the partOfSpeech for a definition
elif user_input.lower() == 'ps':
- print("\nEnter your new part of speech ('0' to go back)")
- user_input = input(': ')
- if user_input != '0':
- entry['partOfSpeech'] = user_input
-
-
+ change_pos(entry)
# END EDIT ENTRY
@@ -288,40 +127,70 @@ def edit_entry(entry,new_word):
# REMOVE WORDS
# # # # # # # # # # #
def remove_words(text):
+ ''' This function allows the user to delete a substring from a definition
+ by specifying the start and end point to 'snip'. The function displays
+ the definition on a grid showing the user how to specify the start and
+ end point.
+ The word is divided into rows of 26 characters where each character in
+ the row is under a column heading with the 26 letters of the alphabet.
+ '''
text = list(text)
- #print(f'text = {text}')
invalid = False
while True:
clear_screen()
+
+ # compute number of rows
rows = 1+len(text)//26
- #print(f"Rows == {1+len(text)//26}")
+
+ # print top of box that surrounds grid
print('*' * 35 + " |")
+
for i in range(rows):
+
+ # print letters for row indices
print(f"Row ({chr(i + 65)}): ",end='')
- #print(f"len(text[ 26 * i : 26 * (i + 1) ]) == {len(text[ 26 * i : 26 * (i + 1) ])}")
- #print(f"text[ 26 * i : 26 * (i + 1) ] == {text[ 26 * i : 26 * (i + 1) ]}")
+
+ # print letters for columns indices
for j in range(len(text[ 26 * i : 26 * (i + 1) ])):
print(chr(j+65),end='')
+
+ # print '|'s to box in grid
pad = 26 - len(text[ 26 * i : 26 * (i + 1) ])
print(' ' * pad + " |")
print(' ' * 9,end='')
+
+ # print portion of sense that belong in this row
for j in range(26 * i,(26 * i) + len(text[ 26 * i : 26 * (i + 1) ])):
- #print(f"range(26 * i,len(text[ 26 * i : 26 * (i + 1) ]) == {range(26 * i,len(text[ 26 * i : 26 * (i + 1) ]))}")
print(text[j],end='')
+
+ # print '-'s and '|'s to box in grid
pad = 26 - len(text[ 26 * i : 26 * (i + 1) ])
print(' ' * pad + " |")
print('-' * 35 + " |")
+
+ # print '*'s and '|' to complete box
print('*' * 35 + " |")
+
+ # print message with UI instructions
print("To cut enter 'Start Row, Start Col, Stop Row, Stop Col':'RC,RC' ('0' to stop): ")
+
+ # if previous entry was invalid
if invalid:
print("\nInvalid entry\n\n")
invalid = False
+
user_input = input(": ")
+
+ # choice to exit
if user_input == '0':
+
+ # construct return string
return_text = ''
for i in range(len(text)):
return_text += text[i]
return return_text
+
+ # try to decode user input and cut substring
else:
try:
user_input = user_input.split(',')
@@ -334,12 +203,241 @@ def remove_words(text):
text = text[:start] + text[stop:]
except:
invalid = True
-# # # # # # # # # # # # # # #
+# END remove_words
+
+def add_new(entry):
+ ''' Add a new sense to the entry
+ '''
+ while True:
+ print("\nChoose postion of new definition (1-n)")
+ try:
+ place = int(input(": "))-1
+ except:
+ print("Invalid")
+ continue
+ break
+ if place < 0:
+ return
+ print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)")
+ new_definition = {'gloss':input(': ')}
+
+ if new_definition['gloss'] != '0':
+ new_definition['tags'] = []
+ print("Enter definition tags ('0' to finish)")
+ new_tag = input(": ")
+ if new_tag != '0':
+ new_definition['tags'].append(new_tag)
+ entry['senses'].insert(place,new_definition)
+
+def add_new_tag(entry):
+ ''' Add a new tag, apply tag to multiple senses until finished
+ '''
+ exit_loop = False
+ while not exit_loop:
+ print("Enter the tag you want to apply ('0' to go back)")
+ new_tag = input(": ")
+ if new_tag == '0':
+ exit_loop = True
+ else:
+ exit_inner_loop = False
+ while exit_inner_loop == False:
+ message = "\n==================================\nChoose the definition you want to tag\n'0' to go back"
+ selection = select_definition(entry,message)
+ if selection == None:
+ exit_inner_loop = True
+ else:
+ entry['senses'][selection]['tags'].append(new_tag)
+
+def remove_tag(entry):
+ ''' User selects a sense and tags to remove
+ '''
+ exit_loop = False
+ while not exit_loop:
+ message = "\n==================================\nChoose the definition you want to untag\n'0' to go back"
+ selection = select_definition(entry,message)
+ if selection == None:
+ exit_loop = True
+ elif entry['senses'][selection]['tags']:
+ exit_inner_loop = False
+ while exit_inner_loop == False:
+ for i in range(len(entry['senses'][selection]['tags'])):
+ print(f"{i+1}. {entry['senses'][selection]['tags'][i]}")
+ print("Select the tag you want to remove ('0' to go back)")
+ tag_no = input(": ")
+ if tag_no == '0':
+ exit_inner_loop = True
+ elif int(tag_no) - 1 in range(len(entry['senses'][selection]['tags'])):
+ del entry['senses'][selection]['tags'][int(tag_no) - 1]
+ if entry['senses'][selection]['tags'] == []:
+ print("\ndefinitions has no more tags")
+ exit_inner_loop = True
+ else:
+ print("\ninvalid selection")
+ else:
+ print('\ndefinition has no tags')
+
+def change_definition(entry,singleton):
+ ''' User is given the option to remove text from any part of the sense
+ or add text to the beginning or ending of the sense.
+ '''
+ while True:
+
+ # get user selection for sense to modify
+ if singleton:
+ selection = 0
+ else:
+ message = "\n==================================\nChoose the definition you want to change\n'0' to go back"
+ selection = select_definition(entry,message)
+ if selection == None:
+ return
+
+
+ while selection != None:
+
+ # display gloss, tags and user options
+ definition_string = f"Definition: {entry['senses'][selection]['gloss']}\nTags: {', '.join(entry['senses'][selection]['tags'])}\n"
+ options = {'0':f"Change Definition Options:\n>'0' to go back",'00':">'00' to finish\n",'1':">'1' to remove words\n",
+ '2':">'2' to add text to the end\n",
+ '3':">'3' to add text to the beginning\n",
+ '4':">'4' to write new definition\n"}
+ user_input = get_selection(options,definition_string)
+
+ # option to go back (exit if singleton)
+ if user_input == '0':
+ selection = None
+ if singleton:
+ return
+ # option to exit back to edit entry menu
+ elif user_input == '00':
+ return
+
+ # option to remove text from any part of sense
+ elif user_input == '1':
+ entry['senses'][selection]['gloss'] = remove_words(entry['senses'][selection]['gloss'])
+
+ # option to add to end of sense
+ elif user_input == '2':
+ print("\nEnter text to add to definition ('0' to go back) (ā, ē, ī, ō, ū)")
+ new_text = input(': ')
+ if new_text != '0':
+ entry['senses'][selection]['gloss'] += new_text
+
+ # option to add to beginning of sense
+ elif user_input == '3':
+ print("\nEnter text to add to definition ('0' to go back) (ā, ē, ī, ō, ū)")
+ new_text = input(': ')
+ if new_text != '0':
+ entry['senses'][selection]['gloss'] = new_text + entry['senses'][selection]['gloss']
+
+ # option to overwrite definition
+ elif user_input == '4':
+ print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)")
+ new_definition = input(': ')
+ if new_definition != '0':
+ entry['senses'][selection]['gloss'] = new_definition
+ entry['senses'][selection]['tags'] = []
+ while True:
+ print("Enter definition tags ('0' to finish)")
+ new_tag = input(": ")
+ if new_tag == '0':
+ break
+ else:
+ entry['senses'][selection]['tags'].append(new_tag)
+
+def change_principle_parts(entry):
+ ''' principle parts set to requested user input
+ '''
+ print("'1' to auto retreieve verb parts (Greek only), any other key to proceed")
+ user_input = input(": ")
+ if user_input == '1':
+ entry['simpleParts'] = auto_parts(entry['simpleParts'],True)
+ else:
+ print("\nEnter your new principal parts ('0' to go back) (ā, ē, ī, ō, ū)")
+ new_definition = input(': ')
+
+ if new_definition != '0':
+ entry['simpleParts'] = new_definition
+
+def change_etymology(entry):
+ ''' etymology set to requested user input
+ '''
+ print("\nEnter your new etymology ('0' to go back) (ā, ē, ī, ō, ū) ('X' to delete)")
+ user_input = input(': ')
+ if user_input.upper() == "X":
+ entry['etymology'] = ''
+ elif user_input != '0':
+ entry['etymology'] = user_input
+
+def change_pos(entry):
+ ''' pos set to requested user input
+ '''
+ print("\nEnter your new part of speech ('0' to go back)")
+ user_input = input(': ')
+ if user_input != '0':
+ entry['partOfSpeech'] = user_input
+
+def move_definition(entry):
+ ''' validate user input for move entries function
+ '''
+ if len(entry['senses']) == 2:
+ entry['senses'] = move_entries(entry['senses'],1,0)
+ else:
+ exit_inner_loop = False
+ while not exit_inner_loop:
+ message = "\n==================================\nChoose the definition you want to move\n'0' to go back"
+ take = select_definition(entry,message)
+
+ if take != None:
+ message = "\nMove to what position?\n'0' to go back"
+ put = select_definition(entry,message)
+
+ if put != None:
+ entry['senses'] = move_entries(entry['senses'],take,put)
+
+ else:
+ exit_inner_loop = True
+
+def delete_definition(entry):
+ ''' validate user input to delete a sense
+ '''
+ exit_inner_loop = False
+ while not exit_inner_loop:
+ message = "\n==================================\nChoose the definition you want to delete\n'0' to go back"
+ selection = select_definition(entry,message)
+
+ if selection != None:
+ print(f"\nAre you sure to want to delete {selection+1}?")
+ user_input = input(confirm_str)
+
+ if user_input == '1':
+ del entry['senses'][selection]
+
+ else:
+ exit_inner_loop = True
+ if len(entry['senses']) == 1:
+ exit_inner_loop = singleton = True
+
+def replace_all(entry):
+ ''' senses replaced with a single sense from user input
+ '''
+ print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)")
+ new_definition = {'gloss':input(': ')}
+
+ if new_definition != '0':
+ new_definition['tags'] = []
+
+ print("Enter definition tags ('0' to finish)")
+ new_tag = input(": ")
+ if new_tag != '0':
+ new_definition['tags'].append(new_tag)
+ entry['senses'] = [new_definition]
# MOVE ENTRIES
# # # # # # # # # # # # # # # # #
def move_entries(entries,selection,new_position):
+ ''' Rearrange object in list from selection to new_position
+ '''
if selection == new_position:
return
else:
@@ -353,10 +451,12 @@ def move_entries(entries,selection,new_position):
# END MOVE ENTRIES
def pretty_print_tags(tags,mode=[]):
-
+ ''' Utility function for printing senses in two level list
+ based on tag groupings
+ '''
+
''' -1 is mode for html printing '''
''' else mode corresponds to counter '''
-
if mode != -1:
string = f'{mode}. ('
else:
@@ -376,6 +476,12 @@ def pretty_print_tags(tags,mode=[]):
def split_tags(senses,next_index,previous_tags):
+ ''' This function tries to group a common tags between senses
+ so that the entry displays more cleanly. If most of the
+ tags match, the senses will be grouped in a sublist.
+ Any individual tags the would break up a logical grouping
+ may be added to the individual lines.
+ '''
current_index = next_index - 1
''' If previous (current) tags and current tags both exist '''
@@ -419,27 +525,13 @@ def split_tags(senses,next_index,previous_tags):
''' If next tags don't exist or are empty, all tags are common tags '''
return senses[current_index]['tags'], []
-'''
-
-1A. If first n tags match with next list
-
- Common tags become common_tags, distinct tags become distinct tags
-1B. If no tags match OR next list does not exist
- All tags become common_tags, no tags become distict tags
-
-2A. If previous common_tags matches with first n tags of current tags
-
- identify distinct tags as distinct_tags
-
-2B. If common_tags does not match first n tags of current tags
-
- Go back to Step 1
-
-
-'''
def convert_message(message,string):
+ ''' this function joins a new string to the message being built
+ by the get_entry function. Keeps the strings to < 129
+ characters before separating into a new line.
+ '''
modulus = 129
if len(message) < modulus:
string += message + "\n"
@@ -458,6 +550,14 @@ def convert_message(message,string):
return string
def get_entry(entry,mode='',trunc=False):
+ ''' Get a formatted string representing an entire word entry.
+ Prints the entry etymology, partOfSpeech and simpleParts,
+ followed by the senses for the entry.
+ The sense are printed in two list levels.
+ Level 1: arabic numerals, any untagged senses or a list of tags.
+ Level 2: lower-case roman numerals, one or more sense under a
+ common set of tags
+ '''
string = ''
iv = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi', 'xvii', 'xviii', 'xix', 'xx', 'xxi', 'xxii', 'xxiii', 'xxiv', 'xxv', 'xxvi', 'xxvii', 'xxviii', 'xxix', 'xxx', 'xxxi', 'xxxii', 'xxxiii', 'xxxiv', 'xxxv', 'xxxvi', 'xxxvii', 'xxxviii', 'xxxix', 'xl', 'xli', 'xlii', 'xliii', 'xliv', 'xlv', 'xlvi', 'xlvii', 'xlviii', 'xlix', 'l']
@@ -539,7 +639,9 @@ def print_entry(entry,mode=''):
# SELECT DEFINITIONS
# # # # # # # # # # # # # #
def select_definition(entry,message):
- # whole function contained in loop
+ ''' user input function for a selecting a definition
+ for modification, deletion, etc.
+ '''
invalid = False
while True:
clear_screen()
@@ -555,8 +657,6 @@ def select_definition(entry,message):
# Option to go back
if user_input == '0':
return None
- #elif user_input == '00':
- # return None, True
# confirm input is numeric
elif "-" in user_input:
diff --git a/src/flashcard_html_utilities.py b/src/flashcard_html_utilities.py
index afaab66..bb053a3 100644
--- a/src/flashcard_html_utilities.py
+++ b/src/flashcard_html_utilities.py
@@ -1,3 +1,8 @@
+'''
+Fuctions used by tables.py to print flashcards with tables containing
+verb conjugations and noun/adjective cases
+'''
+
def set_styles(body_string):
body_string += '