From 667ed5c76b41b67719ce6793404141b305b483da Mon Sep 17 00:00:00 2001 From: itincknell <151386927+itincknell@users.noreply.github.com> Date: Sat, 18 Nov 2023 12:49:00 -0500 Subject: [PATCH] Add files via upload --- combine_entries.py | 63 ++ create_word.py | 157 ++++ dictionary_LSJ.py | 220 ++++++ dictionary_Lewis.py | 216 ++++++ dictionary_MLJohnson.py | 386 ++++++++++ dictionary_Middle_Liddell.py | 229 ++++++ dump_parser.py | 390 ++++++++++ edit_all.py | 658 +++++++++++++++++ edit_dictionary.py | 812 +++++++++++++++++++++ edit_entry.py | 577 +++++++++++++++ get_selection.py | 159 ++++ get_simple.py | 452 ++++++++++++ html_x.py | 64 ++ language_splitter.py | 38 + load_dict.py | 282 ++++++++ main.py | 151 ++++ parser_shell.py | 376 ++++++++++ requirements.txt | 9 + tables.py | 640 +++++++++++++++++ tables_greek_ext.py | 1316 ++++++++++++++++++++++++++++++++++ tables_latin_ext.py | 1113 ++++++++++++++++++++++++++++ tables_oe_ext.py | 316 ++++++++ word_methods.py | 429 +++++++++++ word_print_edit.py | 381 ++++++++++ 24 files changed, 9434 insertions(+) create mode 100644 combine_entries.py create mode 100644 create_word.py create mode 100644 dictionary_LSJ.py create mode 100644 dictionary_Lewis.py create mode 100644 dictionary_MLJohnson.py create mode 100644 dictionary_Middle_Liddell.py create mode 100644 dump_parser.py create mode 100644 edit_all.py create mode 100644 edit_dictionary.py create mode 100644 edit_entry.py create mode 100644 get_selection.py create mode 100644 get_simple.py create mode 100644 html_x.py create mode 100644 language_splitter.py create mode 100644 load_dict.py create mode 100644 main.py create mode 100644 parser_shell.py create mode 100644 requirements.txt create mode 100644 tables.py create mode 100644 tables_greek_ext.py create mode 100644 tables_latin_ext.py create mode 100644 tables_oe_ext.py create mode 100644 word_methods.py create mode 100644 word_print_edit.py diff --git a/combine_entries.py b/combine_entries.py new file mode 100644 index 0000000..8b72030 --- /dev/null +++ b/combine_entries.py @@ -0,0 +1,63 @@ + +import pickle +from load_dict import change_path + +change_path('dictionaries') +with open('new_dictionary_unjoined.txt','rb') as file: + dictionary = pickle.load(file) + +alpha = {} +for i in range(0,26): + alpha[chr(i+97)] = 0 +for i in range(len(dictionary['definitions'])): + if dictionary['definitions'][i]['handle'][0].lower() in alpha: + alpha[dictionary['definitions'][i]['handle'][0].lower()] = i + +''' +prev = 0 +counter = 0 +for key in alpha: + print(f"processing {key}s") + for i in range(prev,alpha[key]): + if dictionary['definitions'][i] in dictionary['definitions'][i + 1:i + 10]: + dictionary['definitions'][i]['tags'].append('DUPE') + prev = alpha[key] + print(f"{key}s completed") + +offset = 0 +for i in range(len(dictionary['definitions'])): + if "DUPE" in dictionary['definitions'][i - offset]['tags']: + print(f"deleting {dictionary['definitions'][i - offset]['handle']}") + del dictionary['definitions'][i - offset] + offset += 1 +''' +prev = 0 +counter = 0 +comp = '' +for key in alpha: + handles = [] + print(f"processing {key}s") + for i in range(prev,alpha[key]): + if comp == dictionary['definitions'][i]: + continue + else: + comp = dictionary['definitions'][i] + for j in range(1,len(dictionary['definitions'][i + 1:i+20])): + if dictionary['definitions'][i + j]['handle'] == dictionary['definitions'][i]['handle']: + print(f"i={i},j={j}; adding {dictionary['definitions'][i]['handle']} to {dictionary['definitions'][i + j]['handle']}") + dictionary['definitions'][i]['entries'].extend(dictionary['definitions'][i + j]['entries']) + dictionary['definitions'][i]['roots'].extend(dictionary['definitions'][i + j]['roots']) + dictionary['definitions'][i + j]['tags'].append('DUPE') + prev = alpha[key] + print(f"{key}s completed") + +offset = 0 +for i in range(len(dictionary['definitions'])): + if "DUPE" in dictionary['definitions'][i - offset]['tags']: + print(f"deleting {dictionary['definitions'][i - offset]['handle']}") + del dictionary['definitions'][i - offset] + offset += 1 + + +with open(dictionary['file'],mode = 'wb') as openFile: + pickle.dump(dictionary, openFile) diff --git a/create_word.py b/create_word.py new file mode 100644 index 0000000..45a0295 --- /dev/null +++ b/create_word.py @@ -0,0 +1,157 @@ +''' +Description: + + create word: + create a new word based on user input + intended to be used if valid latin word cannot be retrieved by wiktionary parser + + create entry: + create a new entry within a word + used by create_word and by word_print_edit.edit_entries +''' + +from unidecode import unidecode +import pickle + +import parser_shell +import word_print_edit +import edit_entry +import load_dict +# CREATE WORD +# # # # # # # # # # # +def create_word(current_dict,tags): + + # Create new word, first tag is file name - .txt + new_word = {'tags':tags} + + # begin user input loop + exit_loop = False + while not exit_loop: + print("\nEnter new word heading ('0' to exit) (ā, ē, ī, ō, ū)") + user_input = input(': ') + + # option to go back, return to calling function + if user_input == '0': + return current_dict + + # assign input to heading; un-macroned version to handle + new_word['heading'] = user_input + new_word['handle'] = unidecode(user_input) + + # create empty entries list + new_word['entries'] = [] + + # call create entry, returns False if creation not completed + new_word, exit_loop = create_entry(new_word) + + # End while Loop only if complete word is created + # Send new word to edit entry to add additional definitons etc. + new_word['entries'][0], dummy = edit_entry.edit_entry(new_word['entries'][0],new_word) + + # call word options, from here return + load_dict.change_path('dumps sorted') + if current_dict['language'] == 'Latin' or current_dict['language'] == "Ancient Greek": + wiki_dump = parser_shell.load_big_language(new_word['heading'][0],current_dict['language']) + else: + wiki_dump = parser_shell.load_dump(current_dict['language']) + parser_shell.save_word(new_word,wiki_dump,2) + current_dict = parser_shell.save_word(new_word,current_dict) + return current_dict +# END CREATE WORD + + +# CREATE ENTRY +# # # # # # # # # # # +def create_entry(new_word): + + # copy heading to shorter name + heading = new_word['heading'] + # create empty entry to be added to + entry = {} + + # Start series of ascending loops + # # # # # # # # # # # # # # # # # + + # Begin Loop LEVEL ONE: part of speech + while True: + + print(f"\nEnter part of speech for '{heading}'") + print("'0' to go back") + user_input = input(': ') + + # go back, word cancelled + if user_input == '0': + return new_word, False + + # add to entry, proceed to next inner loop + entry['partOfSpeech'] = user_input + + # Begin Loop LEVEL TWO: etymology + while True: + + print(f"\nEnter etymology for '{heading}'") + print("'0' to go back\n'1' to go back\n'2' to skip") + user_input = input(': ') + + # go back, word cancelled + if user_input == '0': + return new_word, False + + elif user_input == '1': + break + + # add to entry, proceed to next inner loop + elif user_input != '2': + entry['etymology'] = user_input + else: + entry['etymology'] = '' + + # begin loop LEVEL THREE: principal parts + while True: + + print(f"\nEnter principle parts for '{heading}' (ā, ē, ī, ō, ū)") + print("'0' to exit") + print("'1' to go back") + user_input = input(': ') + + # exit, word cancelled + if user_input == '0': + return new_word, False + # go back to previous level + elif user_input == '1': + break + + # simple/principal the same for created word + entry['principleParts'] = user_input + entry['simpleParts'] = user_input + + # Begin Loop LEVEL FOUR: 1st definition + while True: + + print(f"\nEnter '{heading}' definition") + print("'0' to exit") + print("'1' to go back") + user_input = {'gloss':input(': ')} + + # exit, word cancelled + if user_input == '0': + return new_word, False + # go back to previous level + elif user_input == '1': + break + + user_input['tags'] = [] + while True: + print("Enter definition tags ('0' to finish)") + new_tag = input(": ") + if new_tag == '0': + break + else: + user_input['tags'].append(new_tag) + # assign to entry defintions list + entry['defs'] = [user_input] + + # append new entry to new word, return True + new_word['entries'].append(entry) + return new_word, True +# END CREATE ENTRY diff --git a/dictionary_LSJ.py b/dictionary_LSJ.py new file mode 100644 index 0000000..52dd73b --- /dev/null +++ b/dictionary_LSJ.py @@ -0,0 +1,220 @@ + + +import parser_shell +from load_dict import change_path +from copy import deepcopy +import edit_all +import pickle +import beta_code +from language_splitter import split_language +from dict_utilities import printpr + + +interval = 1000 +level = 0 +start = (interval * level) +stop = (interval) * (level + 1) +debug_print = False +progress_print = True + +def cut_text(text,start,stop): + p = text.find(start) + len(start) + text = text[p:] + text = text[:text.find(stop)] + return text + +def translate_greek(text,bold): + text = list(text) + text = [x for x in text if not x.isnumeric()] + text = "".join(text) + if bold: + return "" + beta_code.beta_code_to_greek(text) + "" + else: + return beta_code.beta_code_to_greek(text) + + +def smart_join(text): + s = "" + for i in range(len(text)): + if i == 0: + s = text[i] + else: + if s[-1] != " " and s[-1] not in ['('] and text[i][0] != " " and text[i][0] not in [',','.',';',')','?']: + s += " " + text[i] + else: + s += text[i] + return s.replace(" .",".").replace(" ,",",").replace(" :",":") + + +def configure_parts(defs): + count = 0 + + for i in defs[0]['gloss']: + if i == "(": + count += 1 + + if i == ")" and count != 0: + count -= 1 + + if count < 0: + # Error too many )s, unbalanced parens + print(defs[0]['gloss']) + print(defs[1]['gloss']) + break + + if count != 0: + parens = 0 + + for i in range(len(defs[1]['gloss'])): + if defs[1]['gloss'][i] == ")": + parens += 1 + + if defs[1]['gloss'][i] == "(": + parens -= 1 + + if parens == count: + break + + if i < len(defs[1]['gloss']) - 1: + defs[0]['gloss'] = smart_join([defs[0]['gloss'],defs[1]['gloss'][: i + 1]]) + defs[1]['gloss'] = defs[1]['gloss'][i + 1 :] + + for i in range(len(defs[1]['gloss'])): + if defs[1]['gloss'][i].isalpha() or defs[1]['gloss'][i] == "=": + break + defs[1]['gloss'] = defs[1]['gloss'][i:] + else: + defs[0]['gloss'] += ")" + + return defs + + +def process_entry(text): + definition = {'heading':'', + 'handle':'', + 'tags':set(), + 'entries':[]} + + entry = {'defs':[], + 'partOfSpeech':'', + 'principleParts':'', + 'simpleParts':'', + 'etymology':''} + + handle = cut_text(text,"key=\"","\"") + definition['heading'] = definition['handle'] = translate_greek(handle,False) + + if debug_print: + print("@"*5000 + f"\nheading \"{definition['heading']}\"") + + while text != "": + text, defs = get_def(text) + + for x in defs: + if debug_print: + print(f"definition: {x['gloss']}") + entry['defs'].append(deepcopy(x)) + + if len(entry['defs']) > 1: + entry['defs'] = configure_parts(entry['defs']) + entry['simpleParts'] = entry["principleParts"] = entry['defs'][0]['gloss'] + entry['defs'].pop(0) + else: + entry['simpleParts'] = entry["principleParts"] = definition['heading'] + + definition['entries'].append(deepcopy(entry)) + definition['tags'].add("LSJ") + return definition + +def get_def(text): + m = 0 + gloss = [] + defs = [] + greek = False + candidate_tag = '' + while True: + pull = text[:text.find("<")] + brac = text[text.find("<"):text.find(">")+1] + text = text[text.find(">") + 1:] + + if greek and pull != "": + pull = translate_greek(pull,True) + greek = False + + if 'lang=\"greek\"' in brac: + greek = True + + if "" or text == "": + gloss = smart_join(gloss).strip(",. ") + if gloss != "": + defs.append({'gloss':gloss,'tags':[]}) + break + + return text, defs + +def extract_dictionary(perseus, dictionary): + line_list = [] + ignition = False + + for line in f.readlines(): + if " 5: + exit() + else: + counter_senses += 1 + + if 'greek_word' in i: + + if debug_print: + print("\n\n\n\t\t\t" + "*"*1000 + f"\n\nLINE: {current_line_number()}") + print(f"['greek_word'] == {i['greek_word']}") + + entry['defs'].append({'gloss':i['greek_word'],'tags':['greek_word']}) + print(f"entry == {entry}") + if counter_grk > 1: + print(f"Greek Word Break") + exit() + else: + counter_grk += 1 + else: + if i['entry_type'] in {'spur','gloss'}: + return None + if debug_print: + print("\n\n\n\t\t\t" + "*"*1000 + f"\n\nLINE: {current_line_number()}") + print(f"i['entry_type'] == 'main' == {i['entry_type'] == 'main'}") + print(f"i['entry_type'] == {i['entry_type']}") + print(i) + + #exit() + if 'principleParts' not in entry: + + defs = entry['defs'][0]['gloss'] + defs = defs.split() + gloss = '' + tag = '' + flag = False + for x, q in enumerate(defs): + if debug_print: + print("\n\n\n\t\t\t" + "*"*1000 + f"\n\nLINE: {current_line_number()}") + print(f"defs = {defs}") + print(f"q = {q}") + print(f"q[-1] = {q[-1]}") + if (x and q[0].isupper()) or q[0] == '(' or q[0].isnumeric(): + break + if ((q[-1] == ',' and not flag) or (x == len(defs)-1 and not flag) or x == 0) and not (q[-1] == '.'): + gloss += q.strip(":") + ' ' + else: + tag += q + ' ' + if q[-1] == ':': + if debug_print: + print("\n\n\n\t\t\t" + "*"*1000 + f"\n\nLINE: {current_line_number()}") + print(f"End in colon is True = {q[-1]}") + break + if q[-1] != ',': + flag = True + continue + + + if tag.strip(): + entry['principleParts'] = gloss.strip(", ") + " (" + tag.strip() + ")" + else: + entry['principleParts'] = gloss.strip(", ") + if debug_print: + print("\n\n\n\t\t\t" + "*"*1000 + f"\n\nLINE: {current_line_number()}") + if counter_princ_parts > 4: + print("'principleParts' not in entry") + print(i['entry_type']) + print(f"entry = {entry}") + print(f"final {entry['principleParts']}",flush=True) + exit() + else: + for x in entry['defs']: + if isinstance(x['gloss'],list): + print(f"Gloss = List {x['gloss']}",flush=True) + print(f"i = {i}") + exit() + print("'principleParts' not in entry") + print(f"increment {entry['principleParts']}") + counter_princ_parts += 1 + return entry + for i in lewis: + entry = get_entry(i,debug_print) + if entry: + dictionary['definitions'].append({'entries':[entry],'heading':i['key'],'handle':unidecode(i['key']),'tags':set()}) + +import json + +def Lewis(new_dictionary={'definitions':[]}): + + + + change_path('texts') + change_path('lewis-short-json-master') + + dictionary = {'file':'','definitions':[],"language":''} + + for i in range(0,1): + json_file = 'ls_' + chr(i+65) + '.json' + with open(json_file,'r') as f: + print(f"Successfully opened '{'ls_' + chr(i+65) + '.json'}'",flush=True) + lewis = json.load(f) + extract_dictionary(lewis,dictionary) + + new_dictionary['definitions'].extend(dictionary['definitions']) + + return new_dictionary +print(Lewis()) + + diff --git a/dictionary_MLJohnson.py b/dictionary_MLJohnson.py new file mode 100644 index 0000000..0385eb0 --- /dev/null +++ b/dictionary_MLJohnson.py @@ -0,0 +1,386 @@ + + + +from load_dict import change_path +from copy import deepcopy +import edit_all +import pickle + +from dict_utilities import printpr + +interval = 30 +level = 1 +start = interval * level +stop = (interval) * (level + 1) + +debug_print = False +progress_print = True + +# From here on this is the most contrived code ever written + +tenses = ['pres','past','ptp'] +genders = ['m','f','n','pl','?'] +numbers = [f'{i + 1}.' for i in range(30)] +other_pos = {'adj':'adjective', + 'noun':'noun', + 'pronoun':'pronoun', + 'prep':'preposition', + 'conj':'conjuction', + 'interrog. adv':'interrogative', + 'interrog':'interrogative', + 'adv':'adverb', + 'prefix':'prefix', + 'interj':'interjection', + 'pron':'pronoun', + 'suffix':'suffix', + 'num':'number', + 'article':'article', + 'infl':'inlfection', + 'indecl':'indecllinable noun'} + +def Johnson_OED(new_dictionary): + + def pos_test(line,line_0 = ''): + if debug_print: + print(f"TEST LINE: {line}") + + index = None + if 'ptp' in line: + full_verb = True + for n in numbers: + if n in line: + if line.index(n) < line.index('ptp'): + full_verb = False + else: + full_verb = False + + if full_verb: + entry = {'partOfSpeech':'verb','etymology':''} + index = line.index('ptp')+2 + parts = line_0 + ' '.join(line[:index]).strip(',.;') + entry['simpleParts'] = entry['principleParts'] = parts + + if debug_print: + print('\n>>>>> FULL VERB') + return entry, index + + else: + if debug_print: + print('NOT FV',end='\t') + + see = False + for word in line: + if word == 'see': + see = True + break + if word.strip('.,') in other_pos \ + or word in numbers \ + or word.strip(',.') in genders: + break + + if see: + index = line.index('see') + entry = {'partOfSpeech':'form','etymology':''} + entry['simpleParts'] = entry['principleParts'] = line_0 + ' '.join(line[:index]).strip(',.;') + entry['defs'] = [{'gloss':'alternative form of ' + ' '.join(line[index+1:]).strip(',.;'),'tags':[]}] + new_word['entries'] = entry + + if debug_print: + print('\n>>>>> FORM OF') + return entry, index + + if debug_print: + print('NOT FORM',end='\t') + + noun = flag = stop = False + for i in range(len(line)): + if line[i].strip(".,") in genders and not stop: + noun = flag = True + index = i + 1 + continue + elif noun and not flag: + stop = True + if flag: + if '(' in line[i]: + if ')' in line[i]: + index = i + 1 + flag = False + else: + continue + else: + flag = False + if ')' in line[i]: + index = i + 1 + flag = False + if line[i] in numbers: + break + + if noun: + entry = {'partOfSpeech':'noun','etymology':''} + parts = line_0 + ' '.join(line[:index]).strip(',.;') + entry['simpleParts'] = entry['principleParts'] = parts + if debug_print: + print('\n>>>>> NOUN') + return entry,index + + if debug_print: + print('NOT NOUN',end='\t') + + found_pos = False + for i in range(len(line)): + if line[i].strip('.,') in other_pos: + index = i + pos = other_pos[line[i].strip('.,')] + found_pos = True + break + if line[i] in numbers: + break + + if found_pos: + entry = {'partOfSpeech':pos,'etymology':''} + if pos == 'noun' or pos == 'pronoun': + if debug_print: + print('%'*1000) + if debug_print: + print(line) + flag = suspicious = False + og_index = index + for i in range(index,len(line)): + if line[i].strip('(,.') in genders: + if '(' in line[i]: + flag = True + else: + index = i + 1 + if flag: + if ')' in line[i]: + flag = False + index = i + 1 + if line[i].strip(',.') in ['suffix','prefix']: + entry['partOfSpeech'] = line[i].strip(',.') + if line[i].strip(',.') in other_pos: + suspicious = True + if line[i].strip(',.') == 'or' and suspicious: + entry['partOfSpeech'] = '' + index = 1 + + if entry['partOfSpeech'] == 'noun' and index > og_index: + parts = ' '.join(line[:index]) + entry['simpleParts'] = entry['principleParts'] = parts + + if entry['partOfSpeech'] == 'suffix' or entry['partOfSpeech'] == 'prefix' : + entry['simpleParts'] = entry['principleParts'] = line[0] + index = 1 + + else: + parts = line_0 + ' '.join(line[:index]).strip(',.;') + entry['simpleParts'] = entry['principleParts'] = parts + index += 1 + if debug_print: + print('\n>>>>> OTHER POS') + return entry, index + if debug_print: + print('POS NOT FOUND',end='\t') + + form = False + for i in range(len(line)): + if line[i] == 'of': + form = True + if line[i].strip('.,') in other_pos \ + or line[i] in numbers \ + or line[i].strip(',.') in genders: + form = False + break + if form: + entry = {'partOfSpeech':'form','etymology':''} + entry['simpleParts'] = entry['principleParts'] = line_0 + index = 0 + return entry, index + prin("") + return None, None + + def return_defs(line,index): + sub_nums = [] + for i in range(len(numbers)): + if numbers[i] in line[index:]: + sub_nums.append(numbers[i]) + defs = [] + if debug_print: + print(line) + for i in range(len(sub_nums)): + start = line.index(sub_nums[i]) + 1 + if sub_nums[i] == sub_nums[-1]: + stop = None + else: + stop = line.index(sub_nums[i+1]) + if stop: + defs.append({'gloss':' '.join(line[start:stop]).strip(',.;'),'tags':[]}) + else: + defs.append({'gloss':' '.join(line[start:]).strip(',.;'),'tags':[]}) + return defs + + def return_multi_entry(line): + + sub_nums = [] + for i in range(len(numbers)): + if numbers[i] in line: + sub_nums.append(numbers[i]) + + entries = [] + if debug_print: + print(sub_nums) + for i in range(len(sub_nums)): + if debug_print: + print(sub_nums[i]) + start = line.index(sub_nums[i]) + 1 + if sub_nums[i] == sub_nums[-1]: + stop = None + else: + stop = line.index(sub_nums[i+1]) + if debug_print: + print(line[start:stop]) + if stop: + entry, index = pos_test(line[start:stop],line[0] + ' ') + else: + entry, index = pos_test(line[start:],line[0] + ' ') + if entry == None or index == None: + if debug_print: + print("\t\tAAAAAAAA") + entry = {'partOfSpeech':'','etymology':''} + parts = ' '.join(line[:line.index('1.')]).strip(',.;') + entry['simpleParts'] = entry['principleParts'] = parts + if stop: + entry['defs'] = [{'gloss':' '.join(line[start:stop]).strip(',.;'),'tags':[]}] + else: + entry['defs'] = [{'gloss':' '.join(line[start:]).strip(',.;'),'tags':[]}] + else: + if debug_print: + print("\t\tBBBBBBBB") + if stop: + if 'defs' not in entry: + entry['defs'] = [{'gloss':' '.join(line[start+index:stop]).strip(',.;'),'tags':[]}] + else: + if 'defs' not in entry: + entry['defs'] = [{'gloss':' '.join(line[start+index:]).strip(',.;'),'tags':[]}] + entries.append(deepcopy(entry)) + return entries + + # Start of actual MLJohnson code + # # # # # # # # # # # # # # # # + + change_path('texts') + + definitions = [] + counter = 0 + line_counter = 0 + + with open('MLJohnson_OEDictionary.txt','r') as f: + if progress_print: + print(f"Parsing 'MLJohnson_OEDictionary.txt': ",flush=True,end='') + + for line in f.readlines(): + line = line.split() + + if '[]' in line: + line.remove('[]') + + offset = 0 + for i in range(len(line)): + i = i - offset + if len(line[i]) > 2: + line[i] = line[i].rstrip('1234') + if len(line[i]) > 1: + line[i] = line[i].strip('?') + if line[i].rstrip('1234') == '': + del line[i] + offset += 1 + i -= 1 + + if line[i].strip('.') in other_pos and line[i-1].strip('.') == 'interrog': + line[i-1] = 'interrog. adv.' + del line[i] + offset += 1 + + if line: + new_word = {} + new_word['heading'] = new_word['handle'] = line[0] + + # PART OF SPEECH TESTS + entry, index = pos_test(line) + if debug_print: + print(f'\nPOS TEST: {entry} {index}') + + if entry: + if entry['partOfSpeech'] == 'suffix' or entry['partOfSpeech'] == 'prefix': + for i in range(len(line)): + if entry['partOfSpeech'] in line[i]: + line[i] = line[i].strip(",.;") + ":" + + + one_entry = True + for n in numbers: + if n in line: + one_entry = False + else: + break + if debug_print: + print(f"ONE ENTRY = {one_entry}") + + if index == None and not one_entry: + new_word['entries'] = return_multi_entry(line) + + elif index and not one_entry: + entry['defs'] = return_defs(line,index) + new_word['entries'] = [entry] + + elif index == None and one_entry: + verb = False + for i in range(len(line)): + if '/' in line[i] or line[i] == 'verb': + index = i + 1 + verb = True + if verb: + entry = {'partOfSpeech':'verb','etymology':''} + parts = ' '.join(line[:index]).strip(',.;') + entry['simpleParts'] = entry['principleParts'] = parts + + + elif 'of' in line: + entry = {'partOfSpeech':'form','etymology':''} + entry['simpleParts'] = entry['principleParts'] = line[0] + index = 1 + elif index == None: + entry = {'partOfSpeech':'','etymology':''} + entry['simpleParts'] = entry['principleParts'] = line[0] + index = 1 + counter += 1 + entry['defs'] = [{'gloss':' '.join(line[index:]).strip(',.;'),'tags':[]}] + new_word['entries'] = [entry] + else: + if 'defs' not in entry: + entry['defs'] = [{'gloss':' '.join(line[index:]).strip(',.;'),'tags':[]}] + new_word['entries'] = [entry] + new_word['tags'] = set('MLJ') + if debug_print: + print(f"\n\tFINAL WORD: {new_word}\n") + if counter > 2: + if debug_print: + print("COUNTER EXCEEDED") + break + definitions.append(new_word) + + line_counter += 1 + if progress_print: + printpr(line_counter) + + print(f' {line_counter:,} lines parsed',flush=True) + + + new_dictionary['definitions'].extend(definitions) + return new_dictionary + + + + + + + diff --git a/dictionary_Middle_Liddell.py b/dictionary_Middle_Liddell.py new file mode 100644 index 0000000..776e443 --- /dev/null +++ b/dictionary_Middle_Liddell.py @@ -0,0 +1,229 @@ + + +import parser_shell +from load_dict import change_path +from copy import deepcopy +import edit_all +import pickle +import beta_code +from language_splitter import split_language +from dict_utilities import printpr + +debug_print = False +progress_print = True + +interval = 1000 +level = 0 +start = (interval * level) +stop = (interval) * (level + 1) + +def cut_text(text,start,stop): + p = text.find(start) + len(start) + text = text[p:] + text = text[:text.find(stop)] + return text + +def translate_greek(text,bold): + text = list(text) + text = [x for x in text if not x.isnumeric()] + text = "".join(text) + if bold: + return "" + beta_code.beta_code_to_greek(text) + "" + else: + return beta_code.beta_code_to_greek(text) + + +def smart_join(text): + if debug_print: + print(text) + s = "" + for i in range(len(text)): + if i == 0: + s = text[i] + else: + if s[-1] != " " and s[-1] not in ['('] and text[i][0] != " " and text[i][0] not in [',','.',';',')','?']: + s += " " + text[i] + else: + s += text[i] + return s.replace(" .",".").replace(" ,",",").replace(" :",":") + +def configure_parts(defs): + count = 0 + for i in defs[0]['gloss']: + if i == "(": + count += 1 + if i == ")" and count != 0: + count -= 1 + + if count < 0: + # Unbalance parens + if debug_print: + # need db printing function for line and function info + print(defs[0]['gloss']) + print(defs[1]['gloss']) + break + + if count != 0: + if debug_print: + # need db printing function + print(defs[0]['gloss']) + print(defs[1]['gloss']) + + parens = 0 + for i in range(len(defs[1]['gloss'])): + if defs[1]['gloss'][i] == ")": + parens += 1 + if defs[1]['gloss'][i] == "(": + parens -= 1 + if parens == count: + break + if i < len(defs[1]['gloss']) - 1: + defs[0]['gloss'] = smart_join([defs[0]['gloss'],defs[1]['gloss'][: i + 1]]) + defs[1]['gloss'] = defs[1]['gloss'][i + 1 :] + for i in range(len(defs[1]['gloss'])): + if defs[1]['gloss'][i].isalpha() or defs[1]['gloss'][i] == "=": + break + defs[1]['gloss'] = defs[1]['gloss'][i:] + else: + defs[0]['gloss'] += ")" + + if debug_print: + # These statements are all useless + print(defs[0]['gloss']) + print(defs[1]['gloss']) + return defs + + +def process_entry(text): + definition = {'heading':'', + 'handle':'', + 'tags':set("Middle Liddell"), + 'entries':[]} + + entry = {'defs':[], + 'partOfSpeech':'', + 'principleParts':'', + 'simpleParts':'', + 'etymology':''} + + handle = cut_text(text,"key=\"","\"") + if debug_print: + print(handle) + + definition['heading'] = definition['handle'] = translate_greek(handle,False) + + if debug_print: + print("@"*5000 + f"\nheading \"{definition['heading']}\"") + + while text != "": + text, defs = get_def(text) + + for x in defs: + if debug_print: + print(f"definition: {x['gloss']}") + entry['defs'].append(deepcopy(x)) + + if len(entry['defs']) > 1: + entry['defs'] = configure_parts(entry['defs']) + entry['simpleParts'] = entry["principleParts"] = entry['defs'][0]['gloss'] + entry['defs'].pop(0) + else: + entry['simpleParts'] = entry["principleParts"] = definition['heading'] + definition['entries'].append(deepcopy(entry)) + definition['tags'].add("Middle Liddell") + + if debug_print: + print(definition) + return definition + +def get_def(text): + m = 0 + gloss = [] + defs = [] + greek = quote = author = False + candidate_tag = '' + while True: + pull = text[:text.find("<")] + brac = text[text.find("<"):text.find(">")+1] + text = text[text.find(">") + 1:] + + if greek and pull != "": + pull = translate_greek(pull,True) + greek = False + + if 'lang=\"greek\"' in brac: + greek = True + + if "" or text == "": + gloss = smart_join(gloss).strip(",. ") + if gloss != "": + defs.append({'gloss':gloss,'tags':[]}) + break + + return text, defs + +def extract_dictionary(perseus, dictionary): + line_list = [] + ignition = False + for i in range(len(perseus)): + + if "= .5 + +def filter_tags(gloss_parts, existing_tags, Test): + """Filter existing tags by comparing them to the gloss_parts.""" + new_tags = existing_tags.copy() + for tag in existing_tags: + gloss_parts = [part for part in gloss_parts if not similar_enough(part, tag)] + if any(similar_enough(part, tag) for part in gloss_parts): + new_tags.remove(tag) + return new_tags, gloss_parts + +def paren_cut(gloss, tags): + + if gloss[0] != "(": + return gloss, tags + + gloss_parts = gloss[1:gloss.find(")")].split(", ") + remaining_gloss = gloss[gloss.find(")") + 2:] + + debug_print(Test, f"g = {gloss_parts}", f"gloss = {remaining_gloss}", f"split g = {gloss_parts}", f"tags = {tags}") + + new_tags, gloss_parts = filter_tags(gloss_parts, tags, Test) + + if len(gloss_parts) == 0: + return remaining_gloss, new_tags + else: + return "(" + ", ".join(gloss_parts) + ") " + remaining_gloss, new_tags + + + +def add_def(defs,new_gloss,gloss_tags): + if ")" in new_gloss: + for d in defs: + if new_gloss[new_gloss.find(")") + 2:] == d['gloss']: + d['gloss'] = new_gloss + for tag in gloss_tags: + if tag not in d['tags']: + d['tags'].append(tag) + d['gloss'], d['tags'] = paren_cut(d['gloss'],d['tags']) + return + + if ":" in new_gloss: + for d in defs: + if ":" in d['gloss'] and new_gloss[:new_gloss.find(':')] == d['gloss'][:d['gloss'].find(':')]: + if new_gloss[new_gloss.find(':') + 2:].isspace() or not new_gloss[new_gloss.find(':') + 2:]: + return + elif new_gloss[new_gloss.find(':') + 2:] == d['gloss'][d['gloss'].find(':') + 2:]: + return + if new_gloss: + defs.append({'gloss':new_gloss,'tags':copy.deepcopy(gloss_tags)}) + +def create_defs(senses, tag_list): + defs = [] + dupe_list = [] + + def process_glosses(glosses, gloss_tags): + for gloss in glosses: + new_gloss = gloss.strip(". ") + split_gloss = new_gloss.split('\n##') if '\n##' in new_gloss else [new_gloss] + + # Apply the loop only if we have split the gloss + if len(split_gloss) > 1: + for i in range(1, len(split_gloss)): + new_gloss, gloss_tags = paren_cut(split_gloss[0] + split_gloss[i], gloss_tags) + if new_gloss not in dupe_list: + add_def(defs, new_gloss, gloss_tags) + dupe_list.append(new_gloss) + else: + new_gloss, gloss_tags = paren_cut(new_gloss, gloss_tags) + if new_gloss not in dupe_list: + add_def(defs, new_gloss, gloss_tags) + dupe_list.append(new_gloss) + + return gloss_tags + + for sense in senses: + gloss_tags = [] + if 'form_of' in sense: + gloss_tags.append('form of ' + sense['form_of'][0]['word']) + + if 'tags' in sense: + gloss_tags.extend([tag for tag in sense['tags'] if tag not in tag_list]) + + if 'english' in sense and sense['english'] not in tag_list: + gloss_tags.append(sense['english']) + + if 'qualifier' in sense and sense['qualifier'] not in tag_list: + gloss_tags.append(sense['qualifier']) + + if 'glosses' in sense: + gloss_tags = process_glosses([sense['glosses'][0].strip(". ")], gloss_tags) + + if 'raw_glosses' in sense: + gloss_tags = process_glosses(sense['raw_glosses'], gloss_tags) + + if Test: + print(f"dupe_list = {dupe_list}") + + if Test: + print(f"defs: {defs}") + return defs + +def get_file_selection(Test, test_file, test_language): + change_path('dumps unsorted') + if Test: + return test_file, test_language + else: + myFiles = glob.glob('*.json') + if myFiles == []: + print("\nSorry no saved dictionaries") + return None, None + else: + options = {'0':f"\nChoose from the following files: (0 to go back)\n"} + for index in range(len(myFiles)): + options[f"{str(index + 1)}"] = f"{index + 1}. {myFiles[index]}\n" + user_input = get_selection(options) + + if user_input == '0': + return None, None + else: + file = myFiles[int(user_input)-1] + language = pick_language() + return file, language + +def print_debug_info(line, counter): + print('\n') + print(f"\tline: {counter}, word: {line['word']}") + print("WORD ITEMS >>>>>>>>>>>>>>") + for item in line.items(): + print(item) + print("SENSES ITEMS >>>>>>>>>>>>") + for item in line['senses'][0].items(): + print(item) + +def handle_pos(line): + pos_mapping = { + 'adv': 'adverb', + 'adj': 'adjective', + 'prep': 'preposition', + 'intj': 'interjection' + } + + pos = line['pos'] + line['pos'] = pos_mapping.get(pos, pos) + return line['pos'] + +def handle_defs(line,tag_list): + if 'tags' in line['senses'][0]: + tag = line['senses'][0]['tags'] + if 'no-senses' in tag or 'no-gloss' in tag or 'empty-gloss' in tag: + if isinstance(tag,list): + return [{'gloss': ", ".join(tag), 'tags': []}] + else: + return [{'gloss': tag, 'tags': []}] + else: + return create_defs(line['senses'], tag_list) + else: + return create_defs(line['senses'], tag_list) + +def handle_etymology(line): + if 'etymology_text' in line: + return line['etymology_text'] + else: + return '' + +def handle_parts(line,get_simple=None): + if get_simple: + return get_simple(line['pos'], line['head_templates'][0]['expansion'], line['word']) if 'head_templates' in line else line['word'] + else: + return line['head_templates'][0]['expansion'] if 'head_templates' in line else line['word'] + + +def handle_word_entry(line,tag_list,get_simple=None): + return { + 'partOfSpeech': handle_pos(line), + 'principleParts': line['head_templates'][0]['expansion'] if 'head_templates' in line else line['word'], + 'simpleParts': handle_parts(line,get_simple), + 'defs': handle_defs(line, tag_list), + 'etymology': handle_etymology(line) + } + + + +def handle_word(line,tag_list,language,get_simple=None): + return { + 'heading': line['word'], + 'handle': unidecode(line['word']) if language in ["Latin", "Italian"] else line['word'], + 'entries': [handle_word_entry(line, tag_list,get_simple)], + 'tags': set(), + 'roots': [line['senses'][0][root_type][0]['word'] for root_type in ['alt_of', 'form_of'] if root_type in line['senses'][0]] + } + +def ui_template(new_dictionary,dict_str,shrt_str,cite,cite_2='',dict_f=""): + user_input = input(f"\nAdd definitiions from: \"{dict_str}\"?"\ + + "\nType 'y' to add definitions, Press 'Enter' to continue: " ) + + if user_input.lower() == 'y': + user_input = '0' + new_dictionary = dict_f(new_dictionary) + + new_dictionary = edit_all.deduplicate(new_dictionary) + n = len(new_dictionary['definitions']) + thank(dict_str,shrt_str,n,cite) + +def thank(dict_str,shrt_str,length,cite,cite_2='',dict_f=""): + print(f"\nYour dictionary now contains ( {length:,} ) unique definitions after adding {shrt_str}.") + print(f"Data files courtesy of {cite}.",end='') + if cite_2: + i = input(f"\nType 'i' for more info, Press 'Enter' to continue: ") + if i.lower() == 'i': + print(f"\n{cite_2}") + input(f"Press 'Enter' to continue ") + else: + input(f"\n(Press 'Enter' to continue)") + +def parse_lines(input_file,tag_list,language,get_simple=None): + definitions_dict = {} + counter = 0 + for line in input_file: + line = json.loads(line) + counter += 1 + if progress_print or Test: + if counter % 1000 == 0: + print(".",end='',flush=True) + if counter % 100000 == 0: + print(f' {counter:,} lines parsed',flush=True) + if Test: + print_debug_info(line,counter) + + new_definition = handle_word(line, tag_list,language,get_simple) + if new_definition['heading'] in definitions_dict: + definitions_dict[new_definition['heading']]['entries'].extend(new_definition['entries']) + else: + definitions_dict[new_definition['heading']] = new_definition + input_file.close() + + print(f' {counter:,} lines parsed',flush=True) + print("De-duplicating definitions...") + + # convert dict values to list + definitions = list(definitions_dict.values()) + + dict_str = f"{language} Wiktionary" + shrt_str = f"{language} Wiktionary" + cite = "Tatu Ylonen, see kaikki.org for more information" + cite_2 = "Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022." + n = len(definitions) + thank(dict_str,shrt_str,n,cite,cite_2) + return definitions + + + +def replace_greek(word): + alt_letters = { + 'Ἀ':'Α', + 'ά':'α', 'ἀ':'α', 'ἄ':'α', 'ἅ':'α', 'ἆ':'α', 'ᾰ':'α', 'ᾱ':'α', 'ᾴ':'α', + 'έ':'ε', 'ἐ':'ε', 'ἑ':'ε', 'ἔ':'ε', 'ἕ':'ε', + 'ή':'η','ἡ':'η', 'ἤ':'η', 'ἥ':'η', 'ῆ':'η', + 'ί':'ι','ἰ':'ι', 'ἱ':'ι', 'ἴ':'ι', 'ἵ':'ι', 'ἶ':'ι', 'ῐ':'ι', 'ῑ':'ι', 'ῖ':'ι', + 'ό':'ο','ὀ':'ο', 'ὁ':'ο', 'ὄ':'ο', 'ὅ':'ο', + 'ῥ':'ρ', + 'ύ':'υ','ὐ':'υ', 'ὑ':'υ', 'ὔ':'υ', 'ὕ':'υ', 'ὖ':'υ', 'ὗ':'υ','ῠ':'υ', 'ῡ':'υ', 'ῦ':'υ', + 'ώ':'ω', 'ὧ':'ω','ῶ':'ω', 'ῷ':'ω' + } + for x in word: + if x in alt_letters: + word = word.replace(x,alt_letters[x]) + return word + +def sort_dump(): + + print_mode = Test + save_mode = not Test + test_language = 'Ancient Greek' + test_file = "kaikki.org-dictionary-AncientGreek.json" + + change_path('texts') + with open('newtaglist.txt','r') as f: + tag_list = json.load(f) + + language = pick_language() + file = "kaikki.org-dictionary-" + language.replace(' ','') + ".json" + + if language == "Latin": + from get_simple import get_simple + simple = get_simple + else: + simple = None + + sorted_file = language.replace(" ", '') + "Dump.txt" + + print(f"Parsing {file}") + new_dictionary = {'definitions':[], 'file': sorted_file, 'language':language} + + change_path('dumps unsorted') + with open(file, 'r') as input_file: + new_dictionary['definitions'] = parse_lines(input_file, tag_list,language,simple) + + with open('newtaglist.txt', mode='w') as f: + json.dump(tag_list, f) + + # Latin fixes + if True: + if new_dictionary['language'] == 'Latin': + if progress_print: + print(f"Fixing participles...") + new_dictionary = edit_all.fix_participles(new_dictionary) + if progress_print: + print(f"Fixing verbs...") + new_dictionary = edit_all.fix_verbs(new_dictionary) + if progress_print: + print(f"Fixing etymologies...") + new_dictionary = edit_all.fix_etymology(new_dictionary) + + # Greek Supplement + + if new_dictionary['language'] == "Ancient Greek": + + dict_str = ["Liddell & Scott, An Intermediate Greek-English Lexicon","Liddell & Scott, A Greek-English Lexicon (LSJ)"] + shrt_str = ["\"Middle\" Liddell",'\"LSJ\"'] + cite = "Tufts University Perseus Digital Library" + cite_2='' + from dictionary_Middle_Liddell import middle_liddell + from dictionary_LSJ import LSJ + dict_f = [middle_liddell,LSJ] + + for i in range(2): + ui_template(new_dictionary,dict_str[i],shrt_str[i],cite,cite_2,dict_f[i]) + + # Old English Supplement + + if new_dictionary['language'] == "Old English": + dict_str = "Mary Lynch Johnson, A Modern English - Old English Dictionary" + shrt_str = "M.L. Johnson OE Dictionary" + cite = "Richard Zimmermann, https://old-engli.sh/dictionary.php" + cite_2='' + from dictionary_MLJohnson import Johnson_OED + ui_template(new_dictionary,dict_str,shrt_str,cite,cite_2,Johnson_OED) + + change_path('dumps sorted') + new_dictionary['definitions'].sort(key=lambda item: item.get('handle').lower()) + + print("Converting dictionary to trie") + split_language(new_dictionary) + input("Extraction successful, press enter to continue") + return + + +if Test: + sort_dump() + + + + + diff --git a/edit_all.py b/edit_all.py new file mode 100644 index 0000000..fb60d11 --- /dev/null +++ b/edit_all.py @@ -0,0 +1,658 @@ + + + + +import get_selection +import word_methods + +import glob +import os +import sys +import pickle, json +from iteration_utilities import unique_everseen +from copy import deepcopy +from unidecode import unidecode +import load_dict +import word_print_edit +import edit_entry +import edit_dictionary +from tables_greek_ext import auto_parts + + +# EDIT ALL +# # # # # # # # # # # # +def edit_all(current_dict): + + while True: + + options = {'1':"Edit All Options:\n==================================\n>'1' to replace a tag\n", + 'q':">'q' other_unknown\n", + '2':">'2' special option\n", + 'a':">'a' change file name\n", + 'b':">'b' special option II\n", + 'c':">'c' convert to gloss/tags defs\n", + 'd':">'d' match dictionaries\n", + '3':">'3' to edit a subset by tag\n", + '4':">'4' to remove punctuation\n", + '5':">'5' to fix participles\n", + 'p':">'p' to fix pronuncitations\n", + '6':">'6' deduplicate\n", + '7':">'7' to fix verbs\n", + '8':">'8' remove periods\n", + '9':">'9' fix sort handles\n", + '10':">'10' special greek option\n", + #'q':">'q' to cut parens\n", + '0':">'0' to go back\n", + 'r':">'r' to reset template indicators\n", + 't':">'t' to tag splits\n", + 's':">'s' remove spaces\n", + 'x':">'x' to convert to set\n"} + user_input = get_selection.get_selection(options) + + if user_input == '0': + return current_dict + elif user_input == 'r': + for i in current_dict['definitions']: + if 'template' in i: + del i['template'] + elif user_input == '1': + current_dict = replace_tag(current_dict) + elif user_input == 'a': + current_dict = change_file_name(current_dict) + elif user_input == 't': + for i in current_dict['definitions']: + if i['entries'][0]['partOfSpeech'] == 'verb': + i['entries'][0]['simpleParts'] = auto_parts(i['entries'][0]['simpleParts'],True) + + load_dict.change_path('dictionaries') + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + + elif user_input == '2': + current = special(current_dict) + elif user_input == 'c': + current_dict = replace_defs(current_dict) + elif user_input == 'd': + match_dictionaries(current_dict) + elif user_input == 'p': + current_dict = fix_pronunciations(current_dict) + #elif user_input == 'q': + # cut_parens_access(current_dict['language']) + elif user_input == '3': + current_dict = edit_subset(current_dict) + elif user_input == '4': + current_dict == remove_all_punct(current_dict) + elif user_input == 'b': + find_paren(current_dict) + elif user_input == '5': + current_dict = fix_participles(current_dict) + elif user_input == '6': + current_dict = deduplicate(current_dict) + elif user_input == '7': + current_dict = fix_verbs(current_dict) + elif user_input == '8': + current_dict = remove_periods(current_dict) + elif user_input == '9': + current_dict = fix_sort_handles(current_dict) + elif user_input == '10': + current_dict = fix_etymology(current_dict) + #elif user_input == '10': + # special_greek(current_dict) + elif user_input == 's': + current_dict = remove_spaces(current_dict) + elif user_input == 'x': + convert_to_set(current_dict) + elif user_input == 'q': + other_unknown(current_dict) + +# END EDIT ALL + +import tables, tables_greek_ext + +def other_unknown(current_dict): + print('re-doing all verbs') + load_dict.change_path('templates') + myFiles = glob.glob('*.txt') + template_file = "AncientGreek_templates.txt" + if template_file not in myFiles: + print("No '_templates.txt' files found in directory") + templates = [] + else: + with open(template_file,'r') as f: + templates = json.load(f) + in_out_list = [] + for t in templates: + if t['POS'] == 'verb': + user_input = input(f"{t['title']} = contracted (y/n)?: ") + if user_input.lower() == 'y': + in_out_list.append(True) + else: + in_out_list.append(False) + print(f"in_out_list = : {in_out_list}") + for i, t in enumerate(templates): + if t['POS'] == 'verb': + if in_out_list[i]: + t = tables_greek_ext.get_forms(t) + print(t['principal']) + user_input = input("Save new verb data (y/n)?") + if user_input.lower() == 'y': + + load_dict.change_path('templates') + sort_tables(templates,language) + with open(template_file,'w') as f: + json.dump(templates,f) + + print('re-doing all verbs succesful') + else: + print('re-doing all verbs aborted succesfully') + return +def special_greek(current_dict): + letters = { + 'Ἀ':'Α', + 'ά':'α', 'ἀ':'α', 'ἄ':'α', 'ἅ':'α', 'ἆ':'α', 'ᾰ':'α', 'ᾱ':'α', 'ᾴ':'α', + 'έ':'ε', 'ἐ':'ε', 'ἑ':'ε', 'ἔ':'ε', 'ἕ':'ε', + 'ή':'η','ἡ':'η', 'ἤ':'η', 'ἥ':'η', 'ῆ':'η', + 'ί':'ι','ἰ':'ι', 'ἱ':'ι', 'ἴ':'ι', 'ἵ':'ι', 'ἶ':'ι', 'ῐ':'ι', 'ῑ':'ι', 'ῖ':'ι', + 'ό':'ο','ὀ':'ο', 'ὁ':'ο', 'ὄ':'ο', 'ὅ':'ο', + 'ῥ':'ρ', + 'ύ':'υ','ὐ':'υ', 'ὑ':'υ', 'ὔ':'υ', 'ὕ':'υ', 'ὖ':'υ', 'ὗ':'υ','ῠ':'υ', 'ῡ':'υ', 'ῦ':'υ', + 'ώ':'ω', 'ὧ':'ω','ῶ':'ω', 'ῷ':'ω' + } + letters = list(letters.keys()) + for x in current_dict['definitions']: + for y in x['heading']: + if y not in letters: + letters.append(y) + letters.sort() + print("&" * 100) + print(letters) + +def remove_spaces(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + text = current_dict['definitions'][i]['entries'][j]['defs'][k] + text['gloss'] = text['gloss'].replace(" .",".").replace(" ,",",").replace(" :",":") + current_dict['definitions'][i]['entries'][j]['defs'][k] = text + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + return current_dict + +def fix_etymology(current_dict): + for i in range(len(current_dict['definitions'])): + for x in range(len(current_dict['definitions'][i]['entries'])): + current_dict['definitions'][i]['entries'][x]['etymology'] = current_dict['definitions'][i]['entries'][x]['etymology'].strip('\n') + current_dict['definitions'][i]['entries'][x]['etymology'] = current_dict['definitions'][i]['entries'][x]['etymology'].replace("\n"," ") + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + +def fix_sort_handles(current_dict): + for i in range(len(current_dict['definitions'])): + if 'sort_handle' in current_dict['definitions'][i]: + current_dict['definitions'][i]['handle'] = current_dict['definitions'][i]['heading'] + del current_dict['definitions'][i]['sort_handle'] + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + +def fix_pronunciations(current_dict): + for i in range(len(current_dict['definitions'])): + if 'Pronunciation' in current_dict['definitions'][i]['tags']: + current_dict['definitions'][i]['heading'] = "Pronunciation: " + current_dict['definitions'][i]['heading'] + current_dict['definitions'][i]['handle'] = current_dict['definitions'][i]['heading'] + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + +def change_file_name(current_dict): + print(f"Enter new name for {current_dict['file']} ('0' to go back)") + user_input = input(": ") + if user_input == '0': + return current_dict + else: + load_dict.change_path('dictionaries') + current_dict['file'] = user_input + '.txt' + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + +def replace_defs(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + current_dict['definitions'][i]['entries'][j]['defs'][k] = {'gloss':current_dict['definitions'][i]['entries'][j]['defs'][k],'tags':[]} + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + +def load_latin(index_letter): + if index_letter.lower() not in 'abcdefghijklmnopqrstuvwxyz': + index_letter = 'misc' + load_dict.change_path("dumps sorted") + with open('Latin-' + index_letter.lower() + '.txt','rb') as openFile: + wiki_dump = pickle.load(openFile) + return wiki_dump + + +def match_dictionaries(current_dict): + load_dict.change_path("dumps sorted") + trie_file = current_dict['language'].replace(" ","") + '-trie.txt' + + print(f"Loading {trie_file}") + with open(trie_file, 'rb') as openFile: + current_trie = pickle.load(openFile)['definitions'] + + for word_data in current_dict['definitions']: + heading = unidecode(word_data['heading'].lower()) + if heading in current_trie: + if isinstance(current_trie[heading],list): + for i in range(len(current_trie[heading])): + if current_trie[heading][i]['heading'] == word_data['heading']: + print(f"{word_data['heading']} updated") + current_trie[heading][i]['tags'].update(word_data['tags']) + else: + print(f"{word_data['heading']} updated") + current_trie[heading]['tags'].update(word_data['tags']) + else: + print(f"\t{word_data['heading']} added") + current_trie[heading] = deepcopy(word_data) + + with open(current_dict['language'].replace(" ","") + '-trie.txt', mode = 'wb') as openFile: + pickle.dump({'file': trie_file, 'language': current_dict['language'], 'definitions': current_trie}, openFile, protocol=pickle.HIGHEST_PROTOCOL) + + +def remove_all_punct(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + if '\n' in current_dict['definitions'][i]['entries'][j]['defs'][k]: + text = current_dict['definitions'][i]['entries'][j]['defs'][k] + text = text[:text.find('\n')] + current_dict['definitions'][i]['entries'][j]['defs'][k] = text + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + +def remove_periods(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + text = current_dict['definitions'][i]['entries'][j]['defs'][k] + text['gloss'] = text['gloss'].strip(",.; ") + current_dict['definitions'][i]['entries'][j]['defs'][k] = text + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + return current_dict + + +def prune(entry): + while True: + edit_entry.print_entry(entry) + user_input = int(input("Prune definitions starting from ('0' to finish): ")) + if user_input == 0: + return entry + else: + entry['defs'] = entry['defs'][:user_input] + +def remove_punct(entry): + while True: + edit_entry.print_entry(entry) + user_input = input("Choose character to remove ('0' to finish): ") + if user_input == '0': + return entry + else: + for i in range(len(entry['defs'])): + text = entry['defs'][i] + text = list(text) + while user_input in text: + text.remove(user_input) + new_text = '' + for c in text: + new_text += c + entry['defs'][i] = new_text + + +# SPECIAL +# # # # # # # # # # # +def special(current_dict,start=0): + + for i in range(start,len(current_dict['definitions'])): + while True: + edit_entry.print_entry(current_dict['definitions'][i]['entries'][0],'length') + print(f"DEFINITION # {i}\n") + options = {'0':">'0' to stop\n", + '1':">'1' to prune definitions\n", + '2':">'2' to remove punctuation\n", + '3':">'3' to continue\n", + '4':">'4' to skip forward\n", + '5':">'5' to change order\n", + '6':">'6' to change definition\n"} + user_input = get_selection.get_selection(options) + if user_input == '0': + return current_dict + elif user_input == '1': + current_dict['definitions'][i]['entries'][0] = prune(current_dict['definitions'][i]['entries'][0]) + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + elif user_input == '2': + current_dict['definitions'][i]['entries'][0] = remove_punct(current_dict['definitions'][i]['entries'][0]) + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + elif user_input == '3': + break + elif user_input == '4': + user_input = int(input("Choose start position: ")) + if user_input != 0: + special(current_dict,user_input) + return current_dict + elif user_input == '5': + while True: + user_input = input("Enter selection and new position seperated by commas ('0' to stop): ") + if user_input == '0': + break + else: + user_input = user_input.split(',') + current_dict['definitions'][i]['entries'][0]['defs'] = edit_entry.move_entries(current_dict['definitions'][i]['entries'][0]['defs'],int(user_input[0]),int(user_input[1])) + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + elif user_input == '6': + message = "\nChoose the definition you want to change" + selection = edit_entry.select_definition(current_dict['definitions'][i]['entries'][0],message) + if selection != None: + current_dict['definitions'][i]['entries'][0]['defs'][selection] = edit_entry.remove_words(current_dict['definitions'][i]['entries'][0]['defs'][selection]) + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + return current_dict +# END SPECIAL + + +# REPLACE TAG +# # # # # # # # # # # +def replace_tag(current_dict): + + # Set directory + load_dict.change_path('dictionaries') + + while True: + tags = word_methods.get_master_list(current_dict) + + options = {'0':"Select the tag you want to replace ('0' to go back)\n"} + + for index in range(len(tags)): + options[f"{index + 1}"] = f"{index + 1}. {tags[index]}\n" + user_input = get_selection.get_selection(options) + + if user_input == '0': + return current_dict + + else: + tag_selection = tags[int(user_input) - 1] + + print(f"Enter the replacement tag for {tag_selection} ('0' to go back)") + user_input = input(": ") + + if user_input == '0': + continue + else: + for index in range(len(current_dict['definitions'])): + offset = 0 + for inner in range(len(current_dict['definitions'][index]['tags'])): + inner = inner - offset + if current_dict['definitions'][index]['tags'][inner] == tag_selection: + if user_input == '': + del current_dict['definitions'][index]['tags'][inner] + offset += 1 + else: + current_dict['definitions'][index]['tags'][inner] = user_input + + # open file, pickle.dump definitions list to fine, close file + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + + print(f"All instances of {tag_selection} replaced with {user_input}\n") +# REPLACE TAG + +def convert_to_set(current_dict): + + # Set directory + load_dict.change_path('dictionaries') + + # Convert all tags from lists to sets + for index in range(len(current_dict['definitions'])): + current_dict['definitions'][index]['tags'] = set(current_dict['definitions'][index]['tags']) + + # open file, pickle.dump definitions list to fine, close file + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + + + +# EDIT SUBSET +# # # # # # # # # # # # # +def edit_subset(current_dict): + + # Set directory + load_dict.change_path('dictionaries') + while True: + + # option to set tags + master_list = word_methods.get_master_list(current_dict) + tags = word_methods.getTags([],'subset',master_list) + + options = {'0':"Subset options:\n>'0' to go back\n", + '1':">'1' to add tags to subset\n",'2':">'2' to remove tags from subset\n"} + user_input = get_selection.get_selection(options) + + if user_input == '0': + return current_dict + + elif user_input == '1': + mode = 'add' + from_to = 'to' + elif user_input == '2': + mode = 'remove' + from_to = 'from' + + options = {'1':">'1' for exact match\n",'2':">'2' for any word with selected tags\n"} + user_input = get_selection.get_selection(options) + + if user_input == '1': + exact = True + elif user_input == '2': + exact = False + + exit_loop = False + while not exit_loop: + print(f"Enter the tag your want to {mode} {from_to} your subset ('0' to go back)") + user_input = input(": ") + + if user_input == '0': + exit_loop = True + + # Loop to create sub-list to select from + offset = 0 + for index in range(len(current_dict['definitions'])): + index = index - offset + + # assign word to shorten name + current_tags = current_dict['definitions'][index]['tags'] + # test if tags match; always 'yes' for empty tags + if exact: + tag_test = current_tags == tags + else: + tag_test = set(tags).issubset(set(current_tags)) + + if tag_test: + if mode == 'add': + current_dict['definitions'][index]['tags'].append(user_input) + elif mode == 'remove': + if user_input in current_dict['definitions'][index]['tags']: + current_dict['definitions'][index]['tags'].remove(user_input) + offset += 1 + + # open file, pickle.dump definitions list to fine, close file + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + + print(f"\n{user_input} {mode[:5]}ed {from_to} subset\n") + + return current_dict +# END EDIT SUBSET + +def fix_participles(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + if current_dict['definitions'][i]['entries'][j]['partOfSpeech'] == 'verb': + simpleParts = current_dict['definitions'][i]['entries'][j]['simpleParts'] + simpleParts = simpleParts.split(',') + if len(simpleParts) >= 3: + if simpleParts[0][-2:] == 'us' and simpleParts[1][-1:] == 'a' and simpleParts[2][-2:] == 'um': + current_dict['definitions'][i]['entries'][j]['partOfSpeech'] = 'participle' + current_dict['definitions'][i]['entries'][j]['simpleParts'] = simpleParts[0].strip() + ", " + simpleParts[1].strip() + ", " + simpleParts[2].strip() + if len(simpleParts) >= 2: + if simpleParts[0][-2:] == 'ns' and simpleParts[1][-4:] == 'ntis': + current_dict['definitions'][i]['entries'][j]['partOfSpeech'] = 'participle' + current_dict['definitions'][i]['entries'][j]['simpleParts'] = simpleParts[0].strip() + ", " + simpleParts[1].strip() + if current_dict['definitions'][i]['entries'][j]['partOfSpeech'] == 'participle': + first = False + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + text = current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] + text, first = word_methods.participle_edit(text,first) + current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] = text + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + + +def fix_verbs(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + if current_dict['definitions'][i]['entries'][j]['partOfSpeech'] == 'verb': + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + text = current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] + text = word_methods.verb_edit(text) + current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] = text + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + return current_dict + +import collections +from copy import deepcopy + +def deduplicate(current_dict): + entries = collections.defaultdict(list) + language = current_dict['language'] + + for item in current_dict['definitions']: + if language == "Latin": + key = unidecode(item['heading']) + else: + key = item['handle'] + entries[key].append(item) + + new_definitions = [] + for key, values in entries.items(): + main_item = deepcopy(values[0]) + if len(values) > 1: + for value in values[1:]: + main_item['entries'].extend(deepcopy(value['entries'])) + new_definitions.append(main_item) + + current_dict['definitions'] = new_definitions + + with open(current_dict['file'], mode='wb') as openFile: + pickle.dump(current_dict, openFile) + print("De-Duplicating Successful") + + return current_dict + + +def cut_parens_access(language): + if language == "Latin": + alpha = {'a':[], + 'b':[], + 'c':[], + 'd':[], + 'e':[], + 'f':[], + 'g':[], + 'h':[], + 'i':[], + 'j':[], + 'k':[], + 'l':[], + 'm':[], + 'n':[], + 'o':[], + 'p':[], + 'q':[], + 'r':[], + 's':[], + 't':[], + 'u':[], + 'v':[], + 'w':[], + 'x':[], + 'y':[], + 'z':[], + 'misc':[]} + for key in alpha: + alpha[key] = load_latin(key) + for key in alpha: + cut_parens(alpha[key]) + else: + load_dict.change_path("dumps sorted") + with open(language.replace(" ","") + 'Dump.txt','rb') as openFile: + wiki_dump = pickle.load(openFile) + cut_parens(wiki_dump) + +def cut_parens(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + for k in range(len(current_dict['definitions'][i]['entries'][j]['defs'])): + g = current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] + if g[0] == "(": + tags = ", ".join(current_dict['definitions'][i]['entries'][j]['defs'][k]['tags']) + p = g[g.find("(") + 1:g.find(")")] + if tags in p or tags.replace("-"," ") in p: + current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] = g[g.find(")") + 1:].strip(": ") + else: + for tag in current_dict['definitions'][i]['entries'][j]['defs'][k]['tags']: + if tag in p or tag.replace("-"," ") in p: + current_dict['definitions'][i]['entries'][j]['defs'][k]['gloss'] = g[g.find(")") + 1:].strip(": ") + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + return current_dict + +def find_paren(current_dict): + original_stdout = sys.stdout + sys.stdout = open("parenthesis.txt",'w') + for definition in current_dict['definitions']: + for defs in definition['entries'][0]['defs']: + if ')' in defs: + text = defs[defs.find("("):defs.find(")")+1] + print(text) + sys.stdout = original_stdout + +''' +def fix_etymology(current_dict): + for i in range(len(current_dict['definitions'])): + for j in range(len(current_dict['definitions'][i]['entries'])): + if 'etymology' in current_dict['definitions'][i]['entries'][j]: + current_dict['definitions'][i]['entries'][j]['etymology'] = current_dict['definitions'][i]['entries'][j]['etymology'].replace('\n',' ') + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + return current_dict +''' + + diff --git a/edit_dictionary.py b/edit_dictionary.py new file mode 100644 index 0000000..c36f28d --- /dev/null +++ b/edit_dictionary.py @@ -0,0 +1,812 @@ +''' +Description: + + edit_dictionary: + options to route control to different functions within this file + + word_look_up: + the function searches for a word input within the current dictionary + if found, send the word and current dictionary to existing_entry_options + + word list: + print a list of all entries (or filtered by tags) + user selection is sent to existing_enntry_options + + print_dict: + print the dictionary or a subset of tags to a file +''' +import glob +import os +import sys +import pickle +from unidecode import unidecode +import re +import random + +import word_print_edit +import parser_shell +import word_methods +import get_selection +import edit_all +import load_dict +from edit_entry import pretty_print_tags + +# EDIT DICTIONARY +# # # # # # # # # # # # +def edit_dictionary(current_dict): + + while True: + + # dictionary options + options = { + '1':"\nEdit Dictionary Options:\n==================================\n>'1' to look up words by name\n", + '2':">'2' to select from all entries\n", + '3':">'3' for edit all options\n", + 'tags':">'tags' to select from a subset by tag\n", + '0':">'0' to go back\n" + } + user_input = get_selection.get_selection(options) + + # options to go back + if user_input == '0': + return current_dict + + elif user_input == '1': + current_dict = word_look_up(current_dict) + + + # Option to set tags + elif user_input.lower() == 'tags': + master_list = word_methods.get_master_list(current_dict) + tags = word_methods.getTags([],'filter',master_list) + user_input = '2' + else: + tags = [] + + # Proceed to next step + if user_input == '2': + current_dict = word_list(current_dict, tags) + + if user_input == '3': + current_dict = edit_all.edit_all(current_dict) +# END EDIT DICTIONARY + + +# WORD LOOK UP +# # # # # # # # # # # # # # # # # # # +def word_look_up(current_dict,look_up_word=None): + + while True: + # set/reset flag + word_found = False + if not look_up_word: + # prompt user to input word + print("Enter the word you want to look up ('0' to go back)") + user_input = input(": ") + else: + user_input = look_up_word + + # option to go back + if user_input == '0': + return current_dict + else: + # compare with handles in current dictionary + for word in current_dict['definitions']: + if user_input == word['handle']: + word_found = True + print(f"\nEntry for '{user_input}' found in {current_dict['file']}") + word_print_edit.print_entries(word['entries']) + current_dict = existing_word_options(current_dict,word) + if look_up_word: + invalid_selection = not word_found + return current_dict, invalid_selection + if not word_found: + print(f"\n\t'{user_input}' not found in {current_dict['file']}\n") + +# END WORD LOOK UP + +def subset_test(tags,word_tags): + result = True + for i in tags: + if i not in word_tags: + result = False + return result + + +# WORD LIST +# # # # # # # # # # # # # # +def word_list(current_dict,tags): + # whole function contained in loop + # flag begins set to false + invalid_selection = False + while True: + + counter = 0 + sublist = [] + + # Print heading for dictionary entries display + print(f"\n{current_dict['file']}\n") + labels = [" Headings:","Parts:","Definitions:"] + print(f"{labels[0]:<25}{labels[1]:<55}{labels[2]}\n") + + # Loop to create sub-list to select from + for index in range(len(current_dict['definitions'])): + + # assign word to shorten name + word = current_dict['definitions'][index] + + # test if tags match; always 'yes' for empty tags + + if subset_test(tags,word['tags']): + + # Create and print formatted string + entry_string = f"{counter + 1:>3}. {word['heading']}:" + # print with desired alignment + print(f"{entry_string:<25}",end='') + + # check if part exceeds desired length + entry_len = len(word['entries'][0]['simpleParts']) + entry_string = f"{word['entries'][0]['simpleParts'][:50]}" + + # if over length attach elipses + if entry_len > 50: + entry_string = entry_string[:-3] + entry_string += "..." + # print with desired alignment + print(f"{entry_string:<55}", end='') + + # check if definition exceeds desired length + entry_len = len(word['entries'][0]['defs'][0]['gloss']) + entry_string = f"{word['entries'][0]['defs'][0]['gloss'][:50]}" + + # if over length attach elipses + if entry_len > 50: + entry_string = entry_string[:-3] + entry_string += "..." + print(f"{entry_string}") + + # save word as a dictionary entry with index values + sublist.append({'word':word}) + sublist[counter]['index'] = index + counter += 1 + + # if sublist is empty + if counter == 0: + print("\nno definitions found with these tags") + return current_dict + + if invalid_selection: + print("\ninvalid selection\n") + invalid_selection = False + + # prompt selection + print("Select definition ('0' to go back)") + user_input = input(": ") + + + + # Option to go back to edit dictionary + if user_input == '0': + return current_dict + # validate selection is numeric + if not user_input.isnumeric(): + current_dict, invalid_selection = word_look_up(current_dict,user_input) + else: + # validate selection is in range + if int(user_input) - 1 not in range(len(sublist)): + invalid_selection = True + else: + # assign saved index from sublist + i = sublist[int(user_input) - 1]['index'] + word = current_dict['definitions'][i] + current_dict = existing_word_options(current_dict,word) +# END WORD LIST + + +# EXISTING WORD OPTIONS +# # # # # # # # # # # # # # # # # # # +def existing_word_options(current_dict,word): + + + # Set directory + load_dict.change_path('dictionaries') + + # whole function contained in loop + while True: + + # retreive heading from sublist + heading = word['heading'] + # display options + options = { + 'α':"Options:\n", + '1':f">'1' to Edit '{heading}'\n", + '2':f">'2' to Delete '{heading}'\n", + '3':f">'3' look-up '{heading}'\n", + '4':f">'4' split word\n", + '0':">'0' to go back\n" + } + user_input = get_selection.get_selection(options) + + # Option to go back + if user_input == '0': + return current_dict + + # Option to edit + elif user_input == '1': + # call edit word + language = current_dict['language'] + word, result = word_print_edit.edit_entries(word,current_dict) + parser_shell.save_word(word,current_dict) + + # back to calling function + return current_dict + + # Option to delete word + elif user_input == '2': + # confirm deletion request + print(f"Delete {heading}?\n'1' to confirm, any other key to cancel") + user_input = input(": ") + + if user_input == '1': + # retrieve correct index from sublist + for i in range(len(current_dict['definitions'])): + if heading == current_dict['definitions'][i]['heading']: + del current_dict['definitions'][i] + break + + # Open file an save dictionary after deleting + openFile = open(current_dict['file'],mode = 'wb') + pickle.dump(current_dict, openFile) + openFile.close() + + # announce word was deleted + print(f"\n'{heading}' deleted from {current_dict['file']}\n") + # back calling function + return current_dict + elif user_input == '3': + parser_shell.look_up_word(heading,current_dict,word['tags'],skip_check=True) + return current_dict + + elif user_input == '4': + word_print_edit.split_word(word,current_dict) + return current_dict +# END EXISTING WORD OPTIONS + +def pos_test_part_latin(definition,pos): + for entry in definition['entries']: + if pos_test_part_latin_2(entry['partOfSpeech'],pos): + return True + return False + +def pos_test_part_latin_2(entry_pos,pos): + if pos == 'adjective': + if entry_pos == 'adjective' or entry_pos == 'participle': + return True + else: + return False + elif pos == 'noun': + if entry_pos == 'noun' or entry_pos == 'name' or entry_pos.lower() == 'proper noun' : + return True + else: + return False + elif pos == 'verb': + if entry_pos == 'verb': + return True + else: + return False + elif pos == 'other': + if entry_pos == 'adjective' or entry_pos == 'participle' or entry_pos == 'noun' or entry_pos == 'verb': + return False + else: + return True + +def pos_test_part_greek(definition,pos): + for entry in definition['entries']: + if pos_test_part_greek_2(entry['partOfSpeech'],pos): + return True + return False + +def pos_test_part_greek_2(entry_pos,pos): + if pos == 'verb': + if entry_pos == 'verb': + return True + else: + return False + elif pos == 'other': + if entry_pos == 'verb': + return False + else: + return True + + +# PRINT DICT +# function prints a loaded dictionary as text to a file with ':' separators +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +def print_dict(current_dict,mode='Quizlet'): + + # change file path to 'prints' folder + load_dict.change_path('flashcards') + myFiles = glob.glob('*.txt') + if current_dict['language'] == "Latin": + split = True + pos_list = ['noun','verb','adjective','other'] + function, function2 = pos_test_part_latin, pos_test_part_latin_2 + elif current_dict['language'] == "Ancient Greek": + split = True + pos_list = ['verb','other'] + function, function2 = pos_test_part_greek, pos_test_part_greek_2 + else: + split = False + pos_list = ['other'] + # Special separator character + c = '|' + counter = 0 + limit = 49000 + files = 1 + + while True: + tags = [] + tag_mode = '2' + + # Save a reference to the original standard output + original_stdout = sys.stdout + + for pos in pos_list: + + if split: + file_name = current_dict['language'].replace(' ','') + "-" + pos.upper() + '-Flashcards.txt' + else: + file_name = current_dict['language'].replace(' ','') + '-Flashcards.txt' + + # assign user selected file to output + sys.stdout = open(file_name, 'w') + + dict_range = list(range(len(current_dict['definitions']))) + random.shuffle(dict_range) + + # Loop through words in current_dict + for i in dict_range: + if function(current_dict['definitions'][i],pos) or not split: + pass + else: + continue + + word_string = '

{current_dict['definitions'][i]['heading'].strip()}

{c}" + + for j in range(len(current_dict['definitions'][i]['entries'])): + entry = current_dict['definitions'][i]['entries'][j] + partOfSpeech = entry['partOfSpeech'] + if function2(partOfSpeech,pos) or not split: + pass + else: + continue + + entry = current_dict['definitions'][i]['entries'][j] + if 'etymology' in entry: + if len(entry['etymology']) <= 125: + word_string += f'

{entry["etymology"]}

' + word_string += get_entry_string(entry,current_dict['language']) + + word_string += c + + for tag in current_dict['definitions'][i]['tags']: + word_string += '"' + tag + '"; ' + word_string.strip('; ') + + print(word_string) + + + # re-assign orinigal output + sys.stdout = original_stdout + + print("\n**********************\n\nPrint to file complete\n\n**********************\n") + return +# END PRINT DICT + +def split_tags(tags,next_index,previous_tags): + current_index = next_index - 1 + + ''' If previous (current) tags and current tags both exist ''' + if previous_tags != [] and tags[current_index] != []: + match = True + if len(previous_tags) > len(tags[current_index]): + match = False + else: + for i in range(len(previous_tags)): + if previous_tags[i] != tags[current_index][i]: + match = False + ''' If all previous (common) tags match with first n current tags ''' + if match: + ''' Seperate current into common and distinct tags ''' + return tags[current_index][:i+1], tags[current_index][i+1:] + + ''' Current did not match previous common tags, inspect next tags ''' + if len(tags) > next_index: + if tags[next_index] != []: + ''' Next tags exist and are not empty ''' + if tags[next_index] == tags[next_index - 1]: + ''' Tags are exactly the same, all current tags will be common tags ''' + return tags[current_index], [] + + ''' Find the smaller of the two lists ''' + if len(tags[current_index]) <= len(tags[next_index]): + shorter = tags[current_index] + longer = tags[next_index] + else: + shorter = tags[next_index] + longer = tags[current_index] + for i in range(len(shorter)): + if shorter[i] != longer[i]: + ''' Once lists are no longer the same we have common and distict tags ''' + if i == 0: + ''' If no matches, all tags are common ''' + return tags[current_index], [] + ''' Else use index to seperate common and distinct ''' + return tags[current_index][:i], tags[current_index][i:] + + ''' If next tags don't exist or are empty, all tags are common tags ''' + return tags[current_index], [] + + +def get_entry_string(entry,language): + entry_string = '' + entry_string += f'

' + f"{entry['simpleParts'].strip()}
" + text = [definition['gloss'] for definition in entry['defs']] + def_tags = [definition['tags'] for definition in entry['defs']] + if language == "Latin": + text = short_defs(text,def_tags) + bank = ["*","^","†","∆"] + custom = [] + while text[0][-1] in bank: + custom.append(text[0][-1]) + text[0] = text[0][:-1] + + if custom: + if len(custom) == 4: + entry_string += f'

    ' #style="color:#750265;">' + elif len(custom) >= 2: + entry_string += f'
      ' # style="color:#5C4033;">' + elif len(custom) == 1: + if custom[0] == f'*': + entry_string += f'
        ' # style="color:#00008B";">' + elif custom[0] == '^': + entry_string += f'
          ' # style="color:#006400";">' + elif custom[0] == '†': + entry_string += f'
            ' # style="color:#483C32";">' + elif custom[0] == '∆': + entry_string += f'
              ' # style="color:#7d022f";">' + entry_string += '' + else: + + entry_string += f'
                ' + special_tags = ['LTRG','Oxford','Liddell & Scott','Athenaze'] + + line_tags = def_tags[0] + previous_tags = [] + common_tags, distinct_tags = split_tags(def_tags,1,previous_tags) + open_sublist = False + if line_tags: + open_sublist = True + entry_string += pretty_print_tags(common_tags,-1) + next_index = 1 + for line, tags in zip(text,def_tags): + common_tags, distinct_tags = split_tags(def_tags,next_index,previous_tags) + previous_tags = common_tags + if common_tags != line_tags: + tag_break = False + for tag in line_tags: + if tag in special_tags: + tag_break = True + for tag in tags: + if tag in special_tags: + tag_break = False + line_tags = common_tags + if open_sublist: + entry_string += "
              " + if tag_break: + entry_string += '
              ' + if common_tags != []: + open_sublist = True + entry_string += pretty_print_tags(common_tags,-1) + else: + open_sublist = False + if distinct_tags: + entry_string += '
            1. ' + "(" + ",".join(distinct_tags) + ") " + line.strip(";,. ").strip("†∆*^") + '
            2. ' + else: + entry_string += '
            3. ' + line.strip(";,. ").strip("†∆*^") + '
            4. ' + next_index += 1 + if open_sublist: + entry_string += "
            " + entry_string += '



          ' + return entry_string + + +# PRINT GLOSS SETUP +# # # # # # # # # # # # +def print_gloss_setup(current_dict): + + + # change file path to 'prints' folder + load_dict.change_path('glosses') + myFiles = glob.glob('*.txt') + + while True: + master_list = word_methods.get_master_list(current_dict) + tags = word_methods.getTags([],'filter',master_list) + + options= {'1':"Choose tag option: '1' for strict match",'2':", '2' for loose match\n"} + tag_mode = get_selection.get_selection(options) + + + # User input Loop + exit_inner_loop = False + while not exit_inner_loop: + + # Input name of print file + print("What to you want to name your print file?") + user_input = input("Enter name (0 to go back): ") + + # return control + if user_input == '0': + exit_inner_loop = True + continue + + file_name = user_input + '.txt' + + # Check with user before overwriting existing file + if file_name in myFiles: + print(f"\n{file_name} already exists, ok to overwrite?") + print("'1' to proceed, any other key to go back") + user_input = input(': ') + + if user_input != '1': + continue + + filter_gloss(current_dict,tags,file_name,tag_mode) + + print("\n**********************\n\nPrint to file complete\n\n**********************\n") + return +# END PRINT GLOSS SETUP + + + +# FILTER GLOSS +# # # # # # # # # # # # # # +def filter_gloss(current_dict,tags,output_file=None,tag_mode='1'): + count = 0 + + if output_file: + # Save a reference to the original standard output + original_stdout = sys.stdout + + # assign user selected file to output + sys.stdout = open(output_file, 'w') + + if tags: + print(f"\n\t{str(tags)}\n") + + parts_list = ["noun", + "proper noun","verb","adjective","participle", "adverb", "determiner", + "article", "preposition", "conjunction","pronoun","letter", "character", + "phrase", "proverb", "idiom","symbol", "syllable", "numeral", "initialism", + "interjection","definitions"] + for part in parts_list: + count += print_gloss(current_dict,tags,part,tag_mode) + + if output_file: + # re-assign orinigal output + sys.stdout = original_stdout + return count +# END FILTER GLOSS + + + +# PRINT GLOSS +# # # # # # # # # # # # # # +def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'): + strings = {} + first_run = True + + counter = 0 + + # Loop to create sub-list to select from + for index in range(len(current_dict['definitions'])): + + # assign word to shorten name + word = current_dict['definitions'][index] + + for x in range(len(word['entries'])): + # test if tags match; always 'yes' for empty tags + if tag_mode == '1': + print_flag = set(tags).issubset(set(word['tags'])) + elif tag_mode == '2': + print_flag = False + for tag in tags: + if tag in word['tags']: + print_flag = True + # test if part of speech match if using part of speech mode + if print_flag and partOfSpeech: + print_flag = word['entries'][x]['partOfSpeech'].lower() == partOfSpeech.lower() + + # print word in desired subset + if print_flag: + + if partOfSpeech and first_run: + print(f"\n{partOfSpeech.upper()}S:\n") + first_run = False + + # increment counter + counter += 1 + + # print with desired alignment + if current_dict['language'] == 'Latin': + simpleParts = word['entries'][x]['simpleParts'] + if partOfSpeech == 'verb': + if len(simpleParts) > 50: + simpleParts = simpleParts[:49] + "-" + entry_string = f"{simpleParts:.<50} | " + else: + if len(simpleParts) > 30: + simpleParts = simpleParts[:29] + "-" + entry_string = f"{simpleParts:.<30} | " + # check if definition exceeds desired length + text = [d['gloss'] for d in word['entries'][x]['defs']] + dtags = [d['tags'] for d in word['entries'][x]['defs']] + #text = short_defs(text, dtags) + if len(text) == 1: + entry_string += f"~) " + text[0].strip('*^†∆') + "; " + else: + for i in range(len(text)): + entry_string += f"{i+1}) " + text[i].strip('*^†∆') + "; " + entry_string = entry_string.strip("; ") + + if len(entry_string) > 130: + print(entry_string[:entry_string[:130].rfind(' ')]) + if partOfSpeech == 'verb': + second_line = entry_string[entry_string[:130].rfind(' '):] + if len(second_line) > 80: + second_line = second_line[:77] + "..." + print(f"{'.':.<50} | {second_line}") + else: + second_line = entry_string[entry_string[:150].rfind(' '):] + if len(second_line) > 100: + second_line = second_line[:97] + "..." + print(f"{'.':.<30} | {second_line}") + else: + print(f"{entry_string}") + elif current_dict['language'] == "Ancient Greek": + + entry_string = word['entries'][x]['simpleParts'][:word['entries'][x]['simpleParts'].find(')')+1].strip() + length_string = entry_string.lower().replace("θ",'t') + length_string = length_string.replace("χ",'k') + length_string = length_string.replace('φ','f') + length_string = length_string.replace('ψ','c') + entry_string += ' ' * (30 - len(unidecode(length_string))) + " | " + # check if definition exceeds desired length + text = [line['gloss'] for line in word['entries'][x]['defs']] + text = short_defs(text) + if len(text) == 1: + entry_string += text[0] + else: + for i in range(len(text)): + entry_string += f"{i+1}) " + text[i].strip('*^†∆') + "; " + entry_string = entry_string.strip("; ") + + if len(entry_string) > 130: + print(entry_string[:entry_string[:130].rfind(' ')]) + print(f"{' ':<30} | {entry_string[entry_string[:130].rfind(' '):entry_string[:225].rfind(',')]}") + else: + print(f"{entry_string}") + + return counter +# END PRINT GLOSS + +# CHOP LINE +# # # # # # # # # # # # # +def chop_line(text,tags): + size = sum([len(line) for line in text]) + if len(text) < 3: + limit = 5 + elif len(text) == 3: + limit = 4 + elif len(text) > 3: + limit = 3 + + custom = [] + bank = ["*","^","†","∆"] + special_tags = ['LTRG','Oxford','Liddell & Scott','Athenaze'] + special = False + for i in range(len(text)): + if text[i] == "": + continue + for tag in tags[i]: + if tag in special_tags: + special = True + while text[i][-1] in bank: + if text[i][-1] not in custom: + custom.append(text[i][-1]) + text[i] = text[i][:-1] + if custom: + c_string = '' + if "*" in custom: + c_string += "*" + if "^" in custom: + c_string += "^" + if "†" in custom: + c_string += "†" + if "∆" in custom: + c_string += "∆" + text[0] += c_string + elif special: + pass + elif size > 150: + for i in range(len(text)): + text[i] = short_line(text[i],limit) + return text + + +# SHORT LINE +# # # # # # # # # # +def short_line(line,limit): + + #print(f"PRINT SHORT LINE WHILE TOP PRE SPLIT:\n{line}") + line = re.split(",|;",line) + stop = orstop = parstop = limit + for i in range(len(line)): + orlist = [x for x in line[i:] if " or " in x] + if orlist != []: + orstop = i + 1 + continue + else: + #stop = max(i,limit) + break + for i in range(len(line)): + parlist = [x for x in line[i:] if ")" in x or "(" in x] + if parlist != []: + parstop = i + 1 + continue + else: + #stop = max(i,limit) + break + stop = max(orstop,parstop,limit) + line = line[:stop] + new_text = '' + for i in range(len(line)): + new_text += line[i].strip() + ', ' + line = new_text.strip(", ") + #print(f"PRINT END SHORT LINE:\n{line}") + return line + +# SHORT DEFS +# # # # # # # # # # +def short_defs(text,tags): + while '' in text: + text.remove('') + text = chop_line(text,tags) + size = sum([len(line) for line in text]) + + while '' in text: + text.remove('') + for i in range(len(text)): + text[i] = text[i].strip(',;') + return text +# END SHORT DEFS + +# CHOP PARENS +# # # # # # # # # # # # +def chop_parens(text): + for i in range(len(text)): + if text[i].strip()[:1] == "(": + text[i] = text[i][text[i].find(')')+1:].strip(';, ') + if text[i][-1:] == ")": + text[i] = text[i][:text[i].rfind('(')].strip(':, ') + return text diff --git a/edit_entry.py b/edit_entry.py new file mode 100644 index 0000000..db67436 --- /dev/null +++ b/edit_entry.py @@ -0,0 +1,577 @@ +''' +DESCRIPTION: + + edit_entry: + allows user to edit an entry: change parts, change definitions, + move definitions, delete definitions or replace all + + move_entries: + take a list and two positionn as arguments and move + item from position one to position two + + print_entry: + prints parts and definition of a single entry + + select_definition: + validates the selection of a definition +''' + +from unidecode import unidecode +from get_selection import get_selection, clear_screen,visible_len +import edit_dictionary +from copy import deepcopy +from tables_greek_ext import auto_parts +from pyfiglet import figlet_format + +# EDIT ENRTY +# # # # # # # # +entry_string = '' +def edit_entry(entry,new_word): + + # string used multiple times + confirm_str = "'1' to confirm, any other key to cancel: " + + # rest of function contained in loop + while True: + + # flag if only one definition exists + if len(entry['defs']) == 1: + singleton = True + else: + singleton = False + + # display entry + entry_string = get_entry(entry) + + # get user selection + options = { + '1':"\n==================================\nEntry Options:\n>'1' to add definition\n", + '2':">'2' to change definition\n"} + if singleton: + options.update({ + '3':">'3' to replace definition\n"}) + # only display if more than one definition + if not singleton: + options.update({ + '3':">'3' to replace all\n", + '4':">'4' to move definitions\n", + '5':">'5' to delete definitions\n"}) + # more options + options.update({ + 'tag':">'tag' to tag defintions", + 'untag_all':'', + 'untag':">'untag' to remove tags\n", + 'parts':">'parts' to change principal parts\n", + 'etym':">'etym' to change etymology\n", + 'ps':">'ps' to change part of speech\n", + '0':">'0' to go back ",'00':">'00' to finish and save\n"}) + user_input = get_selection(options,entry_string) + + # Option to finish/go back + if user_input == '0': + return entry, False + + elif user_input == '00': + return entry, True + + # Option to add new + elif user_input == '1': + while True: + print("\nChoose postion of new definition (1-n)") + try: + place = int(input(": "))-1 + except: + print("Invalid") + continue + break + if place < 0: + continue + print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)") + new_definition = {'gloss':input(': ')} + + if new_definition['gloss'] != '0': + new_definition['tags'] = [] + print("Enter definition tags ('0' to finish)") + new_tag = input(": ") + if new_tag != '0': + new_definition['tags'].append(new_tag) + entry['defs'].insert(place,new_definition) + + elif user_input.lower() == 'tag': + exit_loop = False + while not exit_loop: + print("Enter the tag you want to apply ('0' to go back)") + new_tag = input(": ") + if new_tag == '0': + exit_loop = True + else: + exit_inner_loop = False + while exit_inner_loop == False: + message = "\n==================================\nChoose the definition you want to tag\n'0' to go back" + selection = select_definition(entry,message) + if selection == None: + exit_inner_loop = True + else: + entry['defs'][selection]['tags'].append(new_tag) + + elif user_input.lower() == "untag_all": + for i in entry['defs']: + i['tags'] = [] + + elif user_input.lower() == 'untag': + exit_loop = False + while not exit_loop: + message = "\n==================================\nChoose the definition you want to untag\n'0' to go back" + selection = select_definition(entry,message) + if selection == None: + exit_loop = True + elif entry['defs'][selection]['tags']: + exit_inner_loop = False + while exit_inner_loop == False: + for i in range(len(entry['defs'][selection]['tags'])): + print(f"{i+1}. {entry['defs'][selection]['tags'][i]}") + print("Select the tag you want to remove ('0' to go back)") + tag_no = input(": ") + if tag_no == '0': + exit_inner_loop = True + elif int(tag_no) - 1 in range(len(entry['defs'][selection]['tags'])): + del entry['defs'][selection]['tags'][int(tag_no) - 1] + if entry['defs'][selection]['tags'] == []: + print("\ndefinitions has no more tags") + exit_inner_loop = True + else: + print("\ninvalid selection") + else: + print('\ndefinition has no tags') + + + # Option to change definition + elif user_input == '2': + exit_loop = False + while not exit_loop: + if singleton: + selection = 0 + else: + message = "\n==================================\nChoose the definition you want to change\n'0' to go back" + selection = select_definition(entry,message) + if selection == None: + exit_loop = True + definition_string = '' + while selection != None: + definition_string += f"Definition: {entry['defs'][selection]['gloss']}\nTags: {', '.join(entry['defs'][selection]['tags'])}\n" + options = {'0':f"Change Definition Options:\n>'0' to go back",'00':">'00' to finish\n",'1':">'1' to remove words\n", + '2':">'2' to add text to the end\n", + '3':">'3' to add text to the beginning\n", + '4':">'4' to write new definition\n"} + user_input = get_selection(options,definition_string) + definition_string = "" + if user_input == '0': + selection = None + if singleton: + exit_loop = True + elif user_input == '00': + selection = None + exit_loop == True + elif user_input == '1': + entry['defs'][selection]['gloss'] = remove_words(entry['defs'][selection]['gloss']) + elif user_input == '2': + print("\nEnter text to add to definition ('0' to go back) (ā, ē, ī, ō, ū)") + new_text = input(': ') + if new_text != '0': + entry['defs'][selection]['gloss'] += new_text + elif user_input == '3': + print("\nEnter text to add to definition ('0' to go back) (ā, ē, ī, ō, ū)") + new_text = input(': ') + if new_text != '0': + entry['defs'][selection]['gloss'] = new_text + entry['defs'][selection]['gloss'] + elif user_input == '4': + print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)") + new_definition = input(': ') + if new_definition != '0': + entry['defs'][selection]['gloss'] = new_definition + entry['defs'][selection]['tags'] = [] + while True: + print("Enter definition tags ('0' to finish)") + new_tag = input(": ") + if new_tag == '0': + break + else: + entry['defs'][selection]['tags'].append(new_tag) + + # Option to move definition + elif user_input == '4' and not singleton: + + if len(entry['defs']) == 2: + entry['defs'] = move_entries(entry['defs'],1,0) + else: + exit_inner_loop = False + while not exit_inner_loop: + message = "\n==================================\nChoose the definition you want to move\n'0' to go back" + take = select_definition(entry,message) + + if take != None: + message = "\nMove to what position?\n'0' to go back" + put = select_definition(entry,message) + + if put != None: + entry['defs'] = move_entries(entry['defs'],take,put) + + else: + exit_inner_loop = True + + # Option to delete definition + elif user_input == '5' and not singleton: + exit_inner_loop = False + while not exit_inner_loop: + message = "\n==================================\nChoose the definition you want to delete\n'0' to go back" + selection = select_definition(entry,message) + + if selection != None: + print(f"\nAre you sure to want to delete {selection+1}?") + user_input = input(confirm_str) + + if user_input == '1': + del entry['defs'][selection] + + else: + exit_inner_loop = True + if len(entry['defs']) == 1: + exit_inner_loop = singleton = True + + # Options to replace all definitions + elif user_input == '3': + print("\nEnter your new definition ('0' to go back) (ā, ē, ī, ō, ū)") + new_definition = {'gloss':input(': ')} + + if new_definition != '0': + new_definition['tags'] = [] + + print("Enter definition tags ('0' to finish)") + new_tag = input(": ") + if new_tag != '0': + new_definition['tags'].append(new_tag) + entry['defs'] = [new_definition] + + # Option to rewrite principle parts + elif user_input.lower() == 'parts': + print("'1' to auto retreieve verb parts (Greek only), any other key to proceed") + user_input = input(": ") + if user_input == '1': + entry['simpleParts'] = auto_parts(entry['simpleParts'],True) + else: + print("\nEnter your new principal parts ('0' to go back) (ā, ē, ī, ō, ū)") + new_definition = input(': ') + + if new_definition != '0': + entry['simpleParts'] = new_definition + + # Option to rewrite principle parts + elif user_input.lower() == 'etym': + print("\nEnter your new etymology ('0' to go back) (ā, ē, ī, ō, ū) ('X' to delete)") + user_input = input(': ') + if user_input.upper() == "X": + entry['etymology'] = '' + elif user_input != '0': + entry['etymology'] = user_input + + elif user_input.lower() == 'ps': + print("\nEnter your new part of speech ('0' to go back)") + user_input = input(': ') + if user_input != '0': + entry['partOfSpeech'] = user_input + + + +# END EDIT ENTRY + + +# REMOVE WORDS +# # # # # # # # # # # +def remove_words(text): + text = list(text) + #print(f'text = {text}') + invalid = False + while True: + clear_screen() + rows = 1+len(text)//26 + #print(f"Rows == {1+len(text)//26}") + print('*' * 35 + " |") + for i in range(rows): + print(f"Row ({chr(i + 65)}): ",end='') + #print(f"len(text[ 26 * i : 26 * (i + 1) ]) == {len(text[ 26 * i : 26 * (i + 1) ])}") + #print(f"text[ 26 * i : 26 * (i + 1) ] == {text[ 26 * i : 26 * (i + 1) ]}") + for j in range(len(text[ 26 * i : 26 * (i + 1) ])): + print(chr(j+65),end='') + pad = 26 - len(text[ 26 * i : 26 * (i + 1) ]) + print(' ' * pad + " |") + print(' ' * 9,end='') + for j in range(26 * i,(26 * i) + len(text[ 26 * i : 26 * (i + 1) ])): + #print(f"range(26 * i,len(text[ 26 * i : 26 * (i + 1) ]) == {range(26 * i,len(text[ 26 * i : 26 * (i + 1) ]))}") + print(text[j],end='') + pad = 26 - len(text[ 26 * i : 26 * (i + 1) ]) + print(' ' * pad + " |") + print('-' * 35 + " |") + print('*' * 35 + " |") + print("To cut enter 'Start Row, Start Col, Stop Row, Stop Col':'RC,RC' ('0' to stop): ") + if invalid: + print("\nInvalid entry\n\n") + invalid = False + user_input = input(": ") + if user_input == '0': + return_text = '' + for i in range(len(text)): + return_text += text[i] + return return_text + else: + try: + user_input = user_input.split(',') + row1 = user_input[0][0].upper() + col1 = user_input[0][1].upper() + row2 = user_input[1][0].upper() + col2 = user_input[1][1].upper() + start = ( ( ord(row1) - 65 ) * 26 ) + ( ord(col1) - 65 ) + stop = ( ( ord(row2) - 65 ) * 26 ) + ( ord(col2) - 65 ) + 1 + text = text[:start] + text[stop:] + except: + invalid = True +# # # # # # # # # # # # # # # + + +# MOVE ENTRIES +# # # # # # # # # # # # # # # # # +def move_entries(entries,selection,new_position): + if selection == new_position: + return + else: + popped = entries.pop(selection) + # avoid going out of bounds + if new_position == len(entries): + entries.append(popped) + else: + entries.insert(int(new_position),popped) + return entries +# END MOVE ENTRIES + +def pretty_print_tags(tags,mode=[]): + + ''' -1 is mode for html printing ''' + ''' else mode corresponds to counter ''' + + if mode != -1: + string = f'{mode}. (' + else: + ''' begin html list ''' + string = '
        1. (' + + string += ", ".join(tags) + + ''' if -1 start sub list in hierarchy, edit_dictionary.print_entry_string takes care of capping ordered list ''' + if mode == -1: + string += ')
            ' + + ''' else simply cap tags with ")", print_entry takes care of sublist ''' + else: + string += ")\n" + return string + + +def split_tags(defs,next_index,previous_tags): + current_index = next_index - 1 + + ''' If previous (current) tags and current tags both exist ''' + if previous_tags != [] and defs[current_index]['tags'] != []: + match = True + if len(previous_tags) > len(defs[current_index]['tags']): + match = False + else: + for i in range(len(previous_tags)): + if previous_tags[i] != defs[current_index]['tags'][i]: + match = False + ''' If all previous (common) tags match with first n current tags ''' + if match: + ''' Seperate current into common and distinct tags ''' + return defs[current_index]['tags'][:i+1], defs[current_index]['tags'][i+1:] + + ''' Current did not match previous common tags, inspect next tags ''' + if len(defs) > next_index: + if defs[next_index]['tags'] != []: + ''' Next tags exist and are not empty ''' + if defs[next_index]['tags'] == defs[next_index - 1]['tags']: + ''' Tags are exactly the same, all current tags will be common tags ''' + return defs[current_index]['tags'], [] + + ''' Find the smaller of the two lists ''' + if len(defs[current_index]['tags']) <= len(defs[next_index]['tags']): + shorter = defs[current_index]['tags'] + longer = defs[next_index]['tags'] + else: + shorter = defs[next_index]['tags'] + longer = defs[current_index]['tags'] + for i in range(len(shorter)): + if shorter[i] != longer[i]: + ''' Once lists are no longer the same we have common and distict tags ''' + if i == 0: + ''' If no matches, all tags are common ''' + return defs[current_index]['tags'], [] + ''' Else use index to seperate common and distinct ''' + return defs[current_index]['tags'][:i], defs[current_index]['tags'][i:] + + ''' If next tags don't exist or are empty, all tags are common tags ''' + return defs[current_index]['tags'], [] + +''' + +1A. If first n tags match with next list + + Common tags become common_tags, distinct tags become distinct tags + +1B. If no tags match OR next list does not exist + + All tags become common_tags, no tags become distict tags + +2A. If previous common_tags matches with first n tags of current tags + + identify distinct tags as distinct_tags + +2B. If common_tags does not match first n tags of current tags + + Go back to Step 1 + + +''' +def convert_message(message,string): + modulus = 129 + if len(message) < modulus: + string += message + "\n" + return string + + last = 0 + for i in range((len(message)//modulus)+1): + first = last + if len(message) > (i+1) * modulus: + last = message[0: (i+1) * modulus].rstrip(" ").rfind(" ") + if last == -1: # No space found, force break + last = len(message) + string += message[first:last].replace("\n","") + '\n' + else: + string += message[first:].replace("\n","") + '\n' + return string + +def get_entry(entry,mode='',trunc=False): + string = '' + iv = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi', 'xvii', 'xviii', 'xix', 'xx', 'xxi', 'xxii', 'xxiii', 'xxiv', 'xxv', 'xxvi', 'xxvii', 'xxviii', 'xxix', 'xxx', 'xxxi', 'xxxii', 'xxxiii', 'xxxiv', 'xxxv', 'xxxvi', 'xxxvii', 'xxxviii', 'xxxix', 'xl', 'xli', 'xlii', 'xliii', 'xliv', 'xlv', 'xlvi', 'xlvii', 'xlviii', 'xlix', 'l'] + + ''' Print Heading ''' + if mode == '': + string += figlet_format("ENTRY:",font='cybermedium',width=150) + string += "**************\n\n" + + ''' Print Etymology if Applicable ''' + if 'etymology' in entry and entry['etymology'] != '' and mode != 'choice': + string = convert_message(f"\n{entry['etymology']}\n\n",string) + if entry['partOfSpeech'] != "": + string += f"{entry['partOfSpeech']}\n\n" + + string = convert_message(entry['simpleParts'],string) + + previous_tags = [] + char_count = 0 + etym = len(string) + counter = 1 + offset = 0 + + ''' Loop through defs ''' + for index in range(len(entry['defs'])): + if( (index > 3 or char_count > 400) and trunc): + string += f"{index + 1}. ...\n" + break + + ''' "choice" mode involves simpler formatting ''' + if mode != "choice": + + ''' compare current, previous and next tags for next common tags ''' + common_tags, distinct_tags = split_tags(entry['defs'],index + 1,previous_tags) + + ''' If a new set of tags ''' + if common_tags != previous_tags: + previous_tags = common_tags + if common_tags != []: + ''' Create a new numerical list item ''' + string += pretty_print_tags(common_tags,counter) + offset = index + counter += 1 + ''' If no tags, simply print defs as numerical list item ''' + last = 0 + if common_tags == []: + string = convert_message(f"{counter}. {entry['defs'][index]['gloss']}",string) + counter += 1 + ''' Skip the bottom block for printing defs with roman numerals ''' + char_count = len(string) - etym + continue + + ''' Choice mode uses a simple list w/o hierarchy ''' + last = 0 + if mode == 'choice': + string = convert_message(f"{index + 1}. {entry['defs'][index]['gloss']}",string) + ''' Print definitions with roman numeral in the event there are tags ''' + else: + if distinct_tags: + string = convert_message(f"{iv[index - offset]:>4}. ({', '.join(distinct_tags)}) {entry['defs'][index]['gloss']}",string) + string.strip('\n') + else: + string = convert_message(f"{iv[index - offset]:>4}. {entry['defs'][index]['gloss']}",string) + string.strip('\n') + char_count = len(string) - etym + x = 800 + if mode != '' and char_count - etym > x: + string = string[:x-1] +"...\n" + return string + + + +# PRINT ENTRY +# # # # # # # # # +def print_entry(entry,mode=''): + print(get_entry(entry,mode,False)) +# END PRINT ENTRY + + +# SELECT DEFINITIONS +# # # # # # # # # # # # # # +def select_definition(entry,message): + # whole function contained in loop + invalid = False + while True: + clear_screen() + print_entry(entry,'choice') + print(message) + + if invalid: + print('\ninvalid selection') + invalid = False + + user_input = input(': ') + + # Option to go back + if user_input == '0': + return None + #elif user_input == '00': + # return None, True + + # confirm input is numeric + elif "-" in user_input: + user_input = user_input.split("-") + if user_input[0].isnumeric() and user_input[1].isnumeric(): + user_input[0] = int(user_input[0])-1 + user_input[1] = int(user_input[1])-1 + if user_input[0] in range(len(entry['defs'])) and user_input[1] in range(len(entry['defs'])): + return user_input + elif user_input.isnumeric(): + # convert to string + user_input = int(user_input)-1 + #confirm in range + if user_input in range(len(entry['defs'])): + return user_input + # repeat loop after invalid selection + invalid = True +# END SELECT DEFINITIONS \ No newline at end of file diff --git a/get_selection.py b/get_selection.py new file mode 100644 index 0000000..e95f712 --- /dev/null +++ b/get_selection.py @@ -0,0 +1,159 @@ +''' +Description: + + get_selection: + takes message and a list and/or dict + requests, validates, and returns a user input +''' +from unidecode import unidecode +import unicodedata +import os + +# CLEAR SCREEN +def clear_screen(): + if os.name == 'posix': # For UNIX or Linux or MacOS + os.system('clear') + elif os.name == 'nt': # For Windows + os.system('cls') + +def visible_len(s): + return len([c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn']) + +def get_selection(options, message=""): + invalid = False + while True: + clear_screen() # Uncomment this if you have a clear_screen function + if message: + print(message) + + if len(options) > 30: + print_columns(options) + else: + for key, value in options.items(): + if value != '': + print(value.rstrip('\n')) + + if invalid: + print("\nInvalid selection\n\n") + invalid = False + + user_input = unidecode(input(": ")).lower() + + if user_input in options: + return user_input + else: + invalid = True + +def print_columns(options,field=25): + limit = 40 + keys = list(options.keys()) + special_keys = ['0', '00'] + + # Separate special keys from the rest + regular_keys = [k for k in keys if k not in special_keys] + + # Calculate the number of columns needed + num_columns = len(regular_keys) // limit + 1 + row_string = '' + for i in range(limit): + for col in range(num_columns): + idx = i + limit * col + if idx >= len(regular_keys): + continue + + option_str = options[regular_keys[idx]].rstrip('\n') + while visible_len(option_str) < field: + option_str += " " + if visible_len(option_str) > field and field != 40: + clear_screen() + print_columns(options,40) + return + row_string += f"{option_str}" + + print(row_string) + row_string = '' + + # Print special keys at the end + for key in special_keys: + if key in options: + print(options[key], end='') + + +''' +# GET SELECTION +# # # # # # # # # # # # # # +def get_selection(options,message=""): + # whole function contained in loop + invalid = False + while True: + clear_screen() + if message: + print(message) + + if len(list(options.keys())) > 30: + print_columns(options) + else: + for key in options: + print(f"{options[key]}",end='') + if invalid: + print("\ninvalid selection\n\n") + invalid = False + user_input = unidecode(input(": ")) + + # check if input matches and keys in options + if user_input.lower() in options: + return user_input + else: + invalid = True + + +# END GET SELECTION + +def print_columns(options): + limit = 50 + keys = list(options.keys()) + if '0' in keys: + end = ['0'] + keys.remove('0') + if '00' in keys: + end.append('00') + keys.remove('00') + L = len(keys) + if L > 1000: + print_bigger_columns() + else: + step = L // limit + 1 + for i in range(0,limit): + for x in range(step): + q = i + limit * x + if q >= L: + continue + else: + fill_char = ' ' + # using .format() + print_str = "{message:{fill}<30}".format(message=options[keys[q]], fill=fill_char) + def multi_replace(string): + replacements = [ ('θ','t'), ('ψ','s'), ('φ','f'), ('̓',''), ] + for old, new in replacements: + string = string.replace(old, new) + return string + len_str = multi_replace(print_str) + while len(len_str) < 25: + len_str += "#" + print_str += "#" + while len(print_str) > 25: + len_str = len_str[:-1] + print_str = print_str[:-1] + print(print_str, end='') + + print() + for key in end: + print(options[key],end="") + +''' + + + + +def print_bigger_columns(options): + pass \ No newline at end of file diff --git a/get_simple.py b/get_simple.py new file mode 100644 index 0000000..5d547b6 --- /dev/null +++ b/get_simple.py @@ -0,0 +1,452 @@ +''' +Description: + + get_simple: + takes principle parts line from a wiktionary entry and + chops out all extraneous word + + i_stem_test: + look up the i-stem version of the gen plural of a + third declension noun to check if the noun is + i-stem or not; returns true or false +''' + +#from wiktionaryparser import WiktionaryParser +from unidecode import unidecode +import load_dict +import pickle + +def load_i_stem_trie(): + load_dict.change_path("dumps sorted") + with open("Latin" + '-i_stem_nouns-trie.txt','rb') as openFile: + t = pickle.load(openFile) + #print("Load I-stem trie success") + #for i in t['definitions'].values(): + # print(i) + return t['definitions'] + +def load_dump(): + print("Loading previous Latin trie...") + load_dict.change_path("dumps sorted") + with open("Latin" + '-trie.txt','rb') as openFile: + t = pickle.load(openFile) + return t['definitions'] + +quickmode = True +i_stem_mode = False + +if quickmode: + t = load_i_stem_trie() +else: + t = load_dump() + +# CHOP PARTS +# # # # # # # # # # +def chop_parts(parts): + ''' Chop parts down to simple list ''' + # cut off tail after closing paratheses + if ';' in parts: + parts = parts[:parts.find(';')] + + # separate into list + parts = parts.split(' ') + + # remove punctuation + for index in range(len(parts)): + parts[index] = parts[index].strip(" ,()") + + return parts +# END CHOP PARTS + + +# REMOVE WORDS +# # # +def remove_words(parts,remove_words): + """ Loop to remove extra words not part of principal parts """ + # need to reverse for special case of Latin word 'neuter' + parts.reverse() + + # remove extraneous word + for word in remove_words: + if word in parts: + parts.remove(word) + + # reverse back + parts.reverse() + return parts + +# END REMOVE WORDS + + +# COMBINE DEPONENT +# # # # # # # # # # # # # # # +def combine_deponent(parts): + ''' Loop for perfect active form of deponent verbs ''' + offset = 0 + for number in range(1,len(parts)): + + # when items combined index shrinks + # very unlikely that this is needed twice + index = number - offset + + # combine parts with '__ sum' + if parts[index] == 'sum': + popped = parts.pop(index) + parts[index - 1] += f" {popped}" + offset += 1 + + index = number - offset + + # combine parts with '__ est' + if parts[index] == 'est': + popped = parts.pop(index) + parts[index - 1] += f" {popped}" + offset += 1 + + return parts +# END COMBINE DEPONENT + + +# COMBINE 'OR' +# # # # # # # # # # # # # # # +def combine_or(parts): + """ Loop to combine part with __ or __ """ + offset = 0 + for number in range(len(parts)): + + # when items combined index shrinks + index = number - offset + + # combine parts with __ or __ + if parts[index] == 'or': + popped = parts.pop(index) + parts[index - 1] += f" {popped}" + popped = parts.pop(index) + parts[index - 1] += f" {popped}" + offset += 2 + + return parts +# END COMBINE 'OR' + + +# DOUBLE NOUN +# # # # # # # # # # # +def double_noun(parts): + #print("\n\n\t\tACTIVE IF DOUBLE NOUN >>>>>>>>>>>") + #print(parts) + ''' catches the special case of a two word noun''' + if len(parts) == 1: + parts[0] = parts[0].strip('\xa0') + return parts + if '\xa0' in parts[1]: + parts[0] += ' ' + parts.pop(1) + if len(parts) > 2: + parts[1] += ' ' + parts.pop(2) + #parts = parts[:2] + return parts +# END DOUBLE NOUN + + +# MOVE GENDER +# # # # # # # # # # # # # # # +def move_gender(parts,partOfSpeech): + for index in range(len(parts)): + # wiktionary entries contain special character + # between nominative and gender of nouns + if '\xa0' in parts[index]: + #print("\n\n\t\tACTIVE IF \xa0 >>>>>>>>>>>") + # split nom into two or three parts + word_plus_gdr = parts[index].split('\xa0') + + # reassign nom w/o gdr to 0 in parts + parts[index] = word_plus_gdr[0] + + # and gender to the end of parts; don't do this for pronouns + if partOfSpeech == 'noun': + # add gender to end of parts + parts.append(word_plus_gdr[1]) + + # if two-part gender, + # add second part to gender already appended at end of parts + if len(word_plus_gdr) == 3: + parts[-1] += ' ' + parts[-1] += word_plus_gdr[-1] + elif partOfSpeech == 'noun': + #print("\n\n\t\tACTIVE ELIF >>>>>>>>>>>") + #print(parts) + offset = 0 + for i in range(len(parts)): + i = i - offset + letters = ['n','f','m','sg','pl','m or f'] + if parts[i] in letters: + parts.append(parts.pop(i)) + offset += 1 + #print(parts) + return parts +# END MOVE GENDER + + +# BUILD STRING +# # # # # # # # # # # # # # # # # +def build_string(num_parts,comma_stop,parts,partOfSpeech,i_stem=False): + ''' Loop to build simple parts string ''' + simpleParts = '' + for index in range(min(num_parts,len(parts))): + # Last part of nouns should be gender in () + if partOfSpeech == 'noun' and index == num_parts-1: + simpleParts += f"({parts[index]})" + else: + # append part to string + simpleParts += parts[index] + + # rule for when to insert comma + if index < num_parts - comma_stop: + simpleParts += ', ' + else: + # insert i-stem label if flagged + if i_stem == True and index == num_parts - 2: + simpleParts += ' -ium' + # insert space w/o comma + simpleParts += ' ' + + return simpleParts + +# END BUILD STRING + + +# GET SIMPLE LA 'LATIN' +# # # # # # # # # # # # # # # +def get_simple(partOfSpeech,parts,heading): + + # Only works on noun, verb, adjective, pronoun, determiner + # adverbs, conjunctions, interjections, etc. remain the same + + # in case proper noun or other subset of noun + if partOfSpeech.lower() == 'proper noun': + partOfSpeech = 'noun' + + # not a, b, or c then return + if partOfSpeech not in ['noun','verb','adjective','participle','adverb','pronoun','determiner','numeral']: + return parts + + # set some flags ['thid_decl', 'adjective_parts', 'deponent', 'defective', 'verb_label'] + flags = set_flags_la(parts,partOfSpeech) + + # if not comparable for adverb return parts + if 'not comparable' in parts and partOfSpeech == 'adverb': + return parts + + # Chop parts down to simple list + parts = chop_parts(parts) + + # abort here len(part) == 1, (not a main entry) + if len(parts) == 1 or partOfSpeech == 'adverb': + return parts[0] + + # List of words to remove + remove_words_la = ['present','infinitive','perfect','active', + 'future','participle','supine','genitive','feminine','neuter','irregular', + 'no','indeclinable','variously','declined','comparative','superlative' + ] + + # remove extra words not part of principal parts + parts = remove_words(parts,remove_words_la) + + # Loop for perfect active form of deponent verbs + parts = combine_deponent(parts) + + # Loop to combine part with __ or __ + parts = combine_or(parts) + + # for noun list should only contain nominative & genitive + if partOfSpeech == 'noun': + parts = double_noun(parts) + + # Loop to find gender, separate from nominative + parts = move_gender(parts,partOfSpeech) + + # test for i-stem + i_stem = i_stem_test(parts,heading,flags) + + # wrap up, create simpleParts string + + # add blanks for verbs without four principle parts + if (partOfSpeech == 'verb' and not flags['deponent']) and len(parts) < 4 and flags['no_perfect']: + parts.insert(2,"____") + + while (partOfSpeech == 'verb' and not flags['deponent']) and len(parts) < 4: + parts.append("____") + + # Verbs and nouns all parts are used + # adjectives are cutoff at 2 or 3 + if partOfSpeech in ['adjective','participle','numeral'] and not flags['indeclinable']: + num_parts = flags['adjective_parts'] + else: + num_parts = len(parts) + + # No comma between parts and gender for nouns + if partOfSpeech == 'noun': + comma_stop = 2 + else: + # Comma between each part until last for non-noun + comma_stop = 1 + + simpleParts = build_string(num_parts,comma_stop,parts,partOfSpeech,i_stem) + + # add verb label label if any + if flags['verb_label'] and partOfSpeech == 'verb': + simpleParts += flags['verb_label'] + + if flags['indeclinable']: + simpleParts += '(indeclinable)' + + if flags['indeclinable portion']: + simpleParts += '(indeclinable portion)' + + # return string + + if i_stem_mode: + if i_stem and partOfSpeech == 'noun': + return simpleParts + else: + return False + else: + return simpleParts + +# END GET SIMPLE 'LATIN' + + +# SET FLAGS 'LATIN' +# # # # # # # # # # # # # # # # # +def set_flags_la(parts,partOfSpeech): + + flags = {} + + # flag one/two vs. three termination adjective + if 'one-termination' in parts or 'two-termination' in parts or 'third declension' in parts: + flags['adjective_parts'] = 2 + else: + flags['adjective_parts'] = 3 + + if 'comparative' in parts and 'superlative' in parts: + flags['adjective_parts'] += 2 + + # flag third declension noun for i-stem test + if 'third declension' in parts and partOfSpeech == 'noun': + flags['third_decl'] = True + else: + flags['third_decl'] = False + + # flag defective verb + if 'highly defective' in parts and partOfSpeech == 'verb': + flags['defective'] = 'highly defective' + elif 'defective' in parts and partOfSpeech == 'verb': + flags['defective'] = 'defective' + elif 'perfect forms have present meaning' in parts and partOfSpeech == 'verb': + flags['defective'] = 'perfect forms have present meaning' + else: flags['defective'] = False + + if 'no perfect stem' in parts: + flags['no_perfect'] = True + else: + flags['no_perfect'] = False + + # flag deponent verb + if 'semi-deponent' in parts and partOfSpeech == 'verb': + flags['deponent'] = 'semi-deponent' + elif 'optionally deponent' in parts and partOfSpeech == 'verb': + flags['deponent'] = 'optionally deponent' + elif 'deponent' in parts and partOfSpeech == 'verb': + flags['deponent'] = 'deponent' + else: + flags['deponent'] = False + + deponent = flags['deponent'] + defective = flags['defective'] + + # combine labels into one + if defective and deponent and deponent != 'deponent': + flags['verb_label'] = f"({deponent}, {defective})" + elif defective: + flags['verb_label'] = f"({defective})" + elif deponent and deponent != 'deponent': + flags['verb_label'] = f"({deponent})" + else: + flags['verb_label'] = False + + if 'indeclinable portion' in parts: + flags['indeclinable portion'] = True + else: + flags['indeclinable portion'] = False + + if 'indeclinable' in parts and not flags['indeclinable portion']: + flags['indeclinable'] = True + else: + flags['indeclinable'] = False + + return flags +# END SET FLAGS 'LATIN' + + +# I-STEM TEST +# # # # # # # # # # # # # +def i_stem_test(parts,heading,flags): + if quickmode: + if parts[0] in t and flags['third_decl']: + #print(f"{parts[0]},{parts[1]} is i_stem") + return True + else: + return False + else: + ''' test to determine if i'stem label is needed''' + if flags['third_decl']: + + # chop 's' off genitive to get stem + istem = parts[1][:-1] + # add 'um' for gen plur (of i-stem) + istem += 'um' + # remove macrons + istem = unidecode(istem) + else: + return False + + # remove macrons for comparison + handle = unidecode(heading) + + + if istem in t: + value = t[istem] + if isinstance(value,list): + entries_list = [] + for e in value: + entries_list.extend(e['entries']) + else: + entries_list = value['entries'] + for entry in entries_list: + for d in entry['defs']: + compare = unidecode(d['gloss']) + if 'genitive' in compare and handle in compare: + #print(f"{parts[0]},{parts[1]} is i_stem") + return True + return False +# END I-STEM TEST + +# Combine adjective parts +# # # # # # # # # # # # # # # # +def combine_adjective_parts(parts): + ''' combine masculine and feminine for spanish adjectives ''' + offset = 0 + for number in range(1,len(parts)): + index = number - offset + if parts[index][-1] == 'a' and parts[index - 1][-1] == 'o': + if parts[index][:-1] == parts[index - 1][:-1]: + parts[index - 1] += "/-a" + del parts[index] + offset += 1 + elif parts[index][-2:] == 'as' and parts[index - 1][-2:] == 'os': + if parts[index][:-2] == parts[index - 1][:-2]: + parts[index - 1] += "/-as" + del parts[index] + offset += 1 + return parts +# END COMBINE ADJECTIVE PARTS diff --git a/html_x.py b/html_x.py new file mode 100644 index 0000000..afaab66 --- /dev/null +++ b/html_x.py @@ -0,0 +1,64 @@ + +def set_styles(body_string): + body_string += '' + return body_string + +def create_style(body_string,width): + if width == 2: + body_string += f'\ + ' + elif width == 1: + body_string += f'
            \ + ' + body_string += '' + return body_string + +def create_header(body_string,parts,header=''): + body_string += f'' + column_labels = parts[list(parts.keys())[0]] + for label in column_labels: + body_string += f'' + body_string += "" + return body_string + +def create_body(body_string,parts,t_type): + for key in parts: + body_string += '' + if t_type == 'noun': + body_string += f'' + elif t_type == 'conj': + body_string += f'' + else: + body_string += f'' + if type(parts[key]) == dict: + for row_item in parts[key]: + body_string += f'' + else: + body_string += f'' + body_string += '' + body_string += '
            {header}{label}
            {key[:3]}.{key[:3]}{key}{parts[key][row_item]}{parts[key]}

            ' + return body_string + +def create_table(body_string,parts,t_type,width): + body_string = create_style(body_string,width) + body_string = create_header(body_string,parts) + body_string = create_body(body_string,parts,t_type) + + return body_string + +def create_box(body_string,label,content): + body_string = create_style(body_string,1) + body_string += f'{label}' + body_string += f'{content}' + body_string += '
            ' + return body_string + + + + diff --git a/language_splitter.py b/language_splitter.py new file mode 100644 index 0000000..52c5bd6 --- /dev/null +++ b/language_splitter.py @@ -0,0 +1,38 @@ + + +from load_dict import change_path +from unidecode import unidecode +import pickle +import datrie + +def split_language(new_dictionary): + + alpha = 'abcdefghijklmnopqrstuvwxyz-' + + t = datrie.Trie(alpha) + #counter = 0 + for i in range(len(new_dictionary['definitions'])): + if any(entry['simpleParts'] != False for entry in new_dictionary['definitions'][i]['entries']): + #counter += 1 + try: + key = unidecode(new_dictionary['definitions'][i]['heading']).lower() + if key not in t: + t[key] = new_dictionary['definitions'][i] + else: + if isinstance(t[key],list): + t[key].append(new_dictionary['definitions'][i]) + else: + t[key] = [t[key],new_dictionary['definitions'][i]] + + except KeyError: + print(f"{new_dictionary['definitions'][i]} could not be added") + #print(counter) + #if counter == 0: + # return + new_dictionary['file'] = new_dictionary['language'].replace(' ','') + "-trie.txt" + new_dictionary['definitions'] = t + change_path("dumps sorted") + with open(new_dictionary['file'],mode = 'wb') as openFile: + pickle.dump(new_dictionary, openFile,protocol=pickle.HIGHEST_PROTOCOL) + + return diff --git a/load_dict.py b/load_dict.py new file mode 100644 index 0000000..d97661d --- /dev/null +++ b/load_dict.py @@ -0,0 +1,282 @@ +''' +Description: + + find_dict: + allow user to select a previously saved dictionary + + open_dict: + open a saved dictionary, copy definitions into + dictionary object taken as argument, + + create_dict: + name a new dictionary to be created +''' +import glob +import os +import pickle +import copy +from unidecode import unidecode + +from get_selection import get_selection +#import parser_shell + +# save cwd into global var +global CWD +CWD = os.path.dirname(os.getcwd()) + +# CHANGE PATH +# # # # # # # # # +def change_path(folder=''): + path = os.path.join(CWD,folder) + if not os.path.isdir(path): + os.mkdir(path) + os.chdir(path) +# END CHANGE PATH + + + + +# FIND DICT +# # # # # # # +def find_dict(): + change_path('dictionaries') + myFiles = glob.glob('*.txt') + if myFiles == []: + print("\nSorry no saved dictionaries") + return create_dict() + else: + options = {'0':f"\nChoose from the following files: (0 to go back)\n==================================\n"} + for index in range(len(myFiles)): + options[f"{str(index + 1)}"] = f"{index + 1}. {myFiles[index]}\n" + user_input = get_selection(options) + if user_input == '0': + return None + current_dict = {'file':myFiles[int(user_input)-1]} + with open(current_dict['file'],mode= 'rb') as openFile: + current_dict = pickle.load(openFile) + return current_dict +# END FIND DICT + + +# CREATE DICT +# # # # # # # # +def create_dict(): + change_path('dictionaries') + myFiles = glob.glob('*.txt') + while True: + print("What do you want to name new dictionary?: (0 to go back)\n==================================\n") + user_input = input(': ') + if user_input == '0': + return + user_input += '.txt' + exit_loop = True + while exit_loop: + if user_input in myFiles: + print(f'\n{user_input} already exists, do you want to add to {user_input}?') + options = {'1':"(1=yes, ",'1':"0 to go back): "} + choice = get_selection(options) + if choice == '1': + current_dict = {'file':user_input} + openFile = open(current_dict['file'],mode= 'rb') + current_dict = pickle.load(openFile) + openFile.close() + return current_dict + elif choice == '0': + exit_loop = True + else: + current_dict = {'file':user_input} + user_input = pick_language() + if user_input != None: + current_dict['language'] = user_input + current_dict['definitions'] = [] + return current_dict + else: + exit_loop = True +# END CREATE DICT + + + + + +# COMBINE DICT +# # # # # # # # # # # # +def combine_dict(current_dict,combo_dict=[]): + if combo_dict == []: + change_path('dictionaries') + myFiles = glob.glob('*.txt') + if current_dict['file'] in myFiles: + myFiles.remove(current_dict['file']) + if myFiles == []: + print("\nSorry no other saved dictionaries") + return current_dict + else: + options = {'0':f"\nChoose from the following files: (0 to go back)\n==================================\n"} + for index in range(len(myFiles)): + options[f"{str(index + 1)}"] = f"{index + 1}. {myFiles[index]}\n" + user_input = get_selection(options) + if user_input == '0': + return current_dict + combine_file = myFiles[int(user_input) - 1] + with open(combine_file,mode= 'rb') as openFile: + combo_dict = pickle.load(openFile) + + # FIX DROPPED SOURCE TAGS + bank = ["*","^","†","∆"] + counter = 0 + for i in range(len(combo_dict['definitions'])): + ''' + for j in range(len(combo_dict['definitions'][i]['entries'])): + def_tags = [] + for k in range(len(combo_dict['definitions'][i]['entries'][j]['defs'])): + if combo_dict['definitions'][i]['entries'][j]['defs'][k][-1] in bank: + def_tags.append(combo_dict['definitions'][i]['entries'][j]['defs'][k][-1]) + + if def_tags: + handle = combo_dict['definitions'][i]['handle'] + for x in range(len(current_dict['definitions'])): + if current_dict['definitions'][x]['handle'] == handle: + if j in range(len(current_dict['definitions'][x]['entries'])): + if len(current_dict['definitions'][x]['entries'][j]['defs']) == len(combo_dict['definitions'][i]['entries'][j]['defs']): + for tag in def_tags: + current_dict['definitions'][x]['entries'][j]['defs'][0]['gloss'] += tag + ''' + handle = combo_dict['definitions'][i]['handle'] + found = False + for k in range(len(current_dict['definitions'])): + if current_dict['definitions'][k]['handle'] == handle: + found = True + current_dict['definitions'][k]['tags'].extend(combo_dict['definitions'][i]['tags']) + break + if found == False: + current_dict['definitions'].append(combo_dict['definitions'][i]) + counter += 1 + + current_dict['definitions'].sort(key=lambda item: item.get('handle').lower()) + with open("test" + current_dict['file'] ,mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + print(f"\n{combo_dict['file']} successfully combined with {current_dict['file']}") + print(f"{counter} new words added\n") + return current_dict +# END COMBINE DICT + + +def pick_language(): + + language_options = ["Latin","Ancient Greek","Old English"] + + options = {'0':"\nChoose the language for your new dictionary ('0' to go back)\n==================================\n"} + for i in range(len(language_options)): + options.update({f"{i+1}":f"{i+1}. {language_options[i]}\n"}) + user_input = get_selection(options) + if user_input == '0': + return None + else: + return language_options[int(user_input)-1] + +# EXTRACT LIST +# # # # # # # # # # # # # # # +def extract_list(current_dict,mode=0): + + change_path('lists') + myFiles = glob.glob('*.txt') + if myFiles == []: + print("\nSorry no saved lists") + return + else: + options = {'0':f"\nChoose from the following files: (0 to go back)\n"} + for index in range(len(myFiles)): + options[f"{str(index + 1)}"] = f"{index + 1}. {myFiles[index]}\n" + user_input = get_selection(options) + if user_input == '0': + return + else: + with open(myFiles[int(user_input)-1],'r') as file: + word_list = [unidecode(line).strip("\n\t, ") for line in file.readlines()] + + new_dictionary = {'definitions':[],'file':'','language':'Latin'} + while True: + if mode == 1: + file_name = input("Enter name of new dictionary ('0' to go back): ") + if file_name == '0': + return + else: + user_input = pick_language() + if user_input == None: + continue + else: + new_dictionary['language'] = user_input + new_dictionary['file'] = file_name + ".txt" + else: + new_dictionary['language'] = current_dict['language'] + new_dictionary['file'] = myFiles[int(user_input)-1] + + #new_dictionary['file'] = file_name + ".txt" + tag = '' + user_input = input("Enter a tag to apply to dictionary entries (max=1,'0' to skip): ") + if user_input != '0': + tag = user_input + + + + if new_dictionary['language'] != 'Latin': + #dump_dict = parser_shell.load_dump(new_dictionary['language']) + for word in word_list: + for i in range(len(dump_dict['definitions'])): + if dump_dict['definitions'][i]['handle'] == word: + dump_dict['definitions'][i]['tags'].append(tag) + new_dictionary['definitions'].append(copy.deepcopy(dump_dict['definitions'][i])) + else: + alpha = {'a':[], + 'b':[], + 'c':[], + 'd':[], + 'e':[], + 'f':[], + 'g':[], + 'h':[], + 'i':[], + 'j':[], + 'k':[], + 'l':[], + 'm':[], + 'n':[], + 'o':[], + 'p':[], + 'q':[], + 'r':[], + 's':[], + 't':[], + 'u':[], + 'v':[], + 'w':[], + 'x':[], + 'y':[], + 'z':[], + 'misc':[]} + for key in alpha: + alpha[key] = parser_shell.load_big_language(key,current_dict['language']) + + for word in word_list: + if word[0].lower() not in 'abcdefghijklmnopqrstuvwxyz': + key = 'misc' + else: + key = word[0].lower() + for i in range(len(alpha[key]['definitions'])): + if unidecode(alpha[key]['definitions'][i]['handle']) == unidecode(word): + alpha[key]['definitions'][i]['tags'].append(tag) + new_dictionary['definitions'].append(copy.deepcopy(alpha[key]['definitions'][i])) + if mode == 1: + change_path('dictionaries') + with open(new_dictionary['file'],mode = 'wb') as openFile: + pickle.dump(new_dictionary, openFile) + print(f"{new_dictionary['file']} successfully saved") + else: + combine_dict(current_dict,new_dictionary) + return + + + + + + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..0c2c659 --- /dev/null +++ b/main.py @@ -0,0 +1,151 @@ + +''' + dictionary_maker.py + + Ian Tincknell + +Description: + + Shell program to extract wiktionary entries from json dump files created by Wiktextract + + https://kaikki.org/index.html + + Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. + + Supplementary definitions for Greek come from the Perseus Digital Library + + https://www.perseus.tufts.edu/ + + Links to xml texts: + + https://github.com/blinskey/middle-liddell/blob/master/Perseus_text_1999.04.0058.xml + https://github.com/gcelano/LSJ_GreekUnicode/blob/master/grc.lsj.perseus-eng6.xml + + Supplementary definitions for Old English from Mary Lynch Johnson's "A Modern English - Old English Dictionary" + + Posted Online by: + https://www.richardzimmermann.com/ + https://old-engli.sh/ + + + Pulls definitions from Latin, Greek or Old Enlgish wiktionary + Creates dictionary pickle file to store definitions + + Can print a formatted list to upload as flashcards using Anki + + https://apps.ankiweb.net/ + +'''# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + +import parser_shell +from load_dict import * +import word_methods +import edit_dictionary +from get_selection import get_selection, clear_screen +import dump_parser +import os +import tables +from pyfiglet import figlet_format + +# Print welcome message +# # # # # # # # # # # # +clear_screen() +message = '' +for i in range(5): + if i%2 == 0: + message += "\n" + "Ξ " * (75) + else: + message += "\n" + "Σ " * (75) + +middle = '\n\n' +middle += figlet_format(" Word-Hoarder +",font='epic',width=150) +message += middle +for i in range(5): + if i%2 == 0: + message += "\n" + "Ξ " * (75) + else: + message += "\n" + "Σ " * (75) +print(message) +input("\n\n\n\t\t\tMake sure the display fits your screen\n\t\t\tPress \'Enter\' to continue\n") + +# Whole program contained in loop +# # # # # # # # # # # # # # # # # +exit_loop_1 = False +user_input = None +current_dict = None +while not exit_loop_1: + + # set/reset flag for second loop + exit_loop_2 = False + + options = { + '1':"\nMain Menu:\n==================================\n>'1' Open saved dictionary\n", + '2':">'2' Create new dictionary\n", + '3':">'3' Data files\n", + '0':">'0' to exit\n" + } + user_input = get_selection(options,message) + message = '' + # Terminate program + if user_input == '0': + exit_loop_1 = exit_loop_2 = True + continue + + # function calls to load or create a dictionary + # # # # # # # # # # # # # # # # # # # # # # # # + elif user_input == '1': + current_dict = find_dict() + elif user_input == '2': + current_dict = create_dict() + elif user_input == '3': + dump_parser.sort_dump() + + # find/create will return {} if dict not loaded + # must have a valid dictionary to proceed to new loop + if current_dict == None: + continue + + # inner Loop to get valid user input + # # # # # # # # # # # # # # # # # # + + while not exit_loop_2: + # Print Options + options = { + '1':f"\n{current_dict['file']} options:\n==================================\n>'1' to look up words\n", + '2':">'2' to save formatted flashcards\n", + '3':">'3' to save gloss\n", + '4':">'4' to edit dictionary\n", + '5':">'5' to combine dictionaries\n", + '6':">'6' add words from list\n", + '0':">'0' to go back\n" + } + user_input = get_selection(options) + + # Return previous loop: dictionary selection + if user_input == '0': + exit_loop_2 = True + current_dict = None + + # functions calls to add words to dictionary or print dictionary + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + elif user_input == '1': + current_dict, quit = parser_shell.add_word_options(current_dict) + if quit: + exit_loop_1 = exit_loop_2 = True + elif user_input == '2': + tables_list = tables.get_tables(current_dict['language']) + tables.print_tables(tables_list,current_dict['language']) + edit_dictionary.print_dict(current_dict) + input("Press enter to Continue") + elif user_input == '3': + edit_dictionary.print_gloss_setup(current_dict) + elif user_input == '4': + current_dict = edit_dictionary.edit_dictionary(current_dict) + elif user_input == '5': + current_dict = combine_dict(current_dict) + elif user_input == '6': + extract_list(current_dict) + +print("\n\nGood Bye") +# End Main diff --git a/parser_shell.py b/parser_shell.py new file mode 100644 index 0000000..203fb3e --- /dev/null +++ b/parser_shell.py @@ -0,0 +1,376 @@ + + +import pickle +import os +from unidecode import unidecode +from copy import deepcopy + +from create_word import create_word +import word_methods +from word_print_edit import get_entry_string, edit_entries, word_combo +from get_selection import get_selection, clear_screen +import edit_dictionary +import load_dict +import tables +import unicodedata +import inspect +from pyfiglet import figlet_format + +theme = '\u2624' +theme = "|" + +def current_line_number(): + return inspect.currentframe().f_back.f_lineno + +# ADD WORDS OPTIONS +# # # # # # # # # # # +def add_word_options(current_dict): + tags = set() + while(True): + clear_screen() + options = {'1':"\nWord Search Options:\n==================================\n>'1' word search\n", + '2':">'2' to set tags\n", + '3':">'3' to display current gloss\n", + '4':">'4' to create word\n", + 't':">'T' for tables\n", + '0':">'0' to exit ('00' to quit)\n"} + if not tags: + options['0'] += "** No tags selected **\n" + else: + options['0'] += f"* {', '.join(tags)} *\n" + user_input = get_selection(options) + if user_input == '0': + return current_dict, False + if user_input == '00': + return current_dict, True + if user_input == '1': + current_dict = word_search(current_dict,tags) + continue + elif user_input.lower() == '2': + master_list = word_methods.get_master_list(current_dict) + tags = word_methods.getTags(tags,'',master_list) + continue + elif user_input.lower() == '3': + tag_mode = '1' if tags == set() else '2' + count = edit_dictionary.filter_gloss(current_dict,tags,output_file=None,tag_mode=tag_mode) + print(f"\n\t{count} items with current tags\n") + continue + if user_input == '4': + current_dict = create_word(current_dict,tags) + continue + elif user_input == 't': + tables.table_options(current_dict['language']) + else: + current_dict = look_up_word(user_input,current_dict,tags) +# END ADD WORD OPTIONS + + +# WORD OPTIONS +# # # # # # # # # # # # # +def word_options(new_word,current_dict,backup,existing_word,t): + language = current_dict['language'] + handle = new_word['handle'] + heading = new_word['heading'] + entry_string = '' + while True: + entry_string = get_entry_string(new_word['entries']) + if existing_word == True: + + options = {'0':f"\n===================================================\n{heading} already exists in '{current_dict['file']}'\n", + '1':f"Do you want to save, edit or discard '{heading}'?\n", + '2':">>>(1=save, 2=edit, 0=discard)"} + #if 'template' not in new_word: + # options.update({'t':"'T' to add templates\n"}) + #else: + # options.update({'t':"\n"}) + options.update({'3':"'3' to see original wiktionary\n"}) + else: + options = { + '1':f"\n===================================================\nDo you want to save, edit or discard '{heading}'?\n", + '2':">>>(1=save, 2=edit, 0=discard)"} + #if 'template' not in new_word: + # options.update({'t':" 'T' to add templates\n"}) + #else: + # options.update({'t':"\n"}) + options.update({'0':''}) + user_input = get_selection(options,entry_string) + + if user_input == '0': + return current_dict + + elif user_input.lower() == 't': + tables.auto_template(current_dict,new_word) + if 'template' in new_word: + save_word(new_word,current_dict) + + elif user_input == '1': + save_word(new_word,current_dict) + return current_dict + + elif user_input == '2': + clear_screen() + new_word, finish_and_save = edit_entries(new_word,current_dict,t) + if finish_and_save: + save_word(new_word,current_dict) + return current_dict + + elif user_input == '3': + template = True if 'template' in new_word else False + new_word['entries'] = backup['entries'] + if template: + new_word['template'] = True + clear_screen() +# END WORD OPTIONS + +# SAVE WORD +# # # # # # # # # # # +def save_word(new_word,current_dict,mode=1): + if mode == 2: + load_dict.change_path('dumps sorted') + else: + load_dict.change_path('dictionaries') + for i in range(len(current_dict['definitions'])): + if current_dict['definitions'][i]['handle'] == new_word['handle']: + current_dict['definitions'][i] = deepcopy(new_word) + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + input(f"\n\"{new_word['heading']}\" was updated; press enter to continue") + return current_dict + current_dict['definitions'].append(deepcopy(new_word)) + current_dict['definitions'].sort(key=lambda item: tables.replace_greek(item.get('handle').lower())) + with open(current_dict['file'],mode = 'wb') as openFile: + pickle.dump(current_dict, openFile) + + input(f"\n\"{new_word['heading']}\" was saved; press enter to continue") + return current_dict +# END SAVE WORD + + +def load_dump(language): + print(f"Loading {language}...") + + load_dict.change_path("dumps sorted") + with open(language.replace(" ","") + '-trie.txt','rb') as openFile: + t = pickle.load(openFile) + return t['definitions'] + + +def word_search(current_dict,tags): + + t = load_dump(current_dict['language']) + + while True: + """ Retrieve use selection from dictionary """ + result = choose_from_trie(t,current_dict['language']) + clear_screen() + + """ 'end' will be returned is user choose to end querying """ + if result == None: + return current_dict + + else: + """ Test if selected word already exists in saved dictionary """ + existing_word = False + backup = deepcopy(result) + for i in range(len(current_dict['definitions'])): + if current_dict['definitions'][i]['handle'] == result['handle']: + existing_word = True + result = deepcopy(current_dict['definitions'][i]) + result['tags'] = result['tags'] | tags + current_dict = word_options(result,current_dict,backup,existing_word,t) + +def center_text(text, total_width): + padding_each_side = (total_width - len(text.split('\n')[0])) // 2 + lines = text.split('\n') + centered_lines = [f"{theme} " * ((padding_each_side - 2)//2) + line + f" {theme}" * ( (padding_each_side + 2)//2) + f" {theme}" for line in lines if line != ''] + + return '\n'.join(centered_lines) + +def choose_from_trie(t,lang,debug_print=True): + prefix = '' + items = [] + empty = False + while True: + clear_screen() + + + # print heading + message = f"{theme} " * (75) + "\n" + asci_art = lang.upper() + " WORD SEARCH" + #if len(asci_art)%2 != 0: + # asci_art += "!" + asci_art = figlet_format(asci_art,font='digital',width=150) + asci_art = center_text(asci_art,150) + message += asci_art + "\n" + message += f"{theme} " * (75) + message += "\n" + " "*148 + theme + message += "\n" + " "*148 + theme + print(message) + + + if items: + print_word_list(items) + + if prefix: + print(" " * 148 + theme) + message = f"Current filter: {prefix}-" + print("{:<148}".format(message) + theme) + + if empty: + print(" " * 148 + theme) + print("{:<148}".format("! No items found with this combination") + theme) + print(" " * 148 + theme) + empty = False + + + print("{:<148}".format("Options:") + theme) + + if prefix: + print("{:<148}".format("Enter a number to select definition") + theme) + + print("{:<148}".format("Enter a search word or partial word (add one or more trailing '*'s to limit characters)") + theme) + message = "Enter '0' to go back, '00' to " + if prefix: + message += "clear, '000' to " + message += "end" + print("{:<148}".format(message) + theme) + user_input = input(": ") + + if user_input == '': + continue + + if user_input == '0': + prefix = prefix[:-1] + + elif user_input == '00' and prefix: + prefix = '' + items = [] + if debug_print: + print("\n" + theme * 15 + f"\n\nLINE: {current_line_number()}") + print(f"Prefix == {prefix}") + if prefix: + print(f"Prefix != ''") + exit() + continue + + elif user_input == '00' or user_input == '000': + return None + + elif user_input[0] == '0': + prefix = user_input[1:] + + elif user_input.isnumeric(): + result = get_word(items,int(user_input)) + if result: + return result + else: + continue + else: + prefix += user_input + + prefix = unidecode(prefix).lower() + + while not t.keys(prefix.replace('*','')): + prefix = prefix.replace('*','')[:-1] + + if not prefix: + items = [] + empty = True + continue + + items = t.values(prefix.replace('*','')) + items = flatten_sublists(items) + + if '*' in prefix: + max_length = len(prefix) - 1 + items = [item for item in items if len(item['heading']) <= max_length] + + + if not items: + empty = True + + + +def flatten_sublists(lst): + i = 0 + while i < len(lst): + while isinstance(lst[i], list): + if not lst[i]: + lst.pop(i) + i -= 1 + break + else: + lst[i:i + 1] = lst[i] + i += 1 + return lst + +def get_word(items,user_input): + i = user_input - 1 + if i in range(len(items)): + return items[i] + else: + print("\n\t" + theme*3 + " Selection out of range " + theme*3 + "\n") + return None + +def visible_len(s): + return len([c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn']) + +def print_word_list(items): + clear_screen() + + # print heading + print(f"{theme} " * 75) + print(f"{theme} " * 33 + "SEARCH RESULTS " + f" {theme}" * 34) + print(" " * 148 + theme) + print(" " * 148 + theme) + labels = [" Headings:","Definitions:"] + message = f"{labels[0]:<30}{labels[1]}" + print("{:<148}".format(message) + theme) + print(" " * 148 + theme) + + # Loop to create sub-list to select from + for i in range(len(items)): + + if i >= 100: + message = f"{theme} " * 29 + "Limit reached " + f"{theme} " * 29 + print("{:<148}".format(message) + theme) + break + + # assign word to shorten name + word = items[i] + + # Create and print formatted string + message = '' + for j in range(len(word['entries'])): + if j == 0: + entry_string = f"{i + 1:>6}. {word['heading']}:" + # print with desired alignment + #print(f"{entry_string:<30}",end='') + else: + entry_string = f"{i + 1:>6}{chr(j + 97)}. {word['heading']}:" + message += f"{entry_string:<30}" + # print with desired alignment + #print(f"{entry_string:<30}",end='') + + text = word['entries'][j]['defs'] + if len(text) == 1: + entry_len = len(text[0]['gloss']) + entry_string = text[0]['gloss'] + else: + entry_len = sum([len(f"0) " + text[i]['gloss'] + " ") for i in range(len(text))]) + entry_string = '' + for line in [f"{i + 1}) " + text[i]['gloss'] + " " for i in range(len(text))]: + entry_string += line + entry_string = entry_string[:100] + + # if over length attach elipses + if entry_len > 100: + entry_string = entry_string[:-3] + entry_string += "..." + message += f"{entry_string}" + while visible_len(message) < 148: + message += " " + print(message + theme) + message = '' + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2306e5a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +beautifulsoup4==4.12.2 +beta_code==1.1.0 +datrie==0.8.2 +iteration_utilities==0.12.0 +lxml==4.9.3 +pyfiglet==1.0.2 +Requests==2.31.0 +smartypants==2.0.1 +Unidecode==1.3.6 diff --git a/tables.py b/tables.py new file mode 100644 index 0000000..d7a6e6f --- /dev/null +++ b/tables.py @@ -0,0 +1,640 @@ +import json, pickle +import glob +import os +from load_dict import change_path +from get_selection import get_selection +from unidecode import unidecode +from copy import deepcopy +import requests +import re +import tables_greek_ext + +DEBUG = True +import inspect + +def debug_print(message): + line_number = inspect.currentframe().f_back.f_lineno + if DEBUG: + print(f"[Line {line_number}] - {message}") + +def get_tables(language): + change_path('tables') + myFiles = glob.glob('*.txt') + table_file = language.replace(" ","") + "-tables.txt" + if table_file not in myFiles: + return [] + else: + with open(table_file,'r') as f: + tables_list = json.load(f) + tables_list = sort_tables(tables_list,language) + return tables_list + +def get_forms(language): + change_path('tables') + myFiles = glob.glob('*.txt') + form_file = language.replace(" ","") + "-forms.txt" + if form_file not in myFiles: + return [] + else: + with open(form_file,'r') as f: + forms_list = json.load(f) + forms_list = sort_tables(forms_list,language) + return forms_list + +def sort_tables(tables_list,language): + if language == 'Ancient Greek': + tables_list.sort(key=lambda item: greek_sort_name(item.get('title'))) + else: + tables_list.sort(key=lambda item: find_sort_name(item.get('title'))) + return tables_list + +def greek_sort_name(word): + if ": " in word: + word = word[word.find(": ")+2] + return replace_greek(word.lower()) + +def find_sort_name(word): + if ": " in word: + word = word[word.find(": ")+2] + return unidecode(word.lower()) + +def replace_greek(word): + alt_letters = { + 'Ἀ':'Α', + 'ά':'α', 'ἀ':'α', 'ἄ':'α', 'ἅ':'α', 'ἆ':'α', 'ᾰ':'α', 'ᾱ':'α', 'ᾴ':'α', + 'έ':'ε', 'ἐ':'ε', 'ἑ':'ε', 'ἔ':'ε', 'ἕ':'ε', + 'ή':'η','ἡ':'η', 'ἤ':'η', 'ἥ':'η', 'ῆ':'η', + 'ί':'ι','ἰ':'ι', 'ἱ':'ι', 'ἴ':'ι', 'ἵ':'ι', 'ἶ':'ι', 'ῐ':'ι', 'ῑ':'ι', 'ῖ':'ι', + 'ό':'ο','ὀ':'ο', 'ὁ':'ο', 'ὄ':'ο', 'ὅ':'ο', + 'ῥ':'ρ', + 'ύ':'υ','ὐ':'υ', 'ὑ':'υ', 'ὔ':'υ', 'ὕ':'υ', 'ὖ':'υ', 'ὗ':'υ','ῠ':'υ', 'ῡ':'υ', 'ῦ':'υ', + 'ώ':'ω', 'ὧ':'ω','ῶ':'ω', 'ῷ':'ω' + } + for x in word: + if x in alt_letters: + word = word.replace(x,alt_letters[x]) + return word + +def replace_greek_ii(word): + alt_letters = { + 'Ᾰ̓':'Ἀ', + + 'Ᾰ̔':'Ἁ', + 'ᾰ̀':'ὰ', + 'ᾰ́':'ά', + 'ᾰ̓':'ἀ', + 'ᾰ̔':'ἁ', + 'ἀ̆':'ἀ', + 'ἄ̆':'ἄ', + 'ά̆':'ά', + 'Ῐ̓':'Ἰ', + 'Ῐ̔':'Ἱ', + 'ῐ̈':'ι', + 'ϊ':'ι', + 'ῐ̓':'ἰ', + 'ῐ̔':'ἱ', + 'ί̆':'ί', + 'Ῠ̔':'Υ', + 'ύ̆':'ύ', + 'ῠ́':'ύ', + 'ῠ̈':'υ', + 'ῠ̔':'ὑ', + + # added for kukao + 'ῠ':'υ', + + # removed for εὐνοϊκός + # 'ϊ̄́':'ί', + + + 'ί':'ί', + 'ύ':'ύ', + + 'ἄ':'ἄ', + 'ἴ':'ἴ', + + 'ᾰ̔':'ἁ', + 'ᾰ́':'ά', + 'ᾰ':'α', + + + + 'Ᾱ̓́':'Ἄ', + 'Ᾱ̓':'Ἀ', + 'Ᾱ̔':'Ἁ', + 'Ᾱ':'Α', + 'ᾱ̔́':'ἅ', + 'ᾱ̓́':'ἄ', + 'ᾱ̀':'ὰ', + 'ᾱ́':'ά', + 'ᾱ̆':'ᾶ', + 'ᾱ̔':'ἁ', + 'ᾱ̓':'ἀ', + 'ᾱ':'α', + 'Ῑ̓́':'Ἴ', + 'Ῑ̓':'Ἰ', + 'Ῑ̔':'Ἱ', + 'Ῑ':'Ι', + 'ῑ́̔':'ἵ', + 'ῑ̈́':'ί', + 'ῑ̓́':'ἴ', + 'ῑ̔́':'ἵ', + 'ῑ̔':'ἱ', + 'ῑ́':'ί', + 'ῑ̆':'ῖ', + 'ῐ':"ι", + 'ῑ̈':'ι', + 'ῐ́':'ί', + 'ῑ̓':'ἰ', + 'ῑ':'ι', + 'ῡ̈́':'ύ', + 'ῡ̔́':'ὕ', + 'ῡ́':'ύ', + 'ῡ̈':'υ', + 'ῡ̔':'ὑ', + 'ῡ':'υ', + 'Ῡ̔':'Υ' + } + + for x in alt_letters: + if x in word: + word = word.replace(x,alt_letters[x]) + return word + + +def save_tables(tables_list,file): + change_path('tables') + with open(file,'w') as f: + json.dump(tables_list,f) + +def add_table(tables_list,table_info): + found = False + for i in range(len(tables_list)): + if tables_list[i]['title'] == table_info['title']: + debug_print(f"Match found {table_info['title']} == {tables_list[i]['title']}") + tables_list[i] = deepcopy(table_info) + debug_print(f"{table_info['title']} updated") + found = True + break + if not found: + debug_print(f"No Match Found: New entry made for {table_info['title']}") + tables_list.append(deepcopy(table_info)) + debug_print(f"{table_info['title']} added successfully") + return tables_list + +def table_options(language): + while True: + + options = {'1':"\nTable Options:\n==================================\n>'1' to add a table\n", + '2':">'2' to edit tables\n", + '3':">'3' to print table flashcards\n", + '4':">'4' for template options\n", + '0':">'0' to go back\n"} + user_input = get_selection(options) + if user_input == '1': + add_tables(language) + elif user_input == '2': + edit_tables(language) + elif user_input == '3': + tables_list = get_tables(language) + print_tables(tables_list,language) + elif user_input == '4': + template_options(language) + elif user_input == '0': + return +''' +def template_options(language): + print('re-doing all templates') + change_path('templates') + myFiles = glob.glob('*.txt') + template_file = language.replace(" ","") + "_templates.txt" + if template_file not in myFiles: + templates = [] + else: + with open(template_file,'r') as f: + templates = json.load(f) + for t in templates: + if t['POS'] == 'verb': + t = tables_greek_ext.get_forms(t) + + change_path('templates') + sort_tables(templates,language) + with open(template_file,'w') as f: + json.dump(templates,f) + + print('re-doing all templates succesful') + return +''' +def template_options(language): + while True: + change_path('templates') + myFiles = glob.glob('*.txt') + template_file = language.replace(" ","") + "_templates.txt" + if template_file not in myFiles: + templates = [] + else: + with open(template_file,'r') as f: + templates = json.load(f) + + while True: + if templates == []: + user_input = "00" + else: + + options, keys = get_template_options(templates) + user_input = get_selection(options,"\nSelect template:\n==================================\n") + if user_input == '0': + break + if user_input == '00': + template = new_template(language) + if template: + templates.append(template) + #if user_input == "x": + # templates = redo_templates(templates,language) + else: + for i in range(len(templates)): + if templates[i]['title'] == keys[user_input]['title']: + edit_template(templates[i],language) + break + + change_path('templates') + sort_tables(templates,language) + with open(template_file,'w') as f: + json.dump(templates,f) + return + +def edit_template(template,language): + if language == 'Ancient Greek': + return tables_greek_ext.edit_template(template) + elif language == "Latin": + return tables_latin_ext.edit_template(template) + +def new_template(language,heading='',handle='',definition='',partOfSpeech=''): + if language == 'Ancient Greek': + return tables_greek_ext.new_template(heading,handle,definition,partOfSpeech) + elif language == 'Latin': + return tables_latin_ext.new_template(heading,handle,definition,partOfSpeech) + elif language == 'Old English': + return tables_oe_ext.new_template(heading,handle,definition,partOfSpeech) + +def redo_templates(templates,language): + if language == 'Ancient Greek': + return tables_greek_ext.redo(templates) + +def get_template_options(templates): + counter = 1 + options = {} + keys = {} + for t in templates: + fill_char = '#' + options[str(counter)] = f"{counter:>4}. {t['title']}" + keys[str(counter)] = t + counter += 1 + options.update({'00':" '00' to create new\n"}) + options.update({'0':"'0' to go back"}) + #options.update({"x":"'x' to re-do existing templates"}) + return options, keys + +def auto_template(dictionary,word): + change_path('templates') + myFiles = glob.glob('*.txt') + language = dictionary['language'] + template_file = language.replace(" ","") + "_templates.txt" + if template_file not in myFiles: + templates = [] + else: + with open(template_file,'r') as f: + templates = json.load(f) + + for i in range(len(word['entries'])): + entry = word['entries'][i] + def_string = [x['gloss'] for x in entry['defs']] + def_string = "; ".join(chop_line(def_string[:5])) + print(f"\n{word['heading']} entry #{i+1} info:") + print("{:<18}".format("Principle Parts:") + entry['simpleParts']) + print("{:<18}".format("Part of Speech:") + entry['partOfSpeech']) + print("{:<18}".format("Definition:") + def_string) + if 'etymology' in entry: + if len(entry['etymology']) <= 125: + print("{:<18}".format("Etymology:") + entry['etymology']) + print(f"Create template ('1' for yes, '0' to finish, any other key to skip)") + user_input = input(": ") + if user_input == '0': + break + elif user_input == '1': + word['template'] = True + template = new_template(language,word['heading'],word['handle'],def_string,word['entries'][i]['partOfSpeech']) + if template: + templates.append(template) + + change_path('templates') + sort_tables(templates,language) + with open(template_file,'w') as f: + json.dump(templates,f) + return + +def chop_line(text): + bank = ["*","^","†","∆"] + for x in bank: + for i in range(len(text)): + text[i] = text[i].replace(x,"") + + if len(text) < 3: + limit = 5 + elif len(text) == 3: + limit = 4 + elif len(text) == 4: + limit = 3 + elif len(text) == 5: + limit = 2 + + size = sum([len(line) for line in text]) + if size > 100: + for i in range(len(text)): + text[i] = short_line(text[i],limit) + return text + + +# SHORT LINE +# # # # # # # # # # +def short_line(line,limit): + + #print(f"PRINT SHORT LINE WHILE TOP PRE SPLIT:\n{line}") + line = re.split(",|;",line) + stop = orstop = parstop = limit + for i in range(len(line)): + orlist = [x for x in line[i:] if " or " in x] + if orlist != []: + orstop = i + 1 + continue + else: + #stop = max(i,limit) + break + for i in range(len(line)): + parlist = [x for x in line[i:] if ")" in x or "(" in x] + if parlist != []: + parstop = i + 1 + continue + else: + #stop = max(i,limit) + break + stop = max(orstop,parstop,limit) + line = line[:stop] + new_text = '' + for i in range(len(line)): + new_text += line[i].strip() + ', ' + line = new_text.strip(", ") + + return line + +def get_template(language): + change_path('templates') + myFiles = glob.glob('*.txt') + template_file = language.replace(" ","") + "_templates.txt" + if template_file not in myFiles: + return None + else: + with open(template_file,'r') as f: + templates = json.load(f) + while True: + print("Choose template ('0' to go back)") + for i in range(len(templates)): + print(f"{i + 1}. {templates[i]['title']}") + user_input = input(": ") + if user_input == '0': + return None + elif user_input == '': + print("Invalid selection") + elif user_input[0] == '-': + if user_input[1:].isnumeric(): + if int(user_input[1:]) - 1 in range(len(templates)): + del templates[int(user_input[1:]) - 1] + elif int(user_input) - 1 in range(len(templates)): + return templates[int(user_input) - 1 ] + else: + print("Invalid selection") + +def get_table_types(tables_list): + counter = 1 + options = {} + types = {} + for t in tables_list: + if t['type'] not in types.values(): + options[str(counter)] = f"{counter:>3}. {t['type']}\n" + types[str(counter)] = t['type'] + counter += 1 + options.update({'0':"'0' to go back"}) + return options, types + +def get_table_family(tables_list,table_type): + counter = 1 + families = {} + options = {} + for t in tables_list: + if t['type'] == table_type: + word = t['title'][t['title'].find(": ")+2:] + if word not in families.values(): + options[str(counter)] = f"{counter:>3}. {t['title'][t['title'].find(': ')+2:]}" + families[str(counter)] = t['title'][t['title'].find(': ')+2:] + counter += 1 + options.update({'0':"'0' to go back"}) + return options, families + +def get_table_options(tables_list,table_type,family=None): + counter = 1 + positions = {} + options = {} + for i in range(len(tables_list)): + t = tables_list[i] + if t['type'] == table_type: + word = t['title'][t['title'].find(": ")+2:] + if family == None or family == word: + positions[str(counter)] = i + options[str(counter)] = f"{counter:>3}. {t['title']}" + counter += 1 + options.update({'0':"'0' to go back"}) + return options, positions + + +def edit_tables(language): + tables_list = get_tables(language) + while True: + + options, types = get_table_types(tables_list) + options['1'] = "\nSelect table group:\n==================================\n" + options['1'] + user_input = get_selection(options) + if user_input == '0': + return + table_type = types[user_input] + family = None + if table_type == 'conj': + + options, families = get_table_family(tables_list,'conj') + options['1'] = "\nSelect table family:\n==================================\n" + options['1'] + user_input = get_selection(options) + if user_input == '0': + continue + family = families[user_input] + exit_loop = False + while not exit_loop: + options, positions = get_table_options(tables_list,table_type,family) + options['1'] = "\nSelect table:\n==================================\n" + options['1'] + user_input = get_selection(options) + if user_input == '0': + exit_loop = True + continue + elif user_input in positions: + index = positions[user_input] + if index in range(len(tables_list)): + print("'X' to delete, any other key to edit") + user_input = input(': ') + if user_input == 'X': + del tables_list[index] + else: + tables_list[index] = edit_table(tables_list[index]) + save_tables(tables_list,language) + continue + else: + print("Invalid entry") + +def get_person(x,y): + if x == '1st Person': + return '1st Sg.' if y == 'Singular' else '1st Pl.' + elif x == '2nd Person': + return '2nd Sg.' if y == 'Singular' else '2nd Pl.' + elif x == '3rd Person': + return '3rd Sg.' if y == 'Singular' else '3rd Pl.' + +def edit_table(table): + while True: + table_list = [['definition','',table['definition']]] + table_list.append(['title','',table['title']]) + for part in table['parts']: + if type(table['parts'][part]) != dict: + table_list.append([part,'',table['parts'][part]]) + else: + for x in table['parts'][part]: + if type(table['parts'][part][x]) != dict: + table_list.append([part,x,table['parts'][part][x]]) + else: + for y in table['parts'][part][x]: + person = get_person(x,y) + table_list.append([part,person,table['parts'][part][x][y]]) + for i in range(len(table_list)): + if table['type'] != 'parts': + print(f"{i + 1:>2}. {table_list[i][0]:>11} {table_list[i][1]:>8}: {table_list[i][2]}") + else: + print(f"{i + 1:>2}. {table_list[i][0]:>15}: {table_list[i][2]}") + user_input = input("('0' to go back): ") + if user_input == '0': + return table + elif user_input.isnumeric(): + if int(user_input) - 1 in range(len(table_list)): + if table_list[int(user_input) - 1][0] == 'definition': + print("Enter replacement definition ('0' to cancel)") + user_input = input(": ") + if user_input != '0': + table['definition'] = user_input + elif table_list[int(user_input) - 1][0] == 'title': + print("Enter replacement title ('0' to cancel)") + user_input = input(": ") + if user_input != '0': + table['title'] = user_input + else: + part = table_list[int(user_input)-1][0] + number = table_list[int(user_input)-1][1] + print(f"Enter replacement for {table['parts'][part][number]} ('0' to cancel) (ᾱ)") + user_input = input(": ") + if user_input != '0': + table['parts'][part][number] = user_input + else: + print("Invalid entry") + +import tables_latin_ext, tables_greek_ext, tables_oe_ext + +def add_tables(language): + if language == 'Ancient Greek': + tables_greek_ext.add_tables() + elif language == 'Latin': + tables_latin_ext.add_tables() + elif language == 'Old English': + tables_oe_ext.add_tables() + +def get_html(word): + try: + search_string = "https://en.wiktionary.org/wiki/" + word + page_obj = requests.get(search_string) + html_doc = page_obj.content + except: + print("\tERROR: Search String Not Found ") + return + print(f"{word} retreived successfully") + return html_doc + +def create_style(body_string,columns): + if columns == 2: + body_string += '' + f'\ + \ + \ + \ + ' + elif columns == 1: + body_string += '' + f'
            \ + \ + \ + ' + body_string += '' + return body_string + +def create_table(body_string,parts,t_type,columns,header=''): + body_string = create_style(body_string,columns) + body_string += f'' + columns = parts[list(parts.keys())[0]] + for col in columns: + body_string += f'' + body_string += "" + for row in parts: + body_string += '' + if t_type == 'noun': + body_string += f'' + elif t_type == 'conj': + body_string += f'' + else: + body_string += f'' + for col in parts[row]: + body_string += f'' + body_string += '' + body_string += '
            {header}{col}
            {row[:3]}.{row[:3]}{row}{parts[row][col]}
            ' + return body_string + +def print_tables(tables_list,language): + if language == 'Ancient Greek': + tables_greek_ext.print_tables(tables_list) + elif language == 'Latin': + tables_latin_ext.print_tables(tables_list) + elif language == 'Old English': + tables_oe_ext.print_tables(tables_list) + print("\n**************************\n\nFlashcards printed to file\n\n**************************\n") +''' + +#import parser_shell, word_print_edit, edit_dictionary, edit_entry +def add_definition(language): + + while True: + change_path('dumps sorted') + wiki_dump = parser_shell.load_dump(language) + combo_word = parser_shell.choose_from_alpha(wiki_dump,[],language) + if combo_word == 'end' or combo_word == 'return to top' or combo_word == 'back': + return None + else: + if len(combo_word['entries']) > 1: + selection = word_print_edit.select_entry(combo_word['entries'],"Choose the definition you want to use:") + else: + selection = 0 + number = edit_entry.select_definition(combo_word['entries'][selection],"Choose the line you want to use:") + definition = combo_word['entries'][selection]['defs'][number]['gloss'] + if selection == None: + return None + else: + return definition +''' \ No newline at end of file diff --git a/tables_greek_ext.py b/tables_greek_ext.py new file mode 100644 index 0000000..6c0bc0a --- /dev/null +++ b/tables_greek_ext.py @@ -0,0 +1,1316 @@ + +from bs4 import BeautifulSoup +import tables +import html_x +from load_dict import change_path +from get_selection import get_selection +from copy import deepcopy, copy +import random +import sys +import glob, json + +''' +{'title': capio +'search word': +'definition #': +'POS': verb +'definition': 'to take' +'principle' : capio, capere, capi ,captum +'parts':[mood][tense]etc..."capere" +'forms':[{'form':'capere','article':'','tense':'present',etc...}} +] } + +''' + +DEBUG = True +import inspect + +def debug_print(message): + line_number = inspect.currentframe().f_back.f_lineno + if DEBUG: + print(f"[Line {line_number}] - {message}") + +def new_template(heading="",handle="",definition="",POS=""): + template = {} + while True: + x = 'POS' + options = {'0':f"\nEnter choose template {x} ('0' to go back)\n==================================\n", + '1':"'1'> noun\n",'2':"'2'> verb\n", + '00':"'00' to quit\n"} + user_input = get_selection(options) + if user_input == '0': + break + elif user_input == '00': + return + else: + template[x] = 'noun' if user_input == '1' else 'verb' + if template[x] == 'noun': + x = 'gender' + options = {'0':f"\nEnter choose template {x} ('0' to go back)\n==================================\n", + '1':"'1'> (m)\n", + '2':"'2'> (f)\n", + '3':"'3'> (m or f)\n", + '4':"'4'> (n)\n", + '5':"'5'> (m,f,n)\n", + '6':"'6'> (m/f,n)\n", + '7':"'7'> (m/f/n)\n", + '00':"'00' to quit\n"} + user_input = get_selection(options) + if user_input == '0': + break + elif user_input == '00': + return + else: + template[x] = options[user_input][options[user_input].find("("):].strip("\n") + print("Proper noun? ('Y' for yes, any other key for no)") + user_input = input(": ") + template['proper'] = True if user_input.upper() == 'Y' else False + while True: + x = 'title' + if heading == "": + print(f"Enter template {x} ('0' to go back)") + user_input = input(": ") + if user_input == '0': + break + elif user_input == '00': + return + else: + template[x] = user_input + else: + template[x] = heading + while True: + x = 'search word' + if handle == "": + print(f"Enter template {x} ('0' to go back '00' to quit)") + user_input = input(": ") + if user_input == '0': + break + elif user_input == '00': + return + else: + template[x] = tables.replace_greek_ii(user_input) + else: + template[x] = handle + while True: + print("Enter definition number ('0' if unspecified)") + try: + template['defno'] = int(input(": ")) + except: + print("Invalid entry") + continue + + template = get_forms(template) + x = 'definition' + print(f"Enter template {x} ('0' to go back '00' to quit)") + user_input = input(": ") + if user_input == '0': + continue + elif user_input == '00': + return + else: + template[x] = user_input + print(template) + return template + +def edit_template(template): + while True: + options = {'1':f"\nTemplate \"{template['title']}\" options:\n==================================\n'1' change definition\n", + '2':"'2' to display forms\n", + '3':"'3' to re-do forms\n", + '4':"'4' to delete\n", + '0':"'0' to go back\n"} + user_input = get_selection(options) + + if user_input == '1': + print(f"Current definition: {template['definition']}") + user_input = input("Enter new definition ('0' to cancel): ") + if user_input == '0': + continue + else: + template['definition'] == user_input + elif user_input == '2': + print(template['principal']) + print_parts(template['parts']) + input("\n\nPress Enter to continue\n") + elif user_input == '3': + template = get_forms(template) + elif user_input == '4': + del template + return + elif user_input == '0': + return + +def print_parts(chunk,string='',outer_key=''): + if string != '' or outer_key != '': + string += " " + outer_key + if type(chunk) == dict: + for inner_key in chunk: + print_parts(chunk[inner_key],string,inner_key) + else: + if chunk != '---': + final_str = f"{string.lstrip():_<70}" + chunk + print(final_str) + +def get_forms(template): + if template['POS'] == 'noun': + if template['gender'] == '(multi)': + while True: + print(f"{template['title']} (m,f,n)>1 or (m/f,n)>2?") + user_input = input(": ") + if user_input == '1': + template['gender'] = '(m,f,n)' + break + elif user_input == '2': + template['gender'] = '(m/f,n)' + break + else: + print("Invalid selection") + template['parts'] = retreive_noun_forms(template) + if template['gender'] == '(m,f,n)': + parts = template['parts']['Nominative']['Singular'] + template['principal'] = ", ".join([x for x in parts.values()]) + template['forms'] = inside_out_multi_noun(template['parts']) + elif template['gender'] == '(m/f,n)': + print(template['parts']['Nominative']['Singular']) + template['parts'] = fix_two_ending(template['parts']) + parts = template['parts']['Nominative']['Singular'] + template['principal'] = ", ".join([y for x,y in parts.items() if x != 'Feminine']) + template['forms'] = inside_out_multi_noun(template['parts']) + else: + parts = template['parts']['Nominative']['Singular'] + nom = parts['article'] + " " + parts['form'] + parts = template['parts']['Genitive']['Singular'] + gen = parts['article'] + " " + parts['form'] + template['principal'] = nom + ", " + gen + template['forms'] = inside_out_noun(template['parts']) + elif template['POS'] == 'verb': + template['parts'] = retreive_verb_forms(template) + result = assign_principle_parts({},template['parts']) + result = [y['Principal Part'] for y in result['parts'].values()] + template['principal'] = ", ".join(result) + template['forms'] = inside_out_verb(template['parts']) + return template + +def redo(templates): + for template in templates: + template = get_forms(template) + return templates + +def fix_two_ending(parts): + for case in parts: + for number in parts[case]: + print(f"{case} {number} Masculine = {parts[case][number]['Masculine']}") + parts[case][number]["Feminine"] = parts[case][number]["Masculine"] + print(f"{case} {number} Feminine now = {parts[case][number]['Feminine']}") + print(parts) + return parts + + +def nu_movable(x): + wo_nu = x[:x.find("(")] + letter = x[x.find("(")+1:-1] + if len(letter) > 1: + return [wo_nu] + with_nu = wo_nu + letter + return [wo_nu,with_nu] + +def perfectMiddleSubjuctive(part): + parts = part.split() + if len(parts) == 2: + parts[1] = parts[1].split("/") + if len(parts[1]) == 2: + new_parts = [parts[0] + " " + parts[1][0], parts[0] + " " + parts[1][1]] + return new_parts + return parts + +def inside_out_multi_noun(parts): + forms = [] + print(parts) + for case in parts: + for number in parts[case]: + for gender in parts[case][number]: + if parts[case][number][gender] != "—": + if "," in parts[case][number][gender]: + for x in parts[case][number][gender].split(","): + if x.strip()[-1] == ')': + for y in nu_movable(x): + form = {'form':y.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + elif "/" in parts[case][number][gender]: + for x in parts[case][number][gender].split("/"): + if x.strip()[-1] == ')': + for y in nu_movable(x): + form = {'form':y.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + else: + print(f"{case} {number} {gender} = {parts[case][number][gender]}") + if parts[case][number][gender][-1] == ")": + for x in nu_movable(parts[case][number][gender]): + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + else: + form = {'form':parts[case][number][gender]} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + return forms + +def inside_out_noun(parts): + forms = [] + for case in parts: + for number in parts[case]: + if parts[case][number]['form'] != "—": + if "," in parts[case][number]['form']: + for x in parts[case][number]['form'].split(","): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['case'] = case + form['number'] = number + form['article'] = parts[case][number]['article'] + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['article'] = parts[case][number]['article'] + forms.append(deepcopy(form)) + elif "/" in parts[case][number]['form']: + for x in parts[case][number]['form'].split("/"): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['case'] = case + form['number'] = number + form['article'] = parts[case][number]['article'] + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['article'] = parts[case][number]['article'] + forms.append(deepcopy(form)) + else: + if parts[case][number]['form'][-1] == ")": + for x in nu_movable(parts[case][number]['form']): + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['article'] = parts[case][number]['article'] + forms.append(deepcopy(form)) + form = {'form':parts[case][number]['form']} + form['case'] = case + form['number'] = number + form['article'] = parts[case][number]['article'] + forms.append(deepcopy(form)) + return forms + +def inside_out_verb(parts): + forms = [] + for tense in parts: + for voice in parts[tense]: + for mood in parts[tense][voice]: + if mood == 'Infinitive': + if parts[tense][voice][mood] != "---": + if "," in parts[tense][voice][mood]: + for x in parts[tense][voice][mood].split(","): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + elif "/" in parts[tense][voice][mood]: + for x in parts[tense][voice][mood].split("/"): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + if parts[tense][voice][mood][-1] == ")": + for x in nu_movable(parts[tense][voice][mood]): + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':parts[tense][voice][mood]} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + elif mood == 'Participle': + for gender in parts[tense][voice][mood]: + if parts[tense][voice][mood][gender] != "---": + if "," in parts[tense][voice][mood][gender]: + for x in parts[tense][voice][mood][gender].split(","): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['gender'] = gender + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['gender'] = gender + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + elif "/" in parts[tense][voice][mood][gender]: + for x in parts[tense][voice][mood][gender].split("/"): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['gender'] = gender + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['gender'] = gender + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + if parts[tense][voice][mood][gender][-1] == ")": + for x in nu_movable(parts[tense][voice][mood][gender]): + form = {'form':x.strip()} + form['gender'] = gender + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':parts[tense][voice][mood][gender]} + form['gender'] = gender + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + for person in parts[tense][voice][mood]: + if parts[tense][voice][mood][person] != "---": + if "," in parts[tense][voice][mood][person]: + for x in parts[tense][voice][mood][person].split(","): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + elif "/" in parts[tense][voice][mood][person]: + perfmidsubj_flag = False + for x in parts[tense][voice][mood][person].split("/"): + if x in ['ὦ','ᾖς','ᾖ','ἦτον','εἴην','εἴης','εἴη','εἴητον','εἶτον','εἰήτην','εἴτην','ὦμεν','ἦτε','ὦσῐ','ὦσῐ(ν)','εἴημεν','εἶμεν','εἴητε','εἶτε','εἴησᾰν','εἶεν']: + perfmidsubj_flag = True + if perfmidsubj_flag: + for x in perfectMiddleSubjuctive(parts[tense][voice][mood][person]): + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + else: + for x in parts[tense][voice][mood][person].split("/"): + if x.strip()[-1] == ")": + for y in nu_movable(x): + form = {'form':y.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + else: + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + else: + if parts[tense][voice][mood][person][-1] == ")": + for x in nu_movable(parts[tense][voice][mood][person]): + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + else: + form = {'form':parts[tense][voice][mood][person]} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + return forms + + + + +def auto_parts(parts,rapid_fire=False): + if not rapid_fire: + print("Enter look up word") + user_input = input(": ") + else: + if " " in parts: + user_input = parts[:parts.find(" ")+1] + else: + user_input = parts + user_input = tables.replace_greek_ii(user_input) + print(user_input) + #try: + result = retreive_verb_forms({'search word':user_input,'title':''})#'defno':0}) + result = assign_principle_parts(result,result) + result = [y['Principal Part'] for y in result['parts'].values()] + result = ", ".join(result) + while True: + print("Does this look right ('1' to except '0' to discard)?") + print(result) + user_input = input(": ") + if user_input == '1': + return result + elif user_input == '0': + return parts + else: + print("Invalid entry") + #except: + # print("Something went wrong") + return parts + +def assign_principle_parts(table_info,tenses,template=None): + table_info['parts'] = {} + # 1 + + part = copy(tenses['Present']['Active']['Indicative']['1st Person Singular']) + if part == "---": + part = copy(tenses['Present']['Middle/Passive']['Indicative']['1st Person Singular']) + if part == "---": + part = copy(tenses['Present']['Middle']['Indicative']['1st Person Singular']) + + table_info['parts'].update({'Present':{'Principal Part':part}}) + + # 2 + part = copy(tenses['Future']['Active']['Indicative']['1st Person Singular']) + if part == "---": + part = copy(tenses['Future']['Middle']['Indicative']['1st Person Singular']) + + table_info['parts'].update({'Future':{'Principal Part':part}}) + + part = copy(tenses['Perfect']['Middle/Passive']['Indicative']['1st Person Singular']) + if part == '---': + part = copy(tenses['Perfect']['Middle']['Indicative']['1st Person Singular']) + + table_info['parts'].update( + # 3 + {'Aorist Act.':{'Principal Part':tenses['Aorist']['Active']['Indicative']['1st Person Singular']},\ + # 4 + 'Perfect Act.':{'Principal Part':tenses['Perfect']['Active']['Indicative']['1st Person Singular']},\ + # 5 + 'Perf. M./P.':{'Principal Part':part},\ + # 6 + 'Aorist Pas.':{'Principal Part':tenses['Aorist']['Passive']['Indicative']['1st Person Singular']}}) + if template: + table_info['title'] = f"Principal Parts: {template['title']}" + return table_info + +def count_indent(a): + return len(a) - len(a.lstrip()) + +def retreive_verb_forms(template): + if 'defno' in template: + defno = template['defno'] + else: + defno = 0 + html_doc = tables.get_html(template['search word']) + if html_doc == None: + return + soup = BeautifulSoup(html_doc, 'html.parser') + page_list = soup.prettify().split('\n') + if template['title'] == 'εἰμῐ́': + print("exception detected") + exception = True + else: + exception = False + page_list = clean_page_list(page_list,exception) + + tenses = {"Present":'','Imperfect':'','Future':'','Aorist':'','Perfect':'','Pluperfect':''} + voices = {"Active":'',"Middle/Passive":'',"Middle":'','Passive':''} + moods = {'Indicative':[],'Subjunctive':[],'Optative':[],'Imperative':[],"Infinitive":[],"Participle":[]} + mood_list = ['Indicative','Subjunctive','Optative','Imperative'] + persons = {'1st Person Singular':'---','2nd Person Singular':'---','3rd Person Singular':'---','2nd Person Dual':'---','3rd Person Dual':'---','1st Person Plural':'---','2nd Person Plural':'---','3rd Person Plural':'---'} + genders = {'Masculine':'---','Feminine':'---','Neuter':'---'} + codec = {'m':'Masculine','f':'Feminine','n':'Neuter'} + + for tense in tenses: + tenses[tense] = deepcopy(voices) + for voice in tenses[tense]: + tenses[tense][voice] = deepcopy(moods) + for mood in tenses[tense][voice]: + if mood in mood_list: + tenses[tense][voice][mood] = deepcopy(persons) + elif mood == 'Participle': + tenses[tense][voice][mood] = deepcopy(genders) + elif mood == 'Infinitive': + tenses[tense][voice][mood] = '---' + tense = False + mood = False + voice = False + + indent = 0 + uncontracted = {} + unc_flag = False + counter = 0 + + + def build(uncontracted,tense,voice,mood): + if tense not in uncontracted: + uncontracted[tense] = {} + if voice not in uncontracted[tense]: + uncontracted[tense][voice] = {} + if mood not in uncontracted[tense][voice]: + uncontracted[tense][voice][mood] = {} + if mood in mood_list: + tenses[tense][voice][mood] = deepcopy(persons) + elif mood == 'Participle': + tenses[tense][voice][mood] = deepcopy(genders) + elif mood == 'Infinitive': + tenses[tense][voice][mood] = '---' + + + + for i in range(len(page_list)): + if counter < defno: + if page_list[i].strip(": ").title() in tenses: + if not indent: + indent = count_indent(page_list[i]) + if count_indent(page_list[i]) < indent - 2: + indent = 0 + counter += 1 + else: + if count_indent(page_list[i]) < indent - 2: + if counter < defno: + indent = 0 + tense = False + mood = False + voice = False + counter += 1 + else: + break + if page_list[i].strip(": ").title() in tenses: + if not indent: + indent = count_indent(page_list[i]) + tense = page_list[i].strip(": ").title() + voice = mood = gender = indent = unc_flag = False + voices = [] + elif page_list[i].strip() == '(Uncontracted)': + unc_flag = True + elif tense: + if page_list[i].strip().title() in tenses[tense]: + voice = page_list[i].strip().title() + voices.append(page_list[i].strip().title()) + index = 0 + elif voice: + if page_list[i].strip().title() in moods: + mood = page_list[i].strip().title() + index = 0 + elif mood: + if page_list[i].strip() == "Notes:": + voice = mood = gender = False + else: + if mood in mood_list: + if index not in range(len(list(persons.keys()))): + mood = False + else: + if mood == 'Imperative' and (index == 0 or index == 5): + index += 1 + if tenses[tense][voice][mood][list(persons.keys())[index]] == '---': + if not unc_flag: + tenses[tense][voice][mood][list(persons.keys())[index]] = page_list[i].strip() + else: + build(uncontracted,tense,voice,mood) + uncontracted[tense][voice][mood][list(persons.keys())[index]] = page_list[i].strip() + index += 1 + elif mood == 'Infinitive': + if index >= len(voices): + index %= len(voices) + if tenses[tense][voices[index]][mood] == '---': + if not unc_flag: + tenses[tense][voices[index]][mood] = page_list[i].strip() + else: + build(uncontracted,tense,voice,mood) + uncontracted[tense][voices[index]][mood] = page_list[i].strip() + index += 1 + elif mood == 'Participle': + if index >= len(voices): + index %= len(voices) + if page_list[i].strip() in codec: + gender = codec[page_list[i].strip()] + continue + elif gender and tenses[tense][voices[index]][mood][gender] == '---': + if unc_flag == False: + tenses[tense][voices[index]][mood][gender] = page_list[i].strip() + else: + build(uncontracted,tense,voices[index],mood) + uncontracted[tense][voices[index]][mood][gender] = page_list[i].strip() + index += 1 + for tense in uncontracted.keys(): + tenses[f'{tense} (Uncontracted)'] = uncontracted[tense] + return tenses + +def retreive_noun_forms(template): + html_doc = tables.get_html(template['search word']) + print(f"template['gender'] = {template['gender']}") + three_ending = True if template['gender'] == '(m,f,n)' else False + two_ending = True if template['gender'] == '(m/f,n)' else False + print(f" three_ending = {three_ending}, two_ending = {two_ending}") + if 'proper' in template: + proper = True if template['proper'] else False + else: + proper = False + if html_doc == None: + return + soup = BeautifulSoup(html_doc, 'html.parser') + page_list = soup.prettify().split('\n') + page_list = clean_page_list(page_list) + + cases = {"Nominative":'','Genitive':'','Dative':'','Accusative':'','Vocative':''} + numbers = {'Singular':'---','Dual':'---',"Plural":'---'} + genders = {'Masculine':'---','Feminine':'---','Neuter':'---'} + article = {'article':'','form':'---'} + if three_ending or two_ending: + for n in numbers: + numbers[n] = deepcopy(genders) + else: + for n in numbers: + numbers[n] = deepcopy(article) + for case in cases: + cases[case] = deepcopy(numbers) + + case = False + indent = 0 + for i in range(len(page_list)): + if count_indent(page_list[i]) < indent - 2: + break + if page_list[i].strip() in cases: + if not indent: + indent = count_indent(page_list[i]) + case = page_list[i].strip() + index = 0 + if case: + emdash = True if page_list[i].strip() == '—' else False + if three_ending: + if index % 2 != 0 or emdash: + if index <= 6: + number = 'Singular' + elif index <= 12: + number = 'Dual' + elif index <= 18: + number = 'Plural' + else: + if case == "Vocative": + break + case = False + continue + key_list = ['Masculine','Feminine','Neuter'] + cases[case][number][key_list[index//2%3]] = page_list[i].strip() + if two_ending: + if index % 2 != 0 or emdash: + if index <= 4: + number = 'Singular' + elif index <= 8: + number = 'Dual' + elif index <= 12: + number = 'Plural' + else: + if case == "Vocative": + break + case = False + continue + key_list = ['Masculine','Neuter'] + cases[case][number][key_list[index//2%2]] = page_list[i].strip() + elif not two_ending and not three_ending: + if index % 3 != 0 or emdash: + if index <= 3: + number = 'Singular' + elif index <= 6: + number = 'Dual' + elif index <= 9: + number = 'Plural' + else: + if case == "Vocative": + break + case = False + continue + if case == "Vocative": + if "notes" in page_list[i].lower(): + break + elif proper and number != 'Singular': + break + cases[case][number]['form'] = page_list[i].strip() + index += 1 + elif proper: + cases[case][number]['form'] = page_list[i].strip() + index += 1 + else: + key_list = ['article','form'] + cases[case][number][key_list[index%3-1]] = page_list[i].strip() + index += 2 if emdash else 1 + return cases + +def add_tables(): + tables_list = tables.get_tables('Ancient Greek') + + table_info = {'title':'','type':''} + table_file = "AncientGreek-tables.txt" + + while True: + print("Use table template? ('1' for yes, '0' to go back, any other key for no) 'AUTO' to auto-add all forms") + user_input = input(": ") + if user_input == '1': + template = tables.get_template('Ancient Greek') + if template == None: + continue + table_info['title'] = template['title'] + elif user_input.upper() == 'AUTO': + auto_add([],table_info,table_file) + return + elif user_input == '0': + return + else: + template = None + print(f"Enter table title ('0' to go back)") + table_info['title'] = input(': ') + if table_info['title'] == '0': + continue + + exit_second_loop = False + while not exit_second_loop: + complete = False + print("Select table type") + options = {'1':"'1'> noun\n",'2':"'2'> verb principal parts\n",'3':"'3'> verb conjugation\n",'4':"'4'> noun + dual\n",'5':"'5'> complete verb system\n",'6':"'6'> complete forms\n"} + options.update({'0':"'0'> to go back"}) + user_input = get_selection(options) + if user_input == '0': + exit_second_loop = True + continue + elif user_input == '1': + table_info['type'] = 'noun' + elif user_input == '2': + table_info['type'] = 'parts' + elif user_input == '3': + table_info['type'] = 'conj' + elif user_input == '4': + table_info['type'] = 'dual' + elif user_input == '5': + table_info['type'] = 'conj' + complete = True + elif user_input == '6': + table_info['type'] = 'form' + complete = True + + exit_third_loop = False + while not exit_third_loop: + if template: + table_info['definition'] = template['definition'] + else: + definition = input("Enter definition ('0' to go back): ") + if definition == '0': + exit_third_loop = True + continue + table_info['definition'] = definition + while True: + print("Enter definition number (1-n) ('0' to go back)") + try: + user_input = int(input(": ")) + except: + print("Invalid entry") + continue + if user_input == 0: + break + else: + table_info['defno'] = user_input + + if complete: + if table_info['type'] == 'conj': + add_complete_verb_system(template,table_info,tables_list,table_file) + elif table_info['type'] == 'form': + add_complete_forms(template) + exit_second_loop = exit_third_loop = True + print("tables added succesfully") + continue + + exit_loop_four = False + while not exit_loop_four: + print(f"Do you want to auto-retreive {table_info['title']}? ('1' for yes, '0' to go back, '00' to exit)") + user_input = input(": ") + if user_input == '1': + if table_info['type'] in ['parts','conj']: + table_info = auto_retreive_verb(table_info,template) + else: + table_info = auto_retreive_noun(table_info,template) + if result: + tables_list = tables.add_table(tables_list) + tables.save_tables(tables_list,'AncientGreektables.txt') + exit_loop_four = exit_third_loop = exit_second_loop = True + continue + elif user_input == '0': + exit_loop_four = exit_third_loop = exit_second_loop = True + continue + elif user_input == '00': + return + +def auto_add(tables_list,table_info,table_file): + change_path('templates') + myFiles = glob.glob('*.txt') + template_file = "AncientGreek_templates.txt" + if template_file not in myFiles: + print(f"file: {template_file} not found") + return + else: + with open(template_file,'r') as f: + templates = json.load(f) + + for template in templates: + debug_print(f"Begin: {template['title']}") + i = template + #add_complete_forms(template) + + table_info['definition'] = template['definition'] + if template["POS"] == 'verb': + + table_info['type'] = 'conj' + #for template in templates: + + debug_print(f"Running add_complete_verb_system") + add_complete_verb_system(template,table_info,tables_list,table_file) + debug_print(f"add_complete_verb_system successful") + + if template["POS"] == 'noun': + + table_info['type'] = 'noun' + if template['gender'] == "(m,f,n)" or template['gender'] == "(m/f,n)": + continue + + debug_print(f"Running auto_retreive_noun") + table_info = auto_retreive_noun(table_info,template) + debug_print(f"auto_retreive_noun successful") + + debug_print(f"Running add_table") + tables_list = tables.add_table(tables_list,table_info) + debug_print(f"add_table successful") + tables.save_tables(tables_list,table_file) + + +def empty(item): + if type(item) != dict: + return True if item == '---' or item == '—' else False + for key in item: + if not empty(item[key]): + return False + return True + +""" +[{'title':'capere','instances':[ +{'root':template['title'], +'features':{x:y for x,y in template['forms'] if x != 'form'} +'principal:template['principal'], +'definition':template['definition'], +} +]} +] +""" + +def add_complete_forms(template): + forms_list = tables.get_forms("Ancient Greek") + for i in range(len(template['forms'])): + if not debug_print: + print('*',end='',flush=True) + found = False + for x in range(len(forms_list)): + if forms_list[x]['title'] == template['forms'][i]['form']: + found = True + instance = {'form':template['forms'][i]['form']} + instance['root'] = template['title'] + instance['features'] = {x:y for x,y in template['forms'][i].items() if x != 'form'} + + same = True + for y in forms_list[x]['instances']: + for key in y['features'].keys(): + if key not in instance['features']: + same = False + break + else: + if y['features'][key] != instance['features'][key]: + if debug_print: + print(f"NOT SAME {y['features']} AND {instance['features']}") + same = False + break + if not same: + break + if same: + debug_print(f"rejected DUPLICATE") + break + if 'tense' in instance['features']: + if instance['features']['tense'] not in ["Present",'Imperfect','Future','Aorist']: + debug_print(f"rejected {instance['features']['tense']}") + break + if 'mood' in instance['features']: + if instance['features']['mood'] in ["Subjunctive",'Imperative','Optative']: + debug_print(f"rejected {instance['features']['mood']}") + break + if 'number' in instance['features']: + if instance['features']['number'] == 'Dual': + debug_print(f"rejected {instance['features']['number']}") + break + if 'person' in instance['features']: + if instance['features']['person'] in ['2nd Person Dual','3rd Person Dual']: + debug_print(f"rejected {instance['features']['person']}") + break + instance['principal'] = template['principal'] + instance['definition'] = template['definition'] + + + debug_print("ADDING TO EXISTING") + debug_print(instance) + if not DEBUG: + print('<',end='',flush=True) + forms_list[x]['instances'].append(deepcopy(instance)) + + + if not found: + form = {'title':template['forms'][i]['form']} + instance = {'form':template['forms'][i]['form']} + instance['root'] = template['title'] + instance['features'] = {x:y for x,y in template['forms'][i].items() if x != 'form'} + if 'tense' in instance['features']: + if instance['features']['tense'] not in ["Present",'Imperfect','Future','Aorist']: + debug_print(f"rejected {instance['features']['tense']}") + continue + if 'mood' in instance['features']: + if instance['features']['mood'] in ["Subjunctive",'Imperative','Optative']: + debug_print(f"rejected {instance['features']['mood']}") + continue + if 'number' in instance['features']: + if instance['features']['number'] == 'Dual': + debug_print(f"rejected {instance['features']['number']}") + continue + if 'person' in instance['features']: + if instance['features']['person'] in ['2nd Person Dual','3rd Person Dual']: + debug_print(f"rejected {instance['features']['person']}") + continue + instance['principal'] = template['principal'] + instance['definition'] = template['definition'] + form['instances'] = [deepcopy(instance)] + + + debug_print("ADDING NEW") + debug_print(instance) + if not debug_print: + print('+',end='',flush=True) + forms_list.append(deepcopy(form)) + + file = 'AncientGreek_forms.txt' + + tables.add_tables(forms_list) + + + +def add_complete_verb_system(template,table_info,tables_list,table_file): + + table_info['principal'] = template['principal'] + tenses = template['parts'] + for tense in ["Present",'Imperfect','Future','Aorist']: + debug_print(f"Tense: {tense}") + for voice in ["Active","Middle/Passive","Middle",'Passive']: + debug_print(f"Voice: {voice}") + + debug_print("Running assign_table_info") + table_info = assign_table_info(table_info,template,tenses,tense,voice,'Indicative') + debug_print("assign_table_info run succesfully") + + if empty(table_info['parts']): + debug_print(f"\t\t{table_info['title']} was blank") + continue + else: + tables_list = tables.add_table(tables_list,table_info) + debug_print(f"\t\t{table_info['title']} saved") + table_info['type'] = 'parts' + table_info = assign_principle_parts(table_info,tenses,template) + tables_list = tables.add_table(tables_list,table_info) + +def auto_retreive_verb(table_info,template): + tenses = template['parts'] + if table_info['type'] == 'parts': + table_info = assign_principle_parts(table_info,tenses,template) + return table_info + elif table_info['type'] == 'conj': + options1 = {'1':"Present",'2':'Imperfect','3':'Future','4':'Aorist','5':'Perfect'} + user_input = get_selection(options1,2) + tense = options1[user_input] + options2 = {'1':"Active",'2':"Middle/Passive",'3':"Middle",'4':'Passive'} + user_input = get_selection(options2,2) + voice = options2[user_input] + options3 = {'1':'Indicative','2':'Subjunctive','3':'Optative','4':'Imperative'} + user_input = get_selection(options3,2) + mood = options3[user_input] + table_info = assign_table_info(table_info,template,tenses,tense,voice,mood) + return table_info + +def assign_table_info(table_info,template,tenses,tense,voice,mood): + table_info['parts'] = { + '1st Person':{ + 'Singular':tenses[tense][voice][mood]['1st Person Singular'],\ + 'Plural':tenses[tense][voice][mood]['1st Person Plural']},\ + '2nd Person':{ + 'Singular':tenses[tense][voice][mood]['2nd Person Singular'],\ + 'Plural':tenses[tense][voice][mood]['2nd Person Plural']},\ + '3rd Person':{ + 'Singular':tenses[tense][voice][mood]['3rd Person Singular'],\ + 'Plural':tenses[tense][voice][mood]['3rd Person Plural']}} + table_info['parts'].update({'Infinitive':tenses[tense][voice]['Infinitive']}) + table_info['parts'].update({'Participle':{ + 'Masculine':tenses[tense][voice]['Participle']['Masculine'],\ + 'Feminine':tenses[tense][voice]['Participle']['Feminine'],\ + 'Neuter':tenses[tense][voice]['Participle']['Neuter']}}) + if template: + table_info['title'] = f"{tense.title()} {voice.title()} {mood.title()}: {template['title']}" + return table_info + +def auto_retreive_noun(table_info,template): + table_info['principal'] = template['principal'] + parts = template['parts'] + case_keys = ["Nominative",'Genitive','Dative','Accusative','Vocative'] + number_keys = ['Singular','Dual',"Plural"] + table_info['parts'] = {} + for case in case_keys: + table_info['parts'][case] = {} + for number in number_keys: + if number == 'Dual' and table_info['type'] != 'dual': + continue + if 'article' in parts[case][number]: + if parts[case][number]['article'] != "": + table_info['parts'][case][number] = parts[case][number]['article'] + " " + parts[case][number]['form'] + else: + table_info['parts'][case][number] = parts[case][number]['form'] + elif 'Masculine' in parts[case][number]: + table_info['parts'][case][number] = ", ".join([parts[case][number]['Masculine'], parts[case][number]['Feminine'], parts[case][number]['Neuter']]) + if template: + table_info['title'] = f"{template['title']}" + return table_info + +def clean_page_list(page_list,exception=False): + offset = 0 + for i in range(1,len(page_list)): + i = i - offset + if ' ' in page_list[i]: + page_list[i] = page_list[i].replace(' ',' ') + if page_list[i].strip()[0] == "<": + del page_list[i] + offset += 1 + + offset = 0 + perfmidsubj = ['ὦ','ᾖς','ᾖ','ἦτον','εἴην','εἴης','εἴη','εἴητον','εἶτον','εἰήτην','εἴτην','ὦμεν','ἦτε','ὦσῐ','ὦσῐ(ν)','εἴημεν','εἶμεν','εἴητε','εἶτε','εἴησᾰν','εἶεν'] + #articles = ["ὁ, ἡ" , "οἱ, αἱ" , "τοῦ, τῆς" , "τῷ, τῇ" , "τοῖς, ταῖς" , "τὸν, τὴν" , "τοὺς, τᾱ̀ς"] + for i in range(1,len(page_list)): + i = i - offset + if page_list[i].strip(' ') == 'passive' and page_list[i - 1].strip(' ') == 'middle/': + page_list[i] = 'middle/passive' + del page_list[i - 1] + offset += 1 + elif page_list[i].strip(' ') == ',': + page_list[i - 1] = page_list[i - 1].rstrip(' ') + ", " + page_list[i + 1].strip(' ') + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i].strip(' ') == 'ν': + if page_list[i - 1].strip(' ') == '(': + page_list[i - 2] = page_list[i - 2].rstrip(' ') + '(ν)' + del page_list[i + 1] + del page_list[i] + del page_list[i - 1] + offset += 3 + else: + page_list[i - 1] = page_list[i - 1].rstrip(' ') + '(ν)' + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i].strip(' ') == 'σ': + if page_list[i - 1].strip(' ') == '(': + page_list[i - 2] = page_list[i - 2].rstrip(' ') + '(σ)' + page_list[i + 2].strip(' ') + del page_list[i + 2] + del page_list[i + 1] + del page_list[i] + del page_list[i - 1] + offset += 4 + else: + page_list[i - 1] = page_list[i - 1].rstrip(' ') + '(σ)' + page_list[i + 2].strip(' ') + del page_list[i + 2] + del page_list[i + 1] + del page_list[i] + offset += 3 + elif page_list[i].strip(' ') == '/': + page_list[i - 1] = page_list[i - 1].rstrip(' ') + "/" + page_list[i + 1].strip(' ') + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i].strip(' ')[:1] == ')' and page_list[i - 2].strip(' ')[-1] == '(': + page_list[i - 2] = page_list[i - 2].rstrip(" ") + page_list[i - 1].strip(" ") + page_list[i].strip(' ') + for x in range(2): + del page_list[i - 1] + offset += 2 + elif page_list[i].strip(' ') in perfmidsubj and previous not in perfmidsubj and not exception: + page_list[i - 1] = page_list[i - 1].rstrip(' ') + ' ' + page_list[i].strip(' ') + previous = page_list[i].strip(' ') + del page_list[i] + offset += 1 + elif page_list[i].strip()[0] == "<" or (len(page_list[i].strip()) == 1 and page_list[i].strip().isnumeric()): + del page_list[i] + offset += 1 + else: + previous = '' + return page_list + +""" +[{'title':'capere','instances':[ +{'root':template['title'], +'features':{x:y for x,y in template['forms'] if x != 'form'} +'principal:template['principal'], +'definition':template['definition'], +} +]} +] +""" + +def features_join(features): + join = "" + if 'gender' in features: + join += " " + features['gender'] + "" + for x,y in features.items(): + if x != 'article': + if x == "mood" and (y != "Infinitive" and y != 'Participle'): + join += " " + y + "" + elif x == 'gender': + pass + else: + join += " " + y + "" + return join.strip() + +def print_forms(): + out_file = 'AncientGreek-FormCards.txt' + original_stdout = sys.stdout + change_path('flashcards') + sys.stdout = open(out_file,'w') + forms_list = tables.get_forms("Ancient Greek") + random_list = list(range(len(forms_list))) + random.shuffle(random_list) + for i in random_list: + print('

            · ' + forms_list[i]['title'] + '

            |',end='') + for instance in forms_list[i]['instances']: + features = features_join(instance['features']) + if 'article' in instance['features']: + print('

            ' + instance['features']['article'] + " " + instance['form'] + '
            ',end='') + else: + print('

            ',end='') + print(features,end='') + print(' form of: ' + instance['principal'] + '
            ',end='') + print("Definition: " + instance['definition'] + '


            ',end='') + print('|"forms"') + sys.stdout = original_stdout + + +def print_tables(tables_list): + print_forms() + out_file = 'AncientGreek-TableCards.txt' + original_stdout = sys.stdout + change_path('flashcards') + sys.stdout = open(out_file,'w') + random_list = list(range(len(tables_list))) + random.shuffle(random_list) + for i in random_list: + + if tables_list[i]['type'] != 'noun': + print('

            ' + tables_list[i]['title'] + '

            |',end='') + else: + print('

            Forms: ' + tables_list[i]['title'] + '

            |',end='') + + if tables_list[i]['definition']: + body_string = '

            ' + tables_list[i]['definition'] + '
            ' + else: + body_string = '' + if tables_list[i]['principal'] and tables_list[i]['type'] != 'parts': + body_string += "Parts: " + tables_list[i]['principal'] + '
            ' + body_string += '

            ' + + body_string = html_x.set_styles(body_string) + + if tables_list[i]['type'] == 'parts': + body_string = html_x.create_table(body_string,tables_list[i]['parts'],'parts',1) + elif tables_list[i]['type'] == 'noun': + body_string = html_x.create_table(body_string,tables_list[i]['parts'],'noun',2) + elif tables_list[i]['type'] == 'conj': + parts = {k:v for k,v in tables_list[i]['parts'].items() if k != "Infinitive" and k != "Participle"} + body_string = html_x.create_table(body_string,parts,'conj',2) + body_string = html_x.create_style(body_string,1) + body_string = html_x.create_box(body_string,'Infinitive',tables_list[i]["parts"]["Infinitive"]) + parts = {k:v for k,v in tables_list[i]['parts'].items() if k == "Participle"} + parts = {k:{'Participle':v} for k,v in parts['Participle'].items()} + body_string = html_x.create_table(body_string,parts,'Participle',1) + + print(body_string + '|"table"') + + sys.stdout = original_stdout \ No newline at end of file diff --git a/tables_latin_ext.py b/tables_latin_ext.py new file mode 100644 index 0000000..c0f5c0b --- /dev/null +++ b/tables_latin_ext.py @@ -0,0 +1,1113 @@ + +from bs4 import BeautifulSoup + +import tables +from load_dict import change_path +from get_selection import get_selection +from copy import deepcopy +from unidecode import unidecode +import html_x +import random +import sys +import json +import glob + + +def new_template(heading="",handle="",definition="",POS=""): + template = {} + while True: + x = 'POS' + if POS != "verb" and POS != "noun" and POS != "adjective": + print("Enter choose template {x} ('0' to go back)") + print("'1'> noun/adj.") + print("'2'> verb") + print("type description for other (e.g. 'particle', 'not comparable', 'indeclinable')") + user_input = input(": ") + if user_input == '0': + break + elif user_input == '00': + return + elif user_input == '1': + template[x] = 'noun/adj.' + elif user_input == '2': + template[x] = 'verb' + else: + template[x] = user_input + elif POS == "noun" or POS == "adjective": + template[x] = 'noun/adj.' + elif POS == "verb": + template[x] = 'verb' + + if template[x] == 'noun/adj.': + x = 'gender' + options = {'0':f"Enter choose template {x} ('0' to go back)\n", + '1':"'1'> (m)\n", + '2':"'2'> (f)\n", + '3':"'3'> (m or f)\n", + '4':"'4'> (n)\n", + '5':"'5'> (m,f,n)\n", + '6':"'6'> (m/f,n)\n", + '7':"'7'> (m/f/n)\n", + '00':"'00' to quit\n"} + user_input = get_selection(options) + if user_input == '0': + break + elif user_input == '00': + return + else: + template[x] = options[user_input][options[user_input].find("("):].strip("\n") + print("Proper noun? ('Y' for yes, any other key for no)") + user_input = input(": ") + template['proper'] = True if user_input.upper() == 'Y' else False + while True: + x = 'title' + if heading == "": + print(f"Enter template {x} ('0' to go back)") + user_input = input(": ") + if user_input == '0': + break + elif user_input == '00': + return + else: + template[x] = user_input + else: + template[x] = heading + x = 'search word' + if template['POS'] != 'noun/adj.' and template['POS'] != 'verb': + template['search word'] = None + elif handle == "": + template[x] = unidecode(user_input) + else: + template[x] = handle + while True: + if template['POS'] != 'noun/adj.' and template['POS'] != 'verb': + template['defno'] = None + else: + print("Enter definition number ('0' if unspecified)") + try: + template['defno'] = int(input(": ")) + except: + print("Invalid entry") + continue + + template = get_forms(template) + print(template['principal']) + print_parts(template['parts']) + x = 'definition' + if definition == "": + print(f"Enter template {x} ('0' to go back '00' to quit)") + user_input = input(": ") + if user_input == '0': + continue + elif user_input == '00': + return + else: + template[x] = user_input + else: + template[x] = definition + return template + +def edit_template(template): + while True: + options = {'1':f"Template \"{template['title']}\" options:\n'1' change definition\n", + '2':"'2' to display forms\n", + '3':"'3' to re-do forms\n", + '4':"'4' to delete\n", + '0':"'0' to go back\n"} + user_input = get_selection(options) + + if user_input == '1': + print(f"Current definition: {template['definition']}") + user_input = input("Enter new definition ('0' to cancel): ") + if user_input == '0': + continue + else: + template['definition'] == user_input + elif user_input == '2': + print(template['principal']) + print_parts(template['parts']) + input("\n\nPress Enter to continue\n") + elif user_input == '3': + template = get_forms(template) + elif user_input == '4': + del template + return + elif user_input == '0': + return + + + +def print_parts(chunk,string='',outer_key=''): + if string != '' or outer_key != '': + string += " " + outer_key + if type(chunk) == dict: + for inner_key in chunk: + print_parts(chunk[inner_key],string,inner_key) + else: + if chunk != '---' and chunk != "–": + final_str = f"{string.lstrip():_<70}" + chunk + print(final_str) + +def get_forms(template): + if template['POS'] != 'noun/adj.' and template['POS'] != 'verb': + template['forms'] = [{'form':template['title']}] + template['principal'] = template['title'] + " (" + template['POS'] + ")" + template['parts'] = {'single form':template['title']} + return template + if template['POS'] == 'noun/adj.': + template['parts'] = retreive_noun_forms(template) + if template['gender'] == '(m,f,n)': + template['parts'] = fix_two_ending(template['parts']) + parts = template['parts']['Nominative']['Singular'] + template['principal'] = ", ".join([x for x in parts.values()]) + template['forms'] = inside_out_multi_noun(template['parts']) + elif template['gender'] == '(m/f,n)': + #print(template['parts']['Nominative']['Singular']) + template['parts'] = fix_two_ending(template['parts']) + parts = template['parts']['Nominative']['Singular'] + template['principal'] = ", ".join([y for x,y in parts.items() if x != 'Feminine']) + template['forms'] = inside_out_multi_noun(template['parts']) + elif template['gender'] == '(m/f/n)': + template['parts'] = fix_two_ending(template['parts']) + part = template['parts']['Nominative']['Singular']["Masculine"] + nom = part + part = template['parts']['Genitive']['Singular']["Masculine"] + gen = part + template['principal'] = nom + ", " + gen + template['forms'] = inside_out_noun(template['parts']) + else: + part = template['parts']['Nominative']['Singular'] + nom = part + part = template['parts']['Genitive']['Singular'] + gen = part + template['principal'] = nom + ", " + gen + template['gender'] + template['forms'] = inside_out_noun(template['parts']) + elif template['POS'] == 'verb': + template['parts'] = retreive_verb_forms(template) + result = assign_principle_parts({},template['parts']) + result = [y['Principal Part'] for y in result['parts'].values()] + template['principal'] = ", ".join(result) + print("GETTING FORMS") + template['forms'] = inside_out_verb(template['parts']) + return template + +def redo(templates): + for template in templates: + template = get_forms(template) + return templates + +def fix_two_ending(parts): + for case in parts: + for number in parts[case]: + if parts[case][number]["Feminine"] == "---": + parts[case][number]["Feminine"] = deepcopy(parts[case][number]["Masculine"]) + if parts[case][number]["Neuter"] == "---": + parts[case][number]["Neuter"] = deepcopy(parts[case][number]["Masculine"]) + return parts + + +def remove_emdash(forms): + for form in forms: + if form['form'] == '—': + del form + + +def inside_out_multi_noun(parts): + forms = [] + print(parts) + for case in parts: + for number in parts[case]: + for gender in parts[case][number]: + if parts[case][number][gender] != "—": + if "," in parts[case][number][gender]: + for x in parts[case][number][gender].split(","): + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + elif "/" in parts[case][number][gender]: + for x in parts[case][number][gender].split("/"): + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + else: + print(f"{case} {number} {gender} = {parts[case][number][gender]}") + form = {'form':parts[case][number][gender]} + form['case'] = case + form['number'] = number + form['gender'] = gender + forms.append(deepcopy(form)) + remove_emdash(forms) + return forms + +def inside_out_noun(parts): + forms = [] + for case in parts: + for number in parts[case]: + if parts[case][number] != "—": + if "," in parts[case][number]: + for x in parts[case][number].split(","): + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + forms.append(deepcopy(form)) + elif "/" in parts[case][number]: + for x in parts[case][number].split("/"): + form = {'form':x.strip()} + form['case'] = case + form['number'] = number + forms.append(deepcopy(form)) + else: + form = {'form':parts[case][number]} + form['case'] = case + form['number'] = number + forms.append(deepcopy(form)) + remove_emdash(forms) + return forms + +def inside_out_verb(parts): + change_path("texts") + with open('sum_forms.txt','r') as f: + sum_forms = json.load(f) + if parts['Present']['Active']['Indicative']['FPS'] == "sum": + exception = True + else: + exception = False + forms = [] + for tense in parts: + for voice in parts[tense]: + for mood in parts[tense][voice]: + if mood == 'Infinitive': + if parts[tense][voice][mood] != "---": + if "," in parts[tense][voice][mood]: + split = parts[tense][voice][mood].split(",") + for x in split: + if x.strip() in sum_forms and not exception: + x = split[0][:split[0].find(" ")+1] + x.strip() + print(x) + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + elif "/" in parts[tense][voice][mood]: + for x in parts[tense][voice][mood].split("/"): + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':parts[tense][voice][mood]} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + elif mood == 'Participle': + if parts[tense][voice][mood] != "---": + if "," in parts[tense][voice][mood]: + split = parts[tense][voice][mood].split(",") + for x in split: + if x.strip() in sum_forms and not exception: + x = split[0][:split[0].find(" ")+1] + x.strip() + print(x) + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + elif "/" in parts[tense][voice][mood]: + for x in parts[tense][voice][mood].split("/"): + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + form = {'form':parts[tense][voice][mood]} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + forms.append(deepcopy(form)) + else: + for person in parts[tense][voice][mood]: + if parts[tense][voice][mood][person] != "---": + if "," in parts[tense][voice][mood][person]: + split = parts[tense][voice][mood][person].split(",") + for x in split: + if x.strip() in sum_forms and not exception: + x = split[0][:split[0].find(" ")+1] + x.strip() + print(x) + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + elif "/" in parts[tense][voice][mood][person]: + for x in parts[tense][voice][mood][person].split("/"): + form = {'form':x.strip()} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + else: + form = {'form':parts[tense][voice][mood][person]} + form['tense'] = tense + form['voice'] = voice + form['mood'] = mood + form['person'] = person + forms.append(deepcopy(form)) + remove_emdash(forms) + return forms + +def auto_parts(parts): + print("Enter look up word") + user_input = input(": ") + result = retreive_verb_forms({'search word':user_input,'title':''}) + result = assign_principle_parts(result,result) + result = [y['Principal Part'] for y in result['parts'].values()] + result = ", ".join(result) + print("Does this look right ('1' to except '0' to discard)?") + print(result) + user_input = input(": ") + if user_input == '1': + return result + else: + return parts + +def assign_principle_parts(table_info,tenses,template=None): + table_info['parts'] = { + 'Present':{'Principal Part':tenses['Present']['Active']['Indicative']['FPS']},\ + 'Infinitive':{'Principal Part':tenses['Present']['Active']['Infinitive']},\ + 'Perfect Act.':{'Principal Part':tenses['Perfect']['Active']['Indicative']['FPS']},\ + 'Perfect Pas.':{'Principal Part':tenses['Perfect']['Passive']['Participle']}} + if template: + table_info['title'] = f"Principal Parts: {template['title']}" + return table_info + +def retreive_verb_forms(template): + if 'specified' in template: + specified = template['specified'] + else: + specified = 0 + if template['search word'] == 'sum': + exception = True + else: + exception = False + + html_doc = tables.get_html(template['search word']) + if html_doc == None: + return + soup = BeautifulSoup(html_doc, 'html.parser') + page_list = soup.prettify().split('\n') + page_list = clean_page_list(page_list,exception) + + tenses = {"Present":'','Imperfect':'','Future':'','Future Perfect':'','Perfect':'','Pluperfect':''} + voices = {"Active":'','Passive':''} + moods = {'Indicative':[],'Subjunctive':[],'Imperative':[],"Infinitive":[],"Participle":[]} + mood_list = ['Indicative','Subjunctive','Imperative'] + persons = {'FPS':'---','SPS':'---','TPS':'---','FPP':'---','SPP':'---','TPP':'---'} + + for tense in tenses: + tenses[tense] = deepcopy(voices) + for voice in tenses[tense]: + tenses[tense][voice] = deepcopy(moods) + for mood in tenses[tense][voice]: + if mood in mood_list: + tenses[tense][voice][mood] = deepcopy(persons) + elif mood == 'Participle' or mood == 'Infinitive': + tenses[tense][voice][mood] = '---' + + tense = False + mood = False + voice = False + count = 0 + specified = 0 + for i in range(len(page_list)): + if count < specified: + if page_list[i].strip() == 'Conjugation of': + count += 1 + elif page_list[i].strip() == "verbal nouns": + break + elif page_list[i].strip(": ").title() in mood_list: + mood = page_list[i].strip(": ").title() + voice = False + tense = False + elif page_list[i].strip(": ")[:-1].title() in ['Infinitive','Participle']: + mood = page_list[i].strip(": ")[:-1].title() + columns = ['Active','Passive'] + rows = ['Present','Perfect','Future','Present','Perfect','Future'] + index = 0 + continue + elif mood in mood_list: + if page_list[i].strip().title() in voices: + voice = page_list[i].strip().title() + index = 0 + elif voice: + if page_list[i].strip().title() in tenses: + tense = page_list[i].strip().title() + index = 0 + elif tense: + if index not in range(6): + tense = False + else: + if tenses[tense][voice][mood][list(persons.keys())[index]] == '---': + tenses[tense][voice][mood][list(persons.keys())[index]] = page_list[i].strip() + index += 1 + elif mood in ['Infinitive','Participle']: + if index == 6: + mood = False + else: + tenses[rows[index]][columns[index//3]][mood] = page_list[i].strip() + index += 1 + return tenses + +def count_indent(a): + return len(a) - len(a.lstrip()) + +def manual_sort(cases,case,holder,multi=True): + print(f"\nSorting needed for {case} case") + genders = {"M":"Masculine","F":"Feminine","N":"Neuter"} + numbers = {"S":"Singular","P":"Plural"} + while True: + for word in holder: + user_input = "" + if multi: + while True: + print(f"Select gender for {word} ({case}) ('M', 'F', 'N') ('X' to discard)") + user_input = input(": ") + if user_input.upper() == "X": + break + if len(user_input) == 2: + user_input = list(user_input) + if user_input[0].upper() not in genders and user_input[1].upper() not in numbers: + print("Invalid entry") + continue + else: + gender = genders[user_input[0].upper()] + number = numbers[user_input[1].upper()] + break + if user_input.upper() not in genders: + print("Invalid entry") + continue + else: + gender = genders[user_input.upper()] + break + if user_input == "X": + break + while True: + if len(user_input) == 2: + break + print(f"Select number for {word} ({case}) ('S', 'P')") + user_input = input(": ") + if user_input.upper() not in numbers: + print("Invalid entry") + continue + else: + number = numbers[user_input.upper()] + break + if multi: + if cases[case][number][gender] != "---": + cases[case][number][gender] += ", " + word + else: + cases[case][number][gender] = word + else: + if cases[case][number] != "---": + cases[case][number] += ", " + word + else: + cases[case][number] = word + print("Like this? ('1' to accept, '2' to manually edit, any other key to try again)") + print_parts(cases[case]) + user_input = input(": ") + if user_input == '1': + break + elif user_input == '2': + return manual_edit(cases,case) + else: + reset(cases[case]) + return cases + +def reset(case): + for key in case: + if type(case[key]) == dict: + reset(case[key]) + else: + case[key] = "---" + +def manual_edit(parts,case): + while True: + print("Select term to edit ('0' to finish)") + index = 1 + key_holder = {} + for number in parts[case]: + if type(parts[case][number]) == dict: + for gender in parts[case][number]: + print(f"{index}. {case} {number} {gender} {parts[case][number][gender]}") + key_holder[str(index)] = {'number':number,'gender':gender} + index += 1 + else: + print(f"{index}. {case} {number} {parts[case][number]}") + key_holder[str(index)] = {'number':number} + index += 1 + user_input = input(": ") + if user_input == '0': + return parts + elif user_input in key_holder: + keys = key_holder[user_input] + print(f"Enter correct form for {case} {' '.join([x for x in key_holder[user_input].values()])} ('0' to go back)") + user_input = input(": ") + if user_input == '0': + continue + if 'gender' in keys: + parts[case][keys['number']][keys['gender']] = user_input + else: + parts[case][keys['number']] = user_input + + +def retreive_noun_forms(template): + html_doc = tables.get_html(template['search word']) + if html_doc == None: + return + soup = BeautifulSoup(html_doc, 'html.parser') + page_list = soup.prettify().split('\n') + page_list = clean_page_list(page_list) + + multi = True if template['gender'] in ["(m,f,n)","(m/f,n)","(m/f/n)"] else False + one_ending = True if template['gender'] == "(m/f/n)" else False + two_ending = True if template['gender'] == "(m/f,n)" else False + three_endiing = True if template['gender'] == "(m,f,n)" else False + + + cases = {"Nominative":'','Genitive':'','Dative':'','Accusative':'','Ablative':'','Vocative':''} + genders = {'Masculine':'---','Feminine':'---','Neuter':'---'} + numbers = {'Singular':'---',"Plural":'---'} + if multi: + for n in numbers: + numbers[n] = deepcopy(genders) + for case in cases: + cases[case] = deepcopy(numbers) + + case = False + start = 0 + last = False + holder = [] + indent = 0 + count = 0 + specified = template['defno'] - 1 + stop = False + for i in range(len(page_list)): + if count_indent(page_list[i]) < indent - 2: + stop = True + if page_list[i].strip().title() == "Case" or page_list[i].strip().title() == "Number": + start = 1 + if page_list[i].strip().title() == "Singular" and start == 1: + start = 2 + if (page_list[i].strip().title() == "Plural" or page_list[i].strip().title() == "Case / Gender") and start == 2: + start = 3 + if count < specified: + count += 1 + start = 0 + if page_list[i].strip().title() in cases and start == 3 or stop: + if not indent: + indent = count_indent(page_list[i]) + if case or stop: + if multi: + if len(holder) > 6: + cases = manual_sort(cases,case,holder) + elif len(holder) == 6: + if three_endiing: + cases[case]['Singular']['Masculine'] = holder[0] + cases[case]['Singular']["Feminine"] = holder[1] + cases[case]['Singular']['Neuter'] = holder[2] + cases[case]['Plural']['Masculine'] = holder[3] + cases[case]['Plural']["Feminine"] = holder[4] + cases[case]['Plural']['Neuter'] = holder[5] + else: + cases = manual_sort(cases,case,holder) + elif len(holder) == 5: + cases = manual_sort(cases,case,holder) + elif len(holder) == 4: + if two_ending: + cases[case]['Singular']['Masculine'] = holder[0] + cases[case]['Singular']['Neuter'] = holder[1] + cases[case]['Plural']['Masculine'] = holder[2] + cases[case]['Plural']['Neuter'] = holder[3] + else: + cases = manual_sort(cases,case,holder) + elif len(holder) == 3: + cases = manual_sort(cases,case,holder) + elif len(holder) == 2: + cases[case]['Singular']['Masculine'] = holder[0] + cases[case]['Plural']['Masculine'] = holder[1] + else: + if len(holder) > 2: + cases = manual_sort(cases,case,holder,False) + else: + cases[case]['Singular'] = holder[0] + cases[case]['Plural'] = holder[1] + if stop: + indent = 0 + stop = False + break + holder = [] + case = page_list[i].strip() + elif case: + holder.append(page_list[i].strip()) + + return cases + +def add_sum_forms(page_list,i,alt=''): + change_path('texts') + with open('sum_parts.txt','r') as f: + sum_forms = json.load(f) + if 'Imperfect' in page_list[i - 1].title(): + tense = 'Imperfect' + elif 'Future' in page_list[i - 1].title(): + tense = 'Future' + elif 'Present' in page_list[i - 1].title(): + tense = 'Present' + if 'Indicative' in page_list[i - 1].title(): + mood = 'indicative' + elif 'Subjunctive' in page_list[i - 1].title(): + mood = 'subjunctive' + voice = 'active' + keys = ['tpp','spp','fpp','tps','sps','fps'] + for key in keys: + if alt: + page_list.insert(i + 1,alt + "/" + page_list[i - 2].strip() + " " + sum_forms[tense][voice][mood][key]) + else: + page_list.insert(i + 1,page_list[i - 2] + " " + sum_forms[tense][voice][mood][key]) + return page_list + + +def add_tables(): + + tables_list = tables.get_tables('Latin') + table_info = {'title':'','type':''} + table_file = "Latin-tables.txt" + + while True: + print("Use table template? ('1' for yes, '0' to go back, any other key for no) 'AUTO' to auto-add all forms") + user_input = input(": ") + if user_input == '1': + template = tables.get_template('Latin') + if template == None: + continue + table_info['title'] = template['title'] + elif user_input.upper() == 'AUTO': + auto_add(tables_list,table_info,table_file) + return + elif user_input == '0': + return + else: + template = None + print(f"Enter table title ('0' to go back)") + table_info['title'] = input(': ') + if table_info['title'] == '0': + continue + + exit_second_loop = False + while not exit_second_loop: + complete = False + print("Select table type") + options = {'1':"'1'> noun\n",'2':"'2'> verb principal parts\n",'3':"'3'> verb conjugation\n",'4':"'4'> complete verb system\n",'5':"'5'> complete forms\n"} + + options.update({'4':"'4'> complete verb system\n"}) + options.update({'0':"'0'> to go back"}) + user_input = get_selection(options) + if user_input == '0': + exit_second_loop = True + continue + elif user_input == '1': + table_info['type'] = 'noun' + elif user_input == '2': + table_info['type'] = 'parts' + elif user_input == '3': + table_info['type'] = 'conj' + elif user_input == '4': + table_info['type'] = 'conj' + complete = True + elif user_input == '5': + table_info['type'] = 'form' + complete = True + + exit_third_loop = False + while not exit_third_loop: + if template: + table_info['definition'] = template['definition'] + else: + definition = input("Enter definition ('0' to go back): ") + if definition == '0': + exit_third_loop = True + continue + table_info['definition'] = definition + while True: + print("Enter definition number (1-n) ('0' to go back)") + try: + user_input = int(input(": ")) + except: + print("Invalid entry") + continue + if user_input == 0: + break + else: + table_info['specified'] = user_input + + if complete: + if table_info['type'] == 'conj': + add_complete_verb_system(template,table_info,tables_list,table_file) + elif table_info['type'] == 'form': + add_complete_forms(template) + exit_second_loop = exit_third_loop = True + prtin("tables added succesfully") + continue + + exit_loop_four = False + while not exit_loop_four: + print(f"Do you want to auto-retreive {table_info['title']}? ('1' for yes, '0' to go back, '00' to exit)") + user_input = input(": ") + if user_input == '1': + if table_info['type'] in ['parts','conj']: + result = auto_retreive_verb(table_info,template) + else: + result = auto_retreive_noun(table_info,template) + if result: + tables_list = tables.add_table(tables_list,table_info) + tables.save_tables(tables_list,table_file) + exit_loop_four = exit_third_loop = exit_second_loop = True + continue + elif user_input == '0': + exit_loop_four = exit_third_loop = exit_second_loop = True + continue + elif user_input == '00': + return + +def auto_add(tables_list,table_info,table_file): + change_path('templates') + myFiles = glob.glob('*.txt') + template_file = "Latin_templates.txt" + if template_file not in myFiles: + return + else: + with open(template_file,'r') as f: + templates = json.load(f) + for template in templates: + print(template['title']) + i = template + add_complete_forms(template) + table_info['definition'] = template['definition'] + if template["POS"] == 'verb': + table_info['type']= 'conj' + add_complete_verb_system(template,table_info,tables_list,table_file) + if template["POS"] == 'noun/adj.': + table_info['type']= 'noun' + if template['gender'] == "(m,f,n)" or template['gender'] == "(m/f,n)" or template['gender'] == '(m/f/n)': + continue + table_info = auto_retreive_noun(table_info,template) + tables_list = tables.add_table(tables_list,table_info) + tables.save_tables(tables_list,table_file) + +def empty(item): + if type(item) != dict: + return True if item == '---' or item == '—' else False + for key in item: + if not empty(item[key]): + return False + return True + +def add_complete_forms(template): + return + forms_list = tables.get_forms("Latin") + for i in range(len(template['forms'])): + found = False + for x in range(len(forms_list)): + if forms_list[x]['title'] == unidecode(template['forms'][i]['form']): + found = True + instance = {'form':template['forms'][i]['form']} + instance['root'] = template['title'] + instance['features'] = {x:y for x,y in template['forms'][i].items() if x != 'form'} + for y in forms_list[x]['instances']: + same = True + for key in y['features'].keys(): + if key not in instance['features']: + same = False + break + else: + if y['features'][key] != instance['features'][key]: + #print(f"NOT SAME {y['features']} AND {instance['features']}") + same = False + break + if same: + break + if same: + print(f"rejected DUPLICATE") + break + ''' + if 'tense' in instance['features']: + if instance['features']['tense'] not in ["Present",'Imperfect','Future']: + #print(f"rejected {instance['features']['tense']}") + break + if 'mood' in instance['features']: + if instance['features']['mood'] in ["Subjunctive",'Imperative']: + #print(f"rejected {instance['features']['mood']}") + break + ''' + instance['principal'] = template['principal'] + instance['definition'] = template['definition'] + print("ADDING TO EXISTING") + print(instance) + forms_list[x]['instances'].append(deepcopy(instance)) + if not found: + form = {'title':unidecode(template['forms'][i]['form'])} + instance = {'form':template['forms'][i]['form']} + instance['root'] = template['title'] + instance['features'] = {x:y for x,y in template['forms'][i].items() if x != 'form'} + ''' + if 'tense' in instance['features']: + if instance['features']['tense'] not in ["Present",'Imperfect','Future']: + #print(f"rejected {instance['features']['tense']}") + continue + if 'mood' in instance['features']: + if instance['features']['mood'] in ["Subjunctive",'Imperative']: + #print(f"rejected {instance['features']['mood']}") + continue + ''' + instance['principal'] = template['principal'] + instance['definition'] = template['definition'] + form['instances'] = [deepcopy(instance)] + print("ADDING NEW") + print(instance) + forms_list.append(deepcopy(form)) + tables.save_tables(forms_list,'Latin_forms.txt') + +def add_complete_verb_system(template,table_info,tables_list,table_file): + table_info['principal'] = template['principal'] + tenses = template['parts'] + for tense in ["Present",'Imperfect','Future','Future Perfect','Perfect',"Pluperfect"]: + for voice in ["Active",'Passive']: + table_info = assign_table_info(table_info,template,tenses,tense,voice) + if empty(table_info['parts']): + #print(f"\t\t\t\t\t\t\t\t{table_info['title']} was blank") + continue + else: + tables_list = tables.add_table(tables_list,table_info) + tables.save_tables(tables_list,table_file) + #print(f"\t\t\t\t\t\t\t\t{table_info['title']} saved") + table_info['type'] = 'parts' + table_info = assign_principle_parts(table_info,tenses,template) + tables_list = tables.add_table(tables_list,table_info) + tables.save_tables(tables_list,table_file) + +def auto_retreive_verb(table_info,template): + tenses = template['parts'] + if table_info['type'] == 'parts': + table_info = assign_principle_parts(table_info,tenses,template) + return table_info + elif table_info['type'] == 'conj': + options1 = {'1':"Present",'2':'Imperfect','3':'Future','4':'Perfect','5':'Pluperfect','6':'Future Perfect'} + user_input = get_selection(options1,2) + tense = options1[user_input] + options2 = {'1':"Active",'2':'Passive'} + user_input = get_selection(options2,2) + voice = options2[user_input] + options3 = {'1':'Indicative','2':'Subjunctive','3':'Imperative'} + user_input = get_selection(options3,2) + mood = options3[user_input] + table_info = assign_table_info(table_info,template,tenses,tense,voice,mood) + return table_info + + + +def assign_table_info(table_info,template,tenses,tense,voice): + x = 'Indicative' + table_info['parts'] = {'1st Person':{'Singular':tenses[tense][voice][x]['FPS'],'Plural':tenses[tense][voice][x]['FPP']},\ + '2nd Person':{'Singular':tenses[tense][voice][x]['SPS'],'Plural':tenses[tense][voice][x]['SPP']},\ + '3rd Person':{'Singular':tenses[tense][voice][x]['TPS'],'Plural':tenses[tense][voice][x]['TPP']}} + table_info['parts'].update({'Infinitive':tenses[tense][voice]['Infinitive']}) + table_info['parts'].update({'Participle':tenses[tense][voice]['Participle']}) + x = 'Imperative' + table_info['parts'].update({'Imperative':{'1st Person':{'Singular':tenses[tense][voice][x]['FPS'],'Plural':tenses[tense][voice][x]['FPP']},\ + '2nd Person':{'Singular':tenses[tense][voice][x]['SPS'],'Plural':tenses[tense][voice][x]['SPP']},\ + '3rd Person':{'Singular':tenses[tense][voice][x]['TPS'],'Plural':tenses[tense][voice][x]['TPP']}}}) + x = 'Subjunctive' + table_info['parts'].update({'Subjunctive':{'1st Person':{'Singular':tenses[tense][voice][x]['FPS'],'Plural':tenses[tense][voice][x]['FPP']},\ + '2nd Person':{'Singular':tenses[tense][voice][x]['SPS'],'Plural':tenses[tense][voice][x]['SPP']},\ + '3rd Person':{'Singular':tenses[tense][voice][x]['TPS'],'Plural':tenses[tense][voice][x]['TPP']}}}) + if template: + table_info['title'] = f"{tense.title()} {voice.title()}: {template['title']}" + return table_info + +def auto_retreive_noun(table_info,template): + table_info['principal'] = template['principal'] + parts = template['parts'] + case_keys = ["Nominative",'Genitive','Dative','Accusative','Ablative','Vocative'] + number_keys = ['Singular',"Plural"] + table_info['parts'] = {} + for case in case_keys: + table_info['parts'][case] = {} + for number in number_keys: + if 'Masculine' not in parts[case][number]: + table_info['parts'][case][number] = parts[case][number] + elif 'Masculine' in parts[case][number]: + table_info['parts'][case][number] = ", ".join([parts[case][number]['Masculine'], parts[case][number]['Feminine'], parts[case][number]['Neuter']]) + if template: + table_info['title'] = f"{template['title']}" + return table_info + + + + + +def clean_page_list(page_list,exception=False): + offset = 0 + for i in range(1,len(page_list)): + i = i - offset + if ' ' in page_list[i]: + page_list[i] = page_list[i].replace(' ',' ') + if page_list[i].strip()[0] == "<": + del page_list[i] + offset += 1 + + offset = 0 + sum_infinitive = ['īrī','esse'] + for i in range(1,len(page_list)): + i = i - offset + if page_list[i].strip(' ') == ',': + page_list[i - 1] = page_list[i - 1].strip(' ') + ", " + page_list[i + 1].strip(' ') + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i].strip(' ') == 'sum' and page_list[i - 1].strip(' ')[0] == '+' and page_list[i - 1].strip(' ')[-2:] == 'of': + if page_list[i - 3].strip() == 'or': + page_list = add_sum_forms(page_list,i,page_list[i - 4].strip()) + for x in range(5): + del page_list[i - 4] + offset += 5 + else: + page_list = add_sum_forms(page_list,i) + for x in range(3): + del page_list[i - 2] + offset += 3 + elif page_list[i].strip(' ') == '/': + page_list[i - 1] = page_list[i - 1].strip(' ') + "/" + page_list[i + 1].strip(' ') + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i].strip(' ') in sum_infinitive and previous not in sum_infinitive and not exception: + page_list[i - 1] = page_list[i - 1].strip(' ') + ' ' + page_list[i].strip(' ') + previous = page_list[i].strip(' ') + del page_list[i] + offset += 1 + elif page_list[i].strip()[0] == "<" or (len(page_list[i].strip()) == 1 and page_list[i].strip().isnumeric()): + del page_list[i] + offset += 1 + else: + previous = '' + return page_list + +def filter_cards(tables_list,random_list): + filters = { + 'Pluperfect Passive':97, + 'Future Perfect Passive':97, + 'Pluperfect':75, + 'Future Perfect':75, + 'Perfect Passive':75, + 'Imperfect Passive':90, + 'Imperfect Active':65, + 'Present Passive':50, + 'Future Passive':65, + 'Future Active':25, + } + for i in range(len(tables_list)): + r = random.randrange(0,100,1) + for x in filters: + if x in tables_list[i]['title']: + if r < filters[x]: + random_list.remove(i) + break + return random_list + +def features_join(features): + join = "" + if 'gender' in features: + join += " " + features['gender'] + "" + for x,y in features.items(): + if x == "mood" and (y != "Infinitive" and y != 'Participle'): + join += " " + y + "" + elif x == 'gender': + pass + else: + join += " " + y + "" + return join.strip() + +def print_forms(): + out_file = 'Latin-FormCards.txt' + original_stdout = sys.stdout + change_path('flashcards') + sys.stdout = open(out_file,'w') + forms_list = tables.get_forms("Latin") + random_list = list(range(len(forms_list))) + random.shuffle(random_list) + for i in random_list: + print('

            · ' + unidecode(forms_list[i]['title']) + '

            |',end='') + for instance in forms_list[i]['instances']: + features = features_join(instance['features']) + if features: + print('

            ' + instance['form'] + '
            ',end='') + else: + print('

            ',end='') + print(features,end='') + print(' form of: ' + instance['principal'] + '
            ',end='') + print("Definition: " + instance['definition'] + '


            ',end='') + print('|"forms"') + sys.stdout = original_stdout + +def print_tables(tables_list): + print_forms() + out_file = 'Latin-TableCards.txt' + original_stdout = sys.stdout + change_path('flashcards') + sys.stdout = open(out_file,'w') + #random_list = list(range(len(tables_list))) + #random_list = filter_cards(tables_list,random_list) + #random.shuffle(random_list) + for i in range(len(tables_list)): + + + if tables_list[i]['type'] != 'noun': + print('

            ' + tables_list[i]['title'] + '

            |',end='') + else: + print('

            Forms: ' + tables_list[i]['title'] + '

            |',end='') + + if tables_list[i]['definition']: + body_string = '

            ' + tables_list[i]['definition'] + '

            ' + else: + body_string = '' + + body_string = html_x.set_styles(body_string) + + if tables_list[i]['type'] == 'parts': + body_string = html_x.create_table(body_string,tables_list[i]['parts'],'parts',1) + elif tables_list[i]['type'] == 'noun': + body_string = html_x.create_table(body_string,tables_list[i]['parts'],'noun',2) + elif tables_list[i]['type'] == 'conj': + parts = {k:v for k,v in tables_list[i]['parts'].items() if k != "Infinitive" and k != "Participle" and k != "Imperative" and k != "Subjunctive"} + body_string = html_x.create_table(body_string,parts,'conj',2) + body_string = html_x.create_box(body_string,'Infinitive',tables_list[i]["parts"]["Infinitive"]) + body_string = html_x.create_box(body_string,'Participle',tables_list[i]["parts"]["Participle"]) + if 'Subjunctive' in tables_list[i]['parts']: + body_string += '

            Subjunctive:

            ' + body_string = html_x.create_table(body_string,tables_list[i]['parts']['Subjunctive'],'conj',2) + if 'Imperative' in tables_list[i]['parts']: + body_string += '

            Imperative:

            ' + body_string = html_x.create_table(body_string,tables_list[i]['parts']['Imperative'],'conj',2) + + + print(body_string + '|"table"') + + sys.stdout = original_stdout \ No newline at end of file diff --git a/tables_oe_ext.py b/tables_oe_ext.py new file mode 100644 index 0000000..50e2ae4 --- /dev/null +++ b/tables_oe_ext.py @@ -0,0 +1,316 @@ + +from bs4 import BeautifulSoup +import tables +import html_x +from load_dict import change_path +from get_selection import get_selection +from copy import deepcopy +import random +import sys + +def add_tables(): + + tables_list = tables.get_tables('Old English') + table_info = {'title':'','type':''} + table_file = "OldEnglishtables.txt" + + while True: + print("Use table template? ('1' for yes, '0' to go back, any other key for no)") + user_input = input(": ") + if user_input == '1': + template = tables.get_template('Old English') + if template == None: + continue + table_info['title'] = template['title'] + elif user_input == '0': + return + else: + template = None + print(f"Enter table title ('0' to go back)") + table_info['title'] = input(': ') + if table_info['title'] == '0': + continue + + exit_second_loop = False + while not exit_second_loop: + complete = False + print("Select table type") + options = {'1':"'1'> noun\n",'2':"'2'> verb conjugation\n"} + options.update({'0':"'0'> to go back"}) + user_input = get_selection(options) + if user_input == '0': + exit_second_loop = True + continue + elif user_input == '1': + table_info['type'] = 'noun' + elif user_input == '2': + table_info['type'] = 'conj' + + exit_third_loop = False + while not exit_third_loop: + if template: + table_info['definition'] = template['definition'] + else: + definition = input("Enter definition ('0' to go back): ") + if definition == '0': + exit_third_loop = True + continue + table_info['definition'] = definition + while True: + print("Enter definition number (1-n) ('0' to go back)") + try: + user_input = int(input(": ")) + except: + print("Invalid entry") + continue + if user_input == 0: + break + else: + table_info['specified'] = user_input + + exit_loop_four = False + while not exit_loop_four: + print(f"Do you want to auto-retreive {table_info['title']}? ('1' for yes, '0' to go back, '00' to exit)") + user_input = input(": ") + if user_input == '1': + if table_info['type'] in ['parts','conj']: + result = auto_retreive_verb(table_info,template) + else: + result = auto_retreive_noun(table_info,template) + if result: + tables_list = tables.save_table(tables_list,table_info,'Old English') + tables.pop_template(template,'Old English') + exit_loop_four = exit_third_loop = exit_second_loop = True + continue + elif user_input == '0': + exit_loop_four = exit_third_loop = exit_second_loop = True + continue + elif user_input == '00': + return + +def empty(item): + if type(item) != dict: + return True if item == '---' or item == '—' else False + for key in item: + if not empty(item[key]): + return False + return True + +def auto_retreive_verb(table_info,template): + moods = retreive_verb_forms(template) + if moods == None: + return None + table_info['parts'] = moods + return True + +def auto_retreive_noun(table_info,template): + cases = retreive_noun_forms(template) + if cases == None: + return None + case_keys = ["Nominative",'Genitive','Dative','Accusative'] + number_keys = ['Singular',"Plural"] + parts = {} + for case in case_keys: + parts[case] = {} + for number in number_keys: + parts[case][number] = cases[case.lower()][number] + table_info['parts'] = parts + if template: + table_info['title'] = f"{template['title']}" + return True + +def retreive_verb_forms(template): + if 'specified' in template: + specified = template['specified'] + else: + specified = 0 + html_doc = tables.get_html(template) + if html_doc == None: + return + soup = BeautifulSoup(html_doc, 'html.parser') + page_list = soup.prettify().split('\n') + page_list = clean_page_list(page_list) + + tenses = {"present tense":'','past tense':''} + tenses_2 = {"present":'','past':''} + moods = {"infinitive":[],'indicative':[],'subjunctive':[],'imperative':[],"participle":[]} + mood_list = ["infinitive",'indicative mood','subjunctive','imperative',"participle"] + persons_1 = {'1p sing.':'---','2p sing.':'---','3p sing.':'---','plural':'---'} + person_list = {'first person singular':'1p sing.','second person singular':'2p sing.','third person singular':'3p sing.','plural':'plural'} + persons_2 = {'singular':'---','plural':'---'} + + moods['infinitive'] = [] + moods['indicative'] = deepcopy(persons_1) + for key in persons_1: + moods['indicative'][key] = deepcopy(tenses) + moods['subjunctive'] = deepcopy(persons_2) + for key in persons_2: + moods['subjunctive'][key] = deepcopy(tenses) + moods['imperative'] = deepcopy(persons_2) + moods['participle'] = deepcopy(tenses_2) + + mood = False + for i in range(len(page_list)): + if page_list[i].strip(" ") in mood_list: + mood = page_list[i].strip(" ") + if mood == 'indicative mood': + mood = 'indicative' + person = tense = False + index = 0 + elif mood == 'infinitive': + moods[mood].append(page_list[i].strip(" ")) + elif mood == 'indicative': + if page_list[i].strip(" ") in person_list: + person = person_list[page_list[i].strip(" ")] + t_list = list(tenses.keys()) + index = 0 + elif index < 2 and person: + moods[mood][person][t_list[index]] = page_list[i].strip(" ") + index += 1 + elif index > 1: + mood = person = False + elif mood == 'subjunctive': + if page_list[i].strip(" ") in persons_2: + person = page_list[i].strip(" ") + t_list = list(tenses.keys()) + index = 0 + elif index < 2 and person: + moods[mood][person][t_list[index]] = page_list[i].strip(" ") + index += 1 + elif index > 1: + mood = person = False + elif mood == 'imperative': + if page_list[i].strip(" ") in persons_2: + person = page_list[i].strip(" ") + index = 0 + elif index < 2 and person: + moods[mood][person] = page_list[i].strip(" ") + index += 1 + elif index > 1: + mood = person = False + elif mood == 'participle': + t_list = list(tenses_2.keys()) + if page_list[i].strip(" ") in tenses_2: + pass + elif index < 2: + moods[mood][t_list[index]] = page_list[i].strip(" ") + index += 1 + elif index > 1: + mood = tense = False + return moods + +def retreive_noun_forms(template): + html_doc = tables.get_html(template) + if html_doc == None: + return + soup = BeautifulSoup(html_doc, 'html.parser') + page_list = soup.prettify().split('\n') + page_list = clean_page_list(page_list) + + cases = {"nominative":'','genitive':'','dative':'','accusative':''} + numbers = {'Singular':'---',"Plural":'---'} + for case in cases: + cases[case] = deepcopy(numbers) + + start = 0 + case = False + for i in range(len(page_list)): + if page_list[i].strip() == "Case": + start = 1 + if page_list[i].strip() == "Singular" and start == 1: + start = 2 + if page_list[i].strip() == "Plural" and start == 2: + start = 3 + if page_list[i].strip() in cases and start == 3: + case = page_list[i].strip() + index = 0 + elif case: + if index == 0: + number = 'Singular' + elif index == 1: + number = 'Plural' + elif index > 1: + index = 0 + case = False + continue + if cases[case][number] == "---": + cases[case][number] = page_list[i].strip() + index += 1 + return cases + +def clean_page_list(page_list): + offset = 0 + for i in range(1,len(page_list)): + i = i - offset + if ' ' in page_list[i]: + page_list[i] = page_list[i].replace(' ',' ') + if page_list[i].strip()[0] == "<": + del page_list[i] + offset += 1 + + offset = 0 + for i in range(1,len(page_list)): + i = i - offset + if page_list[i].strip(' ') == ',': + page_list[i - 1] = page_list[i - 1].strip(' ') + ", " + page_list[i + 1].strip(' ') + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i].strip(' ') == '/': + page_list[i - 1] = page_list[i - 1].strip(' ') + "/" + page_list[i + 1].strip(' ') + del page_list[i + 1] + del page_list[i] + offset += 2 + elif page_list[i - 1].strip(' ') == ')' and len(page_list[i - 2].strip(' ')) < 4 and page_list[i - 3].strip(' ') == '(': + page_list[i] = "(" + page_list[i - 2].strip(' ') + ")" + page_list[i].strip(' ') + for x in range(3): + del page_list[i - 3] + offset += 3 + elif page_list[i].strip()[0] == "<" or (len(page_list[i].strip()) == 1 and page_list[i].strip().isnumeric()): + del page_list[i] + offset += 1 + else: + previous = '' + return page_list + +def print_tables(tables_list): + out_file = 'OldEnglishflashcardtables.txt' + original_stdout = sys.stdout + change_path('flashcards') + sys.stdout = open(out_file,'w') + random_list = list(range(len(tables_list))) + random.shuffle(random_list) + for i in random_list: + + print('

            Forms: ' + tables_list[i]['title'] + '

            |',end='') + + if tables_list[i]['definition']: + body_string = '

            ' + tables_list[i]['definition'] + '

            ' + else: + body_string = '' + + body_string = html_x.set_styles(body_string) + + if tables_list[i]['type'] == 'noun': + body_string = html_x.create_table(body_string,tables_list[i]['parts'],'noun',2) + elif tables_list[i]['type'] == 'conj': + body_string += '

            infinitive:

            ' + body_string = html_x.create_box(body_string,tables_list[i]["parts"]["infinitive"][0],tables_list[i]["parts"]["infinitive"][1]) + body_string += '

            indicative:

            ' + indicative = tables_list[i]['parts']['indicative'] + body_string = html_x.create_table(body_string,indicative,'',2) + body_string += '

            subjunctive:

            ' + subjunctive = tables_list[i]['parts']['subjunctive'] + body_string = html_x.create_table(body_string,subjunctive,'',2) + body_string += '

            imperative:

            ' + imperative = tables_list[i]['parts']['imperative'] + body_string = html_x.create_style(body_string,1) + body_string = html_x.create_body(body_string,imperative,'') + body_string += '

            participle:

            ' + participle = tables_list[i]['parts']['participle'] + body_string = html_x.create_style(body_string,1) + body_string = html_x.create_body(body_string,participle,'') + + print(body_string + '|"table"') + + sys.stdout = original_stdout \ No newline at end of file diff --git a/word_methods.py b/word_methods.py new file mode 100644 index 0000000..b153386 --- /dev/null +++ b/word_methods.py @@ -0,0 +1,429 @@ + +''' +Description: + + copy word: + takes a fetch and new_word + returns a new_word + take a result from the wiktionary parser and + copies a simpler version into a new_word taken as an argument + + getTags: + takes a list of tags and allows user to add or delete tags +''' + +#from get_simple import get_simple +from edit_dictionary import chop_parens +import re +from unidecode import unidecode +import get_selection +import copy + + +# REMOVE OR +# # # # # # # # # # +def remove_or(text): + text = text.split() + offset = 0 + for i in range(len(text)): + i = i - offset + if text[i] == 'or': + text[i-1] += "," + del text[i] + offset += 1 + new_text = '' + for i in range(len(text)): + new_text += text[i] + " " + return new_text +# END REMOVE OR + +# VERB EDIT +# # # # # # # # # +def verb_edit(text): + # slip definitions into individual words + text = text.split(" ") + + offset = 0 + # delete 'I's replace 'am's with 'be' + for num in range(len(text)): + num = num - offset + if text[num] == "I": + del text[num] + offset += 1 + elif text[num].strip(";,") == "myself": + text[num] = text[num].replace("myself","oneself") + elif text[num] == "am": + text[num] = "be" + elif text[num] == "my": + text[num] = "one's" + + # recombine word + new_text = '' + for num in range(len(text)): + if num < len(text) - 1: + new_text += text[num] + " " + else: + new_text += text[num] + return new_text +# VERB EDIT + + +# PARTICIPLE EDIT +# # # # # # # # # # +def participle_edit(text,first): + text = list(text) + new_text = '' + for num in range(len(text)): + if text[num] == chr(160): + text[num] = chr(32) + new_text += text[num] + text = new_text + text = re.split(",|;|:",text) + + offset = 0 + string_bank = ['having been','having'] + for i in range(len(text)): + i = i - offset + for string in string_bank: + if string in text[i]: + cut = text[i][text[i].find(string + " ") + len(string) + 1:].strip('. ') + if cut in text or " " + cut in text: + del text[i] + offset += 1 + break + + for num in range(len(text)): + if 'which is to be' in text[num] and not first: + #print(text[num]) + cut = text[num].find('to be ') + 6 + text[num] = text[num][cut:] + #print(text[num]) + text[num] = remove_or(text[num]) + elif 'which is to be' in text[num]: + first = False + text[num] = remove_or(text[num]) + + + new_text = '' + for num in range(len(text)): + if num < len(text) - 1: + new_text += text[num].strip() + ", " + else: + new_text += text[num].strip() + return new_text, first +# END PARTICPLE EDIT + +# COPY WORD +# # # # # # # # # # # # # +def copy_word(fetch_word,new_word,language): + + # Declare components of new_word + new_word['heading'] = None + entries = [] + entry = {} + partOfSpeech = [] + principleParts = [] + simpleParts = [] + defs = [] + roots_list = [] + + # for Loop LEVEL ONE, may contain empty 'definitions' + for outer in range(len(fetch_word)): + + if fetch_word[outer]['etymology'] != '': + etymology = fetch_word[outer]['etymology'] + else: + etymology = '' + + # If not empty... + if fetch_word[outer]['definitions'] == []: + principleParts = simpleParts = partOfSpeech = '*blank definition*' + # Create new entry + entry['partOfSpeech'] = partOfSpeech + entry['principleParts'] = principleParts + entry['simpleParts'] = simpleParts + entry['defs'] = [] + entry['etymology'] = etymology + + # append entry to entries + entries.append(entry) + + # reset entry to blank dict + entry = {} + else: + # for Loop LEVEL TWO, copy info into new word components + for middle in range(len(fetch_word[outer]['definitions'])): + + # copy definitions text list into new list with shorter name + text = fetch_word[outer]['definitions'][middle]['text'] + + # capture heading (only one needed per word) + if new_word['heading'] == None: + # heading should have same number of characters as handle + # compare de-macroned version of text[0] with handle + # if text[0] starts with something else; heading will just be handle + if unidecode(text[0][:len(new_word['handle'])]) != new_word['handle']: + new_word['heading'] = new_word['handle'] + else: + # should be the same number of character as handle + new_word['heading'] = text[middle][:len(new_word['handle'])] + + # capture part of speech + partOfSpeech = fetch_word[outer]['definitions'][middle]['partOfSpeech'] + + # How many lines in the definition (including principal parts) + num_entries = len(text) + + # for Loop LEVEL THREE, loop through text[:] to copy definitions + first = True + for inner in range(num_entries): + + # Copy definition + text = fetch_word[outer]['definitions'][middle]['text'][inner] + + # Capture principal parts (first entry in ^ 'text') + if inner == 0: + principleParts = text + # call get_simple to simplify principle parts, remove extraneous words + simpleParts = principleParts #get_simple(partOfSpeech,principleParts,new_word['heading'],language) + + # Appends definitions to definitions componant + else: + if partOfSpeech == 'verb': + text = verb_edit(text) + + if partOfSpeech == 'participle': + text, first = participle_edit(text,first) + + + defs.append(text.strip(".").replace(":",";")) + + # check if entry is a form of another main entry + roots_list = find_root(roots_list,defs,etymology) + + # Create new entry + entry['partOfSpeech'] = partOfSpeech + entry['principleParts'] = principleParts + entry['simpleParts'] = simpleParts + entry['defs'] = defs + entry['etymology'] = etymology + + # reset defs to empty list + defs = [] + + # append entry to entries + entries.append(entry) + + # reset entry to blank dict + entry = {} + + # Back to Loop LEVEL ONE + + # When Loop LEVEL ONE has finished all iterations + + # assign entries to new_word + new_word['entries'] = entries + + if roots_list: + new_word['roots'] = roots_list + + return new_word +# END COPY WORD + + +# FIND ROOT +# # # # # # # # # # # # # # # # # +def find_root(roots_list,text,etymology): + + test = False + + + trial_list = [copy.deepcopy(etymology)] + copy.deepcopy(text) if etymology else copy.deepcopy(text) + trial_list = chop_parens(trial_list) + + if test: + print(trial_list) + + + word_bank = ['first-person','second-person','third-person','singular', + 'plural','indicative','imperative','infinitive','subjunctive','active','passive', + 'present','future','perfect','imperfect','pluperfect','participle', + 'masculine','feminine','neuter','common','nominative','comparative','degree', + 'genitive','dative','accusative','ablative','locative','vocative', + 'gerund','gerundive','inflection','supine','alternative','archaic','form','of','from' + ] + for index in range(len(trial_list)): + + if 'participle of' in trial_list[index]: + if test: + print(trial_list[index]) + trial_list[index] = trial_list[index][trial_list[index].find('participle of'):] + if test: + print(trial_list[index]) + trial_list[index] = trial_list[index].split() + trial_list[index].remove('participle') + trial_list[index].remove('of') + if test: + print(trial_list[index]) + roots_list.append(trial_list[index][0].strip(".,; ")) + if test: + print(trial_list[index][0]) + continue + + trial_list[index] = trial_list[index].strip(';, ') + if ',' in trial_list[index] or ';' in trial_list[index]: + if test: + print("Continue 1") + continue + + + + + + string = trial_list[index] + string = string.split(' ') + if len(string) == 1: + if test: + print("Continue 2") + continue + + offset = 0 + for index in range(len(string)): + index = index - offset + string[index] = string[index].strip("().") + string[index] = string[index].lower() + if '/' in string[index]: + string.extend(string[index].split('/')) + del string[index] + offset += 1 + + + for word in word_bank: + if test: + print(F"{word} {string}") + if word in string: + if test: + print(f"{word} removed") + string.remove(word) + + string[0] = string[0].strip(".,:;\n") + + if len(string) == 1 and string[0] not in roots_list: + if test: + print(f"{string[0]} being added to Roots") + roots_list.append(string[0]) + if test: + print("Roots test completed") + return roots_list +# END FIND ROOT + + +# GET TAGS +# # # # # # # # +def getTags(tags=set(),mode='',master_list=[]): + + # Whole function contained in loop + while True: + + # flag if tags already in place + if tags: + + # create list of tags + tag_string = ", ".join(f"'{tag}'" for tag in tags) + + # Print list with appropriate commas + if mode: + print(f"\n{mode.title()} tags: {tag_string}") + else: + print(f"\nCurrent tags: {tag_string}") + + if mode: + string = mode.title() + " " + else: + string = '' + + # Display options + options = { + '0':f"{string}Tag Options:\n>'0' to finish\n"} + if mode == '': + options.update({'1':">'1' to add a new tag\n"}) + options.update({'2':">'2' to choose from a list of all tags\n"}) + # only display if tags not empty + if tags: + options.update({'3':">'3' to Remove\n", + '4':">'4' to clear all\n"}) + if master_list: + options.update({}) + user_input = get_selection.get_selection(options) + + # Option to finish, return to calling function + if user_input == '0': + return tags + + # Option to add new tag + elif user_input == '1': + new_tag = input("Enter new tag: ") + tags.add(new_tag) + # Option to remove tag + + elif user_input == '3': + options = {'0':"Select the tag you want to remove ('0' to go back)\n"} + tags_list = list(tags) # convert set to list + for index in range(len(tags_list)): + options[f"{index+1}"] = f"{index + 1}. {tags_list[index]}\n" + user_input = get_selection.get_selection(options) + + if user_input != '0': + del tags_list[int(user_input) - 1] + tags = set(tags_list) # convert list back to set + + + # Option to clear all tags + elif user_input.lower() == '4': + tags = set() + + # option to choose from master list + elif user_input.lower() == '2': + exit_choose = False + # delete all redundant entries + offset = 0 + for index in range(len(master_list)): + if master_list[index - offset] in tags: + del master_list[index - offset] + offset += 1 + + # loop for user selections + while not exit_choose: + options = {} + for index in range(len(master_list)): + options[f"{index+1}"] = f"{index + 1}. {master_list[index]}\n" + options.update({'0':"Select the tag you want to add to your current tags\n"}) + options.update({'00':"('0' to go back, '00' to finish)\n"}) + user_input = get_selection.get_selection(options) + + if user_input == '0': + exit_choose = True + elif user_input == '00': + return tags + else: + tags.add(master_list[int(user_input) - 1]) + del master_list[int(user_input) - 1] + tag_string = ", ".join(f"'{tag}'" for tag in tags) + if mode == 'filter': + print(f"Filter by tags: {tag_string}") + else: + print(f"Current tags: {tag_string}") +# END GET TAGS + + +# GET MASTER LIST +# # # # # # # # # # +def get_master_list(current_dict): + master_list = [] + # get list of all unique tags in current dictionary + for word in current_dict['definitions']: + for tag in word['tags']: + if tag not in master_list: + master_list.append(tag) + return master_list +# END GET MASTER LIST diff --git a/word_print_edit.py b/word_print_edit.py new file mode 100644 index 0000000..d717ba8 --- /dev/null +++ b/word_print_edit.py @@ -0,0 +1,381 @@ + +''' +Description: + + edit entries: + options within word to create, delete, move and modify entries + + select entries: + allows user to choose an entry based on letter indices + returns selection as the number index of selected entry in entries list + + print entries: + print all entries of a word including part of speech + option to include capital letter next to part of speech begining each entry +''' +import pickle +from unidecode import unidecode +from copy import deepcopy +import word_methods +import edit_entry +import create_word +from get_selection import get_selection, clear_screen +import parser_shell +import load_dict +from pyfiglet import figlet_format + +# EDIT ENTRIES +# # # # # # # # # # # # +def edit_entries(new_word,current_dict,t): + language = current_dict['language'] + # save confirm choice string used in multiple places + confirm_str = "'1' to confirm, any other key to cancel: " + entry_string = '' + # rest of function contained in loop + while True: + + # flag if only one entry exists + if len(new_word['entries']) <= 1: + single_entry = True + else: + single_entry = False + + entry_string = get_entry_string(new_word['entries']) + + # get user selection + options = { + '1':f"\n===================================================\n'{new_word['heading']}' Options:\n>'1' Edit Entry\n", + '2':">'2' Create New Entry\n"} + if not single_entry: + options.update({ + '3':">'3' Move Entry\n", + '4':">'4' Delete Entry\n"}) + #'5':">'5' Merge Entries\n"}) + if 'template' in new_word: + options.update({'reset':">'reset' to reset template tag\n"}) + options.update({ + 'tags':">'tags' to modify tags\n", + 'add':">'add' to add another word\n", + 'head':"'head' to change heading\n", + #'split':"'split' to create sub-cards\n", + '0':">'0' to finish edit\n",'00':">'00' to finish and save\n",'verb':''}) + clear_screen() + user_input = get_selection(options,entry_string) + + + # Option to exit, only way to end loop + if user_input == '0': + return new_word, False + + if user_input == '00': + return new_word, True + + # edit tags option + if user_input.lower() == 'tags': + master_list = word_methods.get_master_list(current_dict) + new_word['tags'] = word_methods.getTags(new_word['tags'],'',master_list) + + if user_input.lower() == 'reset': + del new_word['template'] + + # Options to modify entry, then repeat loop + elif user_input == '1': + # selection needed only for multiple entries + if single_entry: + new_word['entries'][0], finish_and_save = edit_entry.edit_entry(new_word['entries'][0],new_word) + if finish_and_save: + return new_word, True + else: + # request user selection with appropriate message + message = '===================================================\nEnter the letter of the entry you want to modify.' + selection = select_entry(new_word['entries'],message) + + # unless modify is aborted, call edit_entry, repeat loop when done + if selection != None: + clear_screen() + new_word['entries'][selection], finish_and_save = edit_entry.edit_entry(new_word['entries'][selection],new_word) + if finish_and_save: + return new_word, True + + # Option to create new entry, then repeat loop + elif user_input == '2': + new_word, result = create_word.create_entry(new_word) + + # Option to move entry + elif user_input == '3' and not single_entry: + if len(new_word['entries']) == 2: + new_word['entries'] = edit_entry.move_entries(new_word['entries'],1,0) + else: + exit_inner_loop = False + while not exit_inner_loop: + # request user selection with appropriate message + message = "\n===================================================\nEnter the letter of the entry you want to move." + selection = select_entry(new_word['entries'],message) + + # unless move is aborted, proceed to STEP TWO + if selection != None: + # request user selection with appropriate message + message = f"\n===================================================\nMove {chr(selection + 65)} to what position?" + new_position = select_entry(new_word['entries'],message) + + # unless move is aborted, confirm requested change + if new_position != None: + new_word['entries'] = edit_entry.move_entries(new_word['entries'],selection,new_position) + else: + exit_inner_loop = True + + elif user_input.lower() == 'add': + print("'1' to look up word, '2' to create new entry") + user_input = input(": ") + if user_input == '1': + new_word = word_combo(new_word,t,current_dict['language']) + else: + new_entry, result = create_word.create_entry(new_word) + + # Option to delete + elif user_input == '4' and not single_entry: + exit_inner_loop = False + while not exit_inner_loop: + # request user selection with appropriate message + message = '===================================================\nEnter the letter of the entry you want to delete.' + selection = select_entry(new_word['entries'],message) + + # if delete not aborted, confirm requested deletion + if selection != None: + # Confirm deletion + print(f"\nDelete entry {chr(selection+65)}?") + user_input = input(confirm_str) + + # if confirmed, delete entry / either way, repeat loop when done + if user_input == '1': + del new_word['entries'][selection] + else: + exit_inner_loop = True + if len(new_word['entries']) == 1: + exit_inner_loop = single_entry = True + + elif user_input == '5' and not single_entry: + exit_inner_loop = False + while not exit_inner_loop: + # request user selection + # request user selection with appropriate message + message = '===================================================\nEnter the letter of the entry you want to merge to.' + s1 = select_entry(new_word['entries'],message) + + if s1 != None: + # request user selection with appropriate message + message = f'===================================================\nEnter the letter of the entry you want to merge with entry {chr(s1 + 65)}.' + s2 = select_entry(new_word['entries'],message) + + if s2 != None: + new_word['entries'][s1]['defs'].extend(new_word['entries'][s2]['defs']) + + print(f"\nChange principle parts \nfrom: {new_word['entries'][s1]['simpleParts']}") + print(f"to: {new_word['entries'][s2]['simpleParts']}\n") + user_input = input("'1' to confirm, any other key for no: ") + + if user_input == '1': + new_word['entries'][s1]['simpleParts'] = new_word['entries'][s2]['simpleParts'] + + del new_word['entries'][s2] + exit_inner_loop = True + else: + exit_inner_loop = True + + # Test if heading should be changed + if new_word['heading'] != new_word['entries'][0]['simpleParts'][:len(new_word['heading'])]: + print(f"Change heading from {new_word['heading']} to {new_word['entries'][0]['simpleParts'][:len(new_word['heading'])]}?") + user_input = input("'1' to confirm, any other key for no: ") + + if user_input == '1': + new_word['heading'] = new_word['entries'][0]['simpleParts'][:len(new_word['heading'])] + new_word['handle'] = unidecode(new_word['heading']) + + elif user_input == 'head': + print("\nEnter your new heading ('0' to go back) (ā, ē, ī, ō, ū)") + new_definition = input(': ') + + if new_definition != '0': + new_word['heading'] = new_definition + new_word['handle'] = unidecode(new_word['heading']) + + elif user_input == 'split': + split_word(new_word,current_dict) + + elif user_input == 'verb': + for i in new_word['entries']: + i['partOfSpeech'] = 'verb' + + +# END EDIT ENTRIES + +def flatten(word): + for i in word['entries']: + for z in i['defs']: + z['gloss'] = z['gloss'].lower() + +def split_word(word,current_dict): + count = 1 + splits = [] + while True: + new_split = {} + print(word['heading']) + print(word['entries'][0]['simpleParts']) + print(f"Enter Split {count} heading ('0' to stop)") + user_input = input(": ") + if user_input == '0': + return + new_split['heading'] = new_split['handle'] = '' + user_input + new_split['tags'] = [] + new_split['entries'] = [{'simpleParts':user_input,'principleParts':user_input,'partOfSpeech':'split','defs':[]}] + for entry in word['entries']: + while True: + print(f"Split {count}:") + if new_split['entries'][0]['defs']: + edit_entry.print_entry(new_split['entries'][0]) + else: + print("\nENTRY:") + print("******\n") + print(f"{new_split['entries'][0]['partOfSpeech']}\n") + print(new_split['entries'][0]['simpleParts']) + selection = edit_entry.select_definition(entry,f"Choose definition to add to split {count} ('0' for next):") + if selection == None: + break + if type(selection) == list: + for i in range(selection[0],selection[1]+1): + new_split['entries'][0]['defs'].append(entry['defs'][i]) + else: + new_split['entries'][0]['defs'].append(entry['defs'][selection]) + edit_entry.print_entry(new_split['entries'][0]) + while True: + print(f"Save split {count}? ('1' save, '0' to discard)") + user_input = input(": ") + if user_input == '0': + break + elif user_input == '1': + splits.append(deepcopy(new_split)) + break + else: + print("Invalid entry") + while True: + print("Creat another split? ('1' to continue, '0' to finish)") + user_input = input(": ") + if user_input == '0' or user_input == '1': + break + else: + print("Invalid entry") + if user_input == '1': + count += 1 + continue + elif user_input == '0': + break + if splits: + for split in splits: + parser_shell.save_word(split,current_dict) + return + + + + +# WORD COMBO +# # # # # # # # # # # # # # # +def word_combo(new_word,t,lang,search_word=None): + letters = [] + """ Retrieve use selection from dictionary """ + combo_word = parser_shell.choose_from_trie(t,lang) + + if combo_word == None: + return new_word + else: + if 'roots' in combo_word: + new_word['roots'] = combo_word['roots'] + for index in range(len(combo_word['entries'])): + new_word['entries'].append(combo_word['entries'][index]) + + if new_word['heading'] != combo_word['heading']: + print(f"Change heading from {new_word['heading']} to {combo_word['heading']}?") + user_input = input("'1' to confirm, any other key for no: ") + + if user_input == '1': + new_word['heading'] = combo_word['heading'] + new_word['handle'] = combo_word['handle'] + + print("NEW COMBINED WORD:\n") + print_entries(new_word['entries']) + return new_word + +# END WORD COMBO + + +# SELECT ENTRY +# # # # # # # # # # # # # # # +def select_entry(entries,message): + # whole function contained in loop + invalid = False + while True: + # Display entries in 'options mode' + print_entries(entries,mode='choice') + + # Print message supplied in function call + print(message) + print("'0' to go back") + if invalid: + print('\ninvalid selection\n\n') + invalid = False + user_input = input(': ') + + # Options to go back, None indicates no selection made + if user_input == '0': + return None + + # validate user input, should be a single letter + + # Avoid empty string error + if len(user_input) == 1: + + # convert to index 'A'=0,'B'=1... + user_input = ord(unidecode(user_input.upper())[0])-65 + # verify input in range + if user_input in range(len(entries)): + # return valid selection + return user_input + + # invalid, repeat loop + invalid = True +# END SELECT ENTRY + +def get_entry_string(entries, mode=''): + string = "" + if mode == 'choice': + string += figlet_format("ENTRY OPTIONS:",font='cybermedium',width=150) + string += "**************\n\n" + else: + string += figlet_format("CURRENT WORD:",font='cybermedium',width=150) + string += "*************************\n\n" + + # Loop to print entries + for index in range(len(entries)): + + # mode '1' print capital letters new to part of speech for each entry + if mode == 'choice': + c = chr(index + 65) + string += f"\n({c}). " + + # assign definitions to shorter name + if mode == 'choice': + string += edit_entry.get_entry(entries[index],'choice',trunc=True) + else: + string += edit_entry.get_entry(entries[index],'plain',trunc=True) + string += '\n' + # print new line when finished + string = string.strip("\n") + return string + + +# PRINT ENTRIES +# # # # # # # # # # # # # # # +def print_entries(entries,mode=''): + # mode 1 is options mode + clear_screen() + print(get_entry_string(entries,mode)) +# END PRINT ENTRIES