Skip to content

Commit

Permalink
Quechua added to language options
Browse files Browse the repository at this point in the history
  • Loading branch information
itincknell committed Mar 10, 2024
1 parent 91ab4f6 commit fec6d12
Show file tree
Hide file tree
Showing 12 changed files with 227 additions and 85 deletions.
43 changes: 43 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,44 @@
.DS_Store
Perseus_text_1999.04.0058.txt
grc.lsj.perseus-eng1.txt
grc.lsj.perseus-eng2.txt
grc.lsj.perseus-eng3.txt
grc.lsj.perseus-eng4.txt
grc.lsj.perseus-eng5.txt
grc.lsj.perseus-eng6.txt
grc.lsj.perseus-eng7.txt
grc.lsj.perseus-eng8.txt
grc.lsj.perseus-eng9.txt
grc.lsj.perseus-eng10.txt
grc.lsj.perseus-eng11.txt
grc.lsj.perseus-eng12.txt
grc.lsj.perseus-eng13.txt
grc.lsj.perseus-eng14.txt
grc.lsj.perseus-eng15.txt
grc.lsj.perseus-eng16.txt
grc.lsj.perseus-eng17.txt
grc.lsj.perseus-eng18.txt
grc.lsj.perseus-eng19.txt
grc.lsj.perseus-eng20.txt
grc.lsj.perseus-eng21.txt
grc.lsj.perseus-eng22.txt
grc.lsj.perseus-eng23.txt
grc.lsj.perseus-eng24.txt
grc.lsj.perseus-eng25.txt
grc.lsj.perseus-eng26.txt
grc.lsj.perseus-eng27.txt
OldEnglish-trie.txt
Latin-trie.txt
AncientGreek-trie.txt
Latin_new_tag_list.txt
GreekDictionary.txt
LatinDictionary.txt
OldEnglishDictionary.txt
Old English_new_tag_list.txt
Ancient Greek_new_tag_list.txt
formatted_flashcard_files/
kaikki_json_files/
sorted_language_files/
src/__pycache__/
supplementary_language_files/Quechua_new_tag_list.txt
user_created_dictionaries/
29 changes: 16 additions & 13 deletions src/convert_file_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

def debug_print(Test, *args):
"""Print messages if Test is True."""
if Test:
if False:
print(*args)

def similar_enough(item, tag):
Expand Down Expand Up @@ -133,10 +133,10 @@ def process_glosses(glosses, gloss_tags):
gloss_tags = process_glosses(sense['raw_glosses'], gloss_tags)

if Test:
print(f"dupe_list = {dupe_list}")
debug_print(f"dupe_list = {dupe_list}")

if Test:
print(f"senses: {senses}")
debug_print(f"senses: {senses}")
return senses

def get_file_selection(Test, test_file, test_language):
Expand Down Expand Up @@ -174,14 +174,14 @@ def get_file_selection(Test, test_file, test_language):
def print_debug_info(line, counter):
''' Function for viewing unprocessed json data
'''
print('\n')
print(f"\tline: {counter}, word: {line['word']}")
print("WORD ITEMS >>>>>>>>>>>>>>")
debug_print('\n')
debug_print(f"\tline: {counter}, word: {line['word']}")
debug_print("WORD ITEMS >>>>>>>>>>>>>>")
for item in line.items():
print(item)
print("SENSES ITEMS >>>>>>>>>>>>")
debug_print(item)
debug_print("SENSES ITEMS >>>>>>>>>>>>")
for item in line['senses'][0].items():
print(item)
debug_print(item)

def handle_pos(line):
''' Matches json pos abbreviations with those used in
Expand Down Expand Up @@ -362,13 +362,16 @@ def convert_files():
tag_list = []

# choose language option, create kaikki.org file string
language = pick_language()
if not Test:
language = pick_language()
else:
language = test_language
file = "kaikki.org-dictionary-" + language.replace(' ','') + ".json"

# this module loads a '-trie.txt' file which can take a few seconds, should be avoided if lanugage != Latin
if language == "Latin":
from get_simple import get_simple
simple = get_simple
simple = lambda x, y, z : get_simple(x, y, z, language)
else:
simple = None

Expand All @@ -382,7 +385,7 @@ def convert_files():
change_path(KAIKKI_JSON_FILES)
try:
with open(file, 'r') as input_file:
new_dictionary['definitions'] = parse_lines(input_file, tag_list,language,simple)
new_dictionary['definitions'] = parse_lines(input_file, tag_list, language, simple)

# save a list of all tags that were encountered
change_path(SUPPLEMENTARY_LANGUAGE_FILES)
Expand Down Expand Up @@ -441,7 +444,7 @@ def convert_files():


if Test:
sort_dump()
convert_files()



Expand Down
38 changes: 33 additions & 5 deletions src/create_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,40 @@ def create_word(current_dict,tags):
new_word['entries'][0], dummy = edit_entry.edit_entry(new_word['entries'][0],new_word)

# call word options, from here return
load_dict.change_path(SORTED_LANGUAGE_FILES)
if current_dict['language'] == 'Latin' or current_dict['language'] == "Ancient Greek":
wiki_dump = parser_shell.load_big_language(new_word['heading'][0],current_dict['language'])


# load trie for current language
load_dict.change_path(load_dict.SORTED_LANGUAGE_FILES)
file_name = current_dict['language'].replace(" ","") + '-trie.txt'
try:
with open(file_name,'rb') as openFile:
sorted_language_object = pickle.load(openFile)
except FileNotFoundError:
input(f'Error: "{file_name}" not found in sorted_languages_files;\ngo to data files in main menu to extract files.\n')
return None

t = sorted_language_object['definitions']

key = unidecode(new_word['heading']).lower()

if key not in t:
t[key] = new_word

# if key is already used
else:
wiki_dump = parser_shell.load_sorted_language(current_dict['language'])
parser_shell.save_word(new_word,wiki_dump,2)
# if a list is already started
if isinstance(t[key],list):
t[key].append(new_word)

# otherwise start a list with [previous item, new item]
else:
t[key] = [t[key],new_word]

sorted_language_object['definitions'] = t

with open(sorted_language_object['file'],mode = 'wb') as openFile:
pickle.dump(sorted_language_object, openFile)

current_dict = parser_shell.save_word(new_word,current_dict)
return current_dict
# END CREATE WORD
Expand Down
20 changes: 16 additions & 4 deletions src/edit_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from iteration_utilities import unique_everseen
from copy import deepcopy
from unidecode import unidecode
from load_dict import change_path, USER_CREATED_DICTIONARIES, FLASHCARD_TEMPLATE_FILES
from load_dict import change_path, USER_CREATED_DICTIONARIES, FLASHCARD_TEMPLATE_FILES, SORTED_LANGUAGE_FILES
import word_print_edit
import edit_entry
import edit_dictionary
Expand All @@ -30,7 +30,7 @@ def edit_all(current_dict):
'2':">'2' special option\n",
'a':">'a' change file name\n",
'b':">'b' special option II\n",
'c':">'c' convert to gloss/tags senses\n",
'c':">'c' replace_defs_with_sensess\n",
'd':">'d' match dictionaries\n",
'3':">'3' to edit a subset by tag\n",
'4':">'4' to remove punctuation\n",
Expand Down Expand Up @@ -72,7 +72,7 @@ def edit_all(current_dict):
elif user_input == '2':
current = special(current_dict)
elif user_input == 'c':
current_dict = replace_senses(current_dict)
current_dict = replace_defs_with_senses(current_dict)
elif user_input == 'd':
match_dictionaries(current_dict)
elif user_input == 'p':
Expand Down Expand Up @@ -233,6 +233,18 @@ def replace_senses(current_dict):
openFile.close()
return current_dict

def replace_defs_with_senses(current_dict):
for i in range(len(current_dict['definitions'])):
for j in range(len(current_dict['definitions'][i]['entries'])):
if 'defs' in current_dict['definitions'][i]['entries'][j]:
current_dict['definitions'][i]['entries'][j]['senses'] = current_dict['definitions'][i]['entries'][j]['defs']
del current_dict['definitions'][i]['entries'][j]['defs']

openFile = open(current_dict['file'],mode = 'wb')
pickle.dump(current_dict, openFile)
openFile.close()
return current_dict

def load_latin(index_letter):
if index_letter.lower() not in 'abcdefghijklmnopqrstuvwxyz':
index_letter = 'misc'
Expand All @@ -243,7 +255,7 @@ def load_latin(index_letter):


def match_dictionaries(current_dict):
change_path("dumps_sorted")
change_path(SORTED_LANGUAGE_FILES)
trie_file = current_dict['language'].replace(" ","") + '-trie.txt'

print(f"Loading {trie_file}")
Expand Down
63 changes: 45 additions & 18 deletions src/edit_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ def filter_gloss(current_dict,tags,output_file=None,tag_mode='1'):
# assign user selected file to output
sys.stdout = open(output_file, 'w')

get_selection.clear_screen()
if tags:
print(f"\n\t{str(tags)}\n")

Expand All @@ -641,9 +642,12 @@ def filter_gloss(current_dict,tags,output_file=None,tag_mode='1'):
"article", "preposition", "conjunction","pronoun","letter", "character",
"phrase", "proverb", "idiom","symbol", "syllable", "numeral", "initialism",
"interjection","definitions"]

for part in parts_list:
count += print_gloss(current_dict,tags,part,tag_mode)

input("\n\nEnter to continue")

if output_file:
# re-assign orinigal output
sys.stdout = original_stdout
Expand All @@ -658,6 +662,8 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
strings = {}
first_run = True

window_size = os.get_terminal_size().columns

counter = 0

# Loop to create sub-list to select from
Expand Down Expand Up @@ -690,16 +696,26 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
counter += 1

# print with desired alignment
if current_dict['language'] == 'Latin':
if True: #current_dict['language'] == 'Latin':
simpleParts = word['entries'][x]['simpleParts']

difference = len(simpleParts) - get_selection.visible_len(simpleParts)

if partOfSpeech == 'verb':
if len(simpleParts) > 50:
simpleParts = simpleParts[:49] + "-"
entry_string = f"{simpleParts:.<50} | "

if get_selection.visible_len(simpleParts) > 50:
simpleParts = simpleParts[:49 + difference] + "-"

entry_string = f"{simpleParts:.<{50 + difference}} | "

else:
if len(simpleParts) > 30:
simpleParts = simpleParts[:29] + "-"
entry_string = f"{simpleParts:.<30} | "
if get_selection.visible_len(simpleParts) > 30:
while(get_selection.visible_len(simpleParts) != 30):
simpleParts = simpleParts[:-2] + "-"
difference = 0

entry_string = f"{simpleParts:.<{30 + difference}} | "

# check if definition exceeds desired length
text = [d['gloss'] for d in word['entries'][x]['senses']]
dtags = [d['tags'] for d in word['entries'][x]['senses']]
Expand All @@ -709,22 +725,28 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
else:
for i in range(len(text)):
entry_string += f"{i+1}) " + text[i].strip('*^†∆') + "; "

entry_string = entry_string.strip("; ")

if len(entry_string) > 130:


if get_selection.visible_len(entry_string) > window_size - 40: # 130:
print(entry_string[:entry_string[:130].rfind(' ')])

if partOfSpeech == 'verb':
second_line = entry_string[entry_string[:130].rfind(' '):]
if len(second_line) > 80:
second_line = second_line[:77] + "..."
second_line = entry_string[entry_string[:window_size - 40].rfind(' '):]
if get_selection.visible_len(second_line) > window_size - 90:
second_line = second_line[:window_size - 93] + "..."
print(f"{'.':.<50} | {second_line}")

else:
second_line = entry_string[entry_string[:150].rfind(' '):]
if len(second_line) > 100:
second_line = second_line[:97] + "..."
second_line = entry_string[entry_string[:window_size - 20].rfind(' '):]
if get_selection.visible_len(second_line) > window_size - 70:
second_line = second_line[:window_size - 73] + "..."
print(f"{'.':.<30} | {second_line}")
else:
print(f"{entry_string}")

elif current_dict['language'] == "Ancient Greek":

entry_string = word['entries'][x]['simpleParts'][:word['entries'][x]['simpleParts'].find(')')+1].strip()
Expand All @@ -735,19 +757,22 @@ def print_gloss(current_dict,tags,partOfSpeech=None,tag_mode='1'):
length_string = length_string.replace('φ','f')
length_string = length_string.replace('ψ','c')
entry_string += ' ' * (30 - len(unidecode(length_string))) + " | "

# check if definition exceeds desired length
text = [line['gloss'] for line in word['entries'][x]['senses']]
text = short_senses(text)

if len(text) == 1:
entry_string += text[0]
else:
for i in range(len(text)):
entry_string += f"{i+1}) " + text[i].strip('*^†∆') + "; "

entry_string = entry_string.strip("; ")

if len(entry_string) > 130:
print(entry_string[:entry_string[:130].rfind(' ')])
print(f"{' ':<30} | {entry_string[entry_string[:130].rfind(' '):entry_string[:225].rfind(',')]}")
if len(entry_string) > window_size - 30:
print(entry_string[:entry_string[:window_size - 30].rfind(' ')])
print(f"{' ':<30} | {entry_string[entry_string[:window_size - 30].rfind(' '):entry_string[:window_size + 68].rfind(',')]} ...")
else:
print(f"{entry_string}")

Expand Down Expand Up @@ -813,7 +838,7 @@ def short_line(line,limit):
# reconstruct string
new_text = ", ".join(word.strip() for word in line)

return line
return new_text

# SHORT senses
# # # # # # # # # #
Expand All @@ -829,6 +854,8 @@ def short_senses(text):
text.remove('')

for i in range(len(text)):
if isinstance(text[i],list):
print(text)
text[i] = text[i].strip(',;')
return text
# END SHORT senses
Expand Down
Loading

0 comments on commit fec6d12

Please sign in to comment.