-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmachine_translation.py
executable file
·137 lines (115 loc) · 4.56 KB
/
machine_translation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#! /usr/bin/env python
#
# Uses the Hugging Face API for machine translation (MT)
#
# Based on:
# - https://stackoverflow.com/questions/71568142/how-can-i-extract-and-store-the-text-generated-from-an-automatic-speech-recognit
# - Hugging Face's NLP with Transformers text
#
"""Machine translation via Hugging Face
Example:
echo "How now Bourne cow?" | FROM=en TO=es {script} -
USE_INTERFACE=1 {script} -
"""
# Standard modules
# TODO: import re
# Intalled module
## OLD: import gradio as gr
## TODO: import transformers
## OLD: from transformers import pipeline
# Local modules
from mezcla import debug
from mezcla.main import Main
from mezcla import misc_utils
from mezcla import system
from mezcla import glue_helpers as gh
# Constants
TL = debug.TL
## TODO:
## # Environment options
## # Notes:
## # - These are just intended for internal options, not for end users.
## # - They also allow for enabling options in one place rather than four
## # when using main.Main (e.g., [Main member] initialization, run-time
## # value, and argument spec., along with string constant definition).
## #
## ENABLE_FUBAR = system.getenv_bool("ENABLE_FUBAR", False,
## description="Enable fouled up beyond all recognition processing")
FROM = system.getenv_text("FROM", "es")
TO = system.getenv_text("TO", "en")
SOURCE_LANG = system.getenv_text("SOURCE_LANG", FROM,
"Source language")
TARGET_LANG = system.getenv_text("TARGET_LANG", TO,
"Target language")
debug.assertion(SOURCE_LANG != TARGET_LANG)
MT_TASK = f"translation_{SOURCE_LANG}_to_{TARGET_LANG}"
DEFAULT_MODEL = f"Helsinki-NLP/opus-mt-{SOURCE_LANG}-{TARGET_LANG}"
MT_MODEL = system.getenv_text("MT_MODEL", DEFAULT_MODEL,
"Hugging Face model for MT")
SHOW_ELAPSED = system.getenv_bool("SHOW_ELAPSED", False,
"Show elapsed time")
TEXT_ARG = "text"
## TODO: ELAPSED_ARG = "elapsed-time"
#-------------------------------------------------------------------------------
TEXT_FILE = system.getenv_text("TEXT_FILE", "-",
"Text file to translate")
USE_INTERFACE = system.getenv_bool("USE_INTERFACE", False,
"Use web-based interface via gradio")
# Optionally load UI support
gr = None
if USE_INTERFACE:
import gradio as gr # pylint: disable=import-error
def main():
"""Entry point"""
debug.trace(TL.USUAL, f"main(): script={system.real_path(__file__)}")
# Show simple usage if --help given
## OLD: dummy_app = Main(description=__doc__, skip_input=False, manual_input=False)
dummy_app = Main(description=__doc__.format(script=__file__),
skip_input=False, manual_input=True,
## TODO: bool_options=[(ELAPSED_ARG, "Show elapsed time")],
text_options=[(TEXT_ARG, "Text to translate")])
debug.trace_object(5, dummy_app)
debug.assertion(dummy_app.parsed_args)
text = dummy_app.get_parsed_option(TEXT_ARG)
# Get input file
text_file = TEXT_FILE
if ((text is not None) or USE_INTERFACE):
pass
elif (text_file == "-"):
text_file = dummy_app.temp_file
text = dummy_app.read_entire_input()
else:
text = system.read_file(text_file)
## TEMP:
## pylint: disable=import-outside-toplevel
from transformers import pipeline
model = pipeline(task=MT_TASK, model=MT_MODEL)
if USE_INTERFACE:
# TODO2: add language controls
pipeline_if = gr.Interface.from_pipeline(
model,
title="Machine translation (MT)",
## TODO2: subtitle=f"From: {FROM}; To: {TO}",
## OLD:
## description="Using pipeline with default",
description=f"From: {FROM}; To: {TO}",
## examples=[text_file])
)
pipeline_if.launch()
else:
TRANSLATION_TEXT = "translation_text"
try:
translation = model(text)
debug.assertion(isinstance(translation, list)
and (TRANSLATION_TEXT in translation[0]))
print(translation[0].get(TRANSLATION_TEXT) or "")
except:
system.print_exception_info("translation")
debug.code(4, lambda: debug.trace(1, gh.run("nvidia-smi")))
return
#-------------------------------------------------------------------------------
if __name__ == '__main__':
## OLD: main()
elapsed = misc_utils.time_function(main)
if SHOW_ELAPSED:
print(f"Elapsed time: {system.round_as_str(elapsed)}ms")