-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
296 lines (236 loc) · 10.5 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import os
import typing as t
import html
import gradio as gr
from dotenv import load_dotenv
from googletrans import Translator
from openai import OpenAI
# Parse a .env file and then load all the variables found as environment variables
load_dotenv()
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# -----
# OpenAI API Key, get yours here: https://platform.openai.com/api-keys
# -----
openai_api_key: t.Optional[str] = os.getenv("API_KEY")
# -----
# Support Functions
# -----
def load_data() -> t.Dict[str, str]:
"""
Load data for the program.
This function should be replaced by the user to suit their needs.
According to the default config, this function should not take in any parameters.
This function must return a dictionary data type for the program to work.
@return: A dictionary containing data for the program.
Keys are Sinhala phrases and values are their corresponding English translations.
@rtype: dict
"""
data: t.Dict[str, str] = {
"දීර්ග බෙදීම": "long division",
"මගින්": "using it",
"බහුපද": "polynomials",
"ප්රකාශනයන්": "expressions",
"ශේෂය": "remainder",
"ලබ්ධිය": "quotient",
}
return data
def load_prompt_custom_algorithm() -> str:
"""
Generate a prompt for the AI model.
This function generates a prompt to be used by the AI model for correcting math questions.
The prompt includes instructions for correcting the question, as well as placeholders for the original and corrected questions.
The functionality of this function may be replaced by the user if needed to customize the prompt.
@return: A string containing the prompt for the AI model.
@rtype: str
"""
txt: str = "**Math Question Correction:**\n\n "
txt += "Please correct the following math question. "
txt += "The question is originally in Sinhala but has been translated into English. "
txt += "Ensure the grammar, syntax, and clarity of the question. "
txt += "Also, make sure the question has proper meaning. "
txt += "If there are any mathematical errors, correct them as well. "
txt += "Your response should be a properly formatted math question. "
txt += "(This is for Sri Lankan GCE Andvanced Level High School Examination). "
txt += "Dont add anything additional. "
txt += "This prompt might not include the questions, and it may be a part of a question, so, just keep that in mind. "
txt += "These are questions. Make sure the question makes sense. "
txt += "You may swap its order or order of words if needed."
txt += "\n\n**Original Question (Translated from Sinhala):**\n\n"
txt += "\"{prompt}\""
txt += "\n\n**Corrected Question:**\n\n"
return txt
def load_prompt_full_ai() -> str:
"""
Generate a prompt for the AI model.
This function generates a prompt to be used by the AI model for translating math questions.
The prompt includes instructions for translating the question, as well as placeholders for the original and translated questions.
The functionality of this function may be replaced by the user if needed to customize the prompt.
@return: A string containing the prompt for the AI model.
@rtype: str
"""
global data
txt: str = "**Math Question Translation from Sinhala to English:**\n\n "
txt += "Please translate the following math question. Dont refer to the chat history. Treat this as a completely new question. "
txt += "These are the special translations to remember when translating:\n"
for k,v in data.items():
txt += f"{k} means: {v}"
txt += "\n\n"
txt += "Ensure the grammar, syntax, and clarity of the question. "
txt += "Also, make sure the question has proper meaning. "
txt += "If there are any mathematical errors, correct them as well. "
txt += "Your response should be a properly formatted math question. "
txt += "(This is for Sri Lankan GCE Andvanced Level High School Examination). "
txt += "Dont add anything additional. "
txt += "This prompt might not include the questions, and it may be a part of a question, so, just keep that in mind. "
txt += "These are questions. Make sure the question makes sense. "
txt += "You may swap its order or order of words if needed."
txt += "\n\n**Original Question (Translated from Sinhala):**\n\n"
txt += "\"{prompt}\""
txt += "\n\n**Corrected Question:**\n\n"
return txt
# -----
# Program Code
# -----
client: OpenAI = OpenAI(api_key=openai_api_key)
data: t.Dict[str, str] = load_data()
def reload_data():
"""
Reload data from the source.
"""
global data
data = load_data()
def replace_words(text: str) -> t.Optional[str]:
"""
Stage 1 of Translation: Replace Sinhala words in the given text with their corresponding English translations.
Used only in "Custom Algorithm" approach.
@param text: The input text containing Sinhala words to be replaced.
@type text: str
@return: The input text with Sinhala words replaced by their English translations, or None if an error occurs.
@rtype: str | None
"""
try:
for k, v in data.items():
text = text.replace(k, v)
return text
except Exception as e:
logging.error(f"Word replacement failed: {e}")
return None
def translate_words(text: str) -> t.Optional[str]:
"""
Stage 2 of Translation: Translate Sinhala text to English using Google Translate.
Used only in "Custom Algorithm" approach.
@param text: The input Sinhala text to be translated.
@type text: str
@return: The translated English text, or None if an error occurs.
@rtype: str | None
"""
try:
translator = Translator()
translation = translator.translate(text, src='si', dest='en')
return translation.text
except Exception as e:
logging.error(f"Translation failed: {e}")
return None
def ai(prompt: str, mode: t.Literal["full_ai", "custom_algorithm"] = "full_ai", model: str = "gpt-3.5-turbo-0125") -> t.Optional[str]:
"""
Perform AI-based correction of a math question.
@param prompt: The input math question in Sinhala Unicode.
@type prompt: str
@param mode: The mode of operation. Either "full_ai" or "custom_algorithm". Defaults to "full_ai".
@type mode: str
@param model: The model to use for AI completion. Either "gpt-3.5-turbo-0125" or "gpt-4-0125-preview". Defaults to "gpt-3.5-turbo-0125".
@type model: str
@return: The corrected math question in English, or None if an error occurs.
@rtype: str | None
"""
try:
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": load_prompt_full_ai().format(prompt=prompt) if mode == "full_ai" else load_prompt_custom_algorithm().format(prompt=prompt)
}
],
model=model,
)
return chat_completion.choices[0].message.content
except Exception as e:
logging.error(f"AI completion failed: {e}")
return None
def custom_approach(prompt: str) -> str:
"""
Translate a math question from Sinhala to English using a custom algorithm.
@param prompt: The original math question in Sinhala.
@type prompt: str
@return: The translated math question in English.
@rtype: str
"""
try:
reload_data()
sanitized_prompt: str = html.escape(prompt)
replaced: t.Optional[str] = replace_words(text=sanitized_prompt)
if replaced is None:
return "Error occurred during word replacement."
translated: t.Optional[str] = translate_words(replaced)
if translated is None:
return "Error occurred during translation."
fixed: t.Optional[str] = ai(translated, mode="custom_algorithm")
if fixed is None:
return "Error occurred during AI completion."
return fixed
except Exception as e:
logging.error(f"Custom Approach failed: {e}")
return "An error occurred."
def full_ai_approach(prompt) -> str:
"""
Translate a math question from Sinhala to English using full AI translation.
@param prompt: The original math question in Sinhala.
@type prompt: str
@return: The translated math question in English.
@rtype: str
"""
try:
reload_data()
sanitized_prompt: str = html.escape(prompt)
fixed: t.Optional[str] = ai(sanitized_prompt, mode="full_ai")
if fixed is None:
return "Error occurred during AI completion."
return fixed
except Exception as e:
logging.error(f"Custom Approach failed: {e}")
return "An error occurred."
def main(prompt: str, algorithm: str, model: str) -> t.Optional[str]:
"""
Translate a math question from Sinhala to English using the selected algorithm and model.
@param prompt: The original math question in Sinhala.
@type prompt: str
@param algorithm: The selected algorithm ('Custom Algorithm' or 'Full AI Translation').
@type algorithm: str
@param model: The selected model ('gpt-3.5-turbo-0125' or 'gpt-4-0125-preview').
@type model: str
@return: The corrected math question in English, or None if an error occurs.
@rtype: str | None
"""
if algorithm == "Custom Algorithm":
return custom_approach(prompt)
elif algorithm == "Full AI Translation":
return full_ai_approach(prompt)
else:
return None
iface: gr.Interface = gr.Interface(
fn=main,
inputs=["text", gr.Radio(["Custom Algorithm", "Full AI Translation"], label="Select Algorithm"), gr.Radio(["gpt-3.5-turbo-0125", "gpt-4-0125-preview"], label="Select Model")],
outputs="text",
title="Sinhala Math Question to English Translator",
description="Translate Sinhala math questions from Sinhala to English for Sri Lankan GCE Advanced Level Examination.",
examples=[
["දීර්ග බෙදීම මගින් පහත බහුපද ප්රකාශනයන් බෙදූ විට ලැබෙන ශේෂය හා ලබ්ධිය සොයන්න"]
],
# allow_flagging=True,
flagging_dir="flagged",
api_name="translate",
)
iface.launch()