Skip to content

Commit

Permalink
Avanzamenti
Browse files Browse the repository at this point in the history
  • Loading branch information
gdacciaro committed Jul 20, 2022
1 parent 8a07001 commit 71823b0
Show file tree
Hide file tree
Showing 12 changed files with 218 additions and 39 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/models/
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 28 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,34 @@
-->
</div>

### Abstract
TODO: add abstract

Master Degree (Artificial Intelligence curriculum)<br>
**HLT** course, Academic Year: 2021/2022<br>
Date: May 2022<br>

## Description
- x
### Abstract
- c
## 🔧 Setup
The files containing the weights of the various models used were not included in this repository because they are very large files.
Therefore, in order for the program to work, it is necessary to download the weights files using the following command.

./download_weights.sh


## 🖥 GUI
<div align="center">
<p>
<img style="" src="./screenshot.png" > <br>
</p>
</div>

### Main Files

|File |Description |
|---|---|
| [main_monk.py](./main_monk.py) | Our best model for MONKs' problems
| [main_cup.py](./main_cup.py) | Our best model for CUP's problem |
| [model_selection_monk.py](./model_selection_cup.py) | The starting point of model selection for MONKs' problems |
| [model_selection_cup.py](./model_selection_cup.py) | The starting point of model selection or CUP's problem |
| [model_selection_cup_distributed.py](./validation/distribuited_computing/model_selection_cup_distributed.py) | The starting point of __distribuited__ model selection or CUP's problem (Note: it requires a database and its initialization) |
| [AIAIAI_ML-CUP21-TS.csv](./AIAIAI_ML-CUP21-TS.csv) | Our Results for the Blind TS |
| [report.pdf](./report.pdf) | Our report |
15 changes: 6 additions & 9 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from flask import Flask, request, jsonify
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS, cross_origin
import time
from controller.download_weights_if_necessary import download_weights_if_necessary
download_weights_if_necessary()

print("[SERVER] Server loading...")
import_start_time = time.time()
Expand All @@ -9,7 +11,7 @@
from controller.FS_LSTMController import fs_lstm_translate
from controller.DeepL_Controller import deepl_translate
from controller.Helsinki_Controller import helsinki_translate
from controller.IBMModel_Controller import ibm_translate
#from controller.IBMModel_Controller import ibm_translate
from controller.T5_Controller import t5_translate

app = Flask(__name__, static_url_path='/static')
Expand All @@ -18,19 +20,14 @@

print("[SERVER] Server loaded in ", time.time()-import_start_time, " seconds")

@app.route('/')
def hello_world(): # put application's code here
return "render_template('index.html')"


@cross_origin()
@app.route('/translate')
def query_example():
model = request.args.get('model')
sentence = request.args.get('sentence')

if model == 'IBM Model 1 (50k)' or model == 'IBM Model 1':
return jsonify({"response": ibm_translate(sentence)})
# if model == 'IBM Model 1 (50k)' or model == 'IBM Model 1':
# return jsonify({"response": ibm_translate(sentence)})
if model == 'LSTM (Custom)':
return jsonify({"response": fs_lstm_translate(sentence)})
if model == 'Transformer (Custom)':
Expand Down
1 change: 0 additions & 1 deletion controller/DeepL_Controller.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import time
import requests

def deepl_translate(sentence):
Expand Down
10 changes: 1 addition & 9 deletions controller/Helsinki_Controller.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
import warnings

warnings.filterwarnings("ignore")
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import warnings

from transformers import AutoModelForSeq2SeqLM
import torch # !! Required !!
warnings.filterwarnings("ignore")
from nltk.translate.bleu_score import sentence_bleu
import nltk
assert (nltk.__version__== '3.2.4')

from transformers import AutoTokenizer

it_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-it")
it_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-it")

Expand Down
9 changes: 9 additions & 0 deletions controller/download_weights_if_necessary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os

def download_weights_if_necessary():
path = '../models'
weight_present = os.path.isdir(path)

if not weight_present:
import subprocess
subprocess.call(['sh', './download_weights.sh'])
5 changes: 5 additions & 0 deletions download_weights.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1fBe7syWG0kBrd1zsX6u7nJwtCcLj67QZ' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1fBe7syWG0kBrd1zsX6u7nJwtCcLj67QZ" -O weights.zip && rm -rf /tmp/cookies.txt

unzip weights.zip

rm weights.zip
2 changes: 1 addition & 1 deletion evaluation/DeepL_Eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from nltk.translate.bleu_score import sentence_bleu
import nltk
#assert (nltk.__version__== '3.2.4')
assert (nltk.__version__== '3.2.4')

def clean(word):
clean_word = word.lower()
Expand Down
14 changes: 0 additions & 14 deletions evaluation/FS_LSTM.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
# -*- coding: utf-8 -*-
"""deprecated - lstm-nmt-with-attention-trained-with-20-epochs.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1SFgGO6lzhA65sIF_3N5oLEzXOkMDgwqT
"""

# Commented out IPython magic to ensure Python compatibility.
from __future__ import absolute_import, division, print_function, unicode_literals

Expand Down Expand Up @@ -45,7 +37,6 @@
import_start_time = time.time()
print("[FS_LSTM] Loading models...")
def preprocess_sentence(sentence):
# sentence = unicode_to_ascii(sentence.lower().strip())
num_digits = str.maketrans('', '', digits)

sentence = sentence.lower()
Expand Down Expand Up @@ -82,11 +73,6 @@ def max_length(tensor):
target_tensor = target_sentence_tokenizer.texts_to_sequences(target)
target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor, padding='post')

"""### Limit the size of the dataset to experiment faster (optional)
To train faster, we can limit the size of the dataset using **sample_size** sentences (of course, translation quality degrades with less data):
"""

max_target_length = max(len(t) for t in target_tensor)
print(max_target_length)
max_source_length = max(len(t) for t in source_tensor)
Expand Down
161 changes: 161 additions & 0 deletions gui/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
<meta http-equiv="x-ua-compatible" content="ie=edge" />
<title>NMT</title>
<link rel="stylesheet" href="css/mdb.min.css" />

<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Lato&display=swap" rel="stylesheet">

<style type="text/css">
#eng_ita{
height: auto;
}
#eng_ita::placeholder {
font-weight: bold;
opacity: .5;
color: #BDBDBD;
text-align: left;
}
body {
background-image: url("img/bgg.png");
background-repeat: no-repeat, repeat;
font-family: 'Lato', sans-serif;
background-position: center; /* Center the image */
background-size: cover; /* Resize the background image to cover the entire container */
background-position: 0 20px !important;
background-color: #fbfbfb;
}
</style>

</head>
<body>

<nav class="navbar navbar-light bg-white w-100">
<span class="navbar-brand mb-0 h1" style="margin-left: 16px;">Comparison of Machine-Translations Models</span>
<span class="navbar-brand mb-0 h1">[HLT] Acciaro, Esposito, Galloppi
<img src="img/logounipi.jpg" style="height: 48px; margin-left: 16px;">
</span>
</nav>

<div class="container">

<br>
<button type="button" id="model_0" class="btn btn-primary btn-md">IBM Model 1 (50k)</button>
<button type="button" id="model_1" class="btn btn-white btn-md">LSTM (Custom)</button>
<button type="button" id="model_2" class="btn btn-white btn-md">Transformer (Custom)</button>
<button type="button" id="model_3" class="btn btn-white btn-md">T5</button>
<button type="button" id="model_4" class="btn btn-white btn-md">Bert2Bert</button>
<button type="button" id="model_5" class="btn btn-white btn-md">T52Bert</button>
<button type="button" id="model_6" class="btn btn-danger btn-md">Helsinki</button>
<button type="button" id="model_7" class="btn btn-danger btn-md">DeepL</button>
<p id="selected_model" ></p>
<br> <br>

<button type="button" id="example_0"
class="btn btn-tag btn-rounded btn-white"
data-mdb-toggle="tooltip"
data-mdb-html="true"
title="Does she have any friends in Germany?">Example 0</button>

<button type="button" id="example_1"
class="btn btn-tag btn-rounded btn-white"
data-mdb-toggle="tooltip"
data-mdb-html="true"
title="Yesterday I got a new phone">Example 1</button>

<button type="button" id="example_2"
class="btn btn-tag btn-rounded btn-white"
data-mdb-toggle="tooltip"
data-mdb-html="true"
title="Who wants to live forever?">Example 2</button>

<br> <br>

<div class="card">
<div class="card-body">
<input type="email"
class="form-control rounded shadow-none"
placeholder="Write here your sentence" id="eng_ita">
<br>
<button id="translate_visible" type="button" class="btn btn-primary">Translate</button>
<button id="translate_hidden" class="btn btn-primary" type="button" disabled>
<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span>
Loading...
</button>
<br>
<br><p id="result" class="card-text"></p>


</div>
</div>
</div>


<script>
$(document).ready(function(){
const models = [$("#model_0"),$("#model_1"), $("#model_2"), $("#model_3"), $("#model_4"), $("#model_5"), $("#model_6"), $("#model_7")];

$("#selected_model").hide();
$("#selected_model").text(models[0].text());

const examples = [$("#example_0"), $("#example_1"), $("#example_2")];
const examples_sentence = ["Does she have any friends in Germany?",
"Yesterday I got a new phone",
"Who wants to live forever?"
];

for (let i = 0; i < models.length; i++) {
models[i].click(function() {
for (let j = 0; j < models.length; j++) {
if(j==6 || j==7){
models[j].removeClass("btn-primary").addClass("btn-danger");
}else{
models[j].removeClass("btn-primary").addClass("btn-white");
}
}
models[i].removeClass("btn-white").removeClass("btn-danger").addClass("btn-primary");
$("#selected_model").text(models[i].text());
});
}

for (let i = 0; i < examples.length; i++) {
examples[i].click(function() {
let text = examples_sentence[i];
$("#eng_ita").val(text);
});
}

$("#translate_hidden").hide();

function pre_execution(){
$("#translate_visible").hide();
$("#translate_hidden").show();
$("#result").val("");
}
function post_execution(result){
$("#translate_visible").show();
$("#translate_hidden").hide();
$("#result").text(result);
}

$("#translate_visible").click(function () {
pre_execution();
$.get( "http://localhost:5000/translate?model="+$("#selected_model").text()+"&sentence="+$("#eng_ita").val(), function( data ) {
post_execution(data.response);
});
});

});
</script>

<!-- MDB -->
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.3.0/mdb.min.js"></script>

</body>
</html>
Binary file added screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 71823b0

Please sign in to comment.