Avanzamenti

robbespo00 · Jul 20, 2022 · 71823b0 · 71823b0
1 parent 8a07001
commit 71823b0
Show file tree

Hide file tree

Showing 12 changed files with 218 additions and 39 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/models/
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -23,11 +23,34 @@
         -->
 </div>
 
+### Abstract
+TODO: add abstract
+
 Master Degree (Artificial Intelligence curriculum)<br>
 **HLT** course, Academic Year: 2021/2022<br>
-Date: May 2022<br>
 
-## Description
-- x 
-### Abstract
-- c
+## 🔧 Setup
+The files containing the weights of the various models used were not included in this repository because they are very large files.
+Therefore, in order for the program to work, it is necessary to download the weights files using the following command.
+
+    ./download_weights.sh
+
+
+## 🖥 GUI
+<div align="center">
+ <p>
+    <img style="" src="./screenshot.png" >  <br>
+  </p>
+</div>
+
+### Main Files
+
+|File   |Description   |
+|---|---|
+| [main_monk.py](./main_monk.py)  | Our best model for MONKs' problems
+| [main_cup.py](./main_cup.py) | Our best model for CUP's problem |
+| [model_selection_monk.py](./model_selection_cup.py) | The starting point of model selection for MONKs' problems  |
+| [model_selection_cup.py](./model_selection_cup.py) |  The starting point of model selection or CUP's problem |
+| [model_selection_cup_distributed.py](./validation/distribuited_computing/model_selection_cup_distributed.py) | The starting point of __distribuited__ model selection or CUP's problem  (Note: it requires a database and its initialization) |
+| [AIAIAI_ML-CUP21-TS.csv](./AIAIAI_ML-CUP21-TS.csv) | Our Results for the Blind TS |
+| [report.pdf](./report.pdf) | Our report |
diff --git a/app.py b/app.py
@@ -1,6 +1,8 @@
-from flask import Flask, request, jsonify
+from flask import Flask, request, jsonify, render_template
 from flask_cors import CORS, cross_origin
 import time
+from controller.download_weights_if_necessary import download_weights_if_necessary
+download_weights_if_necessary()
 
 print("[SERVER] Server loading...")
 import_start_time = time.time()
@@ -9,7 +11,7 @@
 from controller.FS_LSTMController import fs_lstm_translate
 from controller.DeepL_Controller import deepl_translate
 from controller.Helsinki_Controller import helsinki_translate
-from controller.IBMModel_Controller import ibm_translate
+#from controller.IBMModel_Controller import ibm_translate
 from controller.T5_Controller import t5_translate
 
 app = Flask(__name__, static_url_path='/static')
@@ -18,19 +20,14 @@
 
 print("[SERVER] Server loaded in ", time.time()-import_start_time, " seconds")
 
-@app.route('/')
-def hello_world():  # put application's code here
-    return "render_template('index.html')"
-
-
 @cross_origin()
 @app.route('/translate')
 def query_example():
     model = request.args.get('model')
     sentence = request.args.get('sentence')
 
-    if model == 'IBM Model 1 (50k)' or model == 'IBM Model 1':
-        return jsonify({"response": ibm_translate(sentence)})
+#    if model == 'IBM Model 1 (50k)' or model == 'IBM Model 1':
+    #    return jsonify({"response": ibm_translate(sentence)})
     if model == 'LSTM (Custom)':
         return jsonify({"response": fs_lstm_translate(sentence)})
     if model == 'Transformer (Custom)':

diff --git a/controller/DeepL_Controller.py b/controller/DeepL_Controller.py
@@ -1,4 +1,3 @@
-import time
 import requests
 
 def deepl_translate(sentence):

diff --git a/controller/Helsinki_Controller.py b/controller/Helsinki_Controller.py
@@ -1,18 +1,10 @@
 import warnings
-
 warnings.filterwarnings("ignore")
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
-import warnings
-
-from transformers import AutoModelForSeq2SeqLM
-import torch # !! Required !!
-warnings.filterwarnings("ignore")
-from nltk.translate.bleu_score import sentence_bleu
 import nltk
 assert (nltk.__version__== '3.2.4')
 
-from transformers import AutoTokenizer
-
 it_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-it")
 it_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-it")
 

diff --git a/controller/download_weights_if_necessary.py b/controller/download_weights_if_necessary.py
@@ -0,0 +1,9 @@
+import os
+
+def download_weights_if_necessary():
+    path = '../models'
+    weight_present = os.path.isdir(path)
+
+    if not weight_present:
+        import subprocess
+        subprocess.call(['sh', './download_weights.sh'])
diff --git a/download_weights.sh b/download_weights.sh
@@ -0,0 +1,5 @@
+wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1fBe7syWG0kBrd1zsX6u7nJwtCcLj67QZ' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1fBe7syWG0kBrd1zsX6u7nJwtCcLj67QZ" -O weights.zip && rm -rf /tmp/cookies.txt
+
+unzip weights.zip
+
+rm weights.zip
diff --git a/evaluation/DeepL_Eval.py b/evaluation/DeepL_Eval.py
@@ -6,7 +6,7 @@
 
 from nltk.translate.bleu_score import sentence_bleu
 import nltk
-#assert (nltk.__version__== '3.2.4')
+assert (nltk.__version__== '3.2.4')
 
 def clean(word):
     clean_word = word.lower()

diff --git a/evaluation/FS_LSTM.py b/evaluation/FS_LSTM.py
@@ -1,12 +1,4 @@
 # -*- coding: utf-8 -*-
-"""deprecated - lstm-nmt-with-attention-trained-with-20-epochs.ipynb
-
-Automatically generated by Colaboratory.
-
-Original file is located at
-    https://colab.research.google.com/drive/1SFgGO6lzhA65sIF_3N5oLEzXOkMDgwqT
-"""
-
 # Commented out IPython magic to ensure Python compatibility.
 from __future__ import absolute_import, division, print_function, unicode_literals
 
@@ -45,7 +37,6 @@
 import_start_time = time.time()
 print("[FS_LSTM] Loading models...")
 def preprocess_sentence(sentence):
-    # sentence = unicode_to_ascii(sentence.lower().strip())
     num_digits = str.maketrans('', '', digits)
 
     sentence = sentence.lower()
@@ -82,11 +73,6 @@ def max_length(tensor):
 target_tensor = target_sentence_tokenizer.texts_to_sequences(target)
 target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor, padding='post')
 
-"""### Limit the size of the dataset to experiment faster (optional)
-
- To train faster, we can limit the size of the dataset using **sample_size** sentences (of course, translation quality degrades with less data):
-"""
-
 max_target_length = max(len(t) for t in target_tensor)
 print(max_target_length)
 max_source_length = max(len(t) for t in source_tensor)

diff --git a/gui/index.html b/gui/index.html
@@ -0,0 +1,161 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>NMT</title>
+    <link rel="stylesheet" href="css/mdb.min.css" />
+
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Lato&display=swap" rel="stylesheet">
+
+    <style type="text/css">
+      #eng_ita{
+        height: auto;
+      }
+      #eng_ita::placeholder {
+        font-weight: bold;
+        opacity: .5;
+        color: #BDBDBD;
+        text-align: left;
+      }
+      body {
+        background-image: url("img/bgg.png");
+        background-repeat: no-repeat, repeat;
+        font-family: 'Lato', sans-serif;
+        background-position: center; /* Center the image */
+        background-size: cover; /* Resize the background image to cover the entire container */
+        background-position: 0 20px !important;
+        background-color: #fbfbfb;
+      }
+    </style>
+
+  </head>
+  <body>
+
+  <nav class="navbar navbar-light bg-white w-100">
+    <span class="navbar-brand mb-0 h1" style="margin-left: 16px;">Comparison of Machine-Translations Models</span>
+    <span class="navbar-brand mb-0 h1">[HLT] Acciaro, Esposito, Galloppi
+        <img src="img/logounipi.jpg" style="height: 48px; margin-left: 16px;">
+      </span>
+  </nav>
+
+  <div class="container">
+
+    <br>
+    <button type="button" id="model_0" class="btn btn-primary btn-md">IBM Model 1 (50k)</button>
+    <button type="button" id="model_1" class="btn btn-white btn-md">LSTM (Custom)</button>
+    <button type="button" id="model_2" class="btn btn-white btn-md">Transformer (Custom)</button>
+    <button type="button" id="model_3" class="btn btn-white btn-md">T5</button>
+    <button type="button" id="model_4" class="btn btn-white btn-md">Bert2Bert</button>
+    <button type="button" id="model_5" class="btn btn-white btn-md">T52Bert</button>
+    <button type="button" id="model_6" class="btn btn-danger btn-md">Helsinki</button>
+    <button type="button" id="model_7" class="btn btn-danger btn-md">DeepL</button>
+      <p id="selected_model" ></p>
+    <br>   <br>
+
+    <button type="button" id="example_0"
+            class="btn btn-tag btn-rounded btn-white"
+            data-mdb-toggle="tooltip"
+            data-mdb-html="true"
+            title="Does she have any friends in Germany?">Example 0</button>
+
+    <button type="button" id="example_1"
+            class="btn btn-tag btn-rounded btn-white"
+            data-mdb-toggle="tooltip"
+            data-mdb-html="true"
+            title="Yesterday I got a new phone">Example 1</button>
+
+    <button type="button" id="example_2"
+            class="btn btn-tag btn-rounded btn-white"
+            data-mdb-toggle="tooltip"
+            data-mdb-html="true"
+            title="Who wants to live forever?">Example 2</button>
+
+    <br>   <br>
+
+    <div class="card">
+      <div class="card-body">
+        <input type="email"
+               class="form-control rounded shadow-none"
+               placeholder="Write here your sentence" id="eng_ita">
+        <br>
+        <button id="translate_visible" type="button" class="btn btn-primary">Translate</button>
+        <button id="translate_hidden" class="btn btn-primary" type="button" disabled>
+          <span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span>
+          Loading...
+        </button>
+        <br>
+        <br><p id="result" class="card-text"></p>
+
+
+      </div>
+    </div>
+  </div>
+
+
+  <script>
+    $(document).ready(function(){
+      const models = [$("#model_0"),$("#model_1"), $("#model_2"),  $("#model_3"),  $("#model_4"),  $("#model_5"), $("#model_6"), $("#model_7")];
+
+      $("#selected_model").hide();
+      $("#selected_model").text(models[0].text());
+
+      const examples = [$("#example_0"), $("#example_1"), $("#example_2")];
+      const examples_sentence = ["Does she have any friends in Germany?",
+                                    "Yesterday I got a new phone",
+                                      "Who wants to live forever?"
+                                  ];
+
+      for (let i = 0; i < models.length; i++) {
+        models[i].click(function() {
+          for (let j = 0; j < models.length; j++) {
+            if(j==6 || j==7){
+              models[j].removeClass("btn-primary").addClass("btn-danger");
+            }else{
+              models[j].removeClass("btn-primary").addClass("btn-white");
+            }
+          }
+          models[i].removeClass("btn-white").removeClass("btn-danger").addClass("btn-primary");
+          $("#selected_model").text(models[i].text());
+        });
+      }
+
+      for (let i = 0; i < examples.length; i++) {
+        examples[i].click(function() {
+          let text = examples_sentence[i];
+          $("#eng_ita").val(text);
+        });
+      }
+
+      $("#translate_hidden").hide();
+
+      function pre_execution(){
+        $("#translate_visible").hide();
+        $("#translate_hidden").show();
+        $("#result").val("");
+      }
+      function post_execution(result){
+        $("#translate_visible").show();
+        $("#translate_hidden").hide();
+        $("#result").text(result);
+      }
+
+      $("#translate_visible").click(function () {
+        pre_execution();
+        $.get( "http://localhost:5000/translate?model="+$("#selected_model").text()+"&sentence="+$("#eng_ita").val(), function( data ) {
+          post_execution(data.response);
+        });
+      });
+
+    });
+  </script>
+
+  <!-- MDB -->
+  <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.3.0/mdb.min.js"></script>
+
+  </body>
+</html>
diff --git a/screenshot.png b/screenshot.png