Merge pull request #148 from calpoly-csai/resurrecting-audio-metadata

AudioMetadata lives again!!!!
calpoly-csai · May 17, 2020 · 35762df · 35762df
2 parents d329427 + 6531966
commit 35762df
Show file tree

Hide file tree

Showing 7 changed files with 88 additions and 55 deletions.
diff --git a/Entity/AudioSampleMetaData.py b/Entity/AudioSampleMetaData.py
@@ -26,14 +26,15 @@ class AudioSampleMetaData(Base):
     tone = Column(String(255))
     timestamp = Column(Integer)
     username = Column(String(255))
-    # Text chosen because filename is standarized concatenation of above fields
-    filename = Column(Text)
+    emphasis = Column(String(255))
+    script = Column(String(255))
+    audio_file_id = Column(String(1024))
     is_view = False
 
     def __repr__(self):
         string = "<AudioSampleMetaData ( id={}, is_wake_word={}, "
         string += "first_name={}, last_name={}, gender={}, noise_level={}, "
-        string += "location={}, tone={}, timestamp={}, username={} )>"
+        string += "location={}, tone={}, timestamp={}, username={}, emphasis={}, script={}, audio_file_id={} )>"
         return string.format(
             self.id,
             self.is_wake_word,
@@ -45,4 +46,7 @@ def __repr__(self):
             self.tone,
             self.timestamp,
             self.username,
+            self.emphasis,
+            self.script,
+            self.audio_file_id
         )
diff --git a/database_wrapper.py b/database_wrapper.py
@@ -67,7 +67,9 @@
         "tone",
         "timestamp",
         "username",
-        "filename",
+        "audio_file_id",
+        "script",
+        "emphasis"
     ],
     Clubs: [
         "club_name",
@@ -777,7 +779,9 @@ def format_audio_sample_meta_data_dict(self, data_dict: dict) -> dict:
             "tone": "serious-but-not-really",
             "timestamp": 1577077883,
             "username": "guest",
-            "filename": "ww_q_serious-but-not-really_here_m_doe_jj_1577077883_guest.wav"  # noqa because too hard.
+            "emphasis": "us",
+            "script": "Nimbus"
+            "audio_file_id": Id from Google Drive  # noqa because too hard.
         }
 
         Raises:

diff --git a/flask_api.py b/flask_api.py
@@ -122,6 +122,8 @@ def handle_question():
 def save_a_recording():
     """Given the audio metadata & audio file, resamples it, saves to storage.
     """
+    if("wav_file" not in request.files):
+         return "Please provide an audio file under the key 'wav_file' in your FormData", BAD_REQUEST
     validator = WakeWordValidator()
     formatter = WakeWordFormatter()
     data = request.form
@@ -133,19 +135,17 @@ def save_a_recording():
             return str(err), BAD_REQUEST
     formatted_data = formatter.format(data)
     filename = create_filename(formatted_data)
+    try:
+         file_id = save_audiofile(filename, request.files["wav_file"])
+    except Exception as err:
+         return f"Failed to save audio file because... {err}", BAD_REQUEST
 
-    # Save the audiofile first because if error then we stop here
-    # We do not want to save any metadata to the NimbusDatabase
-    #   if the audio fails to save.
-    save_audiofile(filename, request.files["wav_file"])
-
-    # Let's also save the filename to the database for quick reference
-    formatted_data["filename"] = filename
+    formatted_data["audio_file_id"] = file_id
 
     initializeDB()
 
     try:
-        db.save_audio_sample_meta_data(formatted_data)
+        db.insert_entity(AudioSampleMetaData, formatted_data)
     except BadDictionaryKeyError as e:
         return str(e), BAD_REQUEST
     except BadDictionaryValueError as e:
@@ -158,7 +158,7 @@ def save_a_recording():
         # HINT: security always wins
         raise e
 
-    return filename
+    return f"Successfully stored audiofile as '{filename}'", SUCCESS
 
 
 @app.route("/new_data/office_hours", methods=["POST"])
@@ -479,7 +479,18 @@ def resample_audio():
 
 
 def save_audiofile(filename, content):
-    """Actually save the file into Google Drive."""
+    """
+     Saves audio to the club Google Drive folder.
+
+     Parameters
+     ----------
+     - `filename:str` the name of the file, formatted by `create_filename()`
+     - `content: file` audio file to store
+
+     Returns
+     -------
+     The Google Drive file id that can be used to retrieve the file
+     """
     # Initialize our google drive authentication object using saved credentials,
     # or through the command line
     gauth = GoogleAuth()
@@ -499,6 +510,7 @@ def save_audiofile(filename, content):
     # Set the content of the file to the POST request's wav_file parameter.
     file.content = content
     file.Upload()  # Upload file.
+    return file["id"]
 
 
 def get_folder_id():

diff --git a/modules/validators.py b/modules/validators.py
@@ -84,6 +84,8 @@ def __init__(self, validators=None):
             "firstName": lambda firstName: type(firstName) == str,
             "timestamp": lambda timestamp: str.isdigit(timestamp),
             "username": lambda username: type(username) == str,
+            "emphasis": lambda emphasis: type(emphasis) == str,
+            "script": lambda script: type(script) == str,
         }
 
     def validate(self, data):
@@ -98,8 +100,8 @@ def validate(self, data):
                 value = data[key]
                 if not validator(value):
                     issues[key] = WakeWordValidatorIssue.INVALID
-            except BadRequestKeyError as e:
-                print("caught BadRequestKeyError: ", e.args)
+            except (KeyError, BadRequestKeyError) as e:
+                print("Couldn't find", e.args, "when validating data")
                 issues[key] = WakeWordValidatorIssue.DOES_NOT_EXIST
         return issues
 
@@ -118,6 +120,9 @@ def fix(self, data, issues):
                 elif key == "timestamp":
                     form[key] = int(time.time())
                     print("fixed timestamp", form[key])
+                elif key == "script" and form["isWakeWord"] == "ww":
+                     form[key] = "nimbus"
+                     print("Added 'script' value of 'nimbus'")
                 else:
                     raise WakeWordValidatorError(
                         f"Required audio metadata '{key}' was not provided"

diff --git a/tests/test_database_wrapper.py b/tests/test_database_wrapper.py
@@ -43,7 +43,9 @@
     "tone": "serious-but-not-really",
     "timestamp": 1577077883,
     "username": "guest",
-    "filename": "ww_q_serious-fake_m_doe_jj_1577077883_guest.wav",
+    "audio_file_id": "OZ234FSDWER5GDF234F4G5",
+    "script": "Nimbus",
+    "emphasis": "us"
 }
 
 TEST_CONFIG_FILENAME = "testConfig.json"

diff --git a/tests/test_flask_api.py b/tests/test_flask_api.py
@@ -85,8 +85,8 @@ def test_new_data_wakeword(mock_db, mock_formatter, mock_validator, mock_create_
         data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
 
     # Verify that db client was told to save data, and that the newly generated filename was returned
-    mock_db.save_audio_sample_meta_data.assert_called_once()
-    assert resp.data == b"test_filename"
+    mock_db.insert_entity.assert_called_once()
+    assert resp.data == b"Successfully stored audiofile as 'test_filename'"
 
 
 @patch("flask_api.WakeWordValidator")
@@ -96,40 +96,6 @@ def test_new_data_wakeword_validator_issues(mock_validator, client):
     mock_validator.return_value = mock_validator_instance
 
     # Verify that the client will catch and throw an error if the validator fails
-    resp = client.post('/new_data/wakeword', data={"dummy1": "dummy2"})
-    assert resp.status_code == BAD_REQUEST
-    assert resp.data == TEST_ERROR.encode()
-
-
-@patch("flask_api.save_audiofile")
-@patch("flask_api.create_filename", return_value="test_filename")
-@patch("flask_api.WakeWordValidator")
-@patch("flask_api.WakeWordFormatter")
-@patch("flask_api.db")
-def test_new_data_wakeword_db_error(mock_db, mock_formatter, mock_validator, mock_create_filename,
-                                    mock_save_audiofile, client):
-    mock_formatter_instance = Mock()
-    mock_formatter_instance.format.return_value = {"filename": "dummy"}
-    mock_formatter.return_value = mock_formatter_instance
-
-    # Verify that the client will catch and throw an error for specific exceptions
-    mock_db.save_audio_sample_meta_data.side_effect = BadDictionaryKeyError(TEST_ERROR)
-    resp = client.post(
-        '/new_data/wakeword',
-        data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
-    assert resp.status_code == BAD_REQUEST
-    assert resp.data == TEST_ERROR.encode()
-
-    mock_db.save_audio_sample_meta_data.side_effect = BadDictionaryValueError(TEST_ERROR)
-    resp = client.post(
-        '/new_data/wakeword',
-        data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
-    assert resp.status_code == BAD_REQUEST
-    assert resp.data == TEST_ERROR.encode()
-
-    mock_db.save_audio_sample_meta_data.side_effect = NimbusDatabaseError(TEST_ERROR)
-    resp = client.post(
-        '/new_data/wakeword',
-        data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
+    resp = client.post('/new_data/wakeword', data={"dummy1": "dummy2", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
     assert resp.status_code == BAD_REQUEST
     assert resp.data == TEST_ERROR.encode()
diff --git a/tests/test_validators.py b/tests/test_validators.py
@@ -0,0 +1,40 @@
+import json
+import pytest
+
+from modules.validators import WakeWordValidator, WakeWordValidatorError
+wake_word_data = {
+"isWakeWord":"true",
+"noiseLevel":"l",
+"tone":"serious",
+"location":"Cal Poly San Luis Obispo",
+"gender":"m",
+"lastName":"Waidhofer",
+"firstName":"John",
+"timestamp": "1589744893",
+"username":"waidhofer",
+"emphasis":"Emphasized",
+"script":"testing 123",
+"test":"foo"
+}
+
+important_fields = ["isWakeWord",
+"noiseLevel",
+"tone",
+"location",
+"gender",
+"lastName",
+"firstName",
+"emphasis",
+"script"]
+def test_wake_word_missing_values():
+    validator = WakeWordValidator()
+    for field in important_fields:
+        data = wake_word_data.copy()
+        data.pop(field)
+        issues = validator.validate(data)
+        print(issues)
+
+        assert len(issues) == 1
+
+        with pytest.raises(WakeWordValidatorError):
+            data = validator.fix(data,issues)