Skip to content

Commit

Permalink
Merge pull request #148 from calpoly-csai/resurrecting-audio-metadata
Browse files Browse the repository at this point in the history
AudioMetadata lives again!!!!
  • Loading branch information
Waidhoferj authored May 17, 2020
2 parents d329427 + 6531966 commit 35762df
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 55 deletions.
10 changes: 7 additions & 3 deletions Entity/AudioSampleMetaData.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ class AudioSampleMetaData(Base):
tone = Column(String(255))
timestamp = Column(Integer)
username = Column(String(255))
# Text chosen because filename is standarized concatenation of above fields
filename = Column(Text)
emphasis = Column(String(255))
script = Column(String(255))
audio_file_id = Column(String(1024))
is_view = False

def __repr__(self):
string = "<AudioSampleMetaData ( id={}, is_wake_word={}, "
string += "first_name={}, last_name={}, gender={}, noise_level={}, "
string += "location={}, tone={}, timestamp={}, username={} )>"
string += "location={}, tone={}, timestamp={}, username={}, emphasis={}, script={}, audio_file_id={} )>"
return string.format(
self.id,
self.is_wake_word,
Expand All @@ -45,4 +46,7 @@ def __repr__(self):
self.tone,
self.timestamp,
self.username,
self.emphasis,
self.script,
self.audio_file_id
)
8 changes: 6 additions & 2 deletions database_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@
"tone",
"timestamp",
"username",
"filename",
"audio_file_id",
"script",
"emphasis"
],
Clubs: [
"club_name",
Expand Down Expand Up @@ -777,7 +779,9 @@ def format_audio_sample_meta_data_dict(self, data_dict: dict) -> dict:
"tone": "serious-but-not-really",
"timestamp": 1577077883,
"username": "guest",
"filename": "ww_q_serious-but-not-really_here_m_doe_jj_1577077883_guest.wav" # noqa because too hard.
"emphasis": "us",
"script": "Nimbus"
"audio_file_id": Id from Google Drive # noqa because too hard.
}
Raises:
Expand Down
32 changes: 22 additions & 10 deletions flask_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ def handle_question():
def save_a_recording():
"""Given the audio metadata & audio file, resamples it, saves to storage.
"""
if("wav_file" not in request.files):
return "Please provide an audio file under the key 'wav_file' in your FormData", BAD_REQUEST
validator = WakeWordValidator()
formatter = WakeWordFormatter()
data = request.form
Expand All @@ -133,19 +135,17 @@ def save_a_recording():
return str(err), BAD_REQUEST
formatted_data = formatter.format(data)
filename = create_filename(formatted_data)
try:
file_id = save_audiofile(filename, request.files["wav_file"])
except Exception as err:
return f"Failed to save audio file because... {err}", BAD_REQUEST

# Save the audiofile first because if error then we stop here
# We do not want to save any metadata to the NimbusDatabase
# if the audio fails to save.
save_audiofile(filename, request.files["wav_file"])

# Let's also save the filename to the database for quick reference
formatted_data["filename"] = filename
formatted_data["audio_file_id"] = file_id

initializeDB()

try:
db.save_audio_sample_meta_data(formatted_data)
db.insert_entity(AudioSampleMetaData, formatted_data)
except BadDictionaryKeyError as e:
return str(e), BAD_REQUEST
except BadDictionaryValueError as e:
Expand All @@ -158,7 +158,7 @@ def save_a_recording():
# HINT: security always wins
raise e

return filename
return f"Successfully stored audiofile as '{filename}'", SUCCESS


@app.route("/new_data/office_hours", methods=["POST"])
Expand Down Expand Up @@ -479,7 +479,18 @@ def resample_audio():


def save_audiofile(filename, content):
"""Actually save the file into Google Drive."""
"""
Saves audio to the club Google Drive folder.
Parameters
----------
- `filename:str` the name of the file, formatted by `create_filename()`
- `content: file` audio file to store
Returns
-------
The Google Drive file id that can be used to retrieve the file
"""
# Initialize our google drive authentication object using saved credentials,
# or through the command line
gauth = GoogleAuth()
Expand All @@ -499,6 +510,7 @@ def save_audiofile(filename, content):
# Set the content of the file to the POST request's wav_file parameter.
file.content = content
file.Upload() # Upload file.
return file["id"]


def get_folder_id():
Expand Down
9 changes: 7 additions & 2 deletions modules/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def __init__(self, validators=None):
"firstName": lambda firstName: type(firstName) == str,
"timestamp": lambda timestamp: str.isdigit(timestamp),
"username": lambda username: type(username) == str,
"emphasis": lambda emphasis: type(emphasis) == str,
"script": lambda script: type(script) == str,
}

def validate(self, data):
Expand All @@ -98,8 +100,8 @@ def validate(self, data):
value = data[key]
if not validator(value):
issues[key] = WakeWordValidatorIssue.INVALID
except BadRequestKeyError as e:
print("caught BadRequestKeyError: ", e.args)
except (KeyError, BadRequestKeyError) as e:
print("Couldn't find", e.args, "when validating data")
issues[key] = WakeWordValidatorIssue.DOES_NOT_EXIST
return issues

Expand All @@ -118,6 +120,9 @@ def fix(self, data, issues):
elif key == "timestamp":
form[key] = int(time.time())
print("fixed timestamp", form[key])
elif key == "script" and form["isWakeWord"] == "ww":
form[key] = "nimbus"
print("Added 'script' value of 'nimbus'")
else:
raise WakeWordValidatorError(
f"Required audio metadata '{key}' was not provided"
Expand Down
4 changes: 3 additions & 1 deletion tests/test_database_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
"tone": "serious-but-not-really",
"timestamp": 1577077883,
"username": "guest",
"filename": "ww_q_serious-fake_m_doe_jj_1577077883_guest.wav",
"audio_file_id": "OZ234FSDWER5GDF234F4G5",
"script": "Nimbus",
"emphasis": "us"
}

TEST_CONFIG_FILENAME = "testConfig.json"
Expand Down
40 changes: 3 additions & 37 deletions tests/test_flask_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def test_new_data_wakeword(mock_db, mock_formatter, mock_validator, mock_create_
data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})

# Verify that db client was told to save data, and that the newly generated filename was returned
mock_db.save_audio_sample_meta_data.assert_called_once()
assert resp.data == b"test_filename"
mock_db.insert_entity.assert_called_once()
assert resp.data == b"Successfully stored audiofile as 'test_filename'"


@patch("flask_api.WakeWordValidator")
Expand All @@ -96,40 +96,6 @@ def test_new_data_wakeword_validator_issues(mock_validator, client):
mock_validator.return_value = mock_validator_instance

# Verify that the client will catch and throw an error if the validator fails
resp = client.post('/new_data/wakeword', data={"dummy1": "dummy2"})
assert resp.status_code == BAD_REQUEST
assert resp.data == TEST_ERROR.encode()


@patch("flask_api.save_audiofile")
@patch("flask_api.create_filename", return_value="test_filename")
@patch("flask_api.WakeWordValidator")
@patch("flask_api.WakeWordFormatter")
@patch("flask_api.db")
def test_new_data_wakeword_db_error(mock_db, mock_formatter, mock_validator, mock_create_filename,
mock_save_audiofile, client):
mock_formatter_instance = Mock()
mock_formatter_instance.format.return_value = {"filename": "dummy"}
mock_formatter.return_value = mock_formatter_instance

# Verify that the client will catch and throw an error for specific exceptions
mock_db.save_audio_sample_meta_data.side_effect = BadDictionaryKeyError(TEST_ERROR)
resp = client.post(
'/new_data/wakeword',
data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
assert resp.status_code == BAD_REQUEST
assert resp.data == TEST_ERROR.encode()

mock_db.save_audio_sample_meta_data.side_effect = BadDictionaryValueError(TEST_ERROR)
resp = client.post(
'/new_data/wakeword',
data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
assert resp.status_code == BAD_REQUEST
assert resp.data == TEST_ERROR.encode()

mock_db.save_audio_sample_meta_data.side_effect = NimbusDatabaseError(TEST_ERROR)
resp = client.post(
'/new_data/wakeword',
data={"test": "foo", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
resp = client.post('/new_data/wakeword', data={"dummy1": "dummy2", 'wav_file': (BytesIO(b'dummyText'), 'dummyfile.txt')})
assert resp.status_code == BAD_REQUEST
assert resp.data == TEST_ERROR.encode()
40 changes: 40 additions & 0 deletions tests/test_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json
import pytest

from modules.validators import WakeWordValidator, WakeWordValidatorError
wake_word_data = {
"isWakeWord":"true",
"noiseLevel":"l",
"tone":"serious",
"location":"Cal Poly San Luis Obispo",
"gender":"m",
"lastName":"Waidhofer",
"firstName":"John",
"timestamp": "1589744893",
"username":"waidhofer",
"emphasis":"Emphasized",
"script":"testing 123",
"test":"foo"
}

important_fields = ["isWakeWord",
"noiseLevel",
"tone",
"location",
"gender",
"lastName",
"firstName",
"emphasis",
"script"]
def test_wake_word_missing_values():
validator = WakeWordValidator()
for field in important_fields:
data = wake_word_data.copy()
data.pop(field)
issues = validator.validate(data)
print(issues)

assert len(issues) == 1

with pytest.raises(WakeWordValidatorError):
data = validator.fix(data,issues)

0 comments on commit 35762df

Please sign in to comment.