Skip to content

Commit

Permalink
Merge pull request #42 from maxhawkins/restful
Browse files Browse the repository at this point in the history
HTTP API Refactor
  • Loading branch information
strob committed Nov 30, 2015
2 parents fcc7877 + 80f3edb commit 496bff3
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 84 deletions.
5 changes: 4 additions & 1 deletion gentle/language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ def make_bigram_language_model(kaldi_seq, proto_langdir='PROTO_LANGDIR'):
txt_fst_file.name,
hclg_filename])
except Exception, e:
os.unlink(hclg_filename)
try:
os.unlink(hclg_filename)
except:
pass
raise e
finally:
os.unlink(txt_fst_file.name)
Expand Down
172 changes: 92 additions & 80 deletions serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@
from gentle.language_model_transcribe import lm_transcribe, write_csv
import gentle

DATADIR = get_datadir('webdata')

def _next_id():
uid = None
while uid is None or os.path.exists(os.path.join(DATADIR, uid)):
uid = uuid.uuid4().get_hex()[:8]
return uid

# The `ffmpeg.to_wav` function doesn't set headers properly for web
# browser playback.
def to_wav(infile, outfile):
Expand All @@ -33,83 +25,72 @@ def to_wav(infile, outfile):
'-acodec', 'pcm_s16le',
outfile])

class Status(Resource):
def __init__(self):
Resource.__init__(self)
self.uploads = {}

def new_upload(self, uid):
self.uploads[uid] = TranscriptionStatus()
self.putChild(uid, self.uploads[uid])

def set_status(self, uid, status, text):
self.uploads[uid].cur_status = status
self.uploads[uid].status_text = text
class Status():
def __init__(self, status='Started', text=''):
self.status = status
self.text = text

class TranscriptionStatus(Resource):
def __init__(self):
class StatusController(Resource):
def __init__(self, uid, status_store):
Resource.__init__(self)
self.cur_status = "Started"
self.status_text = ""

self.uid = uid
self.status_store = status_store
def render_GET(self, req):
status = self.status_store.get(self.uid, Status())
return json.dumps({
"status": self.cur_status,
"text": self.status_text})

class Uploader(Resource):
def __init__(self, status):
Resource.__init__(self)
self.status = status

def render_POST(self, req):
uid = _next_id()

self.status.new_upload(uid)

outdir = os.path.join(DATADIR, uid)
"status": status.status,
"text": status.text
})

class Transcriber():
def __init__(self, data_dir, status_store):
self.data_dir = data_dir
self.status_store = status_store

def out_dir(self, uid):
return os.path.join(self.data_dir, 'transcriptions', uid)

# TODO(maxhawkins): refactor so this is returned by transcribe()
def next_id(self):
uid = None
while uid is None or os.path.exists(os.path.join(self.data_dir, uid)):
uid = uuid.uuid4().get_hex()[:8]
return uid

def transcribe(self, uid, tran, audio):
outdir = os.path.join(self.data_dir, 'transcriptions', uid)
os.makedirs(outdir)

open(os.path.join(outdir, 'transcript.txt'), 'w').write(
req.args['transcript'][0])

data = req.args['audio'][0]
open(os.path.join(outdir, 'upload'), 'w').write(
data)

reactor.callInThread(self.transcribe, uid, req=req)

req.redirect("/status.html#%s" % (uid))
req.finish()

return NOT_DONE_YET

def onpartial(self, res, uid):
logging.info("partial results for %s, %s", uid, str(res))

self.status.set_status(uid, "Transcribing", json.dumps(res))

def transcribe(self, uid, req=None):
outdir = os.path.join(DATADIR, uid)
tran_path = os.path.join(outdir, 'transcript.txt')
with open(tran_path, 'w') as f:
f.write(tran)
audio_path = os.path.join(outdir, 'upload')
with open(audio_path, 'w') as f:
f.write(audio)

wavfile = os.path.join(outdir, 'a.wav')
self.status.set_status(uid, "Encoding", "")
self.status_store[uid] = Status("Encoding", "")

if to_wav(os.path.join(outdir, 'upload'), wavfile) != 0:
self.status.set_status(uid, "Error", "Encoding failed. Make sure that you've uploaded a valid media file.")
return
self.status_store[uid] = Status("Error", "Encoding failed. Make sure that you've uploaded a valid media file.")
return

transcript = open(os.path.join(outdir, 'transcript.txt')).read()

self.status.set_status(uid, "Starting transcription", "")
self.status_store[uid] = Status("Starting transcription", "")

def onpartial(res):
logging.info("partial results for %s, %s", uid, str(res))

self.status_store[uid] = Status("Transcribing", json.dumps(res))

# Run transcription
ret = lm_transcribe(wavfile,
transcript,
# XXX: should be configurable
get_resource('PROTO_LANGDIR'),
get_resource('data/nnet_a_gpu_online'),
partial_cb=self.onpartial,
partial_kwargs={"uid": uid})
transcript,
# XXX: should be configurable
get_resource('PROTO_LANGDIR'),
get_resource('data/nnet_a_gpu_online'),
partial_cb=onpartial)

# Save output to JSON and CSV
json.dump(ret,
Expand All @@ -122,27 +103,58 @@ def transcribe(self, uid, req=None):
# ...and remove the original upload
os.unlink(os.path.join(outdir, 'upload'))

self.status.set_status(uid, "Done", "")
self.status_store[uid] = Status("Done", "")

logging.info('done with transcription.')

def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0):
return ret

class TranscriptionsController(Resource):
def __init__(self, status_store, transcriber):
Resource.__init__(self)
self.status_store = status_store
self.transcriber = transcriber

def getChild(self, uid, req):
out_dir = self.transcriber.out_dir(uid)
trans_ctrl = File(out_dir)

stats_ctrl = StatusController(uid, self.status_store)
trans_ctrl.putChild('status', stats_ctrl)
return trans_ctrl

def render_POST(self, req):
uid = self.transcriber.next_id()

tran = req.args['transcript'][0]
audio = req.args['audio'][0]
if 'async' in req.args and req.args['async'][0] == 'false':
result = self.transcriber.transcribe(uid, tran, audio)
return json.dumps(result)

reactor.callInThread(self.transcriber.transcribe, uid, tran, audio)

req.redirect("/status.html#%s" % (uid))
req.finish()

return NOT_DONE_YET

def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, data_dir=get_datadir('webdata')):
logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)

if not os.path.exists(DATADIR):
os.makedirs(DATADIR)
if not os.path.exists(data_dir):
os.makedirs(data_dir)

f = File(DATADIR)
f = File(data_dir)

f.putChild('', File(get_resource('www/index.html')))
f.putChild('status.html', File(get_resource('www/status.html')))

stats = Status()
f.putChild("status", stats)

up = Uploader(stats)
status_store = {}

f.putChild('transcribe', up)
trans = Transcriber(data_dir, status_store)
trans_ctrl = TranscriptionsController(status_store, trans)
f.putChild('transcriptions', trans_ctrl)

s = Site(f)
logging.info("about to listen")
Expand Down
2 changes: 1 addition & 1 deletion www/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
<div id="header">
<h1 class="home"><a href="/">Gentle</a></h1>
</div>
<form action="/transcribe" method="POST" enctype="multipart/form-data">
<form action="/transcriptions" method="POST" enctype="multipart/form-data">
Audio:<br>
<input type=file name=audio><br>
<br>
Expand Down
4 changes: 2 additions & 2 deletions www/status.html
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ <h1 class="home"><a href="/">Gentle</a></h1>

function get_status(cb) {
var xhr = new XMLHttpRequest();
xhr.open("GET", "/status/" + UID, true);
xhr.open("GET", "/transcriptions/" + UID + "/status", true);
xhr.onload = function() {
cb(JSON.parse(this.responseText));
}
Expand All @@ -62,7 +62,7 @@ <h1 class="home"><a href="/">Gentle</a></h1>
$status.innerHTML = "Checking status..."
get_status(function(ret) {
if(ret["status"] == "Done") {
window.location = "http://" + window.location.host + "/" + UID + "/";
window.location = "http://" + window.location.host + "/transcriptions/" + UID + "/";
return;
}
else if(ret["status"] == "Error") {
Expand Down

0 comments on commit 496bff3

Please sign in to comment.