-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspeech_rest.py
92 lines (80 loc) · 2.94 KB
/
speech_rest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google Cloud Speech API sample application using the REST API for batch
processing."""
# [START import_libraries]
import argparse
import base64
import json
from googleapiclient import discovery
import httplib2
from oauth2client.client import GoogleCredentials
# [END import_libraries]
# [START authenticating]
DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
'version={apiVersion}')
# CONSTANTS
#ENCODING = 'LINEAR16'
ENCODING = 'FLAC'
#RATE = 16000
RATE = 44100
# Application default credentials provided by env variable
# GOOGLE_APPLICATION_CREDENTIALS
def get_speech_service():
credentials = GoogleCredentials.get_application_default().create_scoped(
['https://www.googleapis.com/auth/cloud-platform'])
http = httplib2.Http()
credentials.authorize(http)
return discovery.build(
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
# [END authenticating]
def main(speech_file):
"""Transcribe the given audio file.
Args:
speech_file: the name of the audio file.
"""
# [START construct_request]
with open(speech_file, 'rb') as speech:
# Base64 encode the binary audio file for inclusion in the JSON
# request.
speech_content = base64.b64encode(speech.read())
service = get_speech_service()
service_request = service.speech().syncrecognize(
body={
'config': {
# There are a bunch of config options you can specify. See
# https://goo.gl/EPjAup for the full list.
'encoding': ENCODING, # raw 16-bit signed LE samples
'sampleRate': RATE, # 16 khz
# See https://goo.gl/DPeVFW for a list of supported languages.
'languageCode': 'en-US', # a BCP-47 language tag
},
'audio': {
'content': speech_content.decode('UTF-8')
}
})
# [END construct_request]
# [START send_request]
response = service_request.execute()
print(json.dumps(response))
# [END send_request]
# [START run_application]
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'speech_file', help='Full path of audio file to be recognized')
args = parser.parse_args()
main(args.speech_file)
# [END run_application]