Ich bin mit der sample code provided here und haben mit der Umsetzung folgende:Google-Sprachdienst - nicht Transkription Rückkehr
{"name": "3202776140236290963"}
Waiting for server processing...
Waiting for server processing...
{
"@type": "type.googleapis.com/google.cloud.speech.v1beta1.AsyncRecognizeResponse"
}
Und mein zurückgegebene Objekt ist:
# [START import_libraries]
import argparse
import base64
import json
import time
from oauth2client.service_account import ServiceAccountCredentials
import googleapiclient.discovery
import googleapiclient as gac
# [END import_libraries]
# [START authenticating]
# Application default credentials provided by env variable
# GOOGLE_APPLICATION_CREDENTIALS
def get_speech_service(credentials):
return googleapiclient.discovery.build('speech', 'v1beta1',credentials = credentials)
def main(speech_file):
"""Transcribe the given audio file asynchronously.
Args:
speech_file: the name of the audio file.
"""
# [START construct_request]
with open(speech_file, 'rb') as speech:
# Base64 encode the binary audio file for inclusion in the request.
speech_content = base64.b64encode(speech.read())
# print speech_content
scopes = ['https://www.googleapis.com/auth/cloud-platform']
credentials = ServiceAccountCredentials.from_json_keyfile_name(
'/Users/user/Documents/google_cloud/myjson.json', scopes)
service = get_speech_service(credentials)
service_request = service.speech().asyncrecognize(
body={
'config': {
# There are a bunch of config options you can specify. See
# https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig for the full list.
'encoding': 'LINEAR16', # raw 16-bit signed LE samples
'sampleRate': 16000, # 16 khz
# See http://g.co/cloud/speech/docs/languages for a list of
# supported languages.
'languageCode': 'en-US', # a BCP-47 language tag
},
'audio': {
'content': speech_content.decode('UTF-8')
}
})
# [END construct_request]
# [START send_request]
response = service_request.execute()
print(json.dumps(response))
# [END send_request]
name = response['name']
# Construct a GetOperation request.
service_request = service.operations().get(name=name)
while True:
# Give the server a few seconds to process.
print('Waiting for server processing...')
time.sleep(1)
# Get the long running operation with response.
response = service_request.execute()
if 'done' in response and response['done']:
break
# First print the raw json response
print(json.dumps(response['response'], indent=2))
# Now print the actual transcriptions
out = []
for result in response['response'].get('results', []):
print 'poo'
print('Result:')
for alternative in result['alternatives']:
print(u' Alternative: {}'.format(alternative['transcript']))
out.append(result)
return response
r = main("/Users/user/Downloads/brooklyn.flac")
Doch mein Druck ist folgendes
{u'done': True,
u'metadata': {u'@type': u'type.googleapis.com/google.cloud.speech.v1beta1.AsyncRecognizeMetadata',
u'lastUpdateTime': u'2017-03-25T15:54:46.136925Z',
u'progressPercent': 100,
u'startTime': u'2017-03-25T15:54:44.514614Z'},
u'name': u'2024312474309214820',
u'response': {u'@type': u'type.googleapis.com/google.cloud.speech.v1beta1.AsyncRecognizeResponse'}}
Auf meinem Konsolenbildschirm sehe ich die Anfragen über:
Ich bin nicht sicher, warum ich nicht die richtige Transkription aus der Beispieldatei zurückbekomme.
Jede Eingabe wird geschätzt!
Danke, das hat sehr geholfen. Ich benutzte die falsche Audiodatei (ein Problem war mit meinem Download der Beispieldatei aufgetreten), die ein Problem mit allem Downstream verursachte. –
Und nur zu beachten, bitte sehen Sie diesen Link für ein bisschen breiter Entpacken: https://groups.google.com/forum/#!topic/cloud-speech-discuss/L9o2OU3WsqI –