I use API, Cloud Speech-to-Text V2 from Google Cloud Platform. I set the environment using JSON file. I want to transcribe an audio file from local to text. When I execute the code, it doesn’t show error but shows a lot of encodes. How to revise it?
import os
import asyncio
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech as speech
from google.protobuf.json_format import MessageToDict
# My credential file.
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../config/google_credential.json'
async def transcribe_audio(local_file_path):
client = SpeechClient()
with open(local_file_path, "rb") as audio_file:
content = audio_file.read()
# recognize how many speakers
diarization_config = speech.SpeakerDiarizationConfig(
min_speaker_count=2,
max_speaker_count=6
)
features = speech.RecognitionFeatures(
diarization_config=diarization_config
)
# Recognition set.
config = speech.RecognitionConfig(
auto_decoding_config=speech.AutoDetectDecodingConfig(),
features=features,
language_codes=["en-US"],
model="long",
)
request = speech.RecognizeRequest(
config=config,
content=content
)
operation = await client.batch_recognize(request=request)
response = await operation.result()
# File result.
for result in response.results:
for alternative in result.alternatives:
print(f"Transcript: {alternative.transcript}")
print(f"Confidence: {alternative.confidence}")
for word in alternative.words:
print(f"Word: {word.word}, Speaker: {word.speaker_tag}")
response_dict = MessageToDict(response)
with open("transcript.txt", "w") as transcript_file:
for result in response_dict['results']:
for alternative in result['alternatives']:
transcript_file.write(f"Transcript: {alternative['transcript']}\n")
transcript_file.write(f"Confidence: {alternative['confidence']}\n")
for word in alternative['words']:
transcript_file.write(f"Word: {word['word']}, Speaker: {word['speakerTag']}\n")
# My file.
local_file_path = "../voice/Chicago.mp3"
asyncio.run(transcribe_audio(local_file_path))
Thank you.