Source code for dronebuddylib.atoms.speechrecognition.google_speech_2_text_conversion_impl
from google.cloud import speech
from dronebuddylib.atoms.speechrecognition.i_speech_to_text_conversion import ISpeechToTextConversion
from dronebuddylib.models.engine_configurations import EngineConfigurations
from dronebuddylib.models.enums import Configurations
from dronebuddylib.models.recognized_speech import RecognizedSpeechResult
from dronebuddylib.utils.utils import config_validity_check
[docs]
class GoogleSpeechToTextConversionImpl(ISpeechToTextConversion):
"""
This class is an implementation of the ISpeechToTextConversion interface for Google Cloud Speech-to-Text API.
Attributes:
sample_rate (int): The sample rate of the audio stream in hertz.
language (str): The language code of the speech in the audio stream.
encoding (speech.RecognitionConfig.AudioEncoding): The encoding type of the audio stream.
speech_conversion_engine (speech.SpeechClient): The Google Cloud Speech-to-Text client.
"""
[docs]
def get_class_name(self) -> str:
"""
Gets the class name.
Returns:
str: The class name.
"""
return 'SPEECH_TO_TEXT_GOOGLE'
[docs]
def get_algorithm_name(self) -> str:
"""
Gets the algorithm name.
Returns:
str: The algorithm name.
"""
return 'Google Speech to Text'
[docs]
def get_optional_params(self) -> list:
"""
Gets the list of optional parameters.
Returns:
list: The list of optional parameters.
"""
return [Configurations.SPEECH_RECOGNITION_GOOGLE_SAMPLE_RATE_HERTZ,
Configurations.SPEECH_RECOGNITION_GOOGLE_LANGUAGE_CODE,
Configurations.SPEECH_RECOGNITION_GOOGLE_ENCODING]
[docs]
def get_required_params(self) -> list:
"""
Gets the list of required parameters.
Returns:
list: The list of required parameters.
"""
return []
def __init__(self, engine_configurations: EngineConfigurations):
"""
Initializes the GoogleSpeechToTextConversionImpl class with the provided engine configurations.
Args:
engine_configurations (EngineConfigurations): The engine configurations containing necessary parameters.
"""
config_validity_check(self.get_required_params(),
engine_configurations.get_configurations_for_engine(self.get_class_name()),
self.get_algorithm_name())
configs = engine_configurations.get_configurations_for_engine(self.get_class_name())
self.sample_rate = configs.get(Configurations.SPEECH_RECOGNITION_GOOGLE_SAMPLE_RATE_HERTZ, 16000)
self.language = configs.get(Configurations.SPEECH_RECOGNITION_GOOGLE_LANGUAGE_CODE, 'en-US')
self.encoding = configs.get(Configurations.SPEECH_RECOGNITION_GOOGLE_ENCODING,
speech.RecognitionConfig.AudioEncoding.LINEAR16)
self.speech_conversion_engine = speech.SpeechClient()
[docs]
def recognize_speech(self, audio_steam) -> RecognizedSpeechResult:
"""
Recognizes speech from an audio stream using the Google Cloud Speech-to-Text API.
Args:
audio_steam (bytes): The audio stream content to be recognized.
Returns:
RecognizedSpeechResult: The result containing recognized speech and total billed time.
"""
audio = self.speech_conversion_engine.RecognitionAudio(content=audio_steam)
config = speech.RecognitionConfig(
encoding=self.encoding,
sample_rate_hertz=self.sample_rate,
language_code=self.language,
)
# Detects speechrecognition in the audio file
response = self.speech_conversion_engine.recognize(config=config, audio=audio)
for result in response.results:
print(f"Transcript: {result.alternatives[0].transcript}")
return RecognizedSpeechResult(response.results, response.total_billed_time)