Source code for dronebuddylib.atoms.speechrecognition.speech_to_text_engine
from dronebuddylib.atoms.speechrecognition.google_speech_2_text_conversion_impl import GoogleSpeechToTextConversionImpl
from dronebuddylib.atoms.speechrecognition.vosk_speech_2_text_conversion_impl import VoskSpeechToTextConversionImpl
from dronebuddylib.models.engine_configurations import EngineConfigurations
from dronebuddylib.utils.enums import SpeechRecognitionAlgorithm
[docs]
class SpeechToTextEngine:
"""
This class provides a high-level interface for speech to text conversion using different algorithms.
Attributes:
algorithm (SpeechRecognitionAlgorithm): The algorithm to be used for speech recognition.
speech_config (EngineConfigurations): The configurations for the speech recognition engine.
speech_conversion_engine (ISpeechToTextConversion): The speech recognition engine.
"""
def __init__(self, algorithm: SpeechRecognitionAlgorithm, speech_config: EngineConfigurations):
"""
Initializes the SpeechToTextEngine class with the provided algorithm and speech configurations.
Args:
algorithm (SpeechRecognitionAlgorithm): The algorithm to be used for speech recognition.
speech_config (EngineConfigurations): The configurations for the speech recognition engine.
"""
self.algorithm = algorithm
self.speech_config = speech_config
if algorithm == SpeechRecognitionAlgorithm.GOOGLE_SPEECH_RECOGNITION:
self.speech_conversion_engine = GoogleSpeechToTextConversionImpl(speech_config)
if algorithm == SpeechRecognitionAlgorithm.VOSK_SPEECH_RECOGNITION:
self.speech_conversion_engine = VoskSpeechToTextConversionImpl(speech_config)
[docs]
def recognize_speech(self, audio_steam):
"""
Recognizes speech from an audio stream using the selected speech recognition algorithm.
Args:
audio_steam (bytes): The audio stream content to be recognized.
Returns:
The result of the speech recognition.
"""
return self.speech_conversion_engine.recognize_speech(audio_steam)