Source code for sksurgeryspeech.algorithms.voice_recognition_service

"""
Speech API algorithm
"""
# pylint: disable=no-name-in-module,import-error

import logging
import json
import struct
from datetime import datetime
import pyaudio
import pvporcupine
import speech_recognition as sr
from PySide2.QtCore import QObject, Signal, Slot, QThread, QTimer


LOGGER = logging.getLogger("voice_recognition_logger")


[docs]class VoiceRecognitionService(QObject):
    """
    Voice Recognition service which takes an microphone input and converts it
    to text by using the Google Cloud Speech-to-Text API.

    Configuration dictionary must contain the following keys:

    porcupine dynamic library path: \
        Porcupine/lib/<operating_system>/<processor_type>/<library_file>

    porcupine model file path: \
        Porcupine/lib/common/porcupine_params.pv

    porcupine keyword file(s): \
        Porcupine/resources/keyword_files/<operating_system>/<keyword>

    optional keys:

    google credentials file: json file with google cloud api credentials

    recogniser: api to use, options are sphinx, google, google_cloud, \
        bing, houdify, ibm, wit

    sphinx keywords: a list of keywords and sensitivities for sphinx
    timeout for command: default None
    """

    start_listen = Signal()
    stop_timer = Signal()
    google_api_not_understand = Signal()
    google_api_request_failure = Signal(str)
    voice_command = Signal(str)
    start_processing_request = Signal()

    def __init__(self, config: dict):
        """
        Construct the service.

        """
        LOGGER.info("Creating Voice Recognition Service")
        # Need this for SignalInstance
        super().__init__()

        self.timeout_for_command = config.get("timeout for command", None)

        library_path = config.get("porcupine dynamic library path", None)
        if library_path is None:
            raise KeyError("Config must contain porcupine dynamic",
                           " library path")

        model_file_path = config.get("porcupine model file path", None)
        if model_file_path is None:
            raise KeyError("Config must contain porcupine model file path")

        keyword_file_paths = config.get("porcupine keyword file", None)
        if keyword_file_paths is None:
            raise KeyError("Config must contain porcupine keyword file")

        self.recogniser = config.get("recogniser", "sphinx")
        self.sphinx_keywords = config.get("sphinx keywords", None)

        self.recognizer = sr.Recognizer()

        sensitivities = config.get("sensitivities", [1.0])
        self.interval = config.get("interval", 10)

        self.handle = pvporcupine.create(
            library_path=library_path,
            model_path=model_file_path,
            keyword_paths=keyword_file_paths,
            sensitivities=sensitivities)

        audio = pyaudio.PyAudio()
        self.audio_stream = \
            audio.open(rate=self.handle.sample_rate,
                       channels=1,
                       format=pyaudio.paInt16,
                       input=True,
                       frames_per_buffer=self.handle.frame_length)

        #  this is to add the credentials for the google cloud api
        #  set the environment variable GOOGLE_APPLICATION_CREDENTIALS
        #  to the path  of your json file with credentials
        key_file_path = config.get('google credentials file', None)
        self.credentials = None
        if key_file_path is not None:
            with open(key_file_path, 'r', encoding='utf-8') as file:
                self.credentials = file.read()

                #r aises a ValueError if the credential file isn't a valid json
                json.loads(self.credentials)

        # Creating timer later, in the context of the running thread.
        self.timer = None

        LOGGER.info("Created Voice Recognition Service")

[docs]    def run(self):
        """
        Entry point for the QThread which starts the timer to listen in the
        background
        """
        LOGGER.info("run method executed")

        # Creating the timer in the context of the running thread.
        self.timer = QTimer()
        self.timer.setInterval(self.interval)
        self.timer.timeout.connect(self.listen_for_keyword)
        self.stop_timer.connect(self.__stop)

        #  start the timer to start the background listening
        self.timer.start()

[docs]    def request_stop(self):
        """
        Called by external client to stop timer.
        """
        LOGGER.info("Requesting VoiceRecognitionService to stop timer.")
        self.stop_timer.emit()
        QThread.msleep(self.interval * 3)
        while self.timer.isActive():
            QThread.msleep(self.interval * 3)
        LOGGER.info("Requested VoiceRecognitionService to stop timer.")

    @Slot()
    def __stop(self):
        LOGGER.info("Stopping VoiceRecognitionService timer.")
        self.timer.stop()
        QThread.msleep(self.interval * 3)
        LOGGER.info("Stopped VoiceRecognitionService timer.")

[docs]    def listen_for_keyword(self):
        """
        This method is called every 100 milliseconds by the QThread running and
        listens for the keyword
        """
        pcm = self.audio_stream.read(self.handle.frame_length)
        pcm = struct.unpack_from("h" * self.handle.frame_length, pcm)
        result = self.handle.process(pcm)
        if result >= 0:
            #  when the keyword gets detected, the user can input a command
            LOGGER.info('[%s] detected keyword', str(datetime.now()))
            self.start_listen.emit()
            self.listen_to_command()

[docs]    def listen_to_command(self):
        """
        This method gets called when a specific command is said.
        It then listens for specific commands and converts them to QT Signals
        """
        #  listen to a single command
        with sr.Microphone() as source:
            audio = self.recognizer \
                .listen(source, phrase_time_limit=self.timeout_for_command)
        try:
            #  convert command to string,
            #  this string should later be used to fire a certain GUI command
            self.start_processing_request.emit()
            words = self._recognise(audio)

            self.voice_command.emit(words)
        except sr.UnknownValueError:
            self.google_api_not_understand.emit()
        except sr.RequestError as exception:
            self.google_api_request_failure.emit(str(exception))

    def _recognise(self, audio):
        words = ""
        if self.recogniser == "sphinx":
            words = self.recognizer.recognize_sphinx(
                audio, keyword_entries=self.sphinx_keywords)
        elif self.recogniser == "google_cloud":
            words = self.recognizer.recognize_google_cloud(
                audio, credentials_json=self.credentials)
        elif self.recogniser == "google":
            words = self.recognizer.recognize_google(audio)
        elif self.recogniser == "bing":
            raise NotImplementedError(
                "Key credentials for bing not set up")
            #something like this, but might need to change credentials
            #words = self.recognizer.recognize_bing(audio, key=self.credentials)
        elif self.recogniser == "houndify":
            raise NotImplementedError(
                "Key credentials for houndify not set up")
            #something like this, but might need to change credentials
            #words = self.recognizer.recognize_houndify(
            #    audio, client_id=self.credentials,
            #    client_key = self.credentials)
        elif self.recogniser == "ibm":
            raise NotImplementedError(
                "Key credentials for ibm not set up")
            #something like this, but might need to change credentials
            #words = recognizer.recognize_ibm(
            #    audio, username=notset, password=notset)
        elif self.recogniser == "wit":
            raise NotImplementedError(
                "Key credentials for wit not set up")
            #something like this, but might need to change credentials
            #words = self.recognizer.recognize_wit(audio, key=self.credentials)
        else:
            raise ValueError("Unrecognised recogniser", self.recogniser)

        return words