几分钟后SpeechRecognizer失败

时间:2018-04-17 12:32:21

标签: ios swift speech-recognition speech-to-text sfspeechrecognizer

我正在开发一个使用SFSpeechRecognizer的iOS项目,它在开始时工作正常。我说一些话就会回应。但是一两分钟后,它就失败了。它没有给出任何公认结果的反馈。 我想知道这是否与缓冲区有关,但我不知道如何修复它。

我基本上使用SpeechRecognizer的演示来构建项目。不同之处在于我将识别结果逐字存储在数组中。程序分析数组并响应某些单词,如“play”或之前设置的其他命令。程序响应命令后,它会删除数组的这个元素。

谈话很便宜,这是代码:

  1. 识别器,您可以看到supportedCommands数组,用于过滤某些特定单词以供程序响应。其他部分类似于https://developer.apple.com/library/content/samplecode/SpeakToMe/Listings/SpeakToMe_ViewController_swift.html#//apple_ref/doc/uid/TP40017110-SpeakToMe_ViewController_swift-DontLinkElementID_6

    的演示
    class SpeechRecognizer: NSObject, SFSpeechRecognizerDelegate {
    
        private var speechRecognizer: SFSpeechRecognizer!
        private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest!
        private var recognitionTask: SFSpeechRecognitionTask!
        private let audioEngine = AVAudioEngine()
        private let locale = Locale(identifier: "en-US")
    
        private var lastSavedString: String = ""
        private let supportedCommands = ["more", "play"]
    
        var speechInputQueue: [String] = [String]()
    
        func load() {
            print("load")
            prepareRecognizer(locale: locale)
    
            authorize()
        }
    
        func start() {
            print("start")
            if !audioEngine.isRunning {
                try! startRecording()
            }
        }
    
        func stop() {
            if audioEngine.isRunning {
                audioEngine.stop()
                recognitionRequest?.endAudio()
    
            }
        }
    
        private func authorize() {
            SFSpeechRecognizer.requestAuthorization { authStatus in
                OperationQueue.main.addOperation {
                    switch authStatus {
                    case .authorized:
                        print("Authorized!")
                    case .denied:
                        print("Unauthorized!")
                    case .restricted:
                        print("Unauthorized!")
                    case .notDetermined:
                        print("Unauthorized!")
                    }
                }
            }
        }
    
        private func prepareRecognizer(locale: Locale) {
            speechRecognizer = SFSpeechRecognizer(locale: locale)!
            speechRecognizer.delegate = self
        }
    
        private func startRecording() throws {
    
            // Cancel the previous task if it's running.
            if let recognitionTask = recognitionTask {
                recognitionTask.cancel()
                self.recognitionTask = nil
            }
    
            let audioSession = AVAudioSession.sharedInstance()
            try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, with: .defaultToSpeaker)
            try audioSession.setMode(AVAudioSessionModeDefault)
            try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
    
            recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
    
            let inputNode = audioEngine.inputNode
            guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
    
            // Configure request so that results are returned before audio recording is finished
            recognitionRequest.shouldReportPartialResults = true
    
            // A recognition task represents a speech recognition session.
            // We keep a reference to the task so that it can be cancelled.
            recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
                var isFinal = false
    
                if let result = result {
    
                    let temp = result.bestTranscription.formattedString.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).lowercased()
                    //print("temp", temp)
                    if temp != self.lastSavedString && temp.count > self.lastSavedString.count {
    
                        var tempSplit = temp.split(separator: " ")
                        var lastSplit = self.lastSavedString.split(separator: " ")
                        while lastSplit.count > 0 {
                            if String(tempSplit[0]) == String(lastSplit[0]) {
                                tempSplit.remove(at: 0)
                                lastSplit.remove(at: 0)
                            }
                            else {
                                break
                            }
                        }
    
                        for command in tempSplit {
                            if self.supportedCommands.contains(String(command)) {
                                self.speechInputQueue.append(String(command))
                            }
                        }
                        self.lastSavedString = temp
    
                    }
                    isFinal = result.isFinal
                }
    
                if error != nil || isFinal {
                    self.audioEngine.stop()
                    inputNode.removeTap(onBus: 0)
                    self.recognitionRequest = nil
                    self.recognitionTask = nil
                }
            }
    
            let recordingFormat = inputNode.outputFormat(forBus: 0)
            inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
                self.recognitionRequest?.append(buffer)
            }
    
            audioEngine.prepare()
    
            try audioEngine.start()
    
        }
    }
    
  2. 我们如何使用它:

        if self.speechRecognizer.speechInputQueue.count > 0 {
        if self.speechRecognizer.speechInputQueue[0] == "more" {
            print("temp", temp)
            print("content", content)
           // isSpeakingContent = true
            self.textToSpeech(text: content)
        }
        else if self.speechRecognizer.speechInputQueue[0] == "play" {
            print("try to play")
            let soundURL = URL(fileURLWithPath: Bundle.main.path(forResource: "cascade", ofType: "wav")!)
    
            do {
                audioPlayer = try AVAudioPlayer(contentsOf: soundURL)
            }
            catch {
                print(error)
            }
            audioPlayer.prepareToPlay()
            audioPlayer.play()
        }
        else {
            self.textToSpeech(text: "unrecognized command")
        }
        self.speechRecognizer.speechInputQueue.remove(at: 0)
        print("after :", self.speechRecognizer.speechInputQueue)
    }
    
  3. 它响应某些命令并播放一些音频。

    缓冲区有问题吗?也许经过一两分钟的识别,缓冲区已满?识别器随着时间的推移而失败。

1 个答案:

答案 0 :(得分:0)

来自WWDC 2016 Session 509: Speech Recognition API

  

对于iOS 10,我们的严格音频持续时间限制约为1分钟,类似于键盘听写。