Question

我正在开发一个使用SFSpeechRecognizer的iOS项目，它在开始时工作正常。我说一些话就会回应。但是一两分钟后，它就失败了。它没有给出任何公认结果的反馈。我想知道这是否与缓冲区有关，但我不知道如何修复它。

我基本上使用SpeechRecognizer的演示来构建项目。不同之处在于我将识别结果逐字存储在数组中。程序分析数组并响应某些单词，如“play”或之前设置的其他命令。程序响应命令后，它会删除数组的这个元素。

谈话很便宜，这是代码：

识别器，您可以看到supportedCommands数组，用于过滤某些特定单词以供程序响应。其他部分类似于https://developer.apple.com/library/content/samplecode/SpeakToMe/Listings/SpeakToMe_ViewController_swift.html#//apple_ref/doc/uid/TP40017110-SpeakToMe_ViewController_swift-DontLinkElementID_6

的演示

class SpeechRecognizer: NSObject, SFSpeechRecognizerDelegate {

    private var speechRecognizer: SFSpeechRecognizer!
    private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest!
    private var recognitionTask: SFSpeechRecognitionTask!
    private let audioEngine = AVAudioEngine()
    private let locale = Locale(identifier: "en-US")

    private var lastSavedString: String = ""
    private let supportedCommands = ["more", "play"]

    var speechInputQueue: [String] = [String]()

    func load() {
        print("load")
        prepareRecognizer(locale: locale)

        authorize()
    }

    func start() {
        print("start")
        if !audioEngine.isRunning {
            try! startRecording()
        }
    }

    func stop() {
        if audioEngine.isRunning {
            audioEngine.stop()
            recognitionRequest?.endAudio()

        }
    }

    private func authorize() {
        SFSpeechRecognizer.requestAuthorization { authStatus in
            OperationQueue.main.addOperation {
                switch authStatus {
                case .authorized:
                    print("Authorized!")
                case .denied:
                    print("Unauthorized!")
                case .restricted:
                    print("Unauthorized!")
                case .notDetermined:
                    print("Unauthorized!")
                }
            }
        }
    }

    private func prepareRecognizer(locale: Locale) {
        speechRecognizer = SFSpeechRecognizer(locale: locale)!
        speechRecognizer.delegate = self
    }

    private func startRecording() throws {

        // Cancel the previous task if it's running.
        if let recognitionTask = recognitionTask {
            recognitionTask.cancel()
            self.recognitionTask = nil
        }

        let audioSession = AVAudioSession.sharedInstance()
        try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, with: .defaultToSpeaker)
        try audioSession.setMode(AVAudioSessionModeDefault)
        try audioSession.setActive(true, with: .notifyOthersOnDeactivation)

        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()

        let inputNode = audioEngine.inputNode
        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }

        // Configure request so that results are returned before audio recording is finished
        recognitionRequest.shouldReportPartialResults = true

        // A recognition task represents a speech recognition session.
        // We keep a reference to the task so that it can be cancelled.
        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
            var isFinal = false

            if let result = result {

                let temp = result.bestTranscription.formattedString.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).lowercased()
                //print("temp", temp)
                if temp != self.lastSavedString && temp.count > self.lastSavedString.count {

                    var tempSplit = temp.split(separator: " ")
                    var lastSplit = self.lastSavedString.split(separator: " ")
                    while lastSplit.count > 0 {
                        if String(tempSplit[0]) == String(lastSplit[0]) {
                            tempSplit.remove(at: 0)
                            lastSplit.remove(at: 0)
                        }
                        else {
                            break
                        }
                    }

                    for command in tempSplit {
                        if self.supportedCommands.contains(String(command)) {
                            self.speechInputQueue.append(String(command))
                        }
                    }
                    self.lastSavedString = temp

                }
                isFinal = result.isFinal
            }

            if error != nil || isFinal {
                self.audioEngine.stop()
                inputNode.removeTap(onBus: 0)
                self.recognitionRequest = nil
                self.recognitionTask = nil
            }
        }

        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            self.recognitionRequest?.append(buffer)
        }

        audioEngine.prepare()

        try audioEngine.start()

    }
}

我们如何使用它：

    if self.speechRecognizer.speechInputQueue.count > 0 {
    if self.speechRecognizer.speechInputQueue[0] == "more" {
        print("temp", temp)
        print("content", content)
       // isSpeakingContent = true
        self.textToSpeech(text: content)
    }
    else if self.speechRecognizer.speechInputQueue[0] == "play" {
        print("try to play")
        let soundURL = URL(fileURLWithPath: Bundle.main.path(forResource: "cascade", ofType: "wav")!)

        do {
            audioPlayer = try AVAudioPlayer(contentsOf: soundURL)
        }
        catch {
            print(error)
        }
        audioPlayer.prepareToPlay()
        audioPlayer.play()
    }
    else {
        self.textToSpeech(text: "unrecognized command")
    }
    self.speechRecognizer.speechInputQueue.remove(at: 0)
    print("after :", self.speechRecognizer.speechInputQueue)
}

它响应某些命令并播放一些音频。

缓冲区有问题吗？也许经过一两分钟的识别，缓冲区已满？识别器随着时间的推移而失败。

Answer 1

来自WWDC 2016 Session 509: Speech Recognition API：

对于iOS 10，我们的严格音频持续时间限制约为1分钟，类似于键盘听写。

几分钟后SpeechRecognizer失败

1 个答案: