快速语音识别

时间:2018-08-03 05:08:52

标签: ios swift speech-recognition

实际上,我在代码中使用了语音识别,以根据识别出的语音找出联系方式。但是,它不断将单词添加到缓冲区中,我要删除的是先前记录的单词或行。就像我要搜索“史蒂夫·乔布斯”,然后是“比尔·盖茨”一样,它都被组合为“史蒂夫·乔布斯·比尔·盖茨”。以下是我的代码。

import UIKit
import Speech

class SpeechRecognizer:NSObject,SFSpeechRecognizerDelegate{

    static let shared:SpeechRecognizer? = {
        return SpeechRecognizer()
    }()

    var isRunning = false

    private var speechRecognizer:SFSpeechRecognizer!
    private var audioEngine:AVAudioEngine!
    private var recognitionRequest:SFSpeechAudioBufferRecognitionRequest!
    private var recognitionTask:SFSpeechRecognitionTask!

    public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
        if available {
            print("recordign available")
        } else {
            print("Recording not available")
        }

    }

    func stopRecording() {
        if audioEngine == nil {
            return
        }
        if audioEngine.isRunning {
            audioEngine.inputNode.removeTap(onBus: 0)
            audioEngine.stop()
            recognitionTask?.cancel()
            recognitionRequest?.endAudio()
            print("Stopped")
            isRunning = false
        }
    }


    func start(result:@escaping(String)->()) {
        getSpeechRegnizationPermission { (per) in
            if per && !self.isRunning{
                do {
                    try self.startRecording(rcResult: { (rcRes) in
                    result(rcRes)
                })
                } catch {
                    print(error)
                }
            } else if self.isRunning {
                print("alrdy running")
            } else {
                print("no permission")
            }
        }
    }


    private func startRecording(rcResult:@escaping (String)->()) throws {
        self.stopRecording()
        isRunning = true
        speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))!
        audioEngine = AVAudioEngine()
        recognitionTask = nil
        recognitionRequest = nil

        if let recognitionTask = recognitionTask {
            recognitionTask.cancel()
            self.recognitionTask = nil
        }

        let audioSession = AVAudioSession.sharedInstance()
        try audioSession.setCategory(AVAudioSessionCategoryRecord)
        try audioSession.setMode(AVAudioSessionModeMeasurement)
        try audioSession.setActive(true, with: .notifyOthersOnDeactivation)

        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()

        let inputNode = audioEngine.inputNode
        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SfSpeechAudioBufferRecognitionRequest object")}

        recognitionRequest.shouldReportPartialResults = true

        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in var isFinal = false
            if let result = result {
                rcResult(result.bestTranscription.formattedString)
                isFinal = result.isFinal
                print("Final word is :",isFinal)

            }
            if error != nil || isFinal {
                self.audioEngine.stop()
                inputNode.removeTap(onBus: 0)
                self.recognitionRequest = nil
                self.recognitionTask = nil
            }
        }

        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            self.recognitionRequest?.append(buffer)
        }

        audioEngine.prepare()
        try audioEngine.start()
       print("Im listening ")
    }



    private func getSpeechRegnizationPermission(status:@escaping (Bool)->()){
        if SFSpeechRecognizer.authorizationStatus() == .authorized {
         status(true)
            return
        }
        SFSpeechRecognizer.requestAuthorization { (sta) in
            switch sta {
            case .authorized:
                status(true)
            case .denied:
                status(false)
            case .notDetermined:
                SFSpeechRecognizer.requestAuthorization({ (st) in
                    if st  == .authorized {
                        status(true)
                    } else {
                        status(false)
                    }
                })
            case .restricted:
                status(false)
            }
        }
    }

}

1 个答案:

答案 0 :(得分:0)

我建议先合并stopRecording函数和此代码块,然后仅使用stopRecording

    if error != nil || isFinal {
        self.audioEngine.stop()
        inputNode.removeTap(onBus: 0)
        self.recognitionRequest = nil
        self.recognitionTask = nil
    }

这样,您将获得一种处置正在使用的资源的方法,而且您无需在stopRecording函数的第一行调用startRecording。得到结果后,将结果传递到块rcResult(result.bestTranscription.formattedString)并在此时调用stopRecording

也不要使用recognitionTask?.cancel()来呼叫recognitionTask?.finish()

希望有帮助!