我正在尝试使用iOS语音将api和文本转换为语音api。基本上,用户会说话,他们说的内容将在文本视图中转录。然后,用户将按下一个按钮,该按钮将从文本视图中的转录文本提供文本到语音。我的错误是,当用户按下语音转文本按钮时,文本到语音按钮停止工作。但是,如果我没有按下语音到文本按钮并通过键盘在文本视图中输入一些文本,则文本到语音按钮可以正常工作。我猜它是语音按钮的问题。我的代码没有任何错误,我对发生的事情感到困惑。
import UIKit
import Speech
import AVFoundation
class SpeechRecognitionViewController: UIViewController, SFSpeechRecognizerDelegate, UIPickerViewDataSource, UIPickerViewDelegate {
private var speechRecognizer: SFSpeechRecognizer!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest!
private var recognitionTask: SFSpeechRecognitionTask!
private let audioEngine = AVAudioEngine()
private var locales: [Locale]!
private let defaultLocale = Locale(identifier: "en-US")
@IBOutlet weak var recordBtn: UIButton!
@IBOutlet weak var speaker: UIButton!
@IBOutlet weak var textView: UITextField!
//@IBOutlet weak var textView: UITextView!
//@IBOutlet private weak var recordBtn : UIButton!
//@IBOutlet private weak var picker: UIPickerView!
@IBOutlet weak var picker: UIPickerView!
override func viewDidLoad() {
super.viewDidLoad()
recordBtn.isEnabled = false
locales = SFSpeechRecognizer.supportedLocales().map({$0})
let index = NSArray(array: locales).index(of: defaultLocale)
picker.selectRow(index, inComponent: 0, animated: false)
prepareRecognizer(locale: defaultLocale)
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
SFSpeechRecognizer.requestAuthorization { authStatus in
/*
The callback may not be called on the main thread. Add an
operation to the main queue to update the record button's state.
*/
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
self.recordBtn.isEnabled = true
case .denied:
self.recordBtn.isEnabled = false
self.recordBtn.setTitle("User denied access to speech recognition", for: .disabled)
case .restricted:
self.recordBtn.isEnabled = false
self.recordBtn.setTitle("Speech recognition restricted on this device", for: .disabled)
case .notDetermined:
self.recordBtn.isEnabled = false
self.recordBtn.setTitle("Speech recognition not yet authorized", for: .disabled)
}
}
}
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
private func prepareRecognizer(locale: Locale) {
speechRecognizer = SFSpeechRecognizer(locale: locale)!
speechRecognizer.delegate = self
}
private func startRecording() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else { fatalError("Audio engine has no input node") }
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
self.textView.text = result.bestTranscription.formattedString
isFinal = result.isFinal
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.recordBtn.isEnabled = true
self.recordBtn.setTitle("Start Recording", for: [])
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
textView.text = "(listening...)"
}
// =========================================================================
// MARK: - UIPickerViewDataSource
func numberOfComponents(in pickerView: UIPickerView) -> Int {
return 1
}
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
return locales.count
}
// =========================================================================
// MARK: - UIPickerViewDelegate
func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
return locales[row].identifier
}
func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
let locale = locales[row]
prepareRecognizer(locale: locale)
}
// =========================================================================
// MARK: - SFSpeechRecognizerDelegate
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
if available {
recordBtn.isEnabled = true
recordBtn.setTitle("Start Recording", for: [])
} else {
recordBtn.isEnabled = false
recordBtn.setTitle("Recognition not available", for: .disabled)
}
}
// =========================================================================
// MARK: - Actions
@IBAction func recordbuttontapped(_ sender: Any) {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
recordBtn.isEnabled = false
recordBtn.setTitle("Stopping", for: .disabled)
} else {
try! startRecording()
recordBtn.setTitle("Stop recording", for: [])
}
}
@IBAction func speaktome(_ sender: Any) {
let something = textView.text!
let utterance = AVSpeechUtterance(string: something)
utterance.voice = AVSpeechSynthesisVoice(language: "en-US" )
let synthesizer = AVSpeechSynthesizer()
synthesizer.speak(utterance)
}
}
答案 0 :(得分:0)
当你按语音到文本按钮时,使用另一个线程获取语音,你的程序专注于从用户获取语音及其无限条件或持续你的程序获得语音而不是任何停止条件,使用另一个线程获取用户在语音到文本按钮上的语音,并在获得语音后终止
答案 1 :(得分:0)
将try audioSession.setCategory(AVAudioSessionCategoryRecord)
更改为try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord)
它应按预期工作。在语音到文本期间,您正在将音频会话更改为记录模式,并且在TTS期间不会更改为播放模式。另一种方法是在TTS期间将音频会话设置为AVAudioSessionCategoryPlayback
。