RT:
一直以來,對Siri的語音識別功能很感興趣幻枉,但一直沒有時間去研究扔傅,今天心血來潮耍共,便找了點資料,自己動手試著做了一個簡單的Dome猎塞,效果如下——
最終效果:
制作思路:
- 在Info.plist文件添加麥克風和語言識別權(quán)限描述
- 引入Speech語言識別包
- 真機測試(必須真機)
Microphone Usage Description和Speech Recognition Usage Description使用意圖描述试读,內(nèi)容隨便寫!
requiresOnDeviceRecognition屬性可以設(shè)置為true荠耽,不需要訪問服務器钩骇,貌似有使用數(shù)量限制!僅限與設(shè)備上就沒有
if #available(iOS 13, *) {
// 將此屬性設(shè)置為true以防止SFSpeechRecognitionRequest通過網(wǎng)絡(luò)發(fā)送音頻
// 設(shè)備上的請求將不那么準確铝量。
recognitionRequest.requiresOnDeviceRecognition = true
}
并且可以識別本地音頻倘屹,不過最好是將音頻的時間限制在1分中以內(nèi)!
本地音頻文件識別部分慢叨,我注釋掉了纽匙,只留實時錄制音頻識別部分的代碼,感興趣可以取消注釋測試看看效果拍谐!
/////////// 識別音頻文件
/*============================================================================*/
// @objc private func recognizeBtnDidClick(_ sender: UIButton) {
// var info = ""
// sender.isSelected = !sender.isSelected
//
// if sender.isSelected {
// info = "正在識別···"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .orange
// let path = Bundle.main.path(forResource: "Track 1_004", ofType: "wav")
// let url: NSURL = NSURL.init(fileURLWithPath: path!)
// recognizeFile(url: url)
//
// } else {
// info = "停止識別烛缔!"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .blue
// }
// }
// // 音頻文件識別
// func recognizeFile(url:NSURL) {
//
// guard let myRecognizer = SFSpeechRecognizer.init(locale: Locale.init(identifier: "zh-CN")) else { return }
//
// if !myRecognizer.isAvailable { return }
//
// let request = SFSpeechURLRecognitionRequest(url: url as URL)
// myRecognizer.recognitionTask(with: request) { (result, error) in
// guard let result = result else { return }
//
// self.textView.text = result.bestTranscription.formattedString
// if result.isFinal {
// print("Speech in the file is \(result.bestTranscription.formattedString)")
// self.textView.text = result.bestTranscription.formattedString
// }
// }
// }
/*============================================================================*/
官方也有相關(guān)的案例,感興趣可以下載過來學習研究……(-)
完整代碼:
//
// SpeechVC.swift
// UIKit-basic
//
// Created by Qire_er on 2022/1/16.
//
import UIKit
import Speech
class SpeechVC: UIViewController {
var textView: UITextView! // 用于顯示識別文本
var recognizeBtn: UIButton! // 錄制按鈕
// 定義語言識別需要用到的幾個對象的引用
/*====================================================================================*/
private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN"))! // 創(chuàng)建與指定區(qū)域設(shè)置關(guān)聯(lián)的語音識別器
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? // 語音識別的請求
private var recognitionTask: SFSpeechRecognitionTask? // 語音識別的任務類
private let audioEngine = AVAudioEngine() // 音頻引擎轩拨,用于音頻輸入
/*====================================================================================*/
private let recodingBG: UIColor = .red // 定義【正在錄制】按鈕背景色
private let enableBG: UIColor = .blue // 定義【可用狀態(tài)】按鈕背景色
private let disableBG: UIColor = .systemGray3 // 定義【禁用狀態(tài)】按鈕背景色
// 添加UI
override func viewDidLoad() {
super.viewDidLoad()
let vStack = UIStackView()
vStack.translatesAutoresizingMaskIntoConstraints = false
vStack.axis = .vertical
textView = UITextView()
textView.font = .boldSystemFont(ofSize: 46)
textView.backgroundColor = .systemGray5
recognizeBtn = UIButton()
recognizeBtn.setTitle("開始錄制", for: .normal)
recognizeBtn.setTitleColor(UIColor.gray, for: .disabled)
recognizeBtn.addTarget(self, action: #selector(recordButtonTapped), for: .touchUpInside)
recognizeBtn.isEnabled = false // 默認禁用
vStack.addArrangedSubview(textView)
vStack.addArrangedSubview(recognizeBtn)
view.addSubview(vStack)
view.backgroundColor = .white
NSLayoutConstraint.activate([
vStack.leftAnchor.constraint(equalTo: view.leftAnchor, constant: 15),
vStack.rightAnchor.constraint(equalTo: view.rightAnchor, constant: -15),
vStack.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 15),
vStack.bottomAnchor.constraint(equalTo: view.safeAreaLayoutGuide.bottomAnchor, constant: -15),
recognizeBtn.heightAnchor.constraint(equalToConstant: 80)
])
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
speechRecognizer.delegate = self // 設(shè)置代理
// MARK: 請求語音識別權(quán)限
SFSpeechRecognizer.requestAuthorization { (status) in
print("status = \(status.rawValue)")
OperationQueue.main.addOperation {
switch status {
case .authorized : // 用戶已授權(quán)
self.recognizeBtn.isEnabled = true
self.recognizeBtn.backgroundColor = .blue
case .notDetermined : // 用戶未授權(quán)
self.recognizeBtn.isEnabled = false
self.recognizeBtn.setTitle("語音識別未經(jīng)授權(quán)践瓷!", for: .disabled)
self.recognizeBtn.backgroundColor = self.disableBG
case .denied : // 用戶拒絕
self.recognizeBtn.isEnabled = false
self.recognizeBtn.setTitle("用戶拒絕訪問語音識別!", for: .disabled)
self.recognizeBtn.backgroundColor = self.disableBG
case .restricted : // 設(shè)備不支持
self.recognizeBtn.isEnabled = false
self.recognizeBtn.setTitle("語音識別不支持此設(shè)備气嫁!", for: .disabled)
self.recognizeBtn.backgroundColor = self.disableBG
default: // 默認情況
self.recognizeBtn.isEnabled = false
self.recognizeBtn.backgroundColor = self.disableBG
}
}
}
}
// 錄制方法
private func startRecording() throws {
// 取消上一次正在識別任務(如果有的話)
recognitionTask?.cancel()
self.recognitionTask = nil
// 配置應用程序的音頻會話
let audioSession = AVAudioSession.sharedInstance() // 管理音頻硬件資源的分配
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) // 設(shè)置音頻會話的類別当窗、模式和選項。
try audioSession.setActive(true, options: .notifyOthersOnDeactivation) // 激活音頻會話
let inputNode = audioEngine.inputNode // inputNode|outputNode分別對應硬件的麥克風和揚聲器
// 創(chuàng)建并配置語音識別請求
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() // 從捕獲的音頻內(nèi)容(如來自設(shè)備麥克風的音頻)識別語音的請求
guard let recognitionRequest = recognitionRequest else { fatalError("無法創(chuàng)建SFSpeechAudioBufferRecognitionRequest對象") }
// 設(shè)置在音頻錄制完成之前返回結(jié)果
// 每產(chǎn)生一種結(jié)果就馬上返回
recognitionRequest.shouldReportPartialResults = true
// 將語音識別數(shù)據(jù)僅限于設(shè)備上
if #available(iOS 13, *) {
// 將此屬性設(shè)置為true以防止SFSpeechRecognitionRequest通過網(wǎng)絡(luò)發(fā)送音頻
// 設(shè)備上的請求將不那么準確寸宵。
recognitionRequest.requiresOnDeviceRecognition = true
}
// 為語音識別會話創(chuàng)建識別任務
// 保留對任務的引用崖面,以便可以取消該任務
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
// 使用識別結(jié)果更新文本視圖
self.textView.text = result.bestTranscription.formattedString
isFinal = result.isFinal
print("【識別內(nèi)容】\(result.bestTranscription.formattedString)")
}
if error != nil || isFinal {
// 如果出現(xiàn)問題元咙,停止識別語音
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.recognizeBtn.isEnabled = true // 設(shè)置按鈕為可用狀態(tài)
self.recognizeBtn.setTitle("開始錄制", for: []) // 設(shè)置按鈕文字
self.recognizeBtn.backgroundColor = self.enableBG
}
}
// 配置麥克風輸入
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
// 提示用戶開始錄制
textView.text = "點擊【開始錄制】···"
}
// 定義按鈕點擊處理函數(shù)
@objc private func recordButtonTapped() {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
recognizeBtn.isEnabled = false
recognizeBtn.setTitle("停止錄制", for: .disabled)
} else {
do {
try startRecording()
recognizeBtn.setTitle("停止錄制", for: [])
recognizeBtn.backgroundColor = recodingBG
} catch {
recognizeBtn.setTitle("錄音不可用!", for: [])
recognizeBtn.backgroundColor = self.disableBG
}
}
}
/////////// 識別音頻文件
/*============================================================================*/
// @objc private func recognizeBtnDidClick(_ sender: UIButton) {
// var info = ""
// sender.isSelected = !sender.isSelected
//
// if sender.isSelected {
// info = "正在識別···"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .orange
// let path = Bundle.main.path(forResource: "Track 1_004", ofType: "wav")
// let url: NSURL = NSURL.init(fileURLWithPath: path!)
// recognizeFile(url: url)
//
// } else {
// info = "停止識別巫员!"
// print(info)
// sender.setTitle(info, for: .normal)
// sender.backgroundColor = .blue
// }
// }
// // 音頻文件識別
// func recognizeFile(url:NSURL) {
//
// guard let myRecognizer = SFSpeechRecognizer.init(locale: Locale.init(identifier: "zh-CN")) else { return }
//
// if !myRecognizer.isAvailable { return }
//
// let request = SFSpeechURLRecognitionRequest(url: url as URL)
// myRecognizer.recognitionTask(with: request) { (result, error) in
// guard let result = result else { return }
//
// self.textView.text = result.bestTranscription.formattedString
// if result.isFinal {
// print("Speech in the file is \(result.bestTranscription.formattedString)")
// self.textView.text = result.bestTranscription.formattedString
// }
// }
// }
/*============================================================================*/
}
// MARK: SFSpeechRecognizerDelegate
extension SpeechVC: SFSpeechRecognizerDelegate {
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
if available {
recognizeBtn.isEnabled = true
recognizeBtn.setTitle("開始錄制···", for: [])
} else {
recognizeBtn.isEnabled = false
recognizeBtn.setTitle("語言識別不可用庶香!", for: .disabled)
}
}
}
控制臺也有相關(guān)信息的輸出!感覺還是挺強大简识,值得好好研究……
(==完==)
ps: 以上僅代表個人淺見赶掖,如果你有什么高見,也歡迎討論交流七扰!-