這篇日志記錄自己在學(xué)習(xí)使用蘋果原生框架語音識(shí)別庫
Speech Framework
時(shí)的總結(jié)和示例代碼触趴。看了一遍官方文檔,把該框架中的相關(guān)類和方法了解了一遍殖演,然后總結(jié)了一張XMind結(jié)構(gòu)圖熄浓。
前提:需要Xcode 8 以上和一個(gè)運(yùn)行iOS10以上系統(tǒng)的iOS設(shè)備.
Speech Framework中的類和方法概念
Note: 因?yàn)樯婕暗綑?quán)限問題情臭,需要在info.plist文件中添加兩個(gè)key。分別是
Privacy - Microphone Usage Description
(麥克風(fēng)權(quán)限)和Privacy - Speech Recognition Usage Description
(語音識(shí)別權(quán)限)
Swift代碼
import UIKit
import Speech
class ViewController: UIViewController {
@IBOutlet weak var textView: UITextView!
@IBOutlet weak var microphoneButton: UIButton!
/// 語音識(shí)別操作類對(duì)象
private let speechRecognizer = SFSpeechRecognizer()
/// 處理語音識(shí)別請(qǐng)求赌蔑,給語音識(shí)別提供語音輸入
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
/// 告訴用戶語音識(shí)別對(duì)象的結(jié)果俯在。擁有這個(gè)對(duì)象很方便因?yàn)槟憧梢?用它刪除或中斷任務(wù)
private var recognitionTask: SFSpeechRecognitionTask?
/// 語音引擎。負(fù)責(zé)提供語音輸入
private let audioEngine = AVAudioEngine()
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.
microphoneButton.isEnabled = false
speechRecognizer?.delegate = self
/// 申請(qǐng)用戶語音識(shí)別權(quán)限
SFSpeechRecognizer.requestAuthorization { (authStatus) in
var isButtonEnabled = false
switch authStatus {
case .authorized: // 用戶授權(quán)語音識(shí)別
isButtonEnabled = true
case .denied: // 用戶拒絕授權(quán)語音識(shí)別
isButtonEnabled = false
print("User denied access to speech recognition")
case .restricted: // 設(shè)備不支持語音識(shí)別功能
isButtonEnabled = false
print("Speech recognition restricted on this device")
case .notDetermined: // 結(jié)果未知 用戶尚未進(jìn)行選擇
isButtonEnabled = false
print("Speech recognition not yet authorized")
}
OperationQueue.main.addOperation {
self.microphoneButton.isEnabled = isButtonEnabled
}
}
}
@IBAction func microphoneButtonClick(_ sender: UIButton) {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
microphoneButton.isEnabled = false
microphoneButton.setTitle("Start Recording", for: .normal)
} else {
startRecording()
microphoneButton.setTitle("Stop Recording", for: .normal)
}
}
func startRecording() {
if recognitionTask != nil { /// 檢查recognitionTask是否在運(yùn)行娃惯,如果在就取消任務(wù)和識(shí)別
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance() /// 記錄語音做準(zhǔn)備
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
/// 實(shí)例化recognitionRequest 利用它把語音數(shù)據(jù)傳到蘋果后臺(tái)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
/// 檢查audioEngine(你的設(shè)備)是否有做錄音功能作為語音輸入
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
}
/// 檢查recognitionRequest對(duì)象是否被實(shí)例化或不是nil
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecongitionRequest object")
}
/// 當(dāng)用戶說話的時(shí)候讓recognitionRequest報(bào)告語音識(shí)別的部分結(jié)果
recognitionRequest.shouldReportPartialResults = true
/// 開啟語音識(shí)別, 回調(diào)每次都會(huì)在識(shí)別引擎收到輸入的時(shí)候跷乐,完善了當(dāng)前識(shí)別的信息時(shí)候,或者被刪除或者停止的時(shí)候被調(diào)用趾浅,最后會(huì)返回一個(gè)最終的文本
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false // 定義一個(gè)布爾值決定識(shí)別是否已經(jīng)結(jié)束
/// 如果結(jié)果result不是nil愕提,把textView.text的值設(shè)置為我們的最優(yōu)文本。如果結(jié)果是最終結(jié)果皿哨,設(shè)置isFinal為true
if result != nil {
self.textView.text = result?.bestTranscription.formattedString
isFinal = (result?.isFinal)!
}
/// 如果沒有錯(cuò)誤或者結(jié)果是最終結(jié)果浅侨,停止audioEngine(語音輸入)并且停止recognitionRequest和recognitionTask
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.microphoneButton.isEnabled = true
}
})
/// 向recognitionRequest增加一個(gè)語音輸入。注意在開始了recognitionTask之后增加語音輸入是OK的证膨。SpeechFramework會(huì)在語音輸入被加入的同時(shí)就開始進(jìn)行解析識(shí)別
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
/// 準(zhǔn)備并且開始audioEngine
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error")
}
textView.text = "Say something, I'm listening!"
}
}
extension ViewController: SFSpeechRecognizerDelegate {
/// 可用性狀態(tài)改變時(shí)被調(diào)用
func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
if available {
microphoneButton.isEnabled = true
} else {
microphoneButton.isEnabled = false
}
}
}
Objective-C 代碼
#import <Speech/Speech.h>
@interface ViewController ()<SFSpeechRecognizerDelegate>
@property (nonatomic, strong) SFSpeechRecognizer *speechRecognizer;
@property (nonatomic, strong) SFSpeechRecognitionTask *recognitionTask;
@property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *recognitionRequest;
/// 音頻引擎
@property (nonatomic, strong) AVAudioEngine *audioEngine;
@property (weak, nonatomic) IBOutlet UITextView *textView;
@property (weak, nonatomic) IBOutlet UIButton *microphoneBtn;
@end
@implementation ViewController
- (void)dealloc {
[self.recognitionTask cancel];
self.recognitionTask = nil;
}
- (void)viewDidLoad {
[super viewDidLoad];
// Do any additional setup after loading the view, typically from a nib.
self.view.backgroundColor = [UIColor whiteColor];
NSLog(@"supportedLocales: %@", [SFSpeechRecognizer supportedLocales]);
self.microphoneBtn.enabled = NO;
/// 創(chuàng)建語音識(shí)別器對(duì)象并設(shè)置代理
self.speechRecognizer = [[SFSpeechRecognizer alloc] init];
self.speechRecognizer.delegate = self;
/// 請(qǐng)求用戶授權(quán)
[SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {
BOOL isButtonEnabled = NO;
switch (status) {
case SFSpeechRecognizerAuthorizationStatusNotDetermined:
isButtonEnabled = NO;
NSLog(@"SFSpeechRecognizerAuthorizationStatusNotDetermined");
break;
case SFSpeechRecognizerAuthorizationStatusDenied:
isButtonEnabled = NO;
NSLog(@"SFSpeechRecognizerAuthorizationStatusDenied");
break;
case SFSpeechRecognizerAuthorizationStatusRestricted:
isButtonEnabled = NO;
NSLog(@"SFSpeechRecognizerAuthorizationStatusRestricted");
break;
case SFSpeechRecognizerAuthorizationStatusAuthorized:
NSLog(@"SFSpeechRecognizerAuthorizationStatusAuthorized");
isButtonEnabled = YES;
break;
default:
break;
}
dispatch_async(dispatch_get_main_queue(), ^{
self.microphoneBtn.enabled = isButtonEnabled;
});
}];
/// 創(chuàng)建音頻引擎對(duì)象
self.audioEngine = [[AVAudioEngine alloc] init];
}
- (IBAction)microphoneBtnClick:(UIButton *)sender {
if (self.audioEngine.isRunning) {
[self.audioEngine stop];
[self.recognitionRequest endAudio];
self.microphoneBtn.enabled = NO;
[self.microphoneBtn setTitle:@"Start Recording" forState:UIControlStateNormal];
} else {
[self startRecording];
[self.microphoneBtn setTitle:@"Stop Recording" forState:UIControlStateNormal];
}
}
#pragma mark - private method
- (void)startRecording {
if (self.recognitionTask != nil) {
[self.recognitionTask cancel]; // 取消當(dāng)前語音識(shí)別任務(wù)
self.recognitionTask = nil;
}
AVAudioSession *audioSession = [AVAudioSession sharedInstance];
NSError *categoryError = nil;
if (![audioSession setCategory:AVAudioSessionCategoryRecord error:&categoryError]) {
NSLog(@"categoryError: %@", categoryError.localizedDescription);
}
NSError *modeError = nil;
if (![audioSession setMode:AVAudioSessionModeMeasurement error:&modeError]) {
NSLog(@"modeError: %@", modeError.localizedDescription);
}
NSError *activeError = nil;
if (![audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&activeError]) {
NSLog(@"activeError: %@", activeError.localizedDescription);
}
/// 實(shí)例化 通過設(shè)備麥克風(fēng)識(shí)別現(xiàn)場(chǎng)語音的請(qǐng)求 對(duì)象
self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
if (!self.audioEngine.inputNode) {// 系統(tǒng)輸入節(jié)點(diǎn)
NSLog(@"Audio engine has no input node");
return;
}
if (!self.recognitionRequest) {
NSLog(@"Unable to create an SFSpeechAudioBufferRecongitionRequest object");
return;
}
/// 報(bào)告每個(gè)發(fā)音的部分非精確結(jié)果
self.recognitionRequest.shouldReportPartialResults = YES;
/// 執(zhí)行語音識(shí)別任務(wù) 完成回調(diào)
self.recognitionTask = [self.speechRecognizer recognitionTaskWithRequest:self.recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * _Nullable error) {
BOOL isFinal = NO;
if (result) {
self.textView.text = result.bestTranscription.formattedString;
isFinal = result.isFinal;
}
if (error || isFinal) {
[self.audioEngine stop];
[self.audioEngine.inputNode removeTapOnBus:0];
self.recognitionRequest = nil;
self.recognitionTask = nil;
self.microphoneBtn.enabled = YES;
}
}];
AVAudioFormat *recordingFormat = [self.audioEngine.inputNode outputFormatForBus:0];
[self.audioEngine.inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
/// 將PCM格式的音頻追加到識(shí)別請(qǐng)求的結(jié)尾
[self.recognitionRequest appendAudioPCMBuffer:buffer];
}];
[self.audioEngine prepare];
NSError *startError = nil;
if(![self.audioEngine startAndReturnError:&startError]) {
NSLog(@"startError: %@", startError.localizedDescription);
}
self.textView.text = @"Say something, I'm listening";
}
#pragma mark - SFSpeechRecognizerDelegate
- (void)speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidChange:(BOOL)available {
if (available) {
self.microphoneBtn.enabled = YES;
} else {
self.microphoneBtn.enabled = NO;
}
}
參考鏈接:
Building a Speech-to-Text App Using Speech Framework in iOS 10
SpeakToMe: Using Speech Recognition with AVAudioEngine