使用Speech Framework實(shí)現(xiàn)語音轉(zhuǎn)文字

這篇日志記錄自己在學(xué)習(xí)使用蘋果原生框架語音識(shí)別庫Speech Framework時(shí)的總結(jié)和示例代碼触趴。看了一遍官方文檔,把該框架中的相關(guān)類和方法了解了一遍殖演,然后總結(jié)了一張XMind結(jié)構(gòu)圖熄浓。

前提:需要Xcode 8 以上和一個(gè)運(yùn)行iOS10以上系統(tǒng)的iOS設(shè)備.

Speech Framework中的類和方法概念

Paste_Image.png

Note: 因?yàn)樯婕暗綑?quán)限問題情臭,需要在info.plist文件中添加兩個(gè)key。分別是Privacy - Microphone Usage Description(麥克風(fēng)權(quán)限)和 Privacy - Speech Recognition Usage Description(語音識(shí)別權(quán)限)

Swift代碼

import UIKit
import Speech

class ViewController: UIViewController {

  @IBOutlet weak var textView: UITextView!
  @IBOutlet weak var microphoneButton: UIButton!

  /// 語音識(shí)別操作類對(duì)象
  private let speechRecognizer = SFSpeechRecognizer()

  /// 處理語音識(shí)別請(qǐng)求赌蔑,給語音識(shí)別提供語音輸入
  private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?

  /// 告訴用戶語音識(shí)別對(duì)象的結(jié)果俯在。擁有這個(gè)對(duì)象很方便因?yàn)槟憧梢?用它刪除或中斷任務(wù)
  private var recognitionTask: SFSpeechRecognitionTask?

  /// 語音引擎。負(fù)責(zé)提供語音輸入
  private let audioEngine = AVAudioEngine()

  override func viewDidLoad() {
      super.viewDidLoad()
      // Do any additional setup after loading the view, typically from a nib.
      microphoneButton.isEnabled = false
    
      speechRecognizer?.delegate = self

      /// 申請(qǐng)用戶語音識(shí)別權(quán)限
      SFSpeechRecognizer.requestAuthorization { (authStatus) in

          var isButtonEnabled = false

          switch authStatus {
          case .authorized: // 用戶授權(quán)語音識(shí)別
              isButtonEnabled = true

          case .denied: // 用戶拒絕授權(quán)語音識(shí)別
              isButtonEnabled = false
              print("User denied access to speech recognition")

          case .restricted: // 設(shè)備不支持語音識(shí)別功能
              isButtonEnabled = false
              print("Speech recognition restricted on this device")

          case .notDetermined: // 結(jié)果未知 用戶尚未進(jìn)行選擇
              isButtonEnabled = false
              print("Speech recognition not yet authorized")
          }

          OperationQueue.main.addOperation {
              self.microphoneButton.isEnabled = isButtonEnabled
          }
      }
  }

  @IBAction func microphoneButtonClick(_ sender: UIButton) {
      if audioEngine.isRunning {
          audioEngine.stop()
          recognitionRequest?.endAudio()
          microphoneButton.isEnabled = false
          microphoneButton.setTitle("Start Recording", for: .normal)
      } else {
          startRecording()
          microphoneButton.setTitle("Stop Recording", for: .normal)
      }
  }

  func startRecording() {
      if recognitionTask != nil { /// 檢查recognitionTask是否在運(yùn)行娃惯,如果在就取消任務(wù)和識(shí)別
          recognitionTask?.cancel()
          recognitionTask = nil
      }

      let audioSession = AVAudioSession.sharedInstance() /// 記錄語音做準(zhǔn)備
      do {
          try audioSession.setCategory(AVAudioSessionCategoryRecord)
          try audioSession.setMode(AVAudioSessionModeMeasurement)
          try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
      } catch {
          print("audioSession properties weren't set because of an error.")
      }

      /// 實(shí)例化recognitionRequest 利用它把語音數(shù)據(jù)傳到蘋果后臺(tái)
      recognitionRequest = SFSpeechAudioBufferRecognitionRequest()

      /// 檢查audioEngine(你的設(shè)備)是否有做錄音功能作為語音輸入
      guard let inputNode = audioEngine.inputNode else {
          fatalError("Audio engine has no input node")
      }

      /// 檢查recognitionRequest對(duì)象是否被實(shí)例化或不是nil
      guard let recognitionRequest = recognitionRequest else {
          fatalError("Unable to create an SFSpeechAudioBufferRecongitionRequest object")
    }

      /// 當(dāng)用戶說話的時(shí)候讓recognitionRequest報(bào)告語音識(shí)別的部分結(jié)果
      recognitionRequest.shouldReportPartialResults = true

      /// 開啟語音識(shí)別, 回調(diào)每次都會(huì)在識(shí)別引擎收到輸入的時(shí)候跷乐,完善了當(dāng)前識(shí)別的信息時(shí)候,或者被刪除或者停止的時(shí)候被調(diào)用趾浅,最后會(huì)返回一個(gè)最終的文本
      recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
          var isFinal = false // 定義一個(gè)布爾值決定識(shí)別是否已經(jīng)結(jié)束

          /// 如果結(jié)果result不是nil愕提,把textView.text的值設(shè)置為我們的最優(yōu)文本。如果結(jié)果是最終結(jié)果皿哨,設(shè)置isFinal為true
          if result != nil {
              self.textView.text = result?.bestTranscription.formattedString
              isFinal = (result?.isFinal)!
          }

          /// 如果沒有錯(cuò)誤或者結(jié)果是最終結(jié)果浅侨,停止audioEngine(語音輸入)并且停止recognitionRequest和recognitionTask
          if error != nil || isFinal {
              self.audioEngine.stop()
              inputNode.removeTap(onBus: 0)

              self.recognitionRequest = nil
              self.recognitionTask = nil

              self.microphoneButton.isEnabled = true
          }
      })

      /// 向recognitionRequest增加一個(gè)語音輸入。注意在開始了recognitionTask之后增加語音輸入是OK的证膨。SpeechFramework會(huì)在語音輸入被加入的同時(shí)就開始進(jìn)行解析識(shí)別
      let recordingFormat = inputNode.outputFormat(forBus: 0)
      inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
          self.recognitionRequest?.append(buffer)
      }

      /// 準(zhǔn)備并且開始audioEngine
      audioEngine.prepare()

      do {
          try audioEngine.start()
      } catch {
          print("audioEngine couldn't start because of an error")
     }

      textView.text = "Say something, I'm listening!"
  }
}

extension ViewController: SFSpeechRecognizerDelegate {
  /// 可用性狀態(tài)改變時(shí)被調(diào)用
  func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
      if available {
          microphoneButton.isEnabled = true
      } else {
          microphoneButton.isEnabled = false
      }
  }
}  

Objective-C 代碼

#import <Speech/Speech.h>

@interface ViewController ()<SFSpeechRecognizerDelegate>

@property (nonatomic, strong) SFSpeechRecognizer *speechRecognizer;

@property (nonatomic, strong) SFSpeechRecognitionTask *recognitionTask;

@property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *recognitionRequest;

/// 音頻引擎
@property (nonatomic, strong) AVAudioEngine *audioEngine;

@property (weak, nonatomic) IBOutlet UITextView *textView;
@property (weak, nonatomic) IBOutlet UIButton *microphoneBtn;

@end

@implementation ViewController

- (void)dealloc {
    [self.recognitionTask cancel];
    self.recognitionTask = nil;
}

- (void)viewDidLoad {
    [super viewDidLoad];
    // Do any additional setup after loading the view, typically from a nib.
    self.view.backgroundColor = [UIColor whiteColor];

    NSLog(@"supportedLocales: %@", [SFSpeechRecognizer supportedLocales]);

    self.microphoneBtn.enabled = NO;

    /// 創(chuàng)建語音識(shí)別器對(duì)象并設(shè)置代理
    self.speechRecognizer = [[SFSpeechRecognizer alloc] init];

    self.speechRecognizer.delegate = self;

    /// 請(qǐng)求用戶授權(quán)
    [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {

    BOOL isButtonEnabled = NO;

    switch (status) {
        case SFSpeechRecognizerAuthorizationStatusNotDetermined:
            isButtonEnabled = NO;
            NSLog(@"SFSpeechRecognizerAuthorizationStatusNotDetermined");
            break;
        case SFSpeechRecognizerAuthorizationStatusDenied:
            isButtonEnabled = NO;
            NSLog(@"SFSpeechRecognizerAuthorizationStatusDenied");
            break;
        case SFSpeechRecognizerAuthorizationStatusRestricted:
            isButtonEnabled = NO;
            NSLog(@"SFSpeechRecognizerAuthorizationStatusRestricted");
            break;
        case SFSpeechRecognizerAuthorizationStatusAuthorized:
            NSLog(@"SFSpeechRecognizerAuthorizationStatusAuthorized");
            isButtonEnabled = YES;
            break;
        default:
            break;
    }

    dispatch_async(dispatch_get_main_queue(), ^{
        self.microphoneBtn.enabled = isButtonEnabled;
    });
}];


    /// 創(chuàng)建音頻引擎對(duì)象
    self.audioEngine = [[AVAudioEngine alloc] init];
}

- (IBAction)microphoneBtnClick:(UIButton *)sender {
    if (self.audioEngine.isRunning) {
        [self.audioEngine stop];
        [self.recognitionRequest endAudio];
        self.microphoneBtn.enabled = NO;
        [self.microphoneBtn setTitle:@"Start Recording" forState:UIControlStateNormal];
    } else {
        [self startRecording];
        [self.microphoneBtn setTitle:@"Stop Recording" forState:UIControlStateNormal];
    }
}

#pragma mark - private method
- (void)startRecording {
    if (self.recognitionTask != nil) {
        [self.recognitionTask cancel]; // 取消當(dāng)前語音識(shí)別任務(wù)
        self.recognitionTask = nil;
    }

    AVAudioSession *audioSession = [AVAudioSession sharedInstance];
    NSError *categoryError = nil;
    if (![audioSession setCategory:AVAudioSessionCategoryRecord error:&categoryError]) {
        NSLog(@"categoryError: %@", categoryError.localizedDescription);
    }

    NSError *modeError = nil;
    if (![audioSession setMode:AVAudioSessionModeMeasurement error:&modeError]) {
        NSLog(@"modeError: %@", modeError.localizedDescription);
    }

    NSError *activeError = nil;
    if (![audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&activeError]) {
        NSLog(@"activeError: %@", activeError.localizedDescription);
    }

    /// 實(shí)例化 通過設(shè)備麥克風(fēng)識(shí)別現(xiàn)場(chǎng)語音的請(qǐng)求 對(duì)象
    self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];

    if (!self.audioEngine.inputNode) {// 系統(tǒng)輸入節(jié)點(diǎn)
        NSLog(@"Audio engine has no input node");
        return;
    }

    if (!self.recognitionRequest) {
        NSLog(@"Unable to create an SFSpeechAudioBufferRecongitionRequest object");
        return;
    }

    /// 報(bào)告每個(gè)發(fā)音的部分非精確結(jié)果
    self.recognitionRequest.shouldReportPartialResults = YES;

    /// 執(zhí)行語音識(shí)別任務(wù) 完成回調(diào)
    self.recognitionTask = [self.speechRecognizer recognitionTaskWithRequest:self.recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * _Nullable error) {

    BOOL isFinal = NO;

    if (result) {
        self.textView.text = result.bestTranscription.formattedString;
        isFinal = result.isFinal;
    }

    if (error || isFinal) {
        [self.audioEngine stop];
        [self.audioEngine.inputNode removeTapOnBus:0];

        self.recognitionRequest = nil;
        self.recognitionTask = nil;

        self.microphoneBtn.enabled = YES;
    }
}];

    AVAudioFormat *recordingFormat = [self.audioEngine.inputNode outputFormatForBus:0];

    [self.audioEngine.inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
        /// 將PCM格式的音頻追加到識(shí)別請(qǐng)求的結(jié)尾
        [self.recognitionRequest appendAudioPCMBuffer:buffer];
    }];

    [self.audioEngine prepare];

    NSError *startError = nil;
    if(![self.audioEngine startAndReturnError:&startError]) {
        NSLog(@"startError: %@", startError.localizedDescription);
    }

    self.textView.text = @"Say something, I'm listening";
}

#pragma mark - SFSpeechRecognizerDelegate
- (void)speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidChange:(BOOL)available {
    if (available) {
        self.microphoneBtn.enabled = YES;
    } else {
        self.microphoneBtn.enabled = NO;
    }
}

參考鏈接:
Building a Speech-to-Text App Using Speech Framework in iOS 10
SpeakToMe: Using Speech Recognition with AVAudioEngine

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
  • 序言:七十年代末如输,一起剝皮案震驚了整個(gè)濱河市,隨后出現(xiàn)的幾起案子央勒,更是在濱河造成了極大的恐慌不见,老刑警劉巖,帶你破解...
    沈念sama閱讀 210,914評(píng)論 6 490
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件订歪,死亡現(xiàn)場(chǎng)離奇詭異脖祈,居然都是意外死亡,警方通過查閱死者的電腦和手機(jī)刷晋,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 89,935評(píng)論 2 383
  • 文/潘曉璐 我一進(jìn)店門盖高,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人眼虱,你說我怎么就攤上這事喻奥。” “怎么了捏悬?”我有些...
    開封第一講書人閱讀 156,531評(píng)論 0 345
  • 文/不壞的土叔 我叫張陵撞蚕,是天一觀的道長。 經(jīng)常有香客問我过牙,道長甥厦,這世上最難降的妖魔是什么纺铭? 我笑而不...
    開封第一講書人閱讀 56,309評(píng)論 1 282
  • 正文 為了忘掉前任,我火速辦了婚禮刀疙,結(jié)果婚禮上舶赔,老公的妹妹穿的比我還像新娘。我一直安慰自己谦秧,他們只是感情好竟纳,可當(dāng)我...
    茶點(diǎn)故事閱讀 65,381評(píng)論 5 384
  • 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著疚鲤,像睡著了一般锥累。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上集歇,一...
    開封第一講書人閱讀 49,730評(píng)論 1 289
  • 那天桶略,我揣著相機(jī)與錄音,去河邊找鬼鬼悠。 笑死删性,一個(gè)胖子當(dāng)著我的面吹牛,可吹牛的內(nèi)容都是我干的焕窝。 我是一名探鬼主播蹬挺,決...
    沈念sama閱讀 38,882評(píng)論 3 404
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場(chǎng)噩夢(mèng)啊……” “哼它掂!你這毒婦竟也來了巴帮?” 一聲冷哼從身側(cè)響起,我...
    開封第一講書人閱讀 37,643評(píng)論 0 266
  • 序言:老撾萬榮一對(duì)情侶失蹤虐秋,失蹤者是張志新(化名)和其女友劉穎榕茧,沒想到半個(gè)月后,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體客给,經(jīng)...
    沈念sama閱讀 44,095評(píng)論 1 303
  • 正文 獨(dú)居荒郊野嶺守林人離奇死亡用押,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點(diǎn)故事閱讀 36,448評(píng)論 2 325
  • 正文 我和宋清朗相戀三年,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了靶剑。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片蜻拨。...
    茶點(diǎn)故事閱讀 38,566評(píng)論 1 339
  • 序言:一個(gè)原本活蹦亂跳的男人離奇死亡,死狀恐怖桩引,靈堂內(nèi)的尸體忽然破棺而出缎讼,到底是詐尸還是另有隱情,我是刑警寧澤坑匠,帶...
    沈念sama閱讀 34,253評(píng)論 4 328
  • 正文 年R本政府宣布血崭,位于F島的核電站,受9級(jí)特大地震影響,放射性物質(zhì)發(fā)生泄漏夹纫。R本人自食惡果不足惜咽瓷,卻給世界環(huán)境...
    茶點(diǎn)故事閱讀 39,829評(píng)論 3 312
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望捷凄。 院中可真熱鬧忱详,春花似錦、人聲如沸跺涤。這莊子的主人今日做“春日...
    開封第一講書人閱讀 30,715評(píng)論 0 21
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽桶错。三九已至,卻和暖如春胀蛮,著一層夾襖步出監(jiān)牢的瞬間院刁,已是汗流浹背。 一陣腳步聲響...
    開封第一講書人閱讀 31,945評(píng)論 1 264
  • 我被黑心中介騙來泰國打工粪狼, 沒想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留退腥,地道東北人。 一個(gè)月前我還...
    沈念sama閱讀 46,248評(píng)論 2 360
  • 正文 我出身青樓再榄,卻偏偏與公主長得像狡刘,于是被迫代替她去往敵國和親。 傳聞我的和親對(duì)象是個(gè)殘疾皇子困鸥,可洞房花燭夜當(dāng)晚...
    茶點(diǎn)故事閱讀 43,440評(píng)論 2 348

推薦閱讀更多精彩內(nèi)容

  • 因?yàn)橐Y(jié)局swift3.0中引用snapKit的問題,看到一篇介紹Xcode8,swift3變化的文章,覺得很詳細(xì)...
    uniapp閱讀 4,403評(píng)論 0 12
  • iOS 10新特性以及適配點(diǎn) SiriKit 所有第三方應(yīng)用都可以用Siri嗅蔬,支持音頻、視頻疾就、消息發(fā)送接收澜术、搜索照...
    越過三閱讀 6,171評(píng)論 11 67
  • APP開發(fā)避免不開系統(tǒng)權(quán)限的問題,今天做定位時(shí)需要在不允許定位的時(shí)候做一些操作猬腰,所以鸟废,今天就大概的了解了一些。 權(quán)...
    SunshineBrother閱讀 16,363評(píng)論 4 62
  • 已經(jīng)好久沒有回去 故鄉(xiāng)卻更清晰了起來 父親的聲音也不再有力 母親走路都成了問題 河里的水好像很久沒有流動(dòng)了 門口還...
    西蘭河閱讀 149評(píng)論 0 0
  • 17歲姑荷,我與他相遇盒延,一起學(xué)習(xí) 18歲,我們相熟厢拭,相約打籃球 高三兰英,文理分班,他成為她的 大一供鸠,他們異地分手 而我畦贸,...
    李易峰女友閱讀 307評(píng)論 5 6