iOS使用攝像頭實時掃描匀们，并實時提取文字

iOS使用攝像頭實時掃描，并實時提取文字

實現(xiàn)通過攝像頭實時掃描并將識別的文字實時疊加在屏幕上的效果，可以結(jié)合 Apple 的 AVFoundation 和 Vision 框架完成笆制。以下是完整實現(xiàn)步驟：

功能描述

使用 AVCaptureSession 打開攝像頭捕獲實時視頻流。
使用 Vision 框架 的 VNRecognizeTextRequest 對每一幀視頻進行文字識別涣达。
將識別到的文字疊加顯示在屏幕上的 UILabel 中在辆。

實現(xiàn)代碼

import UIKit
import AVFoundation
import Vision

class LiveTextRecognitionViewController: UIViewController {
    
    private var captureSession: AVCaptureSession!
    private var previewLayer: AVCaptureVideoPreviewLayer!
    private var detectedTextLabel: UILabel!
    private var textRequest: VNRecognizeTextRequest!
    
    override func viewDidLoad() {
        super.viewDidLoad()
        view.backgroundColor = .black
        
        // 配置攝像頭
        setupCamera()
        
        // 配置實時顯示的 UILabel
        setupDetectedTextLabel()
        
        // 配置 Vision 請求
        setupTextRecognitionRequest()
    }
    
   private func setupCamera() {
    captureSession = AVCaptureSession()
    captureSession.sessionPreset = .high
    
    guard let videoDevice = AVCaptureDevice.default(for: .video),
          let videoInput = try? AVCaptureDeviceInput(device: videoDevice) else {
        print("無法訪問攝像頭")
        return
    }
    
    if captureSession.canAddInput(videoInput) {
        captureSession.addInput(videoInput)
    }
    
    let videoOutput = AVCaptureVideoDataOutput()
    videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoFrameProcessingQueue"))
    
    if captureSession.canAddOutput(videoOutput) {
        captureSession.addOutput(videoOutput)
    }
    
    // 設置預覽圖層
    previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
    previewLayer.videoGravity = .resizeAspectFill
    previewLayer.frame = view.bounds
    view.layer.addSublayer(previewLayer)
    
    // 將 startRunning 放到后臺線程
    DispatchQueue.global(qos: .background).async {
        self.captureSession.startRunning()
    }
}

    private func setupDetectedTextLabel() {
        detectedTextLabel = UILabel()
        detectedTextLabel.numberOfLines = 0
        detectedTextLabel.textColor = .yellow
        detectedTextLabel.backgroundColor = UIColor.black.withAlphaComponent(0.5)
        detectedTextLabel.textAlignment = .center
        detectedTextLabel.translatesAutoresizingMaskIntoConstraints = false
        view.addSubview(detectedTextLabel)
        
        NSLayoutConstraint.activate([
            detectedTextLabel.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 16),
            detectedTextLabel.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -16),
            detectedTextLabel.bottomAnchor.constraint(equalTo: view.bottomAnchor, constant: -40),
            detectedTextLabel.heightAnchor.constraint(equalToConstant: 120)
        ])
    }
    
    private func setupTextRecognitionRequest() {
        textRequest = VNRecognizeTextRequest { [weak self] (request, error) in
            guard let self = self else { return }
            if let error = error {
                print("文本識別出錯: \(error)")
                return
            }
            self.processTextRecognitionResults(request.results)
        }
        textRequest.recognitionLevel = .accurate
        textRequest.recognitionLanguages = ["en-US", "zh-Hans"]
        textRequest.usesLanguageCorrection = true
    }
    
    private func processTextRecognitionResults(_ results: [Any]?) {
        guard let results = results as? [VNRecognizedTextObservation] else { return }
        
        let detectedText = results.compactMap { observation in
            observation.topCandidates(1).first?.string
        }.joined(separator: "\n")
        
        DispatchQueue.main.async {
            self.detectedTextLabel.text = detectedText.isEmpty ? "未檢測到文字" : detectedText
        }
    }
}

extension LiveTextRecognitionViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
        
        let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: .up, options: [:])
        do {
            try handler.perform([textRequest])
        } catch {
            print("文本識別請求處理失敗: \(error)")
        }
    }
}

代碼解析

AVCaptureSession:
AVCaptureSession 用于實時捕獲攝像頭視頻流。
使用 AVCaptureVideoDataOutput 捕獲每一幀視頻并進行處理度苔。

Vision Text Recognition:
配置 VNRecognizeTextRequest 以進行文字識別匆篓。
支持多語言識別：配置 recognitionLanguages 為 ["en-US", "zh-Hans"]，同時支持中文和英文寇窑。

實時顯示文本:
將識別到的文字拼接成字符串鸦概，通過 UILabel 動態(tài)更新到屏幕上。

線程處理:
使用 DispatchQueue 在后臺處理視頻幀甩骏。
通過 DispatchQueue.main.async 更新 UI窗市，確保線程安全。

最后編輯于：2024.11.25 21:44:38

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者