實現(xiàn)通過攝像頭實時掃描并將識別的文字實時疊加在屏幕上的效果,可以結(jié)合 Apple 的 AVFoundation 和 Vision 框架完成笆制。以下是完整實現(xiàn)步驟:
功能描述
使用 AVCaptureSession 打開攝像頭捕獲實時視頻流。
使用 Vision 框架 的 VNRecognizeTextRequest 對每一幀視頻進行文字識別涣达。
將識別到的文字疊加顯示在屏幕上的 UILabel 中在辆。
實現(xiàn)代碼
import UIKit
import AVFoundation
import Vision
class LiveTextRecognitionViewController: UIViewController {
private var captureSession: AVCaptureSession!
private var previewLayer: AVCaptureVideoPreviewLayer!
private var detectedTextLabel: UILabel!
private var textRequest: VNRecognizeTextRequest!
override func viewDidLoad() {
super.viewDidLoad()
view.backgroundColor = .black
// 配置攝像頭
setupCamera()
// 配置實時顯示的 UILabel
setupDetectedTextLabel()
// 配置 Vision 請求
setupTextRecognitionRequest()
}
private func setupCamera() {
captureSession = AVCaptureSession()
captureSession.sessionPreset = .high
guard let videoDevice = AVCaptureDevice.default(for: .video),
let videoInput = try? AVCaptureDeviceInput(device: videoDevice) else {
print("無法訪問攝像頭")
return
}
if captureSession.canAddInput(videoInput) {
captureSession.addInput(videoInput)
}
let videoOutput = AVCaptureVideoDataOutput()
videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoFrameProcessingQueue"))
if captureSession.canAddOutput(videoOutput) {
captureSession.addOutput(videoOutput)
}
// 設置預覽圖層
previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = .resizeAspectFill
previewLayer.frame = view.bounds
view.layer.addSublayer(previewLayer)
// 將 startRunning 放到后臺線程
DispatchQueue.global(qos: .background).async {
self.captureSession.startRunning()
}
}
private func setupDetectedTextLabel() {
detectedTextLabel = UILabel()
detectedTextLabel.numberOfLines = 0
detectedTextLabel.textColor = .yellow
detectedTextLabel.backgroundColor = UIColor.black.withAlphaComponent(0.5)
detectedTextLabel.textAlignment = .center
detectedTextLabel.translatesAutoresizingMaskIntoConstraints = false
view.addSubview(detectedTextLabel)
NSLayoutConstraint.activate([
detectedTextLabel.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 16),
detectedTextLabel.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -16),
detectedTextLabel.bottomAnchor.constraint(equalTo: view.bottomAnchor, constant: -40),
detectedTextLabel.heightAnchor.constraint(equalToConstant: 120)
])
}
private func setupTextRecognitionRequest() {
textRequest = VNRecognizeTextRequest { [weak self] (request, error) in
guard let self = self else { return }
if let error = error {
print("文本識別出錯: \(error)")
return
}
self.processTextRecognitionResults(request.results)
}
textRequest.recognitionLevel = .accurate
textRequest.recognitionLanguages = ["en-US", "zh-Hans"]
textRequest.usesLanguageCorrection = true
}
private func processTextRecognitionResults(_ results: [Any]?) {
guard let results = results as? [VNRecognizedTextObservation] else { return }
let detectedText = results.compactMap { observation in
observation.topCandidates(1).first?.string
}.joined(separator: "\n")
DispatchQueue.main.async {
self.detectedTextLabel.text = detectedText.isEmpty ? "未檢測到文字" : detectedText
}
}
}
extension LiveTextRecognitionViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: .up, options: [:])
do {
try handler.perform([textRequest])
} catch {
print("文本識別請求處理失敗: \(error)")
}
}
}
代碼解析
AVCaptureSession:
AVCaptureSession 用于實時捕獲攝像頭視頻流。
使用 AVCaptureVideoDataOutput 捕獲每一幀視頻并進行處理度苔。
Vision Text Recognition:
配置 VNRecognizeTextRequest 以進行文字識別匆篓。
支持多語言識別:配置 recognitionLanguages 為 ["en-US", "zh-Hans"],同時支持中文和英文寇窑。
實時顯示文本:
將識別到的文字拼接成字符串鸦概,通過 UILabel 動態(tài)更新到屏幕上。
線程處理:
使用 DispatchQueue 在后臺處理視頻幀甩骏。
通過 DispatchQueue.main.async 更新 UI窗市,確保線程安全。