流程圖
1菲饼、視頻編碼
1劲藐、1>初始化視頻編碼類
初始化調(diào)用:
VTCompressionSessionCreate(
kCFAllocatorDefault,
width,
height,
kCMVideoCodecType_H264,
nil,
attributes as CFDictionary?,
nil,
callback,
Unmanaged.passUnretained(self).toOpaque(),
&_session)
需要設(shè)置下熙掺,幅面流妻、碼率吼鳞、幀率赏表、回調(diào)函數(shù)等常規(guī)信息氢橙。
width,height分別是編碼的幅面大小酝枢。
kCMVideoCodecType_H264 采用的編碼技術(shù)。
attributes 流設(shè)置悍手,這里面涉及到的參數(shù):
[kVTCompressionPropertyKey_RealTime: kCFBooleanTrue, // 實(shí)時(shí)編碼
kVTCompressionPropertyKey_ProfileLevel: kVTProfileLevel_H264_Baseline_3_1 as NSObject, //編碼畫質(zhì) 低清Baseline Level 1.3隧枫,標(biāo)清Baseline Level 3,半高清Baseline Level 3.1谓苟,全高清Baseline Level 4.1(BaseLine表示直播官脓,Main存儲(chǔ)媒體,Hight高清存儲(chǔ)【只有:3.1 & 4.1】)
kVTCompressionPropertyKey_AverageBitRate: Int(bitrate) as NSObject, // 設(shè)置碼率
kVTCompressionPropertyKey_ExpectedFrameRate: NSNumber(value: expectedFPS), // 設(shè)置幀率
kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration: NSNumber(value: 2.0) // 關(guān)鍵幀間隔涝焙,單位秒, kVTCompressionPropertyKey_AllowFrameReordering: !isBaseline as NSObject, //是否產(chǎn)生B幀卑笨,直播設(shè)置為false【B幀是雙向差別幀,也就是B幀記錄的是本幀與前后幀的差別仑撞,B幀可以大大減少空間赤兴,但運(yùn)算量較大】
kVTCompressionPropertyKey_PixelTransferProperties: [
"ScalingMode": "Trim"
] as NSObject] 像素轉(zhuǎn)換規(guī)則
kVTCompressionPropertyKey_H264EntropyMode:kVTH264EntropyMode_CABAC // 如果是264編碼指定算法
2、2設(shè)置回調(diào)函數(shù)隧哮。
private var callback: VTCompressionOutputCallback = {(
outputCallbackRef: UnsafeMutableRawPointer?,
sourceFrameRef: UnsafeMutableRawPointer?,
status: OSStatus,
infoFlags: VTEncodeInfoFlags,
sampleBuffer: CMSampleBuffer?) in
guard let ref: UnsafeMutableRawPointer = outputCallbackRef,
let sampleBuffer: CMSampleBuffer = sampleBuffer, status == noErr else {
return
}
let encoder: H264Encoder = Unmanaged<H264Encoder>.fromOpaque(ref).takeUnretainedValue() //因?yàn)槌跏蓟臅r(shí)候傳了進(jìn)去桶良,現(xiàn)在取回來(lái)。
encoder.formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) // 得到視頻流沮翔,用于編碼
encoder.delegate?.sampleOutput(video: sampleBuffer) //交給外部處理,通過(guò)解析 CMSampleBufferRef 分別處理SPS陨帆,PPS,I-Frame和非I-Frame,然后通過(guò)RTMP推出去疲牵。
}
2.3 編碼
編碼后會(huì)自動(dòng)調(diào)用2.2的回調(diào)函數(shù)承二。
BTW:這是在視頻采集的時(shí)候調(diào)用這個(gè)
func captureOutput(_ captureOutput: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
VTCompressionSessionEncodeFrame(
session,
sampleBuffer,
CMSampleBufferGetPresentationTimeStamp(sampleBuffer),
CMSampleBufferGetDuration(sampleBuffer),
nil,
nil,
&flags
)
}
這個(gè)就是CMSampleBuffer的內(nèi)部結(jié)構(gòu)圖,編碼和解碼前后的內(nèi)部結(jié)構(gòu)
編碼就是CVPixelBuffer—>CMSampleBufferRef纲爸,解碼反之亥鸠。
2、音頻編碼
2识啦、1創(chuàng)建編碼器
AudioConverterNewSpecific(
&inSourceFormat!, //輸入?yún)?shù)
&inDestinationFormat, //輸出參數(shù)
UInt32(inClassDescriptions.count), //音頻描述符數(shù)量
&inClassDescriptions, //音頻描述符數(shù)組
&converter //編碼器
)
創(chuàng)建好編碼器后负蚊,還要修改一下編碼器的碼率
UInt32 outputBitrate = 64000 * channelscount // 還要* 通道數(shù)。需要注意颓哮,AAC并不是隨便的碼率都可以支持盖桥。比如,如果PCM采樣率是44100KHz题翻,那么碼率可以設(shè)置64000bps揩徊,如果是16K,可以設(shè)置為32000bps嵌赠。
UInt32 propSize = sizeof(outputBitrate);
AudioConverterSetProperty(audioConverter,
kAudioConverterEncodeBitRate,
propSize,
&outputBitrate);
2塑荒、2音頻描述文件
inDestinationFormat = AudioStreamBasicDescription()
inDestinationFormat!.mSampleRate = sampleRate == 0 ? inSourceFormat!.mSampleRate : sampleRate //設(shè)置采樣率,有 32K, 44.1K姜挺,48K
inDestinationFormat!.mFormatID = kAudioFormatMPEG4AAC // 采用AAC編碼方式
inDestinationFormat!.mFormatFlags = profile //指明格式的細(xì)節(jié). 設(shè)置為 0 說(shuō)明沒(méi)有子格式齿税。
inDestinationFormat!.mBytesPerPacket = 0 //每個(gè)音頻包的字節(jié)數(shù),該字段設(shè)置為 0, 表明包里的字節(jié)數(shù)是變化的。
inDestinationFormat!.mFramesPerPacket = 1024 每個(gè)音頻包幀的數(shù)量. 對(duì)于未壓縮的數(shù)據(jù)設(shè)置為 1. 動(dòng)態(tài)碼率格式炊豪,這個(gè)值是一個(gè)較大的固定數(shù)字凌箕,比如說(shuō)AAC的1024。如果是動(dòng)態(tài)幀數(shù)(比如Ogg格式)設(shè)置為0词渤。
inDestinationFormat!.mBytesPerFrame = 0 // 每個(gè)幀的字節(jié)數(shù)牵舱。對(duì)于壓縮數(shù)據(jù),設(shè)置為 0.
inDestinationFormat!.mChannelsPerFrame = 1 //音頻聲道數(shù)
inDestinationFormat!.mBitsPerChannel = 0 // 壓縮數(shù)據(jù)缺虐,該值設(shè)置為0.
inDestinationFormat!.mReserved = 0 // 用于字節(jié)對(duì)齊芜壁,必須是0.
CMAudioFormatDescriptionCreate(
kCFAllocatorDefault, &inDestinationFormat!, 0, nil, 0, nil, nil, &formatDescription
)
2、3轉(zhuǎn)碼
通過(guò)音頻捕獲獲取音頻流
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// 編碼流程:
首先高氮,創(chuàng)建一個(gè) AudioBufferList慧妄,并將輸入數(shù)據(jù)存到 AudioBufferList里。
其次剪芍,設(shè)置輸出塞淹。
然后,調(diào)用 AudioConverterFillComplexBuffer 方法罪裹,該方法又會(huì)調(diào)用 inInputDataProc 回調(diào)函數(shù)饱普,將輸入數(shù)據(jù)拷貝到編碼器中运挫。
最后,轉(zhuǎn)碼费彼。將轉(zhuǎn)碼后的數(shù)據(jù)輸出到指定的輸出變量中。
//設(shè)置輸入
var blockBuffer: CMBlockBuffer?
currentBufferList = AudioBufferList.allocate(maximumBuffers: 1)
CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
sampleBuffer,
nil,
currentBufferList!.unsafeMutablePointer,
AudioBufferList.sizeInBytes(maximumBuffers: 1),
kCFAllocatorDefault,
kCFAllocatorDefault,
0,
&blockBuffer
)
// 設(shè)置輸出
var finished: Bool = false
while !finished {
var ioOutputDataPacketSize: UInt32 = 1
let dataLength: Int = blockBuffer!.dataLength
let outOutputData: UnsafeMutableAudioBufferListPointer = AudioBufferList.allocate(maximumBuffers: 1)
outOutputData[0].mNumberChannels = inDestinationFormat.mChannelsPerFrame
outOutputData[0].mDataByteSize = UInt32(dataLength)
outOutputData[0].mData = UnsafeMutableRawPointer.allocate(byteCount: dataLength, alignment: 0)
let status: OSStatus = AudioConverterFillComplexBuffer(
converter,
inputDataProc,
Unmanaged.passUnretained(self).toOpaque(),
&ioOutputDataPacketSize,
outOutputData.unsafeMutablePointer,
nil
)
if 0 <= status && ioOutputDataPacketSize == 1 {
var result: CMSampleBuffer?
var timing: CMSampleTimingInfo = CMSampleTimingInfo(sampleBuffer: sampleBuffer)
let numSamples: CMItemCount = sampleBuffer.numSamples
CMSampleBufferCreate(kCFAllocatorDefault, nil, false, nil, nil, formatDescription, numSamples, 1, &timing, 0, nil, &result)
CMSampleBufferSetDataBufferFromAudioBufferList(result!, kCFAllocatorDefault, kCFAllocatorDefault, 0, outOutputData.unsafePointer) // 這里通過(guò)fillComplexBuffer指向outOutputData口芍,然后通過(guò)inputDataProc回調(diào)箍铲,最后再次回調(diào)給自己的onInputDataForAudioConverter函數(shù),再通過(guò)memcpy拷貝到這個(gè)outOutputData里鬓椭。下面的這行代碼才最終把buffer數(shù)據(jù)拿走
delegate?.sampleOutput(audio: result!)
} else {
finished = true
}
for i in 0..<outOutputData.count {
free(outOutputData[i].mData)
}
free(outOutputData.unsafeMutablePointer)
}
}
// 編碼解釋
AudioConverterFillComplexBuffer(
inAudioConverter: AudioConverterRef,
inInputDataProc: AudioConverterComplexInputDataProc,
inInputDataProcUserData: UnsafeMutablePointer,
ioOutputDataPacketSize: UnsafeMutablePointer<UInt32>,
outOutputData: UnsafeMutablePointer<AudioBufferList>,
outPacketDescription: AudioStreamPacketDescription
) -> OSStatus
inAudioConverter : 轉(zhuǎn)碼器
inInputDataProc : 回調(diào)函數(shù)颠猴。用于將PCM數(shù)據(jù)喂給編碼器。
inInputDataProcUserData : 用戶自定義數(shù)據(jù)指針小染。
ioOutputDataPacketSize : 輸出數(shù)據(jù)包大小翘瓮。
outOutputData : 輸出數(shù)據(jù) AudioBufferList 指針。
outPacketDescription : 輸出包描述符裤翩。
回調(diào)處理
private var inputDataProc: AudioConverterComplexInputDataProc = {(
converter: AudioConverterRef,
ioNumberDataPackets: UnsafeMutablePointer<UInt32>,
ioData: UnsafeMutablePointer<AudioBufferList>,
outDataPacketDescription: UnsafeMutablePointer<UnsafeMutablePointer<AudioStreamPacketDescription>?>?,
inUserData: UnsafeMutableRawPointer?) in
return Unmanaged<AACEncoder>.fromOpaque(inUserData!).takeUnretainedValue().onInputDataForAudioConverter(
ioNumberDataPackets,
ioData: ioData,
outDataPacketDescription: outDataPacketDescription
)
}
再回調(diào)處理
func onInputDataForAudioConverter(
_ ioNumberDataPackets: UnsafeMutablePointer<UInt32>,
ioData: UnsafeMutablePointer<AudioBufferList>,
outDataPacketDescription: UnsafeMutablePointer<UnsafeMutablePointer<AudioStreamPacketDescription>?>?) -> OSStatus {
guard let bufferList: UnsafeMutableAudioBufferListPointer = currentBufferList else {
ioNumberDataPackets.pointee = 0
return -1
}
memcpy(ioData, bufferList.unsafePointer, bufferListSize) // 通過(guò)上面的回調(diào)傳值處理资盅,然后再這里在通過(guò)memcpy把數(shù)據(jù)拷貝到iodata里實(shí)現(xiàn)數(shù)據(jù)的保存到outOutputData
ioNumberDataPackets.pointee = 1
free(bufferList.unsafeMutablePointer)
currentBufferList = nil
return noErr
}
3. 流合成。
通過(guò)1踊赠、2的音視頻的編碼操作呵扛,下面我們就可以合成流以便給Socket準(zhǔn)備發(fā)送的數(shù)據(jù)
3.1 視頻合成流
func sampleOutput(video sampleBuffer: CMSampleBuffer) {
let keyframe: Bool = !sampleBuffer.dependsOnOthers
var compositionTime: Int32 = 0
let presentationTimeStamp: CMTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
var decodeTimeStamp: CMTime = CMSampleBufferGetDecodeTimeStamp(sampleBuffer)
if decodeTimeStamp == kCMTimeInvalid {
decodeTimeStamp = presentationTimeStamp
} else {
compositionTime = Int32((decodeTimeStamp.seconds - decodeTimeStamp.seconds) * 1000)
}
let delta: Double = (videoTimestamp == kCMTimeZero ? 0 : decodeTimeStamp.seconds - videoTimestamp.seconds) * 1000
guard let data: Data = sampleBuffer.dataBuffer?.data, 0 <= delta else {
return
}
var buffer: Data = Data([((keyframe ? FLVFrameType.key.rawValue : FLVFrameType.inter.rawValue) << 4) | FLVVideoCodec.avc.rawValue, FLVAVCPacketType.nal.rawValue]) // 設(shè)置頭
buffer.append(contentsOf: compositionTime.bigEndian.data[1..<4]) // 大小端處理
buffer.append(data) //添加流數(shù)據(jù)
delegate?.sampleOutput(video: buffer, withTimestamp: delta, muxer: self) //回調(diào)出去
videoTimestamp = decodeTimeStamp
}
public enum FLVFrameType: UInt8 {
case key = 1
3.1 視頻合成流
func sampleOutput(video sampleBuffer: CMSampleBuffer) {
let keyframe: Bool = !sampleBuffer.dependsOnOthers
var compositionTime: Int32 = 0
let presentationTimeStamp: CMTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
var decodeTimeStamp: CMTime = CMSampleBufferGetDecodeTimeStamp(sampleBuffer)
if decodeTimeStamp == kCMTimeInvalid {
decodeTimeStamp = presentationTimeStamp
} else {
compositionTime = Int32((decodeTimeStamp.seconds - decodeTimeStamp.seconds) * 1000)
}
let delta: Double = (videoTimestamp == kCMTimeZero ? 0 : decodeTimeStamp.seconds - videoTimestamp.seconds) * 1000
guard let data: Data = sampleBuffer.dataBuffer?.data, 0 <= delta else {
return
}
var buffer: Data = Data([((keyframe ? FLVFrameType.key.rawValue : FLVFrameType.inter.rawValue) << 4) | FLVVideoCodec.avc.rawValue, FLVAVCPacketType.nal.rawValue]) // 設(shè)置頭
buffer.append(contentsOf: compositionTime.bigEndian.data[1..<4]) // 大小端處理
buffer.append(data) //添加流數(shù)據(jù)
delegate?.sampleOutput(video: buffer, withTimestamp: delta, muxer: self) //回調(diào)出去
videoTimestamp = decodeTimeStamp
}
public enum FLVFrameType: UInt8 {
case key = 1
case inter = 2
case disposable = 3
case generated = 4
case command = 5
}
3、2音頻合成流
func sampleOutput(audio sampleBuffer: CMSampleBuffer) {
let presentationTimeStamp: CMTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let delta: Double = (audioTimestamp == kCMTimeZero ? 0 : presentationTimeStamp.seconds - audioTimestamp.seconds) * 1000
guard let data: Data = sampleBuffer.dataBuffer?.data, 0 <= delta else {
return
}
var buffer: Data = Data([RTMPMuxer.aac, FLVAACPacketType.raw.rawValue]) // 設(shè)置頭
buffer.append(data) // 添加流數(shù)據(jù)
delegate?.sampleOutput(audio: buffer, withTimestamp: delta, muxer: self) // 回調(diào)出去
audioTimestamp = presentationTimeStamp
}
public enum FLVAACPacketType: UInt8 {
case seq = 0
case raw = 1
}
3筐带、3組RTMP協(xié)議數(shù)據(jù)今穿,僅供參考
func sampleOutput(audio buffer: Data, withTimestamp: Double, muxer: RTMPMuxer) {
guard readyState == .publishing else {
return
}
let type: FLVTagType = .audio
let length: Int = rtmpConnection.socket.doOutput(chunk: // 發(fā)送數(shù)據(jù)給socket,寫入inputstream
RTMPChunk( //拼接流數(shù)據(jù)
type: audioWasSent ? .one : .zero, // 是否是第一次發(fā)送用于處理大小端數(shù)據(jù)
streamId: type.streamId,
message: RTMPAudioMessage(streamId: id, timestamp: UInt32(audioTimestamp), payload: buffer)), locked: nil)
audioWasSent = true
OSAtomicAdd64(Int64(length), &info.byteCount) 原子鎖定伦籍,避免重復(fù)添加蓝晒。發(fā)送數(shù)據(jù)大小統(tǒng)計(jì)
audioTimestamp = withTimestamp + (audioTimestamp - floor(audioTimestamp))
}
和上面很接近只是增加了鎖
func sampleOutput(video buffer: Data, withTimestamp: Double, muxer: RTMPMuxer) {
guard readyState == .publishing else {
return
}
let type: FLVTagType = .video
OSAtomicOr32Barrier(1, &mixer.videoIO.encoder.locked)
let length: Int = rtmpConnection.socket.doOutput(chunk: RTMPChunk(
type: videoWasSent ? .one : .zero,
streamId: type.streamId,
message: RTMPVideoMessage(streamId: id, timestamp: UInt32(videoTimestamp), payload: buffer)
), locked: &mixer.videoIO.encoder.locked)
videoWasSent = true
OSAtomicAdd64(Int64(length), &info.byteCount)
videoTimestamp = withTimestamp + (videoTimestamp - floor(videoTimestamp))
frameCount += 1
}