轉(zhuǎn)碼一般流程
- 獲取音頻數(shù)據(jù)(AAC,MP3等)
- 解碼(獲取音頻原始采樣數(shù)據(jù)pcm)
- 編碼(對(duì)pcm進(jìn)行編碼)
使用ffmpeg的函數(shù)表示的大概流程
//初始化輸入
avformat_open_input() -- 打開對(duì)應(yīng)音頻文件
avformat_find_stream_info() -- 從輸入文件中獲取到流的相關(guān)信息昧旨,例如:文件中流的數(shù)量
//初始化解碼器
avcodec_find_decoder() -- 根據(jù)ffmpeg提供的解碼器id,找到對(duì)應(yīng)的解碼器
avcodec_open2() -- 打開解碼器
//初始化輸出
avformat_alloc_context() -- 創(chuàng)建輸出信息上下文
avcodec_find_encoder() -- 找到編碼器
avcodec_alloc_context3() -- 根據(jù)編碼器您机,初始化編碼上下文
avcodec_open2() -- 打開編碼器
//初始化一個(gè)FIFO(先進(jìn)先出)
av_audio_fifo_alloc() -- 初始化一個(gè)先進(jìn)先出的緩存订歪,用了存儲(chǔ)解碼后的pcm數(shù)據(jù)
//初始化音頻重采用器
swr_alloc_set_opts() -- 設(shè)置轉(zhuǎn)化器參數(shù)
swr_init() -- 初始化轉(zhuǎn)換器
//開始音頻轉(zhuǎn)換
while(finished) {
// 解碼
av_read_frame() -- 讀取要進(jìn)行轉(zhuǎn)碼的數(shù)據(jù)
avcodec_decode_audio4() -- 進(jìn)行解碼
av_samples_alloc() -- 創(chuàng)建樣本空間
swr_convert() -- 數(shù)據(jù)重采樣
av_audio_fifo_write() -- 將數(shù)據(jù)存儲(chǔ)到fifo緩存中
// 編碼
av_audio_fifo_read() -- 從fifo緩存中讀取pcm數(shù)據(jù)
avcodec_encode_audio2() -- 將數(shù)據(jù)進(jìn)行編碼
}
知識(shí)點(diǎn)
AVSampleFormat(樣本數(shù)據(jù)格式)
在說明該格式之前脖祈,先說一個(gè)樣本數(shù)據(jù)存儲(chǔ)的2種方式packet和planar。
packed - 將所有聲道的數(shù)據(jù)刷晋,交替的存儲(chǔ)成一維數(shù)組
planar - 每一個(gè)聲道單獨(dú)存放,一個(gè)二維數(shù)組表示撒犀,每一行代表一個(gè)聲道福压。
例如:
現(xiàn)在有一段音頻采用掏秩,左聲道用L表示或舞,右聲道用R表示
LRLRLR......LR
表示使用了packed方式存儲(chǔ)
LLLLL..L
和 RRRRR..R
表示使用了planar方式存儲(chǔ)
在ffmpeg中,采樣樣本的數(shù)據(jù)格式有如下幾種類型
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
AVCodecContext的屬性
channel_layout
channel_layout
表示聲道布局蒙幻,代表左右聲道形成的立體聲映凳,還是單聲道。
在ffmpeg中邮破,支持的聲道布局有
/**
* @defgroup channel_masks Audio channel masks
*
* A channel layout is a 64-bits integer with a bit set for every channel.
* The number of bits set must be equal to the number of channels.
* The value 0 means that the channel layout is not known.
* @note this data structure is not powerful enough to handle channels
* combinations that have the same channel multiple times, such as
* dual-mono.
*
* @{
*/
#define AV_CH_FRONT_LEFT 0x00000001
#define AV_CH_FRONT_RIGHT 0x00000002
#define AV_CH_FRONT_CENTER 0x00000004
#define AV_CH_LOW_FREQUENCY 0x00000008
#define AV_CH_BACK_LEFT 0x00000010
#define AV_CH_BACK_RIGHT 0x00000020
#define AV_CH_FRONT_LEFT_OF_CENTER 0x00000040
#define AV_CH_FRONT_RIGHT_OF_CENTER 0x00000080
#define AV_CH_BACK_CENTER 0x00000100
#define AV_CH_SIDE_LEFT 0x00000200
#define AV_CH_SIDE_RIGHT 0x00000400
#define AV_CH_TOP_CENTER 0x00000800
#define AV_CH_TOP_FRONT_LEFT 0x00001000
#define AV_CH_TOP_FRONT_CENTER 0x00002000
#define AV_CH_TOP_FRONT_RIGHT 0x00004000
#define AV_CH_TOP_BACK_LEFT 0x00008000
#define AV_CH_TOP_BACK_CENTER 0x00010000
#define AV_CH_TOP_BACK_RIGHT 0x00020000
#define AV_CH_STEREO_LEFT 0x20000000 ///< Stereo downmix.
#define AV_CH_STEREO_RIGHT 0x40000000 ///< See AV_CH_STEREO_LEFT.
#define AV_CH_WIDE_LEFT 0x0000000080000000ULL
#define AV_CH_WIDE_RIGHT 0x0000000100000000ULL
#define AV_CH_SURROUND_DIRECT_LEFT 0x0000000200000000ULL
#define AV_CH_SURROUND_DIRECT_RIGHT 0x0000000400000000ULL
#define AV_CH_LOW_FREQUENCY_2 0x0000000800000000ULL
/** Channel mask value used for AVCodecContext.request_channel_layout
to indicate that the user requests the channel order of the decoder output
to be the native codec channel order. */
#define AV_CH_LAYOUT_NATIVE 0x8000000000000000ULL
/**
* @}
* @defgroup channel_mask_c Audio channel layouts
* @{
* */
#define AV_CH_LAYOUT_MONO (AV_CH_FRONT_CENTER) // 單聲道
#define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT) // 立體聲道
#define AV_CH_LAYOUT_2POINT1 (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_1 (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_SURROUND (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_3POINT1 (AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_4POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_4POINT1 (AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_2 (AV_CH_LAYOUT_STEREO|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_QUAD (AV_CH_LAYOUT_STEREO|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_5POINT1 (AV_CH_LAYOUT_5POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_5POINT0_BACK (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT1_BACK (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_6POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT0_FRONT (AV_CH_LAYOUT_2_2|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_HEXAGONAL (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_FRONT (AV_CH_LAYOUT_6POINT0_FRONT|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_7POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT0_FRONT (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT1_WIDE (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_OCTAGONAL (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_HEXADECAGONAL (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)
#define AV_CH_LAYOUT_STEREO_DOWNMIX (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
通過channels
,channel_layout
和sample_fmt
,可以很便捷的設(shè)置聲道的相關(guān)信息
例如:
channels = 2;
channel_layout = AV_CH_LAYOUT_STEREO
sample_fmt = AV_SAMPLE_FMT_FLTP
表示 這個(gè)音頻數(shù)據(jù)有2個(gè)聲道诈豌,分別是左右聲道,聲音數(shù)據(jù)信息采用浮點(diǎn)型的planar
格式進(jìn)行存儲(chǔ)抒和,即左右聲道分開存儲(chǔ)
sample_rate
sample_rate表示聲道的采樣率矫渔,表示1s內(nèi)采集的聲音樣本個(gè)數(shù)。
例如:44100表示1s內(nèi)采集了44100個(gè)聲音的樣本數(shù)據(jù)
AVFrame
AVFrame 存儲(chǔ)的數(shù)據(jù)摧莽,是解碼后的數(shù)據(jù)庙洼。即音頻中的PCM數(shù)據(jù)
data
存儲(chǔ)了音頻聲道或圖片信息
linesize
對(duì)于視頻,存儲(chǔ)了每一個(gè)圖片平面的長度
對(duì)于音頻镊辕,存儲(chǔ)了每一個(gè)聲道中數(shù)據(jù)的長度
對(duì)于音頻油够,只有linesize[0]
被使用,因?yàn)橐纛l中征懈,每一個(gè)聲道的大小應(yīng)該相等
extended_data
對(duì)于視頻石咬,只是簡單的指向data[]
對(duì)于planar
格式的音頻,每一個(gè)聲道有一個(gè)獨(dú)立的數(shù)據(jù)指針卖哎,并且linesize[0]
包含了每一個(gè)聲道存儲(chǔ)數(shù)據(jù)的大小
nb_samples
表示這一幀中鬼悠,每個(gè)聲道中有多少個(gè)采樣點(diǎn)
詳細(xì)代碼講解
- (void)tranformateToAAC {
// 創(chuàng)建文件夾
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
NSString *docDir = [paths objectAtIndex:0];
NSFileManager *fileManager = [NSFileManager defaultManager];
NSString *h264FileName = @"testMp4ToAAC.aac";
NSString *pcmFileName = @"testMp4ToPcm.pcm";
NSString *h264Path = [docDir stringByAppendingPathComponent:h264FileName];
NSString *pcmPath = [docDir stringByAppendingPathComponent:pcmFileName];
[fileManager removeItemAtPath:h264Path error:nil];
[fileManager removeItemAtPath:pcmPath error:nil];
[fileManager createFileAtPath:h264Path contents:nil attributes:nil];
[fileManager createFileAtPath:pcmPath contents:nil attributes:nil];
fileHandle = [NSFileHandle fileHandleForWritingAtPath:h264Path];
pcmfileHandle = [NSFileHandle fileHandleForWritingAtPath:pcmPath];
NSLog(@"dic = %@",docDir);
NSString *fileName = [docDir stringByAppendingPathComponent:@"video.mp4"];
av_register_all();
avcodec_register_all();
AVFormatContext *inputFormatCtx = NULL;
// 打開輸入音頻文件
int ret = avformat_open_input(&inputFormatCtx, [fileName UTF8String], NULL, 0);
if (ret != 0) {
NSLog(@"打開文件失敗");
return;
}
//獲取音頻中流的相關(guān)信息
ret = avformat_find_stream_info(inputFormatCtx, 0);
if (ret != 0) {
NSLog(@"不能獲取流信息");
return;
}
// 獲取數(shù)據(jù)中音頻流的序列號(hào),這是一個(gè)標(biāo)識(shí)符
int index = 0,audioStream = -1;
AVCodecContext *inputCodecCtx;
for (index = 0; index <inputFormatCtx->nb_streams; index++) {
AVStream *stream = inputFormatCtx->streams[index];
AVCodecContext *code = stream->codec;
if (code->codec_type == AVMEDIA_TYPE_AUDIO){
audioStream = index;
break;
}
}
//從音頻流中獲取輸入編解碼相關(guān)的上下文
inputCodecCtx = inputFormatCtx->streams[audioStream]->codec;
//查找解碼器
AVCodec *pCodec = avcodec_find_decoder(inputCodecCtx->codec_id);
// 打開解碼器
int result = avcodec_open2(inputCodecCtx, pCodec, nil);
if (result < 0) {
NSLog(@"打開音頻解碼器失敗");
return;
}
// 創(chuàng)建aac編碼器
AVCodec *aacCodec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (!aacCodec){
printf("Can not find encoder!\n");
return ;
}
//常見aac編碼相關(guān)上下文信息
AVCodecContext *aacCodeContex = avcodec_alloc_context3(aacCodec);
// 設(shè)置編碼相關(guān)信息
aacCodeContex->sample_fmt = aacCodec->sample_fmts[0];
aacCodeContex->sample_rate= inputCodecCtx->sample_rate; // 音頻的采樣率
aacCodeContex->channel_layout = av_get_default_channel_layout(2);
aacCodeContex->channels = inputCodecCtx->channels;
aacCodeContex->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
//打開編碼器
AVDictionary *opts = NULL;
result = avcodec_open2(aacCodeContex, aacCodec, &opts);
if (result < 0) {
NSLog(@"failure open code");
return;
}
//初始化先進(jìn)先出緩存隊(duì)列
AVAudioFifo *fifo = av_audio_fifo_alloc(AV_SAMPLE_FMT_FLTP,aacCodeContex->channels, aacCodeContex->frame_size);
//獲取編碼每幀的最大取樣數(shù)
int output_frame_size = aacCodeContex->frame_size;
// 初始化重采樣上下文
SwrContext *resample_context = NULL;
if (init_resampler(inputCodecCtx, aacCodeContex,
&resample_context)){
}
BOOL finished = NO;
while (1) {
if (finished){
break;
}
// 查看fifo隊(duì)列中的大小是否超過可以編碼的一幀的大小
while (av_audio_fifo_size(fifo) < output_frame_size) {
// 如果沒超過亏娜,則繼續(xù)進(jìn)行解碼
if (finished)
{
break;
}
AVFrame *audioFrame = av_frame_alloc();
AVPacket packet;
packet.data = NULL;
packet.size = 0;
int data_present;
// 讀取出一幀未解碼數(shù)據(jù)
finished = (av_read_frame(inputFormatCtx, &packet) == AVERROR_EOF);
// 判斷該幀數(shù)據(jù)是否為音頻數(shù)據(jù)
if (packet.stream_index != audioStream) {
continue;
}
// 開始進(jìn)行解碼
if ( avcodec_decode_audio4(inputCodecCtx, audioFrame, &data_present, &packet) < 0) {
NSLog(@"音頻解碼失敗");
return ;
}
if (data_present)
{
//將pcm數(shù)據(jù)寫入文件
for(int i = 0 ; i <audioFrame->channels;i++)
{
NSData *data = [NSData dataWithBytes:audioFrame->data[i] length:audioFrame->linesize[0]];
[pcmfileHandle writeData:data];
}
}
// 初始化進(jìn)行重采樣的存儲(chǔ)空間
uint8_t **converted_input_samples = NULL;
if (init_converted_samples(&converted_input_samples, aacCodeContex,
audioFrame->nb_samples))
{
return;
}
// 進(jìn)行重采樣
if (convert_samples((const uint8_t**)audioFrame->extended_data, converted_input_samples,
audioFrame->nb_samples, resample_context))
{
return;
}
//將采樣結(jié)果加入進(jìn)fifo中
add_samples_to_fifo(fifo, converted_input_samples,audioFrame->nb_samples);
// 釋放重采樣存儲(chǔ)空間
if (converted_input_samples)
{
av_freep(&converted_input_samples[0]);
free(converted_input_samples);
}
}
// 從fifo隊(duì)列中讀入數(shù)據(jù)
while (av_audio_fifo_size(fifo) >= output_frame_size || finished) {
AVFrame *frame;
frame = av_frame_alloc();
const int frame_size = FFMIN(av_audio_fifo_size(fifo),aacCodeContex->frame_size);
// 設(shè)置輸入幀的相關(guān)參數(shù)
(frame)->nb_samples = frame_size;
(frame)->channel_layout = aacCodeContex->channel_layout;
(frame)->format = aacCodeContex->sample_fmt;
(frame)->sample_rate = aacCodeContex->sample_rate;
int error;
//根據(jù)幀的相關(guān)參數(shù)焕窝,獲取數(shù)據(jù)存儲(chǔ)空間
if ((error = av_frame_get_buffer(frame, 0)) < 0)
{
av_frame_free(&frame);
return ;
}
// 從fifo中讀取frame_size個(gè)樣本數(shù)據(jù)
if (av_audio_fifo_read(fifo, (void **)frame->data, frame_size) < frame_size)
{
av_frame_free(&frame);
return ;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
int data_present = 0;
frame->pts = av_frame_get_best_effort_timestamp(frame);
frame->pict_type=AV_PICTURE_TYPE_NONE;
// 將pcm數(shù)據(jù)進(jìn)行編碼
if ((error = avcodec_encode_audio2(aacCodeContex, &pkt,frame, &data_present)) < 0)
{
av_free_packet(&pkt);
return ;
}
av_frame_free(&frame);
// 如果編碼成功,寫入文件
if (data_present) {
NSData *data = [NSData dataWithBytes:pkt.data length:pkt.size];
NSLog(@"pkt length = %d",pkt.size);
[fileHandle writeData:[self adtsDataForPacketLength:pkt.size]];
[fileHandle writeData:data];
}
av_free_packet(&pkt);
}
}
NSLog(@"***************************************end");
}