FFmpeg版本:3.4.2
FFmpeg的官方例子中的resampling_audio打肝,從名字上看它是一個對音頻重采樣的例子只搁。但實際上除了重采樣,它還能轉(zhuǎn)換采樣格式和聲道數(shù)斗躏。
重采樣:舉個例子层释,將一個采樣率為44100的音頻轉(zhuǎn)換成采樣率為8000的音頻,這個過程就叫做音頻的重采樣笑陈。由于源音頻和目標音頻的采樣率之比不一定是整數(shù)际度,為了重采樣后的目標音頻盡可能地不失真,其中涉及到的算法是很復(fù)雜的涵妥。
聲道:常見的音頻有立體聲(stereo)和單聲道(mono)兩種類型乖菱,另外還有環(huán)繞立體聲等其它不太常用的類型。立體聲包含左聲道和右聲道蓬网。
采樣格式:數(shù)字音頻本質(zhì)上是由很多個“采樣”組成的窒所。以不同聲道的采樣排列方式來區(qū)分,采樣格式可以分為平坦(planar)和非平坦兩種類型帆锋;除此之外還會以采樣的數(shù)值精度和數(shù)值類型來區(qū)分采樣格式吵取。
平坦形式是指各個聲道的采樣都由各自的數(shù)組儲存,有多少個聲道就有多少個數(shù)組锯厢;非平坦形式則只有一條數(shù)組皮官,所有聲道的采樣都交錯儲存在同一個數(shù)組中。由于單聲道只有一個聲道实辑,所以平坦和非平坦儲存都是一樣的捺氢。以立體聲為例:
另外,根據(jù)采樣的取值范圍和類型來區(qū)分不同的格式:
AV_SAMPLE_FMT_U8:無符號8位整型
AV_SAMPLE_FMT_S16: 帶符號16位整型
AV_SAMPLE_FMT_S32: 帶符號32位整型
AV_SAMPLE_FMT_S64: 帶符號64位整型
AV_SAMPLE_FMT_FLT: float
AV_SAMPLE_FMT_DBL: double
知道上面這些概念剪撬,代碼就就很好理解了讯沈。
- 初始化
int64_t src_ch_layout = AV_CH_LAYOUT_STEREO; // 輸入立體聲
int64_t dst_ch_layout = AV_CH_LAYOUT_SURROUND; // 輸出環(huán)繞立體聲
int src_rate = 48000;
Int dst_rate = 11025; // 例子中的輸出采樣率為44100,但為了更直觀地看出轉(zhuǎn)換后的變化婿奔,所以選擇一個小一點的輸出采樣率。
/* 創(chuàng)建Context */
struct SwrContext *swr_ctx = swr_alloc();
/* 設(shè)置參數(shù) */
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
- 分配緩沖數(shù)組空間
/* 分配輸入緩沖空間 */
uint8_t **src_data = NULL;
src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
src_nb_samples, src_sample_fmt, 0);
/* 分配輸出緩沖空間 */
Uint8_t **dst_data = NULL;
dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 0);
- 填充輸入數(shù)據(jù)
/**
* 將一段正弦波作為測試音頻填充到輸入數(shù)組中
*/
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
int i, j;
double tincr = 1.0 / sample_rate, *dstp = dst;
const double c = 2 * M_PI * 440.0;
/* generate sin tone with 440Hz frequency and duplicated channels */
for (i = 0; i < nb_samples; i++) {
*dstp = sin(c * *t);
for (j = 1; j < nb_channels; j++)
dstp[j] = dstp[0];
dstp += nb_channels;
*t += tincr;
}
}
/* generate synthetic audio */
fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);
例子中的輸入采樣格式是AV_SAMPLE_FMT_DBL问慎,采樣數(shù)值類型為double萍摊,為非平坦格式,所有數(shù)據(jù)都存在src_data[0]這個數(shù)組中如叼。
- 轉(zhuǎn)換并輸出數(shù)據(jù)
/* 轉(zhuǎn)換 */
ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
if (ret < 0) {
fprintf(stderr, "Error while converting\n");
goto end;
}
// 獲取轉(zhuǎn)換后的數(shù)據(jù)長度
dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
ret, dst_sample_fmt, 1);
if (dst_bufsize < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
goto end;
}
printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
// 輸出數(shù)據(jù)到文件
fwrite(dst_data[0], 1, dst_bufsize, dst_file);
由于例子中目標采樣格式是AV_SAMPLE_FMT_S16冰木,為非平坦格式,所有輸出數(shù)據(jù)都存在dst_data[0]這個數(shù)組中
- 結(jié)束
/* 釋放緩沖數(shù)組 */
if (src_data)
av_freep(&src_data[0]);
av_freep(&src_data);
if (dst_data)
av_freep(&dst_data[0]);
av_freep(&dst_data);
/* 釋放Context */
swr_free(&swr_ctx);
- 將輸入和輸出的音頻用Audacity打開,可以直觀地看到轉(zhuǎn)換前后的變化踊沸。這里有一個問題:為什么輸出音頻的第三聲道的波形是一條直線歇终?
audacity.png - 總結(jié):這個例子只展示非平坦采樣格式的音頻轉(zhuǎn)換,沒有包含平坦與非平坦采樣格式的音頻轉(zhuǎn)換功能逼龟,剛開始我并不熟悉這兩者的區(qū)別评凝,導(dǎo)致音頻轉(zhuǎn)換失敗。下面我對這個例子進行重構(gòu)腺律,讓它能適用于更多種音頻格式的轉(zhuǎn)換奕短。
convertor.h
//
// convertor.h
// SoundEditor
//
// Created by Kidon Liang on 2018/4/1.
// Copyright ? 2018年 Kidon Liang. All rights reserved.
//
#ifndef convertor_h
#define convertor_h
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
/**
* 初始化
*
* @param src_ch_layout 輸入聲道類型
* @param src_sample_fmt 輸入采樣格式
* @param src_sample_rate 輸入采樣率
* @param dst_ch_layout 輸出聲道類型
* @param dst_sample_fmt 輸出采樣格式
* @param dst_sample_rate 輸出采樣率
**/
int convertor_init(int64_t src_ch_layout, enum AVSampleFormat src_sample_fmt, int src_sample_rate,
int64_t dst_ch_layout, enum AVSampleFormat dst_sample_fmt, int dst_sample_rate);
/**
* 輸入數(shù)據(jù)
*
* @param data 音頻數(shù)據(jù)
* @param len 數(shù)據(jù)長度
**/
int convertor_feed_data(uint8_t **data, int len);
/**
* 獲取已轉(zhuǎn)換數(shù)據(jù)長度
**/
int convertor_get_converted_size(void);
/**
* 接收已轉(zhuǎn)換數(shù)據(jù)
*
* @param data 接收數(shù)據(jù)的數(shù)組
**/
int convertor_receive_converted_data(uint8_t **data);
/**
* 排空所有數(shù)據(jù)
**/
void convertor_flush(void);
/**
* 關(guān)閉轉(zhuǎn)換器
**/
void convertor_close(void);
#endif /* convertor_h */
convertor.c
//
// convertor.c
// SoundEditor
//
// Created by Kidon Liang on 2018/4/1.
// Copyright ? 2018年 Kidon Liang. All rights reserved.
//
#include "convertor.h"
static int tmp_ret = 0;
static struct SwrContext *swr_ctx = NULL;
static uint8_t **src_buffers = NULL;
static uint8_t **dst_buffers = NULL;
static enum AVSampleFormat src_sample_fmt;
static enum AVSampleFormat dst_sample_fmt;
static int src_sample_rate;
static int dst_sample_rate;
static int src_nb_samples = 1024; // ffmpeg默認每次采樣數(shù)為1024
static int dst_nb_samples;
static int max_dst_nb_samples; // 用于記錄最大的輸出采樣數(shù),防止數(shù)組越界
static int src_linesize;
static int dst_linesize;
static int src_nb_channels;
static int src_nb_buffers;
static int dst_nb_channels;
static int dst_nb_buffers;
int convertor_init(int64_t src_ch_layout, enum AVSampleFormat src_sp_fmt, int src_sp_rate,
int64_t dst_ch_layout, enum AVSampleFormat dst_sp_fmt, int dst_sp_rate) {
src_sample_fmt = src_sp_fmt;
dst_sample_fmt = dst_sp_fmt;
src_sample_rate = src_sp_rate;
dst_sample_rate = dst_sp_rate;
src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
src_nb_buffers = av_sample_fmt_is_planar(src_sample_fmt) ? src_nb_channels : 1;
dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
dst_nb_buffers = av_sample_fmt_is_planar(dst_sample_fmt) ? dst_nb_channels : 1;
// init
swr_ctx = swr_alloc();
if (!swr_ctx) {
printf("can not alloc SwrContext.");
convertor_close();
return -1;
}
/* set options */
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", src_sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", dst_sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
/* initialize the resampling context */
if (swr_init(swr_ctx) < 0) {
printf("Failed to initialize the resampling context\n");
convertor_close();
return -2;
}
// allocate samples buffers
tmp_ret = av_samples_alloc_array_and_samples(&src_buffers, &src_linesize, src_nb_channels, src_nb_samples, src_sample_fmt, 0);
printf("src_linesize=%d\n", src_linesize);
if (tmp_ret < 0) {
printf("Could not allocate source samples\n");
convertor_close();
return -3;
}
max_dst_nb_samples = dst_nb_samples =
(int)av_rescale_rnd(src_nb_samples, dst_sample_rate, src_sample_rate, AV_ROUND_UP);
tmp_ret = av_samples_alloc_array_and_samples(&dst_buffers, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 0);
printf("dst_linesize=%d\n", dst_linesize);
if (tmp_ret < 0) {
printf("Could not allocate destination samples\n");
convertor_close();
return -4;
}
return 0;
}
static int converted_size = 0;
/**
* 輸入數(shù)據(jù)匀钧,需要確保每次輸入1024個樣本數(shù)據(jù)翎碑。
**/
int convertor_feed_data(uint8_t **data, int len) {
// 填充數(shù)據(jù)
for (int i=0;i<src_nb_buffers;i++) {
memcpy(src_buffers[i], data[i], len);
}
/* compute destination number of samples */
dst_nb_samples = (int) av_rescale_rnd(swr_get_delay(swr_ctx, src_sample_rate) +
src_nb_samples, dst_sample_rate, src_sample_rate, AV_ROUND_UP);
if (dst_nb_samples > max_dst_nb_samples) {
// 重新分配輸出buffer
if (dst_buffers) {
av_freep(&dst_buffers[0]);
}
tmp_ret = av_samples_alloc(dst_buffers, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 1);
printf("realloc dst_linesize=%d\n", dst_linesize);
if (tmp_ret < 0) {
printf("重新分配輸出buffer失敗\n");
convertor_close();
return -1;
}
max_dst_nb_samples = dst_nb_samples;
}
/* convert to destination format */
tmp_ret = swr_convert(swr_ctx, dst_buffers, dst_nb_samples, (const uint8_t **)src_buffers, src_nb_samples);
if (tmp_ret < 0) {
printf("Error while converting\n");
convertor_close();
return -2;
}
converted_size = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
tmp_ret, dst_sample_fmt, 1);
if (converted_size < 0) {
printf("Could not get sample buffer size\n");
convertor_close();
return -3;
}
return 0;
}
int convertor_get_converted_size(void) {
return converted_size;
}
int convertor_receive_converted_data(uint8_t **data) {
tmp_ret = converted_size;
for (int i=0;i<dst_nb_buffers;i++) {
memcpy(data[i], dst_buffers[i], converted_size);
}
converted_size = 0;
return tmp_ret;
}
void convertor_flush(void) {
tmp_ret = swr_convert(swr_ctx, dst_buffers, dst_nb_samples, NULL, 0);
if (tmp_ret < 0) {
printf("Error while converting\n");
convertor_close();
}
converted_size = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
tmp_ret, dst_sample_fmt, 1);
if (converted_size < 0) {
printf("Could not get sample buffer size\n");
convertor_close();
}
}
void convertor_close(void) {
if (src_buffers) {
av_freep(&src_buffers[0]);
}
av_freep(&src_buffers);
if (dst_buffers) {
av_freep(&dst_buffers[0]);
}
av_freep(&dst_buffers);
swr_free(&swr_ctx);
}
main.c
//
// main.c
// SoundEditor
//
// Created by Kidon Liang on 2018/3/31.
// Copyright ? 2018年 Kidon Liang. All rights reserved.
//
#include <stdio.h>
#include "convertor.h"
static void fill_samples_float(float **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
int i, j;
double tincr = 1.0 / sample_rate;
const double c = 2 * M_PI * 440.0;
double val;
if (av_sample_fmt_is_planar(sample_fmt)) {
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data[j][i] = val;
}
*time += tincr;
}
} else {
float *data_p = data[0];
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data_p[j] = val;
}
data_p += nb_channels;
*time += tincr;
}
}
}
static void fill_samples_double(double **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
int i, j;
double tincr = 1.0 / sample_rate;
const double c = 2 * M_PI * 440.0;
double val;
if (av_sample_fmt_is_planar(sample_fmt)) {
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data[j][i] = val;
}
*time += tincr;
}
} else {
double *data_p = data[0];
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data_p[j] = val;
}
data_p += nb_channels;
*time += tincr;
}
}
}
static void fill_samples_16(int16_t **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
int i, j;
double tincr = 1.0 / sample_rate;
const double c = 2 * M_PI * 440.0;
double val;
if (av_sample_fmt_is_planar(sample_fmt)) {
for (i = 0; i < nb_samples; i++) {
val = (32768 - 1) * sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data[j][i] = val;
}
*time += tincr;
}
} else {
int16_t *data_p = data[0];
for (i = 0; i < nb_samples; i++) {
val = (32768 - 1) * sin(c * *time);
// val = (32768 - 1) * 0.25;
for (j = 0; j < nb_channels; j++) {
// data_p[j] = (-1 + j*2) * val;
data_p[j] = val;
}
data_p += nb_channels;
*time += tincr;
}
}
}
static void fill_samples(uint8_t **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
switch (sample_fmt) {
case AV_SAMPLE_FMT_S16:
case AV_SAMPLE_FMT_S16P:
fill_samples_16((int16_t **) data, sample_fmt, nb_samples, nb_channels, sample_rate, time);
return;
case AV_SAMPLE_FMT_FLT:
case AV_SAMPLE_FMT_FLTP:
fill_samples_float((float **) data, sample_fmt, nb_samples, nb_channels, sample_rate, time);
return;
case AV_SAMPLE_FMT_DBL:
case AV_SAMPLE_FMT_DBLP:
fill_samples_double((double **) data, sample_fmt, nb_samples, nb_channels, sample_rate, time);
return;
default:
printf("其它格式的采樣填充方法就不寫了。");
return;
}
}
int main() {
char *src_path = "/Users/kidonliang/Desktop/src-0.pcm";
FILE *src_file = fopen(src_path, "wb");
char *dst_path = "/Users/kidonliang/Desktop/dst-0.pcm";
FILE *dst_file = fopen(dst_path, "wb");
enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16;
enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_FLTP;
int64_t src_ch_layout = AV_CH_LAYOUT_MONO;
int64_t dst_ch_layout = AV_CH_LAYOUT_MONO;
int src_sample_rate = 44100;
int dst_sample_rate = 16000;
int src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
int dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
convertor_init(src_ch_layout, src_sample_fmt, src_sample_rate,
dst_ch_layout, dst_sample_fmt, dst_sample_rate);
double time = 0;
uint8_t **src_buffers = NULL;
int src_bytes_ps = av_get_bytes_per_sample(src_sample_fmt);
// alloc source buffers
if (av_sample_fmt_is_planar(src_sample_fmt)) {
src_buffers = malloc(src_nb_channels * sizeof(void *));
for (int i=0;i<src_nb_channels;i++) {
src_buffers[i] = malloc(src_bytes_ps * 1024);
}
} else {
src_buffers = malloc(sizeof(void *));
src_buffers[0] = malloc(src_nb_channels * src_bytes_ps * 1024);
}
uint8_t **dst_buffers = NULL;
int dst_buffer_size = 0;
int converted_size = 0;
do {
fill_samples(src_buffers, src_sample_fmt, 1024, src_nb_channels, src_sample_rate, &time);
// 輸出源文件
if (av_sample_fmt_is_planar(src_sample_fmt)) {
// 只輸出第一個聲道
fwrite(src_buffers[0], src_bytes_ps, 1024, src_file);
} else {
fwrite(src_buffers[0], src_bytes_ps, src_nb_channels * 1024, src_file);
}
convertor_feed_data(src_buffers, src_bytes_ps * src_nb_channels * 1024);
converted_size = convertor_get_converted_size();
if (dst_buffer_size < converted_size) {
if (dst_buffers) {
av_freep(&dst_buffers[0]);
}
// alloc dst buffers
dst_buffer_size = converted_size;
if (av_sample_fmt_is_planar(dst_sample_fmt)) {
dst_buffers = malloc(dst_nb_channels * sizeof(void *));
for (int i=0;i<dst_nb_channels;i++) {
// 由于converted_size是所有聲道的數(shù)據(jù)加起來的長度之斯,所以要除以dst_nb_channels
dst_buffers[i] = malloc(dst_buffer_size / dst_nb_channels);
}
} else {
dst_buffers = malloc(sizeof(void *));
dst_buffers[0] = malloc(dst_buffer_size);
}
printf("realloc dst_buffers dst_buffer_size=%d\n", dst_buffer_size);
}
convertor_receive_converted_data(dst_buffers);
printf("time=%f, converted_size=%d, dst_nb_channels=%d\n", time, converted_size, dst_nb_channels);
// 輸出
if (av_sample_fmt_is_planar(dst_sample_fmt)) {
// 只輸出第一個聲道
fwrite(dst_buffers[0], 1, converted_size / dst_nb_channels, dst_file);
} else {
fwrite(dst_buffers[0], 1, converted_size, dst_file);
}
} while (time < 10);
convertor_flush();
converted_size = convertor_get_converted_size();
printf("flushed converted_size=%d\n", converted_size);
convertor_receive_converted_data(dst_buffers);
// 輸出
if (av_sample_fmt_is_planar(dst_sample_fmt)) {
// 只輸出第一個聲道
fwrite(dst_buffers[0], 1, converted_size / dst_nb_channels, dst_file);
} else {
fwrite(dst_buffers[0], 1, converted_size, dst_file);
}
fclose(src_file);
fclose(dst_file);
convertor_close();
return 0;
}