一娱两、什么是音頻重采樣
音頻重采樣就是改變音頻的采樣率街图、采樣格式浇衬、聲道數(shù)等參數(shù),使之按照我們期望的參數(shù)輸出餐济。比如我們將采樣率 48kHz耘擂、采樣格式 f32le、聲道數(shù) 1 的音頻 A 轉(zhuǎn)換成采樣率 44.1kHz絮姆、采樣格式 s16le醉冤、聲道數(shù) 2 的音頻 B。
那么為什么需要對(duì)音頻重采樣篙悯?列舉一個(gè)經(jīng)典用途蚁阳,有些音頻編碼器對(duì)輸入的原始PCM數(shù)據(jù)是有特定參數(shù)要求的,比如要求必須是44100_s16le_2鸽照。但是你提供的PCM參數(shù)可能是48000_f32le_1螺捐。這個(gè)時(shí)候就需要先將48000_f32le_1轉(zhuǎn)換成44100_s16le_2,然后再使用音頻編碼器對(duì)轉(zhuǎn)換后的PCM進(jìn)行編碼矮燎。
二定血、使用 FFmpeg 命令行實(shí)現(xiàn)音頻重采樣
將采樣率 48000 采樣格式 s32le 聲道數(shù) 1 的 PCM 音頻數(shù)據(jù)重采樣成采樣率 44100 采樣格式 s16le 聲道數(shù) 2 的 PCM 音頻數(shù)據(jù):
$ ffmpeg -ar 48000 -ac 1 -f f32le -i ar48000ac1f32le.pcm -ar 44100 -ac 2 -f s16le ar44100ac2s16le.pcm
三、使用 FFmpeg API 編程實(shí)現(xiàn)音頻重采樣
使用 libavresample 音頻重采樣的核心步驟:
1漏峰、定義變量(為了簡(jiǎn)化釋放資源的代碼用到了goto 語(yǔ)句糠悼,需要把用到的變量定義到前面):
QFile inFile(inFilename);
QFile outFile(outFilename);
// 輸入緩沖區(qū)
// 指向輸入緩沖區(qū)的指針
uint8_t **inData = nullptr;
// 緩沖區(qū)大小
int inLineSize = 0;
// 聲道數(shù)
int inChs = av_get_channel_layout_nb_channels(inChLayout);
// 每個(gè)樣本的大小
int inBytesPerSample = inChs * av_get_bytes_per_sample(inSampleFormat);
// 輸入緩沖區(qū)樣本數(shù)量
int inSamples = 1024;
// 輸出緩沖區(qū)
// 指向輸出緩沖區(qū)的指針
uint8_t **outData = nullptr;
// 緩沖區(qū)大小
int outLineSize = 0;
// 聲道數(shù)
int outChs = av_get_channel_layout_nb_channels(outChLayout);
// 每個(gè)樣本的大小
int outBytesPerSample = outChs * av_get_bytes_per_sample(outSampleFormat);
// 輸出緩沖區(qū)樣本數(shù)量
int outSamples = av_rescale_rnd(outSampleRate, inSamples, inSampleRate, AV_ROUND_UP);
// 讀取的音頻大小
int len = 0;
// 返回結(jié)果
int ret = 0;
我們?cè)O(shè)置了輸入緩沖區(qū)樣本數(shù)量為 1024,然后根據(jù)輸入輸出采樣率的比例計(jì)算出輸出緩沖區(qū)樣本數(shù)量浅乔,計(jì)算公式如下:
inSamples inSampleRate
—————————— = ———————————————
outSamples outSampleRate
outSamples = inSamples * outSampleRate / inSampleRate
FFmpeg 提供了現(xiàn)成的 API 計(jì)算輸出緩沖區(qū)樣本數(shù)量:
/**
* Rescale a 64-bit integer with specified rounding.
*
* The operation is mathematically equivalent to `a * b / c`, but writing that
* directly can overflow, and does not support different rounding methods.
*
* @see av_rescale(), av_rescale_q(), av_rescale_q_rnd()
*/
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd) av_const;
此函數(shù)的操作等價(jià)于我們上邊的計(jì)算公式倔喂,并且做了防止溢出處理。rnd:取整模式選擇向上取整 AV_ROUND_UP靖苇。實(shí)際上輸入輸出緩沖區(qū)樣本大小全都設(shè)置為 1024 重采樣后的音頻有時(shí)也是可以播放的席噩,聽起來(lái)并沒有什么不同,但是通過觀察轉(zhuǎn)碼后的音頻文件大小你可能會(huì)發(fā)現(xiàn)丟失了部分音頻數(shù)據(jù)贤壁。
2悼枢、創(chuàng)建重采樣上下文:
SwrContext *ctx = swr_alloc_set_opts(nullptr,
outChLayout, outSampleFormat, outSampleRate,
inChLayout, inSampleFormat, inSampleRate,
0, nullptr);
3、初始化重采樣上下文:
ret = swr_init(ctx);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "初始化上下文失斊⒉稹:" << errbuf;
goto end;
}
4馒索、創(chuàng)建輸入緩沖區(qū):
ret = av_samples_alloc_array_and_samples(&inData, &inLineSize, inChs, inSamples, inSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "創(chuàng)建輸入緩沖區(qū)失斢ǘ省:" << errbuf;
goto end;
}
5、創(chuàng)建輸出緩沖區(qū):
ret = av_samples_alloc_array_and_samples(&outData, &outLineSize, outChs, outSamples, outSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "創(chuàng)建輸出緩沖區(qū)失敶律稀:" << errbuf;
goto end;
}
6旨怠、打開文件:
if (!inFile.open(QFile::ReadOnly)) {
qDebug() << "打開輸入文件失敗";
goto end;
}
if (!outFile.open(QFile::WriteOnly)) {
qDebug() << "打開輸出文件失敗";
goto end;
}
7、重采樣:
while ((len = inFile.read((char *)inData[0], inLineSize)) > 0) {
inSamples = len / inBytesPerSample;
ret = swr_convert(ctx, outData, outSamples, (const uint8_t **)inData, inSamples);
qDebug() << "轉(zhuǎn)換:" << ret;
if (ret < 0) {
ERRBUF(ret);
qDebug() << "重采樣失旘诳椤:" << errbuf;
goto end;
}
outFile.write((const char *)outData[0], ret * outBytesPerSample);
}
8鉴腻、檢查輸出緩沖區(qū)是否還有殘留樣本:
while ((ret = swr_convert(ctx, outData, outSamples, nullptr, 0)) > 0) {
outFile.write((const char *)outData[0], ret);
qDebug() << "殘留:" << ret;
}
9、回收釋放資源:
end:
inFile.close();
outFile.close();
if (inData) {
av_freep(&inData[0]);
}
av_freep(&inData);
if (outData) {
av_freep(&outData[0]);
}
av_freep(&outData);
swr_free(&ctx);
三百揭、代碼
#include "ffmpegutils.h"
#include <QDebug>
#include <QFile>
#define ERRBUF(ret) \
char errbuf[1024]; \
av_strerror(ret, errbuf, sizeof (errbuf))
FFmpegUtils::FFmpegUtils(QObject *parent) : QObject(parent)
{
}
void FFmpegUtils::resampleAudio(const char *inFilename, int inSampleRate, AVSampleFormat inSampleFormat, int inChLayout,
const char *outFilename, int outSampleRate, AVSampleFormat outSampleFormat, int outChLayout)
{
QFile inFile(inFilename);
QFile outFile(outFilename);
// 輸入緩沖區(qū)
// 指向輸入緩沖區(qū)的指針
uint8_t **inData = nullptr;
// 緩沖區(qū)大小
int inLineSize = 0;
// 聲道數(shù)
int inChs = av_get_channel_layout_nb_channels(inChLayout);
// 每個(gè)樣本的大小
int inBytesPerSample = inChs * av_get_bytes_per_sample(inSampleFormat);
// 輸入緩沖區(qū)大小
int inSamples = 1024;
// 輸出緩沖區(qū)
// 指向輸出緩沖區(qū)的指針
uint8_t **outData = nullptr;
// 緩沖區(qū)大小
int outLineSize = 0;
// 聲道數(shù)
int outChs = av_get_channel_layout_nb_channels(outChLayout);
// 每個(gè)樣本的大小
int outBytesPerSample = outChs * av_get_bytes_per_sample(outSampleFormat);
// 輸出緩沖區(qū)大小
int outSamples = av_rescale_rnd(outSampleRate, inSamples, inSampleRate, AV_ROUND_UP);
// 讀取的音頻大小
int len = 0;
// 返回結(jié)果
int ret = 0;
// 創(chuàng)建重采樣上下文
SwrContext *ctx = swr_alloc_set_opts(nullptr,
outChLayout, outSampleFormat, outSampleRate,
inChLayout, inSampleFormat, inSampleRate,
0, nullptr);
if (!ctx) {
qDebug() << "創(chuàng)建重采樣上下文失斔ァ!";
goto end;
}
// 初始化采樣上下文
ret = swr_init(ctx);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "初始化上下文失斊饕弧:" << errbuf;
goto end;
}
// 創(chuàng)建輸入緩沖區(qū)
ret = av_samples_alloc_array_and_samples(&inData, &inLineSize, inChs, inSamples, inSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "創(chuàng)建輸入緩沖區(qū)失斂涡俊:" << errbuf;
goto end;
}
// 創(chuàng)建輸出緩沖區(qū)
ret = av_samples_alloc_array_and_samples(&outData, &outLineSize, outChs, outSamples, outSampleFormat, 0);
if (ret < 0) {
ERRBUF(ret);
qDebug() << "創(chuàng)建輸出緩沖區(qū)失敗:" << errbuf;
goto end;
}
// 打開文件
if (!inFile.open(QFile::ReadOnly)) {
qDebug() << "打開輸入文件失敗";
goto end;
}
if (!outFile.open(QFile::WriteOnly)) {
qDebug() << "打開輸出文件失敗";
goto end;
}
while ((len = inFile.read((char *)inData[0], inLineSize)) > 0) {
inSamples = len / inBytesPerSample;
ret = swr_convert(ctx, outData, outSamples, (const uint8_t **)inData, inSamples);
qDebug() << "轉(zhuǎn)換:" << ret;
if (ret < 0) {
ERRBUF(ret);
qDebug() << "重采樣失旐镂琛:" << errbuf;
goto end;
}
outFile.write((const char *)outData[0], ret * outBytesPerSample);
}
while ((ret = swr_convert(ctx, outData, outSamples, nullptr, 0)) > 0) {
outFile.write((const char *)outData[0], ret);
qDebug() << "殘留:" << ret;
}
end:
inFile.close();
outFile.close();
if (inData) {
av_freep(&inData[0]);
}
av_freep(&inData);
if (outData) {
av_freep(&outData[0]);
}
av_freep(&outData);
swr_free(&ctx);
}
調(diào)用函數(shù):
#define IN_FILE_NAME "/Users/mac/Downloads/music/ar48000ac1f32le.pcm”
#define OUT_FILE_NAME "/Users/mac/Downloads/music/ar44100ac2s16le.pcm"
int inSampleRate = 48000;
AVSampleFormat inSampleFormat = AV_SAMPLE_FMT_FLT;
int inChLayout = AV_CH_LAYOUT_MONO;
int outSampleRate = 44100;
AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_S16;
int outChLayout = AV_CH_LAYOUT_STEREO;
FFmpegUtils::resampleAudio(IN_FILE_NAME, inSampleRate, inSampleFormat, inChLayout,
OUT_FILE_NAME, outSampleRate, outSampleFormat, outChLayout);