最近的工作涉及到語(yǔ)音識(shí)別相關(guān)的研究,因此先簡(jiǎn)單的梳理一下WAV格式和PCM钦奋。以前用c++實(shí)現(xiàn)了mp3 player座云,這個(gè)時(shí)候再來回顧下代碼實(shí)現(xiàn),將WAV的播放 看了下付材。
什么是PCM
直接上百度百科的定義:PCM(Pulse Code Modulation)脈沖編碼調(diào)制是數(shù)字通信的編碼方式之一朦拖。主要過程是將話音、圖像等模擬信號(hào)每隔一定時(shí)間進(jìn)行取樣厌衔,使其離散化璧帝,同時(shí)將抽樣值按分層單位四舍五入取整量化,同時(shí)將抽樣值按一組二進(jìn)制碼來表示抽樣脈沖的幅值富寿。
簡(jiǎn)單的說睬隶,PCM就是直接對(duì)原始的模擬聲波信號(hào) 進(jìn)行 采樣 后得到的 數(shù)據(jù)
什么是 采樣率 sample rate
采樣率 是 指 一秒鐘 從脈沖 信號(hào) 中 采樣了 多少個(gè) sample
舉例: sample rate: 16000 HZ ,意思是一秒鐘 從 脈沖信號(hào)中 采樣16000個(gè)點(diǎn)(sample)
什么是比特率 bits per second
從字面定義页徐,即:一秒鐘的聲音信號(hào) 占有的比特?cái)?shù)苏潜。
簡(jiǎn)言之:比特率 表征 采樣的 數(shù)據(jù) 的 信息逼真程度,比特率越高变勇,數(shù)據(jù)的逼真程度越高恤左。
使用 alsa sound 進(jìn)行 pcm 數(shù)據(jù)的播放
關(guān)于alsa sound
從網(wǎng)上找了alsa sound的解釋如下:ALSA(Advanced Linux Sound Architecture)是linux上主流的音頻結(jié)構(gòu),在沒有出現(xiàn)ALSA架構(gòu)之前搀绣,一直使用的是OSS(Open Sound System)音頻架構(gòu)飞袋。
簡(jiǎn)單的說,alsa sound是linux下的音頻框架链患,使用alsa sound的 接口巧鸭,即可以播放pcm數(shù)據(jù)。這里锣险,正好可以用這個(gè)接口蹄皱,來體驗(yàn)pcm的聲音效果,以便有個(gè) 真切的 將pcm傳到 電腦喇叭的 聲音 體驗(yàn)
使用 alsa soun 播放pcm
- 計(jì)算320ms的pcm數(shù)據(jù)長(zhǎng)度芯肤,因?yàn)槲业拇a是每次播放320ms長(zhǎng)度的sample數(shù)據(jù)巷折,所以先計(jì)算出320ms的采樣數(shù)據(jù)長(zhǎng)度。
直接給出公式如下:
簡(jiǎn)單的說下:
sample_rate 標(biāo)識(shí)1秒的采樣的sample數(shù)崖咨,單聲道的采樣數(shù)=采樣率锻拘,雙聲道的采樣數(shù)=采樣率*channel數(shù)。 320ms的采樣數(shù) = 1秒的采樣數(shù)*320/1000 = 采樣率*聲道數(shù)*32/100
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
wav格式
以.wav為后綴的文件就是wav格式的文件,先上百度百科的定義:是微軟公司專門為Windows開發(fā)的一種標(biāo)準(zhǔn)數(shù)字音頻文件署拟,該文件能記錄各種單聲道或立體聲的聲音信息婉宰,并能保證聲音不失真。
簡(jiǎn)單的說:WAV文件就是在PCM數(shù)據(jù)前加了個(gè)PCM的信息說明頭推穷,僅此而已
這個(gè)頭部數(shù)據(jù)占有字節(jié)內(nèi)容如下:
RIFF 4個(gè)字節(jié)
UNKNOWN 4個(gè)字節(jié)心包,
WAVE 4 bytes
fmt 4bytes
UNKNOWN 4bytes
采樣率、聲道數(shù)馒铃、比特率蟹腾、一個(gè)sample占的bit數(shù) 16bytes
extra 2bytes //根據(jù)chunk_size == 18才有
fact 4bytes
data 4bytes
UNKNOWN 4bytes
---------------
到這一步,就計(jì)算出接下來PCM數(shù)據(jù)的長(zhǎng)度区宇,接下來的位置就是PCM數(shù)據(jù)了
最后附播放alsa sound 播放pcm的源碼
class WAVParser {
public:
typedef struct {
int format_tag;
int channels;
unsigned int sample_rate;
unsigned int bit_rate;
int block_align;
int bits_per_sample;
int data_pos;
int data_size;
}FrameHeader;
typedef struct {
int Init(const FrameHeader& header);
}FrameInfo;
static int GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info);
WAVParser(const unsigned char* data, int len);
~WAVParser();
int Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate);
protected:
static int FindFrameHeader(const unsigned char* data, int len);
static int ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header);
static void TypeFrameHeader(const FrameHeader& header) {
PRINT("=============================================");
PRINT("header.format_tag = %u", header.format_tag);
PRINT("header.channels = %u", header.channels);
PRINT("header.sample_rate = %u", header.sample_rate);
PRINT("header.bit_rate = %u", header.bit_rate);
PRINT("header.block_align = %u", header.block_align);
PRINT("header.bits_per_sample = %u", header.bits_per_sample);
}
private:
int index_;
FrameHeader header_;
const unsigned char* data_;
const int data_len_;
int samples_per_320ms_;
};
///////////////////////////////////
//////////////WAV//////////////////
///////////////////////////////////
int WAVParser::FrameInfo::Init(const FrameHeader& header) {
return 0;
}
int WAVParser::GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info) {
FrameHeader header;
int pos = -1;
const unsigned char* temp_data = data;
int temp_len = len;
frame_len = 0;
pos = FindFrameHeader(temp_data, temp_len);
if (pos < 0)
return -1;
int ret = ParseFrameHeader(&temp_data[pos], temp_len, header);
if (ret != 0)
return -1;
//TypeFrameHeader(header);
return 0;
}
int WAVParser::FindFrameHeader(const unsigned char* data, int len) {
// nothing to do
const char* RIFF = "RIFF";
int pos = 0;
while (pos < len - 4) {
if (memcmp(&data[pos], RIFF, 4) == 0) {
PRINT("found header");
return pos;
}
pos++;
}
return -1;
}
int WAVParser::ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header) {
unsigned int chunk_size = 0;
unsigned int temp;
int pos = 0;
if (memcmp(&data[pos], "RIFF", 4) != 0) {
PRINT("not RIFF");
return -1;
}
pos += 4;
temp = data[pos];
chunk_size += temp;
temp = data[pos+1];
temp = temp<<8;
chunk_size += temp;
temp = data[pos+2];
temp = temp<<16;
chunk_size += temp;
temp = data[pos+3];
temp = temp<<24;
chunk_size += temp;
pos += 4;
if (memcmp(&data[pos], "WAVE", 4) != 0) {
PRINT("not WAVE");
return -1;
}
//PRINT("RIFF chunk_size = %u", chunk_size);
pos += 4;
if (memcmp(&data[pos], "fmt ", 4) != 0) {
PRINT("not fmt");
return -1;
}
pos += 4;
temp = data[pos];
chunk_size = 0;
chunk_size += temp;
temp = data[pos+1];
temp = temp<<8;
chunk_size += temp;
temp = data[pos+2];
temp = temp<<16;
chunk_size += temp;
temp = data[pos+3];
temp = temp<<24;
chunk_size += temp;
//PRINT("fmt chunk_size = %u", chunk_size);
pos += 4;
temp = data[pos];
header.format_tag = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.format_tag += temp;
pos++;
temp = data[pos];
header.channels = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.channels += temp;
pos++;
temp = data[pos];
header.sample_rate = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.sample_rate += temp;
pos++;
temp = data[pos];
temp = temp<<16;
header.sample_rate += temp;
pos++;
temp = data[pos];
temp = temp<<24;
header.sample_rate += temp;
pos++;
temp = data[pos];
header.bit_rate = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.bit_rate += temp;
pos++;
temp = data[pos];
temp = temp<<16;
header.bit_rate += temp;
pos++;
temp = data[pos];
temp = temp<<24;
header.bit_rate += temp;
pos++;
temp = data[pos];
header.block_align = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.block_align += temp;
pos++;
temp = data[pos];
header.bits_per_sample = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.bits_per_sample += temp;
pos++;
// extra 2 bytes
if (chunk_size == 18) {
pos += 2;
}
// fact chunk optional
if (memcmp(&data[pos], "fact", 4) == 0) {
PRINT("fact");
chunk_size = 0;
pos += 12;
}
while (memcmp(&data[pos], "data", 4) != 0) {
pos++;
}
pos += 4;
chunk_size = 0;
chunk_size += temp;
temp = data[pos+1];
temp = temp<<8;
chunk_size += temp;
temp = data[pos+2];
temp = temp<<16;
chunk_size += temp;
temp = data[pos+3];
temp = temp<<24;
chunk_size += temp;
pos += 4;
PRINT("data chunk_size = %u", chunk_size);
header.data_size = chunk_size;
header.data_pos = pos;
#if 0
// calculate duration time
{
int sr = header.sample_rate;
int ch = header.channels;
assert(header.bits_per_sample == sizeof(short)*8);
int data_size = header.data_size;
int samples = data_size*8/header.bits_per_sample;
assert(data_size%header.bits_per_sample == 0);
int samples_per_channel = samples/ch;
int seconds = samples_per_channel/sr;
PRINT("play %d seconds", seconds);
}
#endif
return 0;
}
WAVParser::WAVParser(const unsigned char* data, int len):data_(data), data_len_(len), index_(0) {
int pos = -1;
pos = FindFrameHeader(data_, data_len_);
assert(pos >= 0);
int ret = ParseFrameHeader(&data_[pos], data_len_, header_);
assert(ret == 0);
if (ret == 0) {
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
}
}
WAVParser::~WAVParser() {
}
int WAVParser::Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate) {
samples = 0;
channels = 0;
const unsigned char* temp = &data_[header_.data_pos];
temp += index_;
int size = samples_per_320ms_*header_.bits_per_sample/8;
if (index_ + size > header_.data_size || size > len) {
return -1;
}
memcpy(data, temp, size);
index_ += size;
samples = samples_per_320ms_;
channels = header_.channels;
samplerate = header_.sample_rate;
return 0;
}
播放PCM的相關(guān)代碼
void wav_play(const unsigned char* data, int len) {
int ret;
int samples;
int channels;
int samplerate;
PCMPlayer player(16000, 1);
WAVParser parser(data, len);
unsigned char* buffer = new unsigned char[1024*1024];
while (1) {
ret = parser.Get320msSample(buffer, 1024*1024, samples, channels, samplerate);
if (ret != 0)
break;
player.play((short*)buffer, samples, samplerate, channels);
}
delete []buffer;
}
#ifndef _PLAY_PCM_H
#define _PLAY_PCM_H
#include <alsa/asoundlib.h>
#include "util.h"
class PCMPlayer {
public:
PCMPlayer(int sample_rate, int channels):
channels_(channels),
sample_rate_(sample_rate),
init_(false) {
snd_pcm_hw_params_t *params;
int rc = snd_pcm_open(&handle_, "default",SND_PCM_STREAM_PLAYBACK, 0);
if (rc < 0) {
fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(¶ms);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle_, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle_, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle_, params, channels_);
/* 44100 bits/second sampling rate (CD quality) */
unsigned int val = sample_rate_;
int dir;
snd_pcm_hw_params_set_rate_near(handle_, params, &val, &dir);
/* Set period size to 32 frames. */
snd_pcm_uframes_t frames = 0;
snd_pcm_hw_params_set_period_size_near(handle_, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle_, params);
if (rc < 0) {
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
frame_size_ = frames;
}
void play(short* sample, int samples, int sample_rate, int channels) {
SetSamplerateAndChannel(sample_rate, channels);
int pos = 0;
int size = frame_size_*channels_;
while (pos < samples) {
int rc = snd_pcm_writei(handle_, &sample[pos], frame_size_);
if (rc == -EPIPE) {
/* EPIPE means underrun */
fprintf(stderr, "underrun occurred\n");
snd_pcm_prepare(handle_);
} else if (rc < 0) {
fprintf(stderr, "error from writei: %s\n", snd_strerror(rc));
} else if (rc != (int)frame_size_) {
fprintf(stderr, "short write, write %d frames\n", rc);
}
pos += size;
}
}
~PCMPlayer() {
snd_pcm_drain(handle_);
snd_pcm_close(handle_);
}
protected:
void SetSamplerateAndChannel(int sample_rate, int channels) {
if (sample_rate_ != sample_rate || channels_ != channels) {
PRINT("SetSamplerateAndChannel: sample rate = %d chs = %d", sample_rate, channels);
sample_rate_ = sample_rate;
channels_ = channels;
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(¶ms_);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle_, params_);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle_, params_, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle_, params_, SND_PCM_FORMAT_S16_LE);
int dir;
unsigned int val = sample_rate_;
snd_pcm_hw_params_set_rate_near(handle_, params_, &val, &dir);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle_, params_, channels_);
/* Set period size to 32 frames. */
snd_pcm_uframes_t frames = 0;
snd_pcm_hw_params_set_period_size_near(handle_, params_, &frames, &dir);
/* Write the parameters to the driver */
int rc = snd_pcm_hw_params(handle_, params_);
if (rc < 0) {
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params_, &frames, &dir);
frame_size_ = frames;
PRINT("SetSamplerateAndChannel--<");
sleep(1);
}
}
private:
int channels_;
int sample_rate_;
int frame_size_;
bool init_;
snd_pcm_t *handle_;
snd_pcm_hw_params_t *params_;
};
#endif