
最近的工作涉及到語(yǔ)音識(shí)別相關(guān)的研究,因此先簡(jiǎn)單的梳理一下WAV格式和PCM钦奋。以前用c++實(shí)現(xiàn)了mp3 player座云,這個(gè)時(shí)候再來回顧下代碼實(shí)現(xiàn),將WAV的播放 看了下付材。


直接上百度百科的定義:PCM(Pulse Code Modulation)脈沖編碼調(diào)制是數(shù)字通信的編碼方式之一朦拖。主要過程是將話音、圖像等模擬信號(hào)每隔一定時(shí)間進(jìn)行取樣厌衔,使其離散化璧帝,同時(shí)將抽樣值按分層單位四舍五入取整量化,同時(shí)將抽樣值按一組二進(jìn)制碼來表示抽樣脈沖的幅值富寿。

簡(jiǎn)單的說睬隶,PCM就是直接對(duì)原始的模擬聲波信號(hào) 進(jìn)行 采樣 后得到的 數(shù)據(jù)

什么是 采樣率 sample rate

采樣率 是 指 一秒鐘 從脈沖 信號(hào) 中 采樣了 多少個(gè) sample
舉例: sample rate: 16000 HZ ,意思是一秒鐘 從 脈沖信號(hào)中 采樣16000個(gè)點(diǎn)(sample)

什么是比特率 bits per second

從字面定義页徐,即:一秒鐘的聲音信號(hào) 占有的比特?cái)?shù)苏潜。
簡(jiǎn)言之:比特率 表征 采樣的 數(shù)據(jù) 的 信息逼真程度,比特率越高变勇,數(shù)據(jù)的逼真程度越高恤左。

使用 alsa sound 進(jìn)行 pcm 數(shù)據(jù)的播放

關(guān)于alsa sound

從網(wǎng)上找了alsa sound的解釋如下:ALSA(Advanced Linux Sound Architecture)是linux上主流的音頻結(jié)構(gòu),在沒有出現(xiàn)ALSA架構(gòu)之前搀绣,一直使用的是OSS(Open Sound System)音頻架構(gòu)飞袋。
簡(jiǎn)單的說,alsa sound是linux下的音頻框架链患,使用alsa sound的 接口巧鸭,即可以播放pcm數(shù)據(jù)。這里锣险,正好可以用這個(gè)接口蹄皱,來體驗(yàn)pcm的聲音效果,以便有個(gè) 真切的 將pcm傳到 電腦喇叭的 聲音 體驗(yàn)

使用 alsa soun 播放pcm
  • 計(jì)算320ms的pcm數(shù)據(jù)長(zhǎng)度芯肤,因?yàn)槲业拇a是每次播放320ms長(zhǎng)度的sample數(shù)據(jù)巷折,所以先計(jì)算出320ms的采樣數(shù)據(jù)長(zhǎng)度。
sample_rate 標(biāo)識(shí)1秒的采樣的sample數(shù)崖咨,單聲道的采樣數(shù)=采樣率锻拘,雙聲道的采樣數(shù)=采樣率*channel數(shù)。 320ms的采樣數(shù) = 1秒的采樣數(shù)*320/1000 = 采樣率*聲道數(shù)*32/100
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;



RIFF 4個(gè)字節(jié)
UNKNOWN 4個(gè)字節(jié)心包,
WAVE 4 bytes
fmt 4bytes
UNKNOWN 4bytes
采樣率、聲道數(shù)馒铃、比特率蟹腾、一個(gè)sample占的bit數(shù) 16bytes
extra 2bytes //根據(jù)chunk_size == 18才有
fact 4bytes
data 4bytes
UNKNOWN 4bytes

最后附播放alsa sound 播放pcm的源碼

class WAVParser {
typedef struct {
    int format_tag;
    int channels;
    unsigned int sample_rate;
    unsigned int bit_rate;
    int block_align;
    int bits_per_sample;
    int data_pos;
    int data_size;

typedef struct {

    int Init(const FrameHeader& header);

    static int GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info);

    WAVParser(const unsigned char* data, int len);

    int Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate);

    static int FindFrameHeader(const unsigned char* data, int len);

    static int ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header);
    static void TypeFrameHeader(const FrameHeader& header) {
        PRINT("header.format_tag = %u", header.format_tag);
        PRINT("header.channels = %u", header.channels);
        PRINT("header.sample_rate = %u", header.sample_rate);
        PRINT("header.bit_rate = %u", header.bit_rate);
        PRINT("header.block_align = %u", header.block_align);
        PRINT("header.bits_per_sample = %u", header.bits_per_sample);

    int index_;
    FrameHeader header_;
    const unsigned char* data_;
    const int data_len_;
    int samples_per_320ms_;

int WAVParser::FrameInfo::Init(const FrameHeader& header) {

    return 0;

int WAVParser::GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info) {
    FrameHeader header;
    int pos = -1;
    const unsigned char* temp_data = data;
    int temp_len = len;
    frame_len = 0;
    pos = FindFrameHeader(temp_data, temp_len);
    if (pos < 0)
        return -1;
    int ret = ParseFrameHeader(&temp_data[pos], temp_len, header);    
    if (ret != 0)
        return -1;

    return 0;

int WAVParser::FindFrameHeader(const unsigned char* data, int len) {
    // nothing to do
    const char* RIFF = "RIFF";
    int pos = 0;
    while (pos < len - 4) {
        if (memcmp(&data[pos], RIFF, 4) == 0) {
            PRINT("found header");
            return pos;
    return -1;

int WAVParser::ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header) {
    unsigned int chunk_size = 0;
    unsigned int temp;
    int pos = 0;

    if (memcmp(&data[pos], "RIFF", 4) != 0) {
        PRINT("not RIFF");
        return -1;

    pos += 4;
    temp = data[pos];
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;

    pos += 4;
    if (memcmp(&data[pos], "WAVE", 4) != 0) {
        PRINT("not WAVE");
        return -1;
    //PRINT("RIFF chunk_size = %u", chunk_size);

    pos += 4;
    if (memcmp(&data[pos], "fmt ", 4) != 0) {
        PRINT("not fmt");
        return -1;

    pos += 4;
    temp = data[pos];
    chunk_size = 0;
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;

    //PRINT("fmt chunk_size = %u", chunk_size);
    pos += 4;

    temp = data[pos];
    header.format_tag = temp;
    temp = data[pos];
    temp = temp<<8;
    header.format_tag += temp;
    temp = data[pos];
    header.channels = temp;
    temp = data[pos];
    temp = temp<<8;
    header.channels += temp;
    temp = data[pos];
    header.sample_rate = temp;
    temp = data[pos];
    temp = temp<<8;
    header.sample_rate += temp;
    temp = data[pos];
    temp = temp<<16;
    header.sample_rate += temp;
    temp = data[pos];
    temp = temp<<24;
    header.sample_rate += temp;
    temp = data[pos];
    header.bit_rate = temp;
    temp = data[pos];
    temp = temp<<8;
    header.bit_rate += temp;
    temp = data[pos];
    temp = temp<<16;
    header.bit_rate += temp;
    temp = data[pos];
    temp = temp<<24;
    header.bit_rate += temp;
    temp = data[pos];
    header.block_align = temp;
    temp = data[pos];
    temp = temp<<8;
    header.block_align += temp;
    temp = data[pos];
    header.bits_per_sample = temp;
    temp = data[pos];
    temp = temp<<8;
    header.bits_per_sample += temp;

    // extra 2 bytes
    if (chunk_size == 18) {
        pos += 2;
    // fact chunk optional
    if (memcmp(&data[pos], "fact", 4) == 0) {
        chunk_size = 0;
        pos += 12;

    while (memcmp(&data[pos], "data", 4) != 0) {
    pos += 4;

    chunk_size = 0;
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;
    pos += 4;

    PRINT("data chunk_size = %u", chunk_size);
    header.data_size = chunk_size;
    header.data_pos = pos;

#if 0
    // calculate duration time
        int sr = header.sample_rate;
        int ch = header.channels;
        assert(header.bits_per_sample == sizeof(short)*8);
        int data_size = header.data_size;
        int samples = data_size*8/header.bits_per_sample;
        assert(data_size%header.bits_per_sample == 0);
        int samples_per_channel = samples/ch;
        int seconds = samples_per_channel/sr;
        PRINT("play %d seconds", seconds);

    return 0;

WAVParser::WAVParser(const unsigned char* data, int len):data_(data), data_len_(len), index_(0) {
    int pos = -1;
    pos = FindFrameHeader(data_, data_len_);
    assert(pos >= 0);

    int ret = ParseFrameHeader(&data_[pos], data_len_, header_);
    assert(ret == 0);
    if (ret == 0) {
        samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;

WAVParser::~WAVParser() {

int WAVParser::Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate) {
    samples = 0;
    channels = 0;

    const unsigned char* temp = &data_[header_.data_pos];

    temp += index_;
    int size = samples_per_320ms_*header_.bits_per_sample/8;
    if (index_ + size > header_.data_size || size > len) {
        return -1;

    memcpy(data, temp, size);
    index_ += size;
    samples = samples_per_320ms_;
    channels = header_.channels;
    samplerate = header_.sample_rate;

    return 0;


void wav_play(const unsigned char* data, int len) {
    int ret;
    int samples;
    int channels;
    int samplerate;
    PCMPlayer player(16000, 1);
    WAVParser parser(data, len);
    unsigned char* buffer = new unsigned char[1024*1024];

    while (1) {
        ret = parser.Get320msSample(buffer, 1024*1024, samples, channels, samplerate);
        if (ret != 0)
        player.play((short*)buffer, samples, samplerate, channels);

    delete []buffer;

#ifndef _PLAY_PCM_H
#define _PLAY_PCM_H
#include <alsa/asoundlib.h>
#include "util.h"
class PCMPlayer {
    PCMPlayer(int sample_rate, int channels):
            init_(false) {

        snd_pcm_hw_params_t *params;
        int rc = snd_pcm_open(&handle_, "default",SND_PCM_STREAM_PLAYBACK, 0);
        if (rc < 0) {
            fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));

        /* Allocate a hardware parameters object. */
        /* Fill it in with default values. */
        snd_pcm_hw_params_any(handle_, params);
        /* Set the desired hardware parameters. */
        /* Interleaved mode */
        snd_pcm_hw_params_set_access(handle_, params, SND_PCM_ACCESS_RW_INTERLEAVED);

        /* Signed 16-bit little-endian format */
        snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);

        /* Two channels (stereo) */
        snd_pcm_hw_params_set_channels(handle_, params, channels_);

        /* 44100 bits/second sampling rate (CD quality) */
        unsigned int val = sample_rate_;
        int dir;
        snd_pcm_hw_params_set_rate_near(handle_, params, &val, &dir); 

        /* Set period size to 32 frames. */
        snd_pcm_uframes_t frames = 0;
        snd_pcm_hw_params_set_period_size_near(handle_, params, &frames, &dir);
        /* Write the parameters to the driver */
        rc = snd_pcm_hw_params(handle_, params);
        if (rc < 0) {
            fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));

        /* Use a buffer large enough to hold one period */
        snd_pcm_hw_params_get_period_size(params, &frames, &dir);
        frame_size_ = frames;

    void play(short* sample, int samples, int sample_rate, int channels) {
        SetSamplerateAndChannel(sample_rate, channels);

        int pos = 0;
        int size = frame_size_*channels_;
        while (pos < samples) {
            int rc = snd_pcm_writei(handle_, &sample[pos], frame_size_);
            if (rc == -EPIPE) {
                /* EPIPE means underrun */
                fprintf(stderr, "underrun occurred\n");
            } else if (rc < 0) {
                fprintf(stderr, "error from writei: %s\n", snd_strerror(rc));
            }  else if (rc != (int)frame_size_) {
                fprintf(stderr, "short write, write %d frames\n", rc);
            pos += size;

    ~PCMPlayer() {
    void SetSamplerateAndChannel(int sample_rate, int channels) {
        if (sample_rate_ != sample_rate || channels_ != channels) {
            PRINT("SetSamplerateAndChannel: sample rate = %d  chs = %d", sample_rate, channels);
            sample_rate_ = sample_rate;
            channels_ = channels;

            /* Allocate a hardware parameters object. */
            /* Fill it in with default values. */
            snd_pcm_hw_params_any(handle_, params_);
            /* Set the desired hardware parameters. */
            /* Interleaved mode */
            snd_pcm_hw_params_set_access(handle_, params_, SND_PCM_ACCESS_RW_INTERLEAVED);

            /* Signed 16-bit little-endian format */
            snd_pcm_hw_params_set_format(handle_, params_, SND_PCM_FORMAT_S16_LE);

            int dir;
            unsigned int val = sample_rate_;
            snd_pcm_hw_params_set_rate_near(handle_, params_, &val, &dir);

            /* Two channels (stereo) */
            snd_pcm_hw_params_set_channels(handle_, params_, channels_);

            /* Set period size to 32 frames. */
            snd_pcm_uframes_t frames = 0;
            snd_pcm_hw_params_set_period_size_near(handle_, params_, &frames, &dir);

            /* Write the parameters to the driver */
            int rc = snd_pcm_hw_params(handle_, params_);
            if (rc < 0) {
                fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));

            /* Use a buffer large enough to hold one period */
            snd_pcm_hw_params_get_period_size(params_, &frames, &dir);
            frame_size_ = frames;


    int channels_;
    int sample_rate_;
    int frame_size_;
    bool init_;
    snd_pcm_t *handle_;
    snd_pcm_hw_params_t *params_;

