需求說明:錄音后,需要有變聲選項谊却,選擇變聲模式試聽,滿意后可保存哑芹。保存的變聲文件格式為AMR
解決方案:錄音問題很好解決炎辨,每次錄音后保存一個文件,格式為WAV绩衷。變聲功能采用網上比較常見的fmod框架蹦魔〖ぢ剩可試聽可保存文件咳燕,文件格式為WAV。保存后進行文件轉格式,由WAV轉為amr乒躺。方案是采用Android自帶的AmrInputStream 完成招盲。
遇到的問題有:
- 變聲后文件如何保存。
- 變聲后文件轉格式后嘉冒,聲音被拉長兩倍曹货。
下面是具體的解決方法。
變聲采用fmod框架來實現(xiàn)讳推,具體方案可以在網上搜索顶籽,這里貼上cpp代碼:
#include "inc/fmod.hpp"
#include <stdlib.h>
#include <unistd.h>
#include "com_kidosc_voicechange_jni_VoiceFixer.h"
#include <android/log.h>
#define LOGI(FORMAT, ...) __android_log_print(ANDROID_LOG_INFO,"zph",FORMAT,##__VA_ARGS__);
#define LOGE(FORMAT, ...) __android_log_print(ANDROID_LOG_ERROR,"zph",FORMAT,##__VA_ARGS__);
#define MODE_NORMAL 0
#define MODE_FUNNY 1
#define MODE_UNCLE 2
#define MODE_LOLITA 3
#define MODE_ROB0T 4
#define MODE_ETHEREAL 5
#define MODE_CHORUS 6
#define MODE_HORROR 7
using namespace FMOD;
Sound *sound;
DSP *dsp;
Channel *channel;
bool playing;
float frequency;
System *mSystem;
JNIEnv *mEnv;
void stopPlaying();
JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_init(JNIEnv *env,
jclass jcls){
}
JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_fix(JNIEnv *env,
jclass jcls, jstring path_jstr,
jint type,jint save) {
playing = true;
frequency = 0;
System_Create(&mSystem);
mSystem->setSoftwareFormat(8000,FMOD_SPEAKERMODE_MONO,0); //設置采樣率為8000,channel為1
mEnv=env;
const char *path_cstr = mEnv->GetStringUTFChars(path_jstr, NULL);
if(save==1){
char cDest[200] = "sdcard/xxx.wav";
mSystem->setOutput(FMOD_OUTPUTTYPE_WAVWRITER); //保存文件格式為WAV
mSystem->init(32, FMOD_INIT_NORMAL | FMOD_INIT_PROFILE_ENABLE,cDest);
} else{
mSystem->init(32, FMOD_INIT_NORMAL,NULL);
}
try {
//創(chuàng)建聲音
mSystem->createSound(path_cstr, FMOD_DEFAULT, NULL, &sound);
mSystem->playSound(sound, 0, false, &channel);
switch (type) {
case MODE_NORMAL:
LOGI("%s", path_cstr);
LOGI("%s", "fix normal");
break;
case MODE_FUNNY:
mSystem->createDSPByType(FMOD_DSP_TYPE_NORMALIZE, &dsp);
channel->getFrequency(&frequency);
frequency = frequency * 1.6;
channel->setFrequency(frequency);
break;
case MODE_UNCLE:
mSystem->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &dsp);
dsp->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, 0.8);
channel->addDSP(0, dsp);
break;
case MODE_LOLITA:
mSystem->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &dsp);
dsp->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH,
1.8);
channel->addDSP(0, dsp);
break;
case MODE_ROB0T:
mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 50);
dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 60);
channel->addDSP(0, dsp);
break;
case MODE_ETHEREAL:
mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 300);
dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 20);
channel->addDSP(0, dsp);
break;
case MODE_CHORUS:
mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 100);
dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 50);
channel->addDSP(0, dsp);
break;
case MODE_HORROR:
mSystem->createDSPByType(FMOD_DSP_TYPE_TREMOLO, &dsp);
dsp->setParameterFloat(FMOD_DSP_TREMOLO_SKEW, 0.8);
channel->addDSP(0, dsp);
break;
default:
break;
}
} catch (...) {
LOGE("%s", "發(fā)生異常");
goto end;
}
mSystem->update();
while (playing) {
usleep(1000);
channel->isPlaying(&playing);
}
goto end;
end:
mEnv->ReleaseStringUTFChars(path_jstr, path_cstr);
sound->release();
mSystem->close();
mSystem->release();
jclass clazz = mEnv -> FindClass("com/kidosc/voicechange/jni/VoiceFixer");
if(clazz == NULL){
printf("not found com/kidosc/voicechange/jni/VoiceFixer class");
return;
}
jmethodID id = mEnv->GetStaticMethodID(clazz,"setPlayState","()V");
if (id==NULL){
printf("method not found");
return;
}
mEnv->CallStaticVoidMethod(clazz,id);
printf("env->CallStaticVoidMethod(clazz,id);");
}
JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_stopPlay(JNIEnv *env,
jclass jcls){
stopPlaying();
}
void stopPlaying(){
channel->stop();
printf("stopplaying");
}
上面代碼與網上的區(qū)別是银觅,增加了變聲文件保存的功能礼饱,增加了停止變聲播放的功能,改變了默認輸出文件的采樣率和channel究驴。其中最后一條尤為重要setSoftwareFormat方法文檔
變聲臨時文件保存在sdcard/xxx.wav镊绪,可在保存完成后,再進行重新轉格式并保存在自己想要的位置洒忧。
轉格式使用的是Android自帶的AmrInputStream.java處理蝴韭。這個文件被隱藏了,想要直接使用的話熙侍,需要將文件拷貝到自己項目下榄鉴。具體如何使用可自行搜索。AmrInputStream轉格式蛉抓,默認的是8000khz,16bit,單channel庆尘。這些參數(shù)很重要,如果WAV格式與這個不匹配芝雪,轉格式后會出現(xiàn)聲音被拉長等一系列問題减余。所以需要將fmod變聲框架的輸出文件格式改成8000khz,16bit,單channel 。這才是最頭痛的地方惩系!網上根本找不到相關的內容位岔,沒辦法只能自己慢慢摸索了如筛。最開始的思路是將fmod輸出的格式改為8000,成功了之后發(fā)現(xiàn)聲音還是被拉長了兩倍抒抬。懷疑是16bit的問題杨刨,又不知道fmod輸出的是多少bit。所以嘗試更改AmrInputStream中擦剑,將其默認輸入的音頻數(shù)據(jù)更改一下妖胀。更改完之后,聲音都變質了惠勒。所以最終方案還是考慮更改fmod輸出音頻赚抡。找到一個setSoftwareFormat方法,發(fā)現(xiàn)可以更改輸出采樣率纠屋。那么現(xiàn)在的問題就是涂臣,為什么還是會被拉長兩倍。
/**
* @param inPath 源文件
* @param outPath 目標文件
*/
public void systemWav2Amr(String inPath,String outPath){
try {
FileOutputStream fileoutputStream = new FileOutputStream(outPath);
InputStream inputStream =new FileInputStream(inPath);
AmrInputStream amrInputStream =new AmrInputStream(inputStream);
fileoutputStream.write(header);
byte[] buf = new byte[1024];
int len = 0;
while ((len = amrInputStream.read(buf)) >0){
fileoutputStream.write(buf,0,len);
}
fileoutputStream.close();
amrInputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
查找資料發(fā)現(xiàn)售担,采樣速率跟三個值有關赁遗,一個是channel,一個是采樣位數(shù)族铆,一個是采樣率岩四。既然被拉長兩倍,說明采樣速率變?yōu)橹暗囊话敫缛痢Uf明fmod輸出的音頻數(shù)據(jù)中剖煌,channel或者采樣位數(shù)中的一個是AmrInputStream的兩倍。最開始猜測是采樣位數(shù)為32bit導致的献丑。更改了AmrInputStream的輸入格式為32bit末捣。發(fā)現(xiàn)并沒有用,時間的確已經保持一致了创橄。說明思路是對的箩做,那么問題就是如何更改fmod輸出的音頻channel了。AmrInputStream中的channel為1妥畏,說明fmod中輸出的為2channel邦邦。fmod中在fmod_codec.h中找到channel相關屬性如下:
struct FMOD_CODEC_WAVEFORMAT
{
char name[256]; /* [w] Name of sound. Optional. */
FMOD_SOUND_FORMAT format; /* [w] Format for (decompressed) codec output, ie FMOD_SOUND_FORMAT_PCM8, FMOD_SOUND_FORMAT_PCM16. Mandantory - Must be supplied. */
int channels; /* [w] Number of channels used by codec, ie mono = 1, stereo = 2. Mandantory - Must be supplied. */
int frequency; /* [w] Default frequency in hz of the codec, ie 44100. Mandantory - Must be supplied. */
unsigned int lengthbytes; /* [w] Length in bytes of the source data. Used for FMOD_TIMEUNIT_RAWBYTES. Optional. Default = 0. */
unsigned int lengthpcm; /* [w] Length in decompressed, PCM samples of the file, ie length in seconds * frequency. Used for Sound::getLength and for memory allocation of static decompressed sample data. Mandantory - Must be supplied. */
unsigned int pcmblocksize; /* [w] Minimum, optimal number of decompressed PCM samples codec can handle. 0 or 1 = no buffering. Anything higher means FMOD will allocate a PCM buffer of this size to read in chunks. The codec read callback will be called in multiples of this value. Optional. */
int loopstart; /* [w] Loopstart in decompressed, PCM samples of file. Optional. Default = 0. */
int loopend; /* [w] Loopend in decompressed, PCM samples of file. Optional. Default = 0. */
FMOD_MODE mode; /* [w] Mode to determine whether the sound should by default load as looping, non looping, 2d or 3d. Optional. Default = FMOD_DEFAULT. */
FMOD_CHANNELMASK channelmask; /* [w] Defined channel bitmask to describe which speakers the channels in the codec map to, in order of channel count. See fmod_common.h. Optional. Leave at 0 to map to the speaker layout_save defined in FMOD_SPEAKER. */
FMOD_CHANNELORDER channelorder; /* [w] Defined channel order type, to describe where each sound channel should pan for the number of channels specified. See fmod_common.h. Optional. Leave at 0 to play in default speaker order. */
float peakvolume; /* [w] Peak volume of sound. Optional. Default = 0 if not used. */
};
在setSoftwareFormat方法中發(fā)現(xiàn)第二個參數(shù)可以設置為MONO,當其為FMOD_SPEAKERMODE_MONO時醉蚁,channels為1.
typedef enum
{
FMOD_SPEAKERMODE_DEFAULT, /* Default speaker mode based on operating system/output mode. Windows = control panel setting, Xbox = 5.1, PS3 = 7.1 etc. */
FMOD_SPEAKERMODE_RAW, /* There is no specific speakermode. Sound channels are mapped in order of input to output. Use System::setSoftwareFormat to specify speaker count. See remarks for more information. */
FMOD_SPEAKERMODE_MONO, /* The speakers are monaural. */
FMOD_SPEAKERMODE_STEREO, /* The speakers are stereo. */
FMOD_SPEAKERMODE_QUAD, /* 4 speaker setup. This includes front left, front right, surround left, surround right. */
FMOD_SPEAKERMODE_SURROUND, /* 5 speaker setup. This includes front left, front right, center, surround left, surround right. */
FMOD_SPEAKERMODE_5POINT1, /* 5.1 speaker setup. This includes front left, front right, center, surround left, surround right and an LFE speaker. */
FMOD_SPEAKERMODE_7POINT1, /* 7.1 speaker setup. This includes front left, front right, center, surround left, surround right, back left, back right and an LFE speaker. */
FMOD_SPEAKERMODE_MAX, /* Maximum number of speaker modes supported. */
FMOD_SPEAKERMODE_FORCEINT = 65536 /* Makes sure this enum is signed 32bit. */
} FMOD_SPEAKERMODE;
如此設置之后燃辖,就可以完美的進行轉格式分享了。