可以實(shí)現(xiàn)的聲音種類:蘿莉看疙、大叔、肥仔直奋、搞怪能庆、熊孩子、慢吞吞脚线、網(wǎng)紅女搁胆、困獸、重機(jī)械邮绿、感冒渠旁、空靈等。
本方法是通過github開源的項(xiàng)目 TarsosDSP
廢話不多說斯碌,先上代碼
這里從maven上找了一個fork TarsosDSP打包的jar包一死,源碼一樣。也可以直接從TarsosDSP項(xiàng)目上把jar包下載到本地
<dependency>
<groupId>ws.schild</groupId>
<artifactId>jave-core</artifactId>
<version>2.4.6</version>
</dependency>
public static void main(String[] args) throws Exception {
//這里返回的是pcm格式的音頻
byte[] pcmBytes = speechPitchShiftMp3("/home/li/Music/silk2pcm/file.mp3", 0.6, 0.6);
//如果需要轉(zhuǎn)成wav則需要給pcmBytes增加一個頭部信息
//TarsosDSP中也有輸出Wav格式音頻的處理器傻唾,這里沒有使用投慈。
byte[] wavHeader = pcm2wav(bytes);
OutputStream wavOutPut = new FileOutputStream(tempFile);
wavOutPut.write(wavHeader);
wavOutPut.write(bytes);
wavOutPut.flush();
wavOutPut.close();
// 對于各種聲音類型承耿,以及所需添加的處理器,還有處理器參數(shù)代碼伪煤,將在本文最后給出加袋。
//如果需要轉(zhuǎn)mp3格式的,也可以給我留言抱既,我會加上职烧。
}
/**
* 變聲
* @param speedFactor 變速率 (0,2) 大于1為加快語速,小于1為放慢語速
* @param rateFactor 音調(diào)變化率 (0,2) 大于1為降低音調(diào)(深沉)防泵,小于1為提升音調(diào)(尖銳)
* @return 變聲后的MP3數(shù)據(jù)輸入流
*/
public static byte[] speechPitchShiftMp3(String fileUrl, double rateFactor, double speedFactor) throws IOException, UnsupportedAudioFileException {
WaveformSimilarityBasedOverlapAdd w = new WaveformSimilarityBasedOverlapAdd(WaveformSimilarityBasedOverlapAdd.Parameters.speechDefaults(rateFactor, 16000));
int inputBufferSize = w.getInputBufferSize();
int overlap = w.getOverlap();
AudioDispatcher dispatcher = AudioDispatcherFactory.fromPipe(fileUrl,16000,inputBufferSize,overlap);
w.setDispatcher(dispatcher);
dispatcher.addAudioProcessor(w);
/** 采樣率轉(zhuǎn)換器蚀之。 使用插值更改采樣率, 與時間拉伸器一起可用于音高轉(zhuǎn)換。 **/
dispatcher.addAudioProcessor(new RateTransposer(speedFactor));
AudioOutputToByteArray out = new AudioOutputToByteArray();
/** 聲音速率轉(zhuǎn)換器 -- 失敗 **/
/*SoundTouchRateTransposer soundTouchRateTransposer = new SoundTouchRateTransposer(2);
soundTouchRateTransposer.setDispatcher(dispatcher);
dispatcher.addAudioProcessor(soundTouchRateTransposer);*/
/** 正弦波發(fā)生器 -- 無反應(yīng) **/
/*SineGenerator sineGenerator = new SineGenerator(0.5, 0.5);
dispatcher.addAudioProcessor(sineGenerator);*/
/** 音調(diào)轉(zhuǎn)換器 -- 無效果 **/
// dispatcher.addAudioProcessor(new PitchShifter(0.1,16000,448,overlap));
/** 制粒機(jī)使用顆粒合成回放樣本捷泞。方法可用于控制播放速率足删,音高,顆粒大小锁右, -- 無效果 **/
// dispatcher.addAudioProcessor(new OptimizedGranulator(16000, 448));
/** 噪音產(chǎn)生器 -- 有效果 **/
// dispatcher.addAudioProcessor(new NoiseGenerator(0.2 ));
/** 增益處理器 增益為1失受,則無任何反應(yīng)。 增益大于1表示音量增加a -- 有反應(yīng) **/
// dispatcher.addAudioProcessor(new GainProcessor(10));
/**鑲邊效果 -- 有反應(yīng) **/
// dispatcher.addAudioProcessor(new FlangerEffect(64, 0.3, 16000, 16000));// 回聲效果
// dispatcher.addAudioProcessor(new FlangerEffect(1 << 4, 0.8, 8000, 2000));// 感冒
// dispatcher.addAudioProcessor(new ZeroCrossingRateProcessor());//感冒
/** 淡出 --聲音慢慢變小 **/
// dispatcher.addAudioProcessor(new FadeOut(5));
/** 淡入-- 聲音慢慢變大 **/
// dispatcher.addAudioProcessor(new FadeIn(5));
/** 在信號上添加回聲效果咏瑟。echoLength以秒為單位 elay回聲的衰減拂到,介于0到1之間的值。1表示無衰減码泞,0表示立即衰減 **/
dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000) );
/** 調(diào)幅噪聲 -- 將聲音轉(zhuǎn)換為噪聲**/
// dispatcher.addAudioProcessor(new AmplitudeModulatedNoise());
/** 振幅LFO -- 聲音波動 **/
// dispatcher.addAudioProcessor(new AmplitudeLFO());
dispatcher.addAudioProcessor(out);
dispatcher.run();
// return new ByteArrayInputStream(out.getData());
return out.getData();
}
public static byte[] pcm2wav(byte[] bytes) throws IOException {
//填入?yún)?shù)兄旬,比特率等等。這里用的是16位單聲道 8000 hz
WaveHeader header = new WaveHeader();
//長度字段 = 內(nèi)容的大杏嗔取(PCMSize) + 頭部字段的大小(不包括前面4字節(jié)的標(biāo)識符RIFF以及fileLength本身的4字節(jié))
header.fileLength = bytes.length + (44 - 8);
header.FmtHdrLeth = 16;
header.BitsPerSample = 16;
header.Channels = 1;
header.FormatTag = 0x0001;
header.SamplesPerSec = 16000;
header.BlockAlign = (short)(header.Channels * header.BitsPerSample / 8);
header.AvgBytesPerSec = header.BlockAlign * header.SamplesPerSec;
header.DataHdrLeth = bytes.length;
byte[] h = header.getHeader();
assert h.length == 44; //WAV標(biāo)準(zhǔn)辖试,頭部應(yīng)該是44字節(jié)
return h;
}
AudioOutputToByteArray代碼
import be.tarsos.dsp.AudioEvent;
import be.tarsos.dsp.AudioProcessor;
import org.tritonus.share.sampled.file.AudioOutputStream;
import java.io.ByteArrayOutputStream;
public class AudioOutputToByteArray implements AudioProcessor {
private boolean isDone = false;
private byte[] out = null;
private ByteArrayOutputStream bos;
private AudioOutputStream outputStream;
public AudioOutputToByteArray() {
bos = new ByteArrayOutputStream();
}
public ByteArrayOutputStream getBos() {
return bos;
}
public byte[] getData() {
while (!isDone && out == null) {
try {
Thread.sleep(10);
} catch (InterruptedException ignored) {}
}
return out;
}
@Override
public boolean process(AudioEvent audioEvent) {
bos.write(audioEvent.getByteBuffer(),0,audioEvent.getByteBuffer().length);
return true;
}
@Override
public void processingFinished() {
out = bos.toByteArray().clone();
bos = null;
isDone = true;
}
}
WaveHeader代碼
import java.io.ByteArrayOutputStream;
import java.io.IOException;
public class WaveHeader {
public final char fileID[] = {'R', 'I', 'F', 'F'};
public int fileLength;
public char wavTag[] = {'W', 'A', 'V', 'E'};;
public char FmtHdrID[] = {'f', 'm', 't', ' '};
public int FmtHdrLeth;
public short FormatTag;
public short Channels;
public int SamplesPerSec;
public int AvgBytesPerSec;
public short BlockAlign;
public short BitsPerSample;
public char DataHdrID[] = {'d','a','t','a'};
public int DataHdrLeth;
public byte[] getHeader() throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
WriteChar(bos, fileID);
WriteInt(bos, fileLength);
WriteChar(bos, wavTag);
WriteChar(bos, FmtHdrID);
WriteInt(bos,FmtHdrLeth);
WriteShort(bos,FormatTag);
WriteShort(bos,Channels);
WriteInt(bos,SamplesPerSec);
WriteInt(bos,AvgBytesPerSec);
WriteShort(bos,BlockAlign);
WriteShort(bos,BitsPerSample);
WriteChar(bos,DataHdrID);
WriteInt(bos,DataHdrLeth);
bos.flush();
byte[] r = bos.toByteArray();
bos.close();
return r;
}
private void WriteShort(ByteArrayOutputStream bos, int s) throws IOException {
byte[] mybyte = new byte[2];
mybyte[1] =(byte)( (s << 16) >> 24 );
mybyte[0] =(byte)( (s << 24) >> 24 );
bos.write(mybyte);
}
private void WriteInt(ByteArrayOutputStream bos, int n) throws IOException {
byte[] buf = new byte[4];
buf[3] =(byte)( n >> 24 );
buf[2] =(byte)( (n << 8) >> 24 );
buf[1] =(byte)( (n << 16) >> 24 );
buf[0] =(byte)( (n << 24) >> 24 );
bos.write(buf);
}
private void WriteChar(ByteArrayOutputStream bos, char[] id) {
for (int i=0; i<id.length; i++) {
char c = id[i];
bos.write(c);
}
}
各種變聲器參數(shù)
import be.tarsos.dsp.AudioDispatcher;
import be.tarsos.dsp.WaveformSimilarityBasedOverlapAdd;
import be.tarsos.dsp.ZeroCrossingRateProcessor;
import be.tarsos.dsp.effects.DelayEffect;
import be.tarsos.dsp.io.jvm.AudioDispatcherFactory;
import be.tarsos.dsp.resample.RateTransposer;
import com.bleege.recordingsound.utils.AudioOutputToByteArray;
import com.bleege.recordingsound.utils.WaveHeader;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
import java.io.IOException;
import java.util.Optional;
import java.util.function.Consumer;
@Slf4j
public enum SoundEnum {
LUOLI(0.6, 0.6, "蘿莉", 1, dispatcher -> {}),
DASHU(1.2, 1.2, "大叔", 2, dispatcher -> {}),
FEIZAI(1.5, 1.5, "肥仔", 3, dispatcher -> {}),
GAOGUAI(1.5, 0.8, "搞怪", 4, dispatcher -> {}),
XIONGHAIZI(0.73, 0.73, "熊孩子", 5, dispatcher -> {}),
MANTUNTUN(0.35,1, "慢吞吞",6 , dispatcher -> {}),
WANGHONGNV(1.2,0.7, "網(wǎng)紅女",7 , dispatcher -> {}),
/**
* dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000) );
*/
KUNSHOU(1.55,1.55, "困獸", 8, dispatcher -> dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000))),
/**
* dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000) );
*/
ZHONGJIXIE(1.50,1.50, "重機(jī)械", 9, dispatcher -> dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000))),
/**
* dispatcher.addAudioProcessor(new FlangerEffect(1 << 4, 0.8, 8000, 2000));
* dispatcher.addAudioProcessor(new ZeroCrossingRateProcessor());
*/
GANMAO(1.05,1.05, "感冒", 10, dispatcher -> {
dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000));
dispatcher.addAudioProcessor(new ZeroCrossingRateProcessor());
}),
/**
* dispatcher.addAudioProcessor(new DelayEffect(0.8, 0.5, 12000) );
* dispatcher.addAudioProcessor(new DelayEffect(0.5, 0.3, 8000) );
*/
KONGLING(1, 1, "空靈", 11, dispatcher -> {
dispatcher.addAudioProcessor(new DelayEffect(0.8, 0.5, 12000) );
dispatcher.addAudioProcessor(new DelayEffect(0.5, 0.3, 8000) );
});
/**
* @param speedFactor 變速率 (0,2) 大于1為加快語速,小于1為放慢語速
* @param rateFactor 音調(diào)變化率 (0,2) 大于1為降低音調(diào)(深沉)劈狐,小于1為提升音調(diào)(尖銳)
*/
SoundEnum(double rateFactor, double speedFactor, String name, int type, Consumer<AudioDispatcher> consumer){
this.rateFactor = rateFactor;
this.speedFactor = speedFactor;
this.name = name;
this.type = type;
this.consumer = consumer;
}
private double rateFactor;
private double speedFactor;
private String name;
private int type;
private Consumer consumer;
public byte[] run(String fileUrl){
WaveformSimilarityBasedOverlapAdd w = new WaveformSimilarityBasedOverlapAdd(WaveformSimilarityBasedOverlapAdd.Parameters.speechDefaults(rateFactor, 16000));
int inputBufferSize = w.getInputBufferSize();
int overlap = w.getOverlap();
AudioDispatcher dispatcher = AudioDispatcherFactory.fromPipe(fileUrl,16000,inputBufferSize,overlap);
w.setDispatcher(dispatcher);
dispatcher.addAudioProcessor(w);
/** 采樣率轉(zhuǎn)換器。 使用插值更改采樣率, 與時間拉伸器一起可用于音高轉(zhuǎn)換呐馆。 **/
dispatcher.addAudioProcessor(new RateTransposer(speedFactor));
AudioOutputToByteArray out = new AudioOutputToByteArray();
consumer.accept(dispatcher);
dispatcher.addAudioProcessor(out);
dispatcher.run();
return out.getData();
}
public static byte[] pcm2wav(byte[] bytes) {
try {
//填入?yún)?shù)肥缔,比特率等等。這里用的是16位單聲道 8000 hz
WaveHeader header = new WaveHeader();
//長度字段 = 內(nèi)容的大行诶础(PCMSize) + 頭部字段的大小(不包括前面4字節(jié)的標(biāo)識符RIFF以及fileLength本身的4字節(jié))
header.fileLength = bytes.length + (44 - 8);
header.FmtHdrLeth = 16;
header.BitsPerSample = 16;
header.Channels = 1;
header.FormatTag = 0x0001;
header.SamplesPerSec = 16000;
header.BlockAlign = (short)(header.Channels * header.BitsPerSample / 8);
header.AvgBytesPerSec = header.BlockAlign * header.SamplesPerSec;
header.DataHdrLeth = bytes.length;
byte[] h = header.getHeader();
assert h.length == 44; //WAV標(biāo)準(zhǔn)续膳,頭部應(yīng)該是44字節(jié)
return h;
} catch (IOException e) {
log.error("pcm2wav-error", e);
}
return null;
}
public static Optional<SoundEnum> getInstance(int type){
for (int i = 0; i < SoundEnum.values().length; i++) {
if(SoundEnum.values()[i].type == type)
return Optional.of(SoundEnum.values()[i]);
}
return Optional.empty();
}
}