鑑於有人要求stft和istft代碼,我就把它貼出來,之前不貼出來是因爲很多人stft和istft是不一樣的,而且實時跟離線算法的stft跟istft在實現上也是有點不同的,你可以用自己的,也可以用我的,但思路和結果都是一樣的.
我不發上github的原因是我的repo很亂,我後面太多個版本了,後面整理一下,把這個baseline版本發github地址出來大家用的比較方便
---------------------------------------------------
去年底跳槽之後,轉做前端了,主要在去混響部分,
目前自我實現的算法有SS和kalman filter,都是根據論文來複現的,但是比較常用的叫WPE,
接觸時間不長,還在研究學習階段,慢慢分享心得
答應過的貼出來譜減法去混響的論文和代碼
A New Method Based on Spectral Subtraction for SpeechDereverberation
根據上面這個論文實現的,這是半年前完成的工作, 後期有幾篇論文是在這基礎上改的,比如多通道的去混響, 分頻帶對混響進行估計等, 都能提升, 我就提供給baseline給大家參考, 有問題請指出,謝謝
細節等我有空再來一行一行或者一個函數一個函數來解釋說明代表啥,爲啥我要這麼寫
半年前的matlab 代碼寫的不好,多原諒,後期轉寫C代碼,或者直接 matlab生成C代碼,所以不用太看代碼規範問題,主要請看內容和算法計算
PS: 分幀函數split_frame和疊加函數overlap_add或者說整個STFT過程和ISTFT過程你們可以用你們自己的, 若需要我貼我自己的, 可以留言,謝謝
這個代碼只要改改一開始你們的wav_file, out_file和out_yrr_file(我用來看混響的)的路徑,就能跑了
#主函數
clear all;clc;close all
addpath('./codes');
disp('Start Dereverberation');
%wav_file = 'Data/BAC009S0764W0123-reverb.wav';
%out_file = 'Data/BAC009S0764W0123-dereverb.wav';
wav_file = '/home/yongyug/workspace/speech_enhancement/generate_noisy_data/n1_real.wav';
out_file = '/home/yongyug/workspace/speech_enhancement/generate_noisy_data/ss_out2.wav';
out_yrr_file = 'Data/yrr.wav';
[signal fs] = audioread(wav_file);
xx=signal(:,1);
signalLength = length(xx);
time=(0:signalLength-1)/fs;
xx=xx-mean(xx); %去除直流分量
%x=xx/max(abs(xx)); %正則化
tic
beta=0.8;lambda=0.1;Tr=0.4 ;
nwind = 128; noverlap=nwind/2; inc=nwind-noverlap;freq_bin=nwind/2+1;
[Yxx, X_amp, X_phase] = get_yxx(xx, nwind, noverlap,beta);
disp('Done Estimate Yxx');
delta=(3*log(10))/Tr;lag=0.05;
[Yrr] = get_yrr(Yxx,delta,lag, inc);
disp('Done Estimate Yrr');
SNRpri = get_SNRpri(X_amp, Yrr, beta);
[dereverberated_signal, clean_signal, yrr_signal] = ss_baseline(X_amp, Yrr, SNRpri, X_phase, lambda, signalLength, nwind, inc);
disp('Done Dereverberation using SS filtering');
audiowrite(out_file, dereverberated_signal, fs);
audiowrite(out_yrr_file, yrr_signal, fs);
disp('Finish Dereverberation');
toc
#########################函數#################################
function [Yxx, X_amp,X_phase] = get_yxx(x, nwind, noverlap,beta)
x=x(:);
inc = nwind-noverlap;
%tmp = v_enframe(x,hamming(nwind),inc,'sdp',16000);
X = split_frame(x,hamming(nwind),inc)';
frameNum = size(X,2);
%N = 256;
X_fft=fft(X);
X_amp=abs(X_fft); %get amplitude
X_phase=angle(X_fft); %get phase
X_psd = X_amp.^2;
nfft = nwind/2 + 1;
Yxx = zeros(frameNum,nfft);
X_psd = X_psd(1:nfft,:)';
for m=1:frameNum
if m ==1
Yxx(1,:) = X_psd(1,:);
else
Yxx(m,:) = beta.*Yxx(m-1,:) + (1-beta).*X_psd(m,:);
end
end
end
#########################函數#################################
function [Yrr] = get_yrr(Yxx, delta, lag, inc)
lagpoints = 16000*lag;
[frameNum, nfft] = size(Yxx);
Yrr = zeros(frameNum, nfft);
lagframe = fix(lagpoints/inc);
decay_para = exp(-2*delta*lag); %used when debug, check the value is correct or not
for m=1:frameNum
if m<=lagframe
Yrr(m,:) = Yxx(m,:);
else
Yrr(m,:) = exp(-2*delta*lag).*Yxx(m - lagframe,:);
end
end
end
#########################函數#################################
function [SNRpri] = get_SNRpri(x_amp, yrr,beta)
[nfft, frameNum] = size(x_amp);
freq_bin = nfft/2 +1;
x_energy = x_amp.^2;
x_energy = x_energy(1:freq_bin,:)';
SNRpos = (x_energy./yrr) - 1;
SNRpri = zeros(frameNum, freq_bin);
for m=1:frameNum
if m==1
SNRpri(m,:)=max(0, SNRpos(m,:));
else
SNRpri(m,:)=beta.*SNRpri(m-1,:)+(1-beta).*max(0,SNRpos(m,:));
end
end
end
#########################函數#################################
function [output, clean_Signal, yrr_output] = ss_baseline(X_amp, yrr, SNRpri, X_phase, lambda, signallength, nwind, inc)
[frameNum, freq_bin] = size(yrr);
clean_Signal = zeros(frameNum,freq_bin);
X_amp = X_amp(1:freq_bin,:)';
gain = 1 - (1./sqrt(SNRpri+1));
for m=1:frameNum
for k=1:freq_bin
tmp_signal = gain(m,k)* X_amp(m,k);
% For Debugging
% tmp_check = lambda*sqrt(yrr(m,k));
% tmp_gain = gain(m,k);
% tmp_X = X_amp(m,k);
% tmp_th = lambda*sqrt(yrr(m,k));
if tmp_signal >= lambda*sqrt(yrr(m,k))
clean_Signal(m,k) = tmp_signal;
else
clean_Signal(m,k) = lambda*sqrt(yrr(m,k));
end
end
end
clean_signal = clean_Signal';
output=OverlapAdd2(clean_signal,X_phase(1:freq_bin,:),nwind,inc);
yrr_output=OverlapAdd2(sqrt(yrr)',X_phase(1:freq_bin,:),nwind,inc);
Nout=length(output);
if Nout>signallength
output=output(1:signallength);
elseif Nout<signallength
output=[output;zeros(signallength-Nout,1)];
end
%output=output/max(abs(output));
end
#########################函數#################################
function [f,t]=split_frame(x,win,inc)
%ENFRAME split signal up into (overlapping) frames: one per row. [F,T]=(X,WIN,INC)
nx=length(x(:));
nwin=length(win);
if (nwin == 1)
len = win;
else
len = nwin;
end
if (nargin < 3)
inc = len;
end
nf = fix((nx-len+inc)/inc);
f=zeros(nf,len);
indf= inc*(0:(nf-1)).';
inds = (1:len);
f(:) = x(indf(:,ones(1,len))+inds(ones(nf,1),:));
if (nwin > 1)
w = win(:)';
f = f .* w(ones(nf,1),:);
end
if nargout>1
t=(1+len)/2+indf;
end
#########################函數#################################
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen)
if nargin<2
yphase=angle(XNEW);
end
if nargin<3
windowLen=size(XNEW,1)*2;
end
if nargin<4
ShiftLen=windowLen/2;
end
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes,FrameNum]=size(XNEW);
Spec=XNEW.*exp(1i*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
end