合成混合語音(純淨語音+噪聲)和IBM

有純淨語音,有噪聲,怎樣合成不同信噪比下的混合語音呢?!

function mixwav_IBM(S,db,nois)

    numChan = 64; %通道數
    FS = 8000;    %數據的採樣頻率
    gamma = gen_gammaton(FS,numChan);
    ft_env = fir1(1024,[70,400]/FS*2,'bandpass');
    [v,fs] = wavread(['babble.wav']);
    v = resample(v,FS,fs);                
    noi.wav = repmat(v,fix(FS*10/length(v))+1,1);
    noi.wav = noi.wav(1:FS*10);
    [~,gf] = fgammaton(noi.wav',gamma,FS,numChan);
    noi.cch = cochleagram(gf');

    [w,fsw] = wavread(['F50A1.WAV']); 
     w = resample(w,FS,fsw); 
    [~,gf] = fgammaton(w',gamma,FS,numChan);
    scch = cochleagram(gf');
    v = noi.wav;

    nLen = min(length(w),length(v));
    w = w(1:nLen); %w是純淨的語音
    v = v(1:nLen); %v是噪聲
    db = 0;%混合語音的信噪比是0db
    [m,v] = wavmix(w,v,db);%m是混合語音的波形

     winsize = 256;  
     shiftsize = 80;  
     window = hamming(winsize);  
     winc_w = getframe(w,winsize,shiftsize,window);%純淨語音分幀  
     winc_v = getframe(v,winsize,shiftsize,window); %噪聲分幀
     winc_m = getframe(m,winsize,shiftsize,window); %混合語音分幀
     w_fft = abs(fft(winc_w));%純淨語音的fft
     v_fft = abs(fft(winc_v));%噪聲語音的fft
     m_fft = abs(fft(winc_m));%混合語音的fft
     amp = zeros(size(w_fft));%掩碼的size和原始頻譜圖的size一樣大
     amp(w_fft>v_fft) = 1;

     wavwrite(m,FS,'F50A1_babble.WAV');

     figure;imagesc(w_fft);title('純淨語音')
     figure;imagesc(v_fft);title('噪聲')
     figure;imagesc(m_fft);title('混合語音')
     figure;imagesc(amp);title('IBM')
     figure;imagesc(m_fft.*amp);title('IBM蓋在混合語音語譜圖上得到降噪之後的語音語譜')
     close(figure);
     %如果正確,那麼將IBM蓋在混合語音的語譜圖上 則會得到和純淨語音一樣的語譜圖
     %純淨語音的語譜圖與IBM之後的語譜還是不太一樣
end      
function [m,v] = wavmix(w,v,db)
   amp = sqrt(sum(w.^2)/sum(v.^2)/10^(db/10));
   m = w + v*amp;
   v = v*amp;
end
  function winc = getframe(wav,winsize,shiftsize,window)  
    slen = length(wav);  
    numframe = ceil(slen/shiftsize);  
    winc = zeros(winsize,numframe);  
    for n = 1:numframe  
        st = (n-1)*shiftsize;  
        ed = min(st+winsize,slen);  
        winc(1:ed-st,n) = wav(st+1:ed);  
        winc(:,n) = winc(:,n).*window;          
    end  
end
function gt = gen_gammaton(sampFreq, numChannel)
    % Create gammatone filterbank
    % Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11

    if ~exist('sampFreq', 'var')
        sampFreq = 8000; % sampling frequency, default is 8000
    end


    if  ~exist('numChannel', 'var')
        numChannel = 64; % number of channels, default is 64
    end


    stepBound=200000; 

    filterOrder=4;     % filter order

    phase(1:numChannel)=zeros(numChannel,1);        % original phase
    erb_b=hz2erb([50,sampFreq/2]);       % upper and lower bound of ERB
    erb=[erb_b(1):diff(erb_b)/(numChannel-1):erb_b(2)];     % ERB segment
    cf=erb2hz(erb);       % center frequency
    b=1.019*24.7*(4.37*cf/1000+1);       % rate of decay

    gt=zeros(numChannel,stepBound);

    tmp_t=[1:stepBound]/sampFreq;

    for i=1:numChannel

        %gain=10^((loudness(cf(i))-60)/20)/3*(2*pi*b(i)/sampFreq).^4;
        gain = (2*pi*b(i)/sampFreq).^4/3;
        gt(i,:)=gain*sampFreq^3*tmp_t.^(filterOrder-1).*exp(-2*pi*b(i)*tmp_t).*cos(2*pi*cf(i)*tmp_t+phase(i));
    end
end
function erb=hz2erb(hz)
    % Convert normal frequency scale in hz to ERB-rate scale.
    % Units are number of Hz and number of ERBs.
    % ERB stands for Equivalent Rectangular Bandwidth.
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    erb=21.4*log10(4.37e-3*hz+1);

end
function hz=erb2hz(erb)
    % Convert ERB-rate scale to normal frequency scale.
    % Units are number of ERBs and number of Hz.
    % ERB stands for Equivalent Rectangular Bandwidth.
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    hz=(10.^(erb/21.4)-1)/4.37e-3;
end
function [f, tmp]=fgammaton(sig, gamma, sampFreq, numChannel)
    % Filter input signal with a gammatone filterbank and generate GF features
    % sig: input signal
    % gamma: gammtone filter impulse responses
    % sampFreq: sampling frequency
    % numChannel: number of channels
    % Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11

    [ignore,stepBound]=size(sig);

    d = 2^ceil(log2(length(sig)));

    tmp =ifft((fft((ones(numChannel,1)*sig)',d).*fft(gamma(:,1:stepBound)',d)));
    tmp = tmp(1:stepBound,:);
    f=(abs(resample(abs(tmp),100,sampFreq)')).^(1/3);
end
function a = cochleagram(r, winLength,winShift)
    % Generate a cochleagram from responses of a Gammatone filterbank.
    % It gives the log energy of T-F units
    % The first variable is required.
    % winLength: window (frame) length in samples
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    if nargin < 2
        winLength = 320;      % default window length in sample points which is 20 ms for 16 KHz sampling frequency
    end

    [numChan,sigLength] = size(r);     % number of channels and input signal length
    if nargin < 3 
        winShift = winLength/2;        % frame shift (default is half frame)
    end   
    increment = winLength/winShift;    % special treatment for first increment-1 frames

    M = floor(sigLength/winShift);     % number of time frames

    coswin = (1 + cos(2*pi*(0:winLength-1)/winLength - pi))/2;  % calculate a raised cosine window

    % calculate energy for each frame in each channel
    a = zeros(numChan,M);
    for m = 1:M      
        for i = 1:numChan
            if m < increment        % shorter frame lengths for beginning frames
                a(i,m) = r(i,1:m*winShift)*r(i,1:m*winShift)';
            else
                startpoint = (m-increment)*winShift;
                a(i,m) = (r(i,startpoint+1:startpoint+winLength)).*coswin*((r(i,startpoint+1:startpoint+winLength)).*coswin)';
    %             a(i,m) = sum(abs((r(i,startpoint+1:startpoint+winLength))).*coswin);%*((r(i,startpoint+1:startpoint+winLength)))';
            end
        end
    end
end

這裏寫圖片描述

經過掩碼掩蓋之後,那麼怎樣從頻譜圖合成降噪之後的語音呢?!

function wav = syn_mask(n,tt)
% n 噪聲譜,n = stft(wav)
% tt 爲估計的mask,語音部分爲1,其餘爲0,大小與n相同
if nargin < 2
    tt = ones(size(n));
end
if size(tt,1)~=129
    tt = tt';
end
if size(n,1)~=129
    n = n';
end
wav = istft(n.*tt);
% ang = angle(n);
% wav = istft(exp(tt').*(cos(ang)+1i*sin(ang)))';
function wav = syn_spec(n,tt)
% n 噪聲譜,n = stft(wav)
% tt 爲估計的幅度譜,大小與n相同
if size(tt,1)==129
    tt = tt';
end
%wav = istft(n.*tt);
ang = angle(n);
wav = istft((tt').*(cos(ang')+1i*sin(ang')))';`這裏寫代碼片`
function d = stft(x)
s = length(x);
f = 256;

win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800];

c = 1;
h = 80;

% pre-allocate output array
d = complex(zeros((1+f/2),1+fix((s-f)/h)));

for b = 0:h:(s-f)
  u = win.*x((b+1):(b+f));
  t = fft(u);
  d(:,c) = t(1:(1+f/2))';
  c = c+1;
end;
function x = istft(d)
% X = istft(D, F, W, H)                   Inverse short-time Fourier transform.
%   Performs overlap-add resynthesis from the short-time Fourier transform 
%   data in D.  Each column of D is taken as the result of an F-point 
%   fft; each successive frame was offset by H points. Data is 
%   hamm-windowed at W pts.  
%       W = 0 gives a rectangular window; W as a vector uses that as window.
% dpwe 1994may24.  Uses built-in 'ifft' etc.
% $Header: /homes/dpwe/public_html/resources/matlab/pvoc/RCS/istft.m,v 1.4 2009/01/07 04:20:00 dpwe Exp $

ftsize = 256;
h = 80;
s = size(d);

cols = s(2);
xlen = ftsize + (cols-1)*h;
x = zeros(1,xlen);

win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800]';



for b = 0:h:(h*(cols-1))
  ft = d(:,1+b/h)';
  ft = [ft, conj(ft([((ftsize/2)):-1:2]))];
  px = real(ifft(ft));
  x((b+1):(b+ftsize)) = x((b+1):(b+ftsize))+px.*win;
end;
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章