基於語音信號MFCC的情感識別

信號處理課程結束了,沒有像期望的那樣學到很多東西,買的書(Discrete-Time)也沒怎麼看,作業算是完成了,但是要達到該坑的國際領先水平,距離有些遙遠。我只是撫摸了一下信號處理的皮毛,我只是用了一下支持向量機。然而,我對語音情感的實際使用價值卻沒一個月之前那麼看好了,或許面部表情和生理信號更加靠譜,也或許……

基於MFCC的語音情感識別

在人類的面對面交流場景中,語音所傳達的信息量佔有很大的比重,僅次於面部表情。基於比較成熟的信號處理技術、認知心理學模型以及計算機技術,語音情感計算模型在情感計算領域已經發展了十多年。它聚焦於語音數字信號的特徵提取與分析、情感語音庫的建立和語音信號的情感分類。本文的目的是給出語音情感計算的概況以及最新進展,探討語音情感計算的整個過程。首先,介紹了語音情感計算的定義以及它的交叉學科的本質。接下來,從數字信號處理的角度研究了基於最常用的數字語音信號特徵——MFCC的語音情感檢測的全過程。然後,利用柏林語音情感數據庫分析了在提取MFCC的過程中一些具體的參數的選擇對檢測結果準確率的影響。最後,闡述了語音情感計算領域面臨的挑戰以及今後的可能發展趨勢。
用的是柏林語音情感數據庫,最終的識別率勉強接近50%,存在的問題我也思考了,但是一時解決不了。
    % process
clear all;%
%cd D:\Program Files\MATLAB\R2014a\toolbox\libsvm-3.21\matlab
cd C:\Users\mayax\Desktop\現代信號處理【報告】——參考文獻\samples\data\wav
file = dir('*.wav');
%load rawdata.mat;
class_label = containers.Map({'anger','boredom','disgust','fear','happiness','neutral','sadness'},{1,2,3,4,5,6,7});
class_key = ['anger','boredom','disgust','fear','happiness','neutral','sadness'];
anger = file(1:127);
boredom = file(128:208);
disgust = file(209:254);
fear = file(255:323);
happiness = file(324:394);
neutral = file(395:473);
sadness = file(474:535);
%讀取音頻原始序列並得到特徵參數

for i = 1:1:length(anger)
    [y fs]=audioread(anger(i).name);
    feature_anger(i,:) = mfcc_extract_func( y,fs );
end

for i = 1:1:length(boredom)
    [y fs]=audioread(boredom(i).name);
    feature_boredom(i,:) = mfcc_extract_func( y,fs );
end
for i = 1:1:length(disgust)
    [y fs]=audioread(disgust(i).name);
    feature_disgust(i,:) = mfcc_extract_func( y,fs );
end
for i = 1:1:length(fear)
    [y fs]=audioread(fear(i).name);
    feature_fear(i,:) = mfcc_extract_func( y,fs );
end
for i = 1:1:length(happiness)
    [y fs]=audioread(happiness(i).name);
    feature_happiness(i,:) = mfcc_extract_func( y,fs );
end
for i = 1:1:length(neutral)
    [y fs]=audioread(neutral(i).name);
    feature_neutral(i,:) = mfcc_extract_func( y,fs );
end
for i = 1:1:length(sadness)
    [y fs]=audioread(sadness(i).name);
    feature_sadness(i,:) = mfcc_extract_func( y,fs );
end
%
train_matrix_size = [42,27,15,23,24,26,21];
%construct training matrix separately
training_matrix_anger = feature_anger(1:train_matrix_size(class_label('anger')),:);
training_matrix_boredom = feature_boredom(1:train_matrix_size(class_label('boredom')),:);
training_matrix_disgust = feature_disgust(1:train_matrix_size(class_label('disgust')),:);
training_matrix_fear = feature_fear(1:train_matrix_size(class_label('fear')),:);
training_matrix_happiness = feature_happiness(1:train_matrix_size(class_label('happiness')),:);
training_matrix_neutral = feature_neutral(1:train_matrix_size(class_label('neutral')),:);
training_matrix_sadness = feature_sadness(1:train_matrix_size(class_label('sadness')),:);
%construct training label seprately
training_label_anger = class_label('anger')*ones(size(training_matrix_anger,1),1);
training_label_boredom = class_label('boredom')*ones(size(training_matrix_boredom,1),1);
training_label_disgust = class_label('disgust')*ones(size(training_matrix_disgust,1),1);
training_label_fear = class_label('fear')*ones(size(training_matrix_fear,1),1);
training_label_happiness = class_label('happiness')*ones(size(training_matrix_happiness,1),1);
training_label_neutral = class_label('neutral')*ones(size(training_matrix_neutral,1),1);
training_label_sadness = class_label('sadness')*ones(size(training_matrix_sadness,1),1);
% fuse training data
%matrix
training_matrix = [training_matrix_anger; training_matrix_boredom; training_matrix_disgust; training_matrix_fear; ...
    training_matrix_happiness; training_matrix_neutral; training_matrix_sadness];
%label
training_label = [training_label_anger; training_label_boredom; training_label_disgust; training_label_fear; ...
    training_label_happiness; training_label_neutral; training_label_sadness];
%construct testing matrix separately
testing_matrix_anger = feature_anger(train_matrix_size(class_label('anger'))+1:end,:);
testing_matrix_boredom = feature_boredom(train_matrix_size(class_label('boredom'))+1:end,:);
testing_matrix_disgust = feature_disgust(train_matrix_size(class_label('disgust'))+1:end,:);
testing_matrix_fear = feature_fear(train_matrix_size(class_label('fear'))+1:end,:);
testing_matrix_happiness = feature_happiness(train_matrix_size(class_label('happiness'))+1:end,:);
testing_matrix_neutral = feature_neutral(train_matrix_size(class_label('neutral'))+1:end,:);
testing_matrix_sadness = feature_sadness(train_matrix_size(class_label('sadness'))+1:end,:);
%construct testing label seprately
testing_label_anger = class_label('anger')*ones(size(testing_matrix_anger,1),1);
testing_label_boredom = class_label('boredom')*ones(size(testing_matrix_boredom,1),1);
testing_label_disgust = class_label('disgust')*ones(size(testing_matrix_disgust,1),1);
testing_label_fear = class_label('fear')*ones(size(testing_matrix_fear,1),1);
testing_label_happiness = class_label('happiness')*ones(size(testing_matrix_happiness,1),1);
testing_label_neutral = class_label('neutral')*ones(size(testing_matrix_neutral,1),1);
testing_label_sadness = class_label('sadness')*ones(size(testing_matrix_sadness,1),1);

% fuse testing data
%matrix
testing_matrix = [testing_matrix_anger; testing_matrix_boredom; testing_matrix_disgust; testing_matrix_fear; ...
    testing_matrix_happiness; testing_matrix_neutral; testing_matrix_sadness];
%label
testing_label = [testing_label_anger; testing_label_boredom; testing_label_disgust; testing_label_fear; ...
    testing_label_happiness; testing_label_neutral; testing_label_sadness];
%training svm model
cd 'D:\Program Files\MATLAB\R2014a\toolbox\libsvm-3.21\matlab'
[testing_matrix_scale, training_matrix_scale] = scaleForSVM(testing_matrix, training_matrix,-1,1);
[testing_matrix_pca,training_matrix_pca] = pcaForSVM(testing_matrix_scale, training_matrix_scale, 90);
model = svmtrain(training_label, training_matrix_pca);
[predicted_label] = svmpredict(testing_label, testing_matrix_pca, model);
%分別統計各個情感的識別率
acc = zeros(7,7);
for i = 1:1:size(testing_label,1)
    acc(testing_label(i),predicted_label(i)) = acc(testing_label(i),predicted_label(i)) + 1;
end

mfcc_extract_func:

function [ features ] = mfcc_extract_func( y,fs )
%UNTITLED3 此處顯示有關此函數的摘要
%   此處顯示詳細說明
    yy = filter([1 -0.97],1,y);
    %組幀
    frame_yy = enframe(yy,512,285);%對x 512點分爲一幀
    frame_yy_copy = frame_yy;
    frame_count = size(frame_yy,1);
    %加窗---給每一幀加上長度512的漢明窗。
    for i=1:frame_count
        win_frame_yy(i,:) = ((frame_yy(i,:))'.* hamming(512))';
    end
    %計算離散傅里葉變換以及信號功率
    for i=1:frame_count
        frame_YY(i,:) = fft((win_frame_yy(i,:))',512);
    end
        amp_frame_YY = abs(frame_YY);
        pow_frame_YY = amp_frame_YY.^2;
        %截取能量頻譜的前257個
        newpow_frame_YY = pow_frame_YY(:,1:257);
        %mel濾波器組的設置
        bank=melbankm(26,512,fs,0,0.5,'t');%Mel濾波器的階數爲26,fft變換的長度爲512,採樣頻率爲16000Hz
        mel_energy = bank*newpow_frame_YY';
        log_mel_energy = log10(mel_energy);
        %離散餘弦變換參數
        for k=1:1:13 
            n=0:1:25;
            dctcoef(k,:)=cos((2*n+1)*k*pi/(2*26));%第一階MFCC是要被丟棄的,所以此處的變換矩陣的第一行可以隨便算算,不必嚴格按照DCT的原理進行
        end
        mfcc1 = dctcoef*log_mel_energy;
        mfcc2 = mfcc1(2:13,:);
        mfcc = mfcc2;
        mean_mfcc = mean(mfcc,2);
        max_mfcc = max(mfcc,[],2);
        min_mfcc = min(mfcc,[],2);
        var_mfcc = var(mfcc,[],2);
        %融合
        features = [mean_mfcc',max_mfcc',min_mfcc',var_mfcc'];
        for i = 1:1:48
            if isnan(features(i)) == 1
                features(i) = randi([-3,3])*abs(randi([-3,3]));
            end
        end
end

報告和ppt鏈接: http://pan.baidu.com/s/1o8Uxmwa 密碼: diwc
參考資料:
[1] MATLAB添加VOICEBOX工具箱
[2]語音特徵參數MFCC提取過程詳解
[3]mfcc詳解
[4]支持向量機的MATLAB工具箱
[5]頂尖水平(並沒有多頂尖,只是數據比較漂亮)——-Speech Emotion Recognition Using Fourier Parameters

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章