可供參考的項目有:
https://github.com/jayaram1125/Single-Word-Speech-Recognition-using-GMM-HMM-
數據集得自己造:
# -----------------------------------------------------------------------------------------------------
'''
&usage: HMM-GMM的孤立詞識別模型
@author: hongwen sun
'''
# -----------------------------------------------------------------------------------------------------
# 導入依賴庫,特別需要注意hmmlearn
from python_speech_features import mfcc
from scipy.io import wavfile
from hmmlearn import hmm
from sklearn.externals import joblib
import numpy as np
import os
# -----------------------------------------------------------------------------------------------------
'''
&usage: 準備所需數據
'''
# -----------------------------------------------------------------------------------------------------
# 生成wavdict,key=wavid,value=wavfile
def gen_wavlist(wavpath):
wavdict = {}
labeldict = {}
for (dirpath, dirnames, filenames) in os.walk(wavpath):
for filename in filenames:
if filename.endswith('.wav'):
filepath = os.sep.join([dirpath, filename])
fileid = filename.strip('.wav')
wavdict[fileid] = filepath
label = fileid.split('_')[1]
labeldict[fileid] = label
return wavdict, labeldict
# 特徵提取,feat = compute_mfcc(wadict[wavid])
def compute_mfcc(file):
fs, audio = wavfile.read(file)
# 這裏我故意fs/2,有些類似減小step,不建議這樣做,投機取巧做法
mfcc_feat = mfcc(audio, samplerate=(fs/2), numcep=26)
return mfcc_feat
# -----------------------------------------------------------------------------------------------------
'''
&usage: 搭建HMM-GMM的孤立詞識別模型
參數意義:
CATEGORY: 所有標籤的列表
n_comp: 每個孤立詞中的狀態數
n_mix: 每個狀態包含的混合高斯數量
cov_type: 協方差矩陣的類型
n_iter: 訓練迭代次數
'''
# -----------------------------------------------------------------------------------------------------
class Model():
"""docstring for Model"""
def __init__(self, CATEGORY=None, n_comp=3, n_mix = 3, cov_type='diag', n_iter=1000):
super(Model, self).__init__()
self.CATEGORY = CATEGORY
self.category = len(CATEGORY)
self.n_comp = n_comp
self.n_mix = n_mix
self.cov_type = cov_type
self.n_iter = n_iter
# 關鍵步驟,初始化models,返回特定參數的模型的列表
self.models = []
for k in range(self.category):
model = hmm.GMMHMM(n_components=self.n_comp, n_mix = self.n_mix,
covariance_type=self.cov_type, n_iter=self.n_iter)
self.models.append(model)
# 模型訓練
def train(self, wavdict=None, labeldict=None):
for k in range(10):
subdata = []
model = self.models[k]
for x in wavdict:
if labeldict[x] == self.CATEGORY[k]:
mfcc_feat = compute_mfcc(wavdict[x])
model.fit(mfcc_feat)
# 使用特定的測試集合進行測試
def test(self, wavdict=None, labeldict=None):
result = []
for k in range(self.category):
subre = []
label = []
model = self.models[k]
for x in wavdict:
mfcc_feat = compute_mfcc(wavdict[x])
# 生成每個數據在當前模型下的得分情況
re = model.score(mfcc_feat)
subre.append(re)
label.append(labeldict[x])
# 彙總得分情況
result.append(subre)
# 選取得分最高的種類
result = np.vstack(result).argmax(axis=0)
# 返回種類的類別標籤
result = [self.CATEGORY[label] for label in result]
print('識別得到結果:\n',result)
print('原始標籤類別:\n',label)
# 檢查識別率,爲:正確識別的個數/總數
totalnum = len(label)
correctnum = 0
for i in range(totalnum):
if result[i] == label[i]:
correctnum += 1
print('識別率:', correctnum/totalnum)
def save(self, path="models.pkl"):
# 利用external joblib保存生成的hmm模型
joblib.dump(self.models, path)
def load(self, path="models.pkl"):
# 導入hmm模型
self.models = joblib.load(path)
# -----------------------------------------------------------------------------------------------------
'''
&usage: 使用模型進行訓練和識別
'''
# -----------------------------------------------------------------------------------------------------
# 準備訓練所需數據
CATEGORY = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
wavdict, labeldict = gen_wavlist('training_data')
testdict, testlabel = gen_wavlist('test_data')
# 進行訓練
models = Model(CATEGORY=CATEGORY)
models.train(wavdict=wavdict, labeldict=labeldict)
models.save()
models.load()
models.test(wavdict=wavdict, labeldict=labeldict)
models.test(wavdict=testdict, labeldict=testlabel)