文章目錄
特徵相關腳本分析
在以下腳本
steps/make_mfcc.sh
steps/make_mfcc_pitch.sh
steps/make_fbank.sh
steps/make_fbank_pitch.sh
steps/compute_cmvn_stats.sh
中涉及到的命令以及腳本情況:
命令行 | 作用 |
---|---|
腳本 | 作用 |
---|---|
steps/compute_cmvn_stats.sh
compute_cmvn_stats.sh 是爲了計算提取特徵的CMVN,即爲倒譜方差均值歸一化
提取特徵腳本 (steps/make_mfcc.sh steps/make_mfcc_pitch.sh steps/make_fbank.sh steps/make_fbank_pitch.sh )
NOTE:以上所有腳本中所必需的文件爲wav.scp
這個腳本的輸入參數有三個:1.data/train 2exp/make_mfcc/train 3mfcc
1.中有數據預處理後的一些文件:text utt2spk wav.scp
2.中應該是要保存程序運行的日誌文件的
3.中是提取出的特徵文件
1是輸入目錄,2,3是輸出目錄
kaldi中以上腳本在進行提取特徵的過程中,存在着相似的結構。因此將以上四個腳本放在一塊進行記錄分析。
在此記錄中,使用 steps/make_mfcc_pitch.sh 進行基本分析,其腳本與其他腳本的區別僅僅在特徵提取的過程。
以下內容爲首先進行特徵提取的配置,參數的初始化以及配置文件的初始化
# Begin configuration section.
nj=4
cmd=run.pl
mfcc_config=conf/mfcc.conf
pitch_config=conf/pitch.conf
pitch_postprocess_config=
paste_length_tolerance=2
compress=true
write_utt2num_frames=false # if true writes utt2num_frames
# End configuration section.
# 打印這個腳本的名稱以及所有的參數
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -lt 1 ] || [ $# -gt 3 ]; then
echo "Usage: $0 [options] <data-dir> [<log-dir> [<mfcc-dir>] ]";
echo "e.g.: $0 data/train exp/make_mfcc/train mfcc"
echo "Note: <log-dir> defaults to <data-dir>/log, and <mfcc-dir> defaults to <data-dir>/data"
echo "Options: "
echo " --mfcc-config <mfcc-config-file> # config passed to compute-mfcc-feats "
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste-length-tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --write-utt2num-frames <true|false> # If true, write utt2num_frames file."
exit 1;
fi
以下腳本在進行使用的過程中,根據傳入參數的大小進行寫入數據
data=$1
if [ $# -ge 2 ]; then # 如果參數大於等於2,就日誌就存放在第二參數中
logdir=$2
else # 否則將日誌寫到文件下
logdir=$data/log
fi
if [ $# -ge 3 ]; then # 如果參數大於等於3
mfcc_pitch_dir=$3
else # 將提取處理出來的特徵放到該文間加下
mfcc_pitch_dir=$data/data
fi
在進行特徵提取的過程中,所需的必須的文件爲 wav.scp
# make $mfcc_pitch_dir an absolute pathname.
mfcc_pitch_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfcc_pitch_dir ${PWD}`
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $mfcc_pitch_dir || exit 1; # 創建mfcc特徵文件夾和log文件夾
mkdir -p $logdir || exit 1;
if [ -f $data/feats.scp ]; then # 如果之前就存在feats.scp文件夾的話,就建立
mkdir -p $data/.backup
echo "$0: moving $data/feats.scp to $data/.backup"
mv $data/feats.scp $data/.backup
fi
scp=$data/wav.scp # 獲取音頻文件列表,根據該列表中的uttid以及wav的位置進行特徵提取
required="$scp $mfcc_config $pitch_config"
for f in $required; do # 在進行特徵提取的過程中,所需的必須的文件爲 wav.scp
if [ ! -f $f ]; then
echo "make_mfcc_pitch.sh: no such file $f"
exit 1;
fi
done
# 使用validate_data_dir.sh 檢測$data裏的內容是否正確
utils/validate_data_dir.sh --no-text --no-feats $data || exit 1;
TODO:隨後補充內容
if [ ! -z "$pitch_postprocess_config" ]; then
postprocess_config_opt="--config=$pitch_postprocess_config";
else
postprocess_config_opt=
fi
if [ -f $data/spk2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/spk2warp"
vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk"
elif [ -f $data/utt2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/utt2warp"
vtln_opts="--vtln-map=ark:$data/utt2warp"
fi
以下的內容主要爲是否使用segments文件,並且使用腳本進行模型處理,如下:
for n in $(seq $nj); do
# the next command does nothing unless $mfcc_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.ark
done
if $write_utt2num_frames; then
write_num_frames_opt="--write-num-frames=ark,t:$logdir/utt2num_frames.JOB"
else
write_num_frames_opt=
fi
if [ -f $data/segments ]; then
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq $nj); do
split_segments="$split_segments $logdir/segments.$n"
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress $write_num_frames_opt ark:- \
ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \
|| exit 1;
else
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
split_scps=""
for n in $(seq $nj); do
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
done
utils/split_scp.pl $scp $split_scps || exit 1; # 使用腳本處理 wav.scp
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
#最後生成的應該就是mfcc/train 中的raw_mfcc_train.1.ark raw_mfcc_train.1.scp
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress $write_num_frames_opt ark:- \
ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \
|| exit 1;
fi
在進行特徵提取完之後,進行後續處理,如下:
if [ -f $logdir/.error.$name ]; then # 如果出現了錯誤則打印出log中最後的錯誤信息,並且退出
echo "Error producing mfcc & pitch features for $name:"
tail $logdir/make_mfcc_pitch_${name}.1.log
exit 1;
fi
# concatenate the .scp files together. # 根據id,將所有的scp文件拼接起來輸出到 feats.scp
for n in $(seq $nj); do
cat $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.scp || exit 1;
done > $data/feats.scp
if $write_utt2num_frames; then
for n in $(seq $nj); do
cat $logdir/utt2num_frames.$n || exit 1;
done > $data/utt2num_frames || exit 1
rm $logdir/utt2num_frames.*
fi
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null # 刪除過程文件
nf=`cat $data/feats.scp | wc -l` # 輸出文件的行數
nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then # 檢測特徵的數目與音頻文件的數目是否相同
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
if [ $nf -lt $[$nu - ($nu/20)] ]; then
echo "Less than 95% the features were successfully generated. Probably a serious error."
exit 1;
fi
echo "Succeeded creating MFCC & Pitch features for $name"