從findsound中選取了8個大類,並且對每個大類中的5個小類(一個爲三個小類)進行了音頻特徵的提取,至多提取100個clip。提取的特徵以mean和variance的形式分類保存在了文件之中,並且每一個clip對應有相應的文件,這些文件和相應的clip保存在相應的目錄之中。
功能:根據給定的特徵的類型,從這些文件中選擇相應特徵值,組成針對每一個小類的arff weka文件,這些weka文件保存到一個統一的地方,這裏是保存在了list所在的目錄。
list文件也是針對每一個小類的,並且按照原有目錄的層次結構進行了組織。
function [ret,contents] = parseAfile(fpath,target)
state = exist(fpath,'file');
if state~=2
sprintf('not exist %s \n',fpath)
ret=0;
contents=cell(1,1);
return;
end
contents=cell(1,length(target));
fid=fopen(fpath);
count=1;
while ~feof(fid)
featurename=fgetl(fid);
featurename=strtrim(featurename);
l=length(target);
flag=0;
for i=1:l
name=target(i);
if strcmp(name,featurename)
flag=1;
break;
end
end
if flag==0
continue;
end
featurevalue=fgetl(fid);
featurevalue=strtrim(featurevalue);
featurevalue=sprintf('%s ',featurevalue);
index=strfind(featurevalue,' ');
aline=cell(1,length(index));
start=1;
for j=1:length(index)
avalue=featurevalue(1,start:index(j));
start=index(j)+1;
aline{j}=avalue;
end
contents{count}=aline;
count=count+1;
end
fclose(fid);
ret=1;
alllist=cell(8,5);
alllist{1,1}='animal\bat.txt';
alllist{1,2}='animal\cat.txt';
alllist{1,3}='animal\cow.txt';
alllist{1,4}='animal\dog.txt';
alllist{1,5}='animal\lamb.txt';
alllist{2,1}='household\bubbles.txt';
alllist{2,2}='household\clock.txt';
alllist{2,3}='household\door.txt';
alllist{2,4}='household\phone.txt';
alllist{2,5}='household\toilet.txt';
alllist{3,1}='musical+instruments\drum+rimshot.txt';
alllist{3,2}='musical+instruments\flute.txt';
alllist{3,3}='musical+instruments\guitar.txt';
alllist{3,4}='musical+instruments\piano.txt';
alllist{3,5}='musical+instruments\trumpet.txt';
alllist{4,1}='nature\fire.txt';
alllist{4,2}='nature\ocean.txt';
alllist{4,3}='nature\rain.txt';
alllist{4,4}='nature\thunder.txt';
alllist{4,5}='nature\wind.txt';
alllist{5,1}='office\coins.txt';
alllist{5,2}='office\modem.txt';
alllist{5,3}='office\mouse+click.txt';
alllist{5,4}='office\paper.txt';
alllist{5,5}='office\typewriter.txt';
alllist{6,1}='people\applause.txt';
alllist{6,2}='people\baby.txt';
alllist{6,3}='people\cough.txt';
alllist{6,4}='people\cry.txt';
alllist{6,5}='people\heartbeat.txt';
alllist{7,1}='sports+and+recreation\bowling.txt';
alllist{7,2}='sports+and+recreation\camera.txt';
alllist{7,3}='sports+and+recreation\cards.txt';
alllist{8,1}='vehicles\collision.txt';
alllist{8,2}='vehicles\engine.txt';
alllist{8,3}='vehicles\helicopter.txt';
alllist{8,4}='vehicles\ship.txt';
alllist{8,5}='vehicles\train.txt';
[r,c]=size(alllist);
target=cell(1,10);
target{1,1}='fluctuation';
target{1,2}='attacktime';
target{1,3}='attackslope';
target{1,4}='centroid';
target{1,5}='brightness';
target{1,6}='skewness';
target{1,7}='kurtosis';
target{1,8}='spectentropy';
target{1,9}='flatness';
target{1,10}='irregularity';
target{1,11}='zerocross';
target{1,12}='mfcc';
allcategory=cell(1,38);
allcategory{1,1}='bat';
allcategory{1,2}='cat';
allcategory{1,3}='cow';
allcategory{1,4}='dog';
allcategory{1,5}='lamb';
allcategory{1,6}='bubbles';
allcategory{1,7}='clock';
allcategory{1,8}='door';
allcategory{1,9}='phone';
allcategory{1,10}='toilet';
allcategory{1,11}='drum+rimshot';
allcategory{1,12}='flute';
allcategory{1,13}='guitar';
allcategory{1,14}='piano';
allcategory{1,15}='trumpet';
allcategory{1,16}='fire';
allcategory{1,17}='ocean';
allcategory{1,18}='rain';
allcategory{1,19}='thunder';
allcategory{1,20}='wind';
allcategory{1,21}='coins';
allcategory{1,22}='modem';
allcategory{1,23}='mouse+click';
allcategory{1,24}='paper';
allcategory{1,25}='typewriter';
allcategory{1,26}='applause';
allcategory{1,27}='baby';
allcategory{1,28}='cough';
allcategory{1,29}='cry';
allcategory{1,30}='heartbeat';
allcategory{1,31}='bowling';
allcategory{1,32}='camera';
allcategory{1,33}='cards';
allcategory{1,34}='collision';
allcategory{1,35}='engine';
allcategory{1,36}='helicopter';
allcategory{1,37}='ship';
allcategory{1,38}='train';
hasattribute=0;
for i=1:r
for j=1:c
alist=alllist{i,j}
if isempty(alist)
continue;
end
aalist=sprintf('list2\\%s',alist)
fid = fopen(aalist);
output=sprintf('list2\\%s.arff',alist);
fidoutput=fopen(output,'w+')
fprintf(fidoutput,'@relation ''cpu''\n');
while ~feof(fid)
line=fgetl(fid);
line=sprintf('%s-mean.txt',line);
indexs=strfind(line,'/');
len=length(indexs);
category=line(1,indexs(len-1)+1:indexs(len)-1)
fullpath=sprintf('F:\\研究——音頻事件檢測\\findsound result\\%s',line)
[ret,contents]=parseAfile(fullpath,target);
if ret==1
if hasattribute==0
[retstat,order]=getattributename(fullpath,target);
for k=1:length(order)
name=order{k,1};
le=order{k,2};
for kk=1:le
%thisname=sprintf('%s%d\n',name,kk)
%fwrite(fidoutput,thisname);
fprintf(fidoutput,'@attribute %s%d numeric\n',name,kk);
end
end
fprintf(fidoutput,'@attribute class {')
for kk=1:length(allcategory)
if kk~=length(allcategory)
fprintf(fidoutput,'%s,',allcategory{kk});
continue;
end
fprintf(fidoutput,'%s',allcategory{kk});
end
fprintf(fidoutput,'}\n');
fprintf(fidoutput,'@data\n');
hasattribute=1;
end
tmp=contents;
for k=1:length(contents)
avalue=contents{1,k};
for kk=1:length(avalue)
%fwrite(fidoutput,avalue{kk});
%fwrite(fidoutput,' ');
fprintf(fidoutput,'%s,',avalue{kk});
end
end
%fwrite(fidoutput,'\n');
fprintf(fidoutput,'%s\n',category);
end
end
hasattribute=0;
fclose(fid);
fclose(fidoutput);
end
end
上面的代碼,指定了所有的list的位置,所有的類別,所要提取的特徵的類型,通過調用腳本一,得到這些特徵,並且把這些特徵輸出爲arff文件
function [ret,orderattribute] = getattributename(fpath,target)
state = exist(fpath,'file');
if state~=2
sprintf('not exist %s \n',fpath)
ret=0;
orderattribute=cell(1,1);
return;
end
orderattribute=cell(length(target),2);
fid=fopen(fpath);
count=1;
while ~feof(fid)
featurename=fgetl(fid);
featurename=strtrim(featurename);
l=length(target);
flag=0;
for i=1:l
name=target(i);
if strcmp(name,featurename)
flag=1;
break;
end
end
if flag==0
continue;
end
featurevalue=fgetl(fid);
featurevalue=strtrim(featurevalue);
featurevalue=sprintf('%s ',featurevalue);
index=strfind(featurevalue,' ');
attrlen=length(index);
orderattribute{count,1}=featurename;
orderattribute{count,2}=attrlen;
count=count+1;
end
fclose(fid);
ret=1;
getattributename的功能是返回所有的特徵,並且按照實際的順序,不應是按照target指定的順序,可以作爲arff文件中的attribute的輸出