時間序列分類算法之LPS(代碼分析)

   public class LPS extends AbstractClassifierWithTrainingData 
02:    implements ParameterSplittable{
03:    RandomRegressionTree[] trees;
04:    
05:    public static final int PARASEARCH_NOS_TREES=25;
06:    public static final int DEFAULT_NOS_TREES=200;
07:    int nosTrees=DEFAULT_NOS_TREES;//森林中樹的個數
08:    int nosSegments=20;
09:    double[] ratioLevels={0.01,0.1,0.25,0.5};
10:    double[] segmentProps={0.05,0.1,0.25,0.5,0.75,0.95};//每棵樹最少實例個數比例數組,用於參數搜索
11:    double segmentProp=segmentProps[0];
12:    double ratioLevel=ratioLevels[0];//每棵樹最少實例個數比例
13:    int[] treeDepths={2,4,6};//樹的深度數組,用於搜索最優參數
14:    int treeDepth=treeDepths[2];//樹的深度
15:    int[] segLengths;//子序列長度數組
16:    int[][] segStarts;//子序列開始下標
17:    int[][] segDiffStarts;//導數子序列開始下標
18:    Instances sequences;
19:    int[] nosLeafNodes;//每棵樹葉子節點的個數
20:    int[][][] leafNodeCounts;//每個實例在每棵樹中各個葉子節點出現的次數
21:    double[] trainClassVals;
22:    int[] classAtt;
23:    boolean paramSearch=true;//是否進行最優化參數搜索
24:    double acc=0;
25:    public LPS(){
26:        trees=new RandomRegressionTree[nosTrees];
27:    }
28:
29:    public String globalInfo() {
30:        return "Blah";
31:    }
32:  
33:    @Override
34:    public void setParamSearch(boolean b) {
35:        paramSearch=b;
36:    }
37:
38:    @Override
39:    public void setParametersFromIndex(int x) {
40:        throw new UnsupportedOperationException("Not supported yet."); 
41:    }
42:    @Override
43:    public String getParas() {
44:        return ratioLevel+","+treeDepth;
45:    }
46:    @Override
47:    public double getAcc() {
48:        return acc;
49:    }
50:    
51:    @Override
52:    public void buildClassifier(Instances data) throws Exception {
53:         trainResults.buildTime=System.currentTimeMillis();
54:        //最優化參數搜索,確定最小實例樹,樹的深度
55:        if(paramSearch){
56:            double bestErr=1;
57:            int bestRatio=0;
58:            int bestTreeDepth=0;
59:            LPS trainer=new LPS();
60:            trainer.nosTrees=50;
61:            trainer.setParamSearch(false);
62:            int folds=10;
63:            for(int i=0;i<ratioLevels.length;i++){
64:                trainer.ratioLevel=ratioLevels[i];
65:                for(int j=0;j<treeDepths.length;j++){
66:                    trainer.treeDepth=treeDepths[j];
67:                    Evaluation eval=new Evaluation(data);
68:                    eval.crossValidateModel(trainer, data, folds,new
69:                                            Random());
70:                    double e=eval.errorRate();
71:                    if(e<bestErr){
72:                        bestErr=e;
73:                        bestTreeDepth=j;
74:                        bestRatio=i;
75:                    }
76:                }
77:            }
78:            ratioLevel=ratioLevels[bestRatio];
79:            treeDepth=treeDepths[bestTreeDepth];
80:          }
81:        int seriesLength=data.numAttributes()-1;
82:        int minSegment=(int)(seriesLength*0.1);//最小片段(子序列)長度
83:        int maxSegment=(int)(seriesLength*0.9);//最大片段(子序列)長度
84:        //初始化數組
85:        segLengths=new int[nosTrees];
86:        nosLeafNodes=new int[nosTrees];
87:        segStarts=new int[nosTrees][nosSegments];
88:        segDiffStarts=new int[nosTrees][nosSegments];
89:        leafNodeCounts=new int[data.numInstances()][nosTrees][];
90:        trainClassVals=new double[data.numInstances()];
91:        //存儲實例類標籤
92:        for(int i=0;i<data.numInstances();i++)
93:            trainClassVals[i]=data.instance(i).classValue();
94:        //用於保存自迴歸樹中的因變量屬性
95:        classAtt=new int[nosTrees];
96:        //隨機函數
97:        Random r= new Random();
98:         //遍歷建立樹
99:        for(int i=0;i<nosTrees;i++){    
100:            //爲每棵樹選擇隨機的段長度
101:            segLengths[i]=minSegment+r.nextInt(maxSegment-minSegment);
102:            //    爲每棵樹隨機選擇目標細分
103:            for(int j=0;j<nosSegments;j++){
104:                segStarts[i][j]=r.nextInt(seriesLength-segLengths[i]);
105:                segDiffStarts[i][j]=r.nextInt(seriesLength-segLengths[i]-1);
106:            }
107:            //爲每一棵樹生成實例
108:            FastVector atts=new FastVector();
109:            String name;
110:            for(int j=0;j<2*nosSegments;j++){
111:                    name = "SegFeature"+j;
112:                    atts.addElement(new Attribute(name));
113:            }
114:            sequences = new Instances("SubsequenceIntervals",
115:                              atts,segLengths[i]*data.numInstances());            
116:            //填充實例的值,包括導數值
117:            for(int j=0;j<data.numInstances();j++){
118:                Instance series=data.instance(j);
119:                for(int k=0;k<segLengths[i];k++){
120:                    DenseInstance in=new DenseInstance(sequences.numAttributes());
121:                    for(int m=0;m<nosSegments;m++)
122:                        in.setValue(m, series.value(segStarts[i][m]+k));
123:                    for(int m=0;m<nosSegments;m++)
124:                        in.setValue(nosSegments+m, series.value(segDiffStarts[i][m]+k)-series.value(segDiffStarts[i][m]+k+1));                     
125:                    sequences.add(in);    
126:                }
127:            }
128:            //選擇隨機的一個屬性作爲樹的因變量
129:            classAtt[i]=r.nextInt(sequences.numAttributes());//
130:            sequences.setClassIndex(classAtt[i]);
131:            //建立迴歸樹
132:            trees[i]= new RandomRegressionTree();
133:            trees[i].setMaxDepth(treeDepth);
134:            trees[i].setKValue(1);
135:            trees[i].setMinNum((int)(sequences.numInstances()*ratioLevel));
136:            trees[i].buildClassifier(sequences);
137:            nosLeafNodes[i]=trees[i].nosLeafNodes;
138:            //統計每個實例在每棵樹的各個葉子節點上出現的次數
139:            for(int j=0;j<data.numInstances();j++){
140:                leafNodeCounts[j][i]=new int[trees[i].nosLeafNodes];
141:                for(int k=0;k<segLengths[i];k++){
142:                    trees[i].distributionForInstance(sequences.instance
143:                                                   (j*segLengths[i]+k));
144:                    int leafID=RandomRegressionTree.lastNode;
145:                    leafNodeCounts[j][i][leafID]++;
146:                }
147:            }
148:        }
149:        sequences=null;
150:        System.gc();
151:     }
152:     //轉化後兩實例的距離度量
153:    public double distance(int[][] test, int[][] train){
154:        double d=0;
155:        for(int i=0;i<test.length;i++)
156:            for(int j=0;j<test[i].length;j++){
157:                double x=(test[i][j]-train[i][j]);
158:                if(x>0)
159:                    d+=x;
160:                else
161:                    d+=-x;
162:            }
163:        return d;
164:    }
165:    //分類
166:    public double classifyInstance(Instance ins) throws Exception{
167:        //用於存儲轉化後的測試序列
168:        int[][] testNodeCounts=new int[nosTrees][];
169:        //使用訓練階段生成的多個迴歸樹來轉化測試序列
170:        for(int i=0;i<nosTrees;i++){    
171:            FastVector atts=new FastVector();
172:            String name;
173:            for(int j=0;j<2*nosSegments;j++){
174:                    name = "SegFeature"+j;
175:                    atts.addElement(new Attribute(name));
176:            }
177:            sequences = new Instances("SubsequenceIntervals",
178:                                             atts,segLengths[i]);            
179:            for(int k=0;k<segLengths[i];k++){
180:                DenseInstance in=new DenseInstance(sequences
181:                                         .numAttributes());
182:                for(int m=0;m<nosSegments;m++)
183:                  in.setValue(m, ins.value(segStarts[i][m]+k));
184:                for(int m=0;m<nosSegments;m++)
185:                  in.setValue(nosSegments+m, ins.value(segDiffStarts[i][m]+k)
186:                               -ins.value(segDiffStarts[i][m]+k+1));
187:                sequences.add(in);
188:            }            
189:            sequences.setClassIndex(classAtt[i]);
190:            testNodeCounts[i]=new int[trees[i].nosLeafNodes];
191:            for(int k=0;k<sequences.numInstances();k++){
192:                trees[i].distributionForInstance(sequences.instance(k));
193:                int leafID=RandomRegressionTree.lastNode;
194:                testNodeCounts[i][leafID]++;
195:            }
196:        }
197:        //使用最近鄰分類器來進行分類
198:        double minDist=Double.MAX_VALUE;
199:        int closest=0;
200:        for(int i=0;i<leafNodeCounts.length;i++){
201:            double d=distance(testNodeCounts,leafNodeCounts[i]);
202:            if(d<minDist){
203:                minDist=d;
204:                closest=i;
205:            }
206:        }
207:        return trainClassVals[closest];
208:    }
209: }

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章