时间序列分类算法之LPS(代码分析)

   public class LPS extends AbstractClassifierWithTrainingData 
02:    implements ParameterSplittable{
03:    RandomRegressionTree[] trees;
04:    
05:    public static final int PARASEARCH_NOS_TREES=25;
06:    public static final int DEFAULT_NOS_TREES=200;
07:    int nosTrees=DEFAULT_NOS_TREES;//森林中树的个数
08:    int nosSegments=20;
09:    double[] ratioLevels={0.01,0.1,0.25,0.5};
10:    double[] segmentProps={0.05,0.1,0.25,0.5,0.75,0.95};//每棵树最少实例个数比例数组,用于参数搜索
11:    double segmentProp=segmentProps[0];
12:    double ratioLevel=ratioLevels[0];//每棵树最少实例个数比例
13:    int[] treeDepths={2,4,6};//树的深度数组,用于搜索最优参数
14:    int treeDepth=treeDepths[2];//树的深度
15:    int[] segLengths;//子序列长度数组
16:    int[][] segStarts;//子序列开始下标
17:    int[][] segDiffStarts;//导数子序列开始下标
18:    Instances sequences;
19:    int[] nosLeafNodes;//每棵树叶子节点的个数
20:    int[][][] leafNodeCounts;//每个实例在每棵树中各个叶子节点出现的次数
21:    double[] trainClassVals;
22:    int[] classAtt;
23:    boolean paramSearch=true;//是否进行最优化参数搜索
24:    double acc=0;
25:    public LPS(){
26:        trees=new RandomRegressionTree[nosTrees];
27:    }
28:
29:    public String globalInfo() {
30:        return "Blah";
31:    }
32:  
33:    @Override
34:    public void setParamSearch(boolean b) {
35:        paramSearch=b;
36:    }
37:
38:    @Override
39:    public void setParametersFromIndex(int x) {
40:        throw new UnsupportedOperationException("Not supported yet."); 
41:    }
42:    @Override
43:    public String getParas() {
44:        return ratioLevel+","+treeDepth;
45:    }
46:    @Override
47:    public double getAcc() {
48:        return acc;
49:    }
50:    
51:    @Override
52:    public void buildClassifier(Instances data) throws Exception {
53:         trainResults.buildTime=System.currentTimeMillis();
54:        //最优化参数搜索,确定最小实例树,树的深度
55:        if(paramSearch){
56:            double bestErr=1;
57:            int bestRatio=0;
58:            int bestTreeDepth=0;
59:            LPS trainer=new LPS();
60:            trainer.nosTrees=50;
61:            trainer.setParamSearch(false);
62:            int folds=10;
63:            for(int i=0;i<ratioLevels.length;i++){
64:                trainer.ratioLevel=ratioLevels[i];
65:                for(int j=0;j<treeDepths.length;j++){
66:                    trainer.treeDepth=treeDepths[j];
67:                    Evaluation eval=new Evaluation(data);
68:                    eval.crossValidateModel(trainer, data, folds,new
69:                                            Random());
70:                    double e=eval.errorRate();
71:                    if(e<bestErr){
72:                        bestErr=e;
73:                        bestTreeDepth=j;
74:                        bestRatio=i;
75:                    }
76:                }
77:            }
78:            ratioLevel=ratioLevels[bestRatio];
79:            treeDepth=treeDepths[bestTreeDepth];
80:          }
81:        int seriesLength=data.numAttributes()-1;
82:        int minSegment=(int)(seriesLength*0.1);//最小片段(子序列)长度
83:        int maxSegment=(int)(seriesLength*0.9);//最大片段(子序列)长度
84:        //初始化数组
85:        segLengths=new int[nosTrees];
86:        nosLeafNodes=new int[nosTrees];
87:        segStarts=new int[nosTrees][nosSegments];
88:        segDiffStarts=new int[nosTrees][nosSegments];
89:        leafNodeCounts=new int[data.numInstances()][nosTrees][];
90:        trainClassVals=new double[data.numInstances()];
91:        //存储实例类标签
92:        for(int i=0;i<data.numInstances();i++)
93:            trainClassVals[i]=data.instance(i).classValue();
94:        //用于保存自回归树中的因变量属性
95:        classAtt=new int[nosTrees];
96:        //随机函数
97:        Random r= new Random();
98:         //遍历建立树
99:        for(int i=0;i<nosTrees;i++){    
100:            //为每棵树选择随机的段长度
101:            segLengths[i]=minSegment+r.nextInt(maxSegment-minSegment);
102:            //    为每棵树随机选择目标细分
103:            for(int j=0;j<nosSegments;j++){
104:                segStarts[i][j]=r.nextInt(seriesLength-segLengths[i]);
105:                segDiffStarts[i][j]=r.nextInt(seriesLength-segLengths[i]-1);
106:            }
107:            //为每一棵树生成实例
108:            FastVector atts=new FastVector();
109:            String name;
110:            for(int j=0;j<2*nosSegments;j++){
111:                    name = "SegFeature"+j;
112:                    atts.addElement(new Attribute(name));
113:            }
114:            sequences = new Instances("SubsequenceIntervals",
115:                              atts,segLengths[i]*data.numInstances());            
116:            //填充实例的值,包括导数值
117:            for(int j=0;j<data.numInstances();j++){
118:                Instance series=data.instance(j);
119:                for(int k=0;k<segLengths[i];k++){
120:                    DenseInstance in=new DenseInstance(sequences.numAttributes());
121:                    for(int m=0;m<nosSegments;m++)
122:                        in.setValue(m, series.value(segStarts[i][m]+k));
123:                    for(int m=0;m<nosSegments;m++)
124:                        in.setValue(nosSegments+m, series.value(segDiffStarts[i][m]+k)-series.value(segDiffStarts[i][m]+k+1));                     
125:                    sequences.add(in);    
126:                }
127:            }
128:            //选择随机的一个属性作为树的因变量
129:            classAtt[i]=r.nextInt(sequences.numAttributes());//
130:            sequences.setClassIndex(classAtt[i]);
131:            //建立回归树
132:            trees[i]= new RandomRegressionTree();
133:            trees[i].setMaxDepth(treeDepth);
134:            trees[i].setKValue(1);
135:            trees[i].setMinNum((int)(sequences.numInstances()*ratioLevel));
136:            trees[i].buildClassifier(sequences);
137:            nosLeafNodes[i]=trees[i].nosLeafNodes;
138:            //统计每个实例在每棵树的各个叶子节点上出现的次数
139:            for(int j=0;j<data.numInstances();j++){
140:                leafNodeCounts[j][i]=new int[trees[i].nosLeafNodes];
141:                for(int k=0;k<segLengths[i];k++){
142:                    trees[i].distributionForInstance(sequences.instance
143:                                                   (j*segLengths[i]+k));
144:                    int leafID=RandomRegressionTree.lastNode;
145:                    leafNodeCounts[j][i][leafID]++;
146:                }
147:            }
148:        }
149:        sequences=null;
150:        System.gc();
151:     }
152:     //转化后两实例的距离度量
153:    public double distance(int[][] test, int[][] train){
154:        double d=0;
155:        for(int i=0;i<test.length;i++)
156:            for(int j=0;j<test[i].length;j++){
157:                double x=(test[i][j]-train[i][j]);
158:                if(x>0)
159:                    d+=x;
160:                else
161:                    d+=-x;
162:            }
163:        return d;
164:    }
165:    //分类
166:    public double classifyInstance(Instance ins) throws Exception{
167:        //用于存储转化后的测试序列
168:        int[][] testNodeCounts=new int[nosTrees][];
169:        //使用训练阶段生成的多个回归树来转化测试序列
170:        for(int i=0;i<nosTrees;i++){    
171:            FastVector atts=new FastVector();
172:            String name;
173:            for(int j=0;j<2*nosSegments;j++){
174:                    name = "SegFeature"+j;
175:                    atts.addElement(new Attribute(name));
176:            }
177:            sequences = new Instances("SubsequenceIntervals",
178:                                             atts,segLengths[i]);            
179:            for(int k=0;k<segLengths[i];k++){
180:                DenseInstance in=new DenseInstance(sequences
181:                                         .numAttributes());
182:                for(int m=0;m<nosSegments;m++)
183:                  in.setValue(m, ins.value(segStarts[i][m]+k));
184:                for(int m=0;m<nosSegments;m++)
185:                  in.setValue(nosSegments+m, ins.value(segDiffStarts[i][m]+k)
186:                               -ins.value(segDiffStarts[i][m]+k+1));
187:                sequences.add(in);
188:            }            
189:            sequences.setClassIndex(classAtt[i]);
190:            testNodeCounts[i]=new int[trees[i].nosLeafNodes];
191:            for(int k=0;k<sequences.numInstances();k++){
192:                trees[i].distributionForInstance(sequences.instance(k));
193:                int leafID=RandomRegressionTree.lastNode;
194:                testNodeCounts[i][leafID]++;
195:            }
196:        }
197:        //使用最近邻分类器来进行分类
198:        double minDist=Double.MAX_VALUE;
199:        int closest=0;
200:        for(int i=0;i<leafNodeCounts.length;i++){
201:            double d=distance(testNodeCounts,leafNodeCounts[i]);
202:            if(d<minDist){
203:                minDist=d;
204:                closest=i;
205:            }
206:        }
207:        return trainClassVals[closest];
208:    }
209: }

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章