public class LPS extends AbstractClassifierWithTrainingData
02: implements ParameterSplittable{
03: RandomRegressionTree[] trees;
04:
05: public static final int PARASEARCH_NOS_TREES=25;
06: public static final int DEFAULT_NOS_TREES=200;
07: int nosTrees=DEFAULT_NOS_TREES;//森林中树的个数
08: int nosSegments=20;
09: double[] ratioLevels={0.01,0.1,0.25,0.5};
10: double[] segmentProps={0.05,0.1,0.25,0.5,0.75,0.95};//每棵树最少实例个数比例数组,用于参数搜索
11: double segmentProp=segmentProps[0];
12: double ratioLevel=ratioLevels[0];//每棵树最少实例个数比例
13: int[] treeDepths={2,4,6};//树的深度数组,用于搜索最优参数
14: int treeDepth=treeDepths[2];//树的深度
15: int[] segLengths;//子序列长度数组
16: int[][] segStarts;//子序列开始下标
17: int[][] segDiffStarts;//导数子序列开始下标
18: Instances sequences;
19: int[] nosLeafNodes;//每棵树叶子节点的个数
20: int[][][] leafNodeCounts;//每个实例在每棵树中各个叶子节点出现的次数
21: double[] trainClassVals;
22: int[] classAtt;
23: boolean paramSearch=true;//是否进行最优化参数搜索
24: double acc=0;
25: public LPS(){
26: trees=new RandomRegressionTree[nosTrees];
27: }
28:
29: public String globalInfo() {
30: return "Blah";
31: }
32:
33: @Override
34: public void setParamSearch(boolean b) {
35: paramSearch=b;
36: }
37:
38: @Override
39: public void setParametersFromIndex(int x) {
40: throw new UnsupportedOperationException("Not supported yet.");
41: }
42: @Override
43: public String getParas() {
44: return ratioLevel+","+treeDepth;
45: }
46: @Override
47: public double getAcc() {
48: return acc;
49: }
50:
51: @Override
52: public void buildClassifier(Instances data) throws Exception {
53: trainResults.buildTime=System.currentTimeMillis();
54: //最优化参数搜索,确定最小实例树,树的深度
55: if(paramSearch){
56: double bestErr=1;
57: int bestRatio=0;
58: int bestTreeDepth=0;
59: LPS trainer=new LPS();
60: trainer.nosTrees=50;
61: trainer.setParamSearch(false);
62: int folds=10;
63: for(int i=0;i<ratioLevels.length;i++){
64: trainer.ratioLevel=ratioLevels[i];
65: for(int j=0;j<treeDepths.length;j++){
66: trainer.treeDepth=treeDepths[j];
67: Evaluation eval=new Evaluation(data);
68: eval.crossValidateModel(trainer, data, folds,new
69: Random());
70: double e=eval.errorRate();
71: if(e<bestErr){
72: bestErr=e;
73: bestTreeDepth=j;
74: bestRatio=i;
75: }
76: }
77: }
78: ratioLevel=ratioLevels[bestRatio];
79: treeDepth=treeDepths[bestTreeDepth];
80: }
81: int seriesLength=data.numAttributes()-1;
82: int minSegment=(int)(seriesLength*0.1);//最小片段(子序列)长度
83: int maxSegment=(int)(seriesLength*0.9);//最大片段(子序列)长度
84: //初始化数组
85: segLengths=new int[nosTrees];
86: nosLeafNodes=new int[nosTrees];
87: segStarts=new int[nosTrees][nosSegments];
88: segDiffStarts=new int[nosTrees][nosSegments];
89: leafNodeCounts=new int[data.numInstances()][nosTrees][];
90: trainClassVals=new double[data.numInstances()];
91: //存储实例类标签
92: for(int i=0;i<data.numInstances();i++)
93: trainClassVals[i]=data.instance(i).classValue();
94: //用于保存自回归树中的因变量属性
95: classAtt=new int[nosTrees];
96: //随机函数
97: Random r= new Random();
98: //遍历建立树
99: for(int i=0;i<nosTrees;i++){
100: //为每棵树选择随机的段长度
101: segLengths[i]=minSegment+r.nextInt(maxSegment-minSegment);
102: // 为每棵树随机选择目标细分
103: for(int j=0;j<nosSegments;j++){
104: segStarts[i][j]=r.nextInt(seriesLength-segLengths[i]);
105: segDiffStarts[i][j]=r.nextInt(seriesLength-segLengths[i]-1);
106: }
107: //为每一棵树生成实例
108: FastVector atts=new FastVector();
109: String name;
110: for(int j=0;j<2*nosSegments;j++){
111: name = "SegFeature"+j;
112: atts.addElement(new Attribute(name));
113: }
114: sequences = new Instances("SubsequenceIntervals",
115: atts,segLengths[i]*data.numInstances());
116: //填充实例的值,包括导数值
117: for(int j=0;j<data.numInstances();j++){
118: Instance series=data.instance(j);
119: for(int k=0;k<segLengths[i];k++){
120: DenseInstance in=new DenseInstance(sequences.numAttributes());
121: for(int m=0;m<nosSegments;m++)
122: in.setValue(m, series.value(segStarts[i][m]+k));
123: for(int m=0;m<nosSegments;m++)
124: in.setValue(nosSegments+m, series.value(segDiffStarts[i][m]+k)-series.value(segDiffStarts[i][m]+k+1));
125: sequences.add(in);
126: }
127: }
128: //选择随机的一个属性作为树的因变量
129: classAtt[i]=r.nextInt(sequences.numAttributes());//
130: sequences.setClassIndex(classAtt[i]);
131: //建立回归树
132: trees[i]= new RandomRegressionTree();
133: trees[i].setMaxDepth(treeDepth);
134: trees[i].setKValue(1);
135: trees[i].setMinNum((int)(sequences.numInstances()*ratioLevel));
136: trees[i].buildClassifier(sequences);
137: nosLeafNodes[i]=trees[i].nosLeafNodes;
138: //统计每个实例在每棵树的各个叶子节点上出现的次数
139: for(int j=0;j<data.numInstances();j++){
140: leafNodeCounts[j][i]=new int[trees[i].nosLeafNodes];
141: for(int k=0;k<segLengths[i];k++){
142: trees[i].distributionForInstance(sequences.instance
143: (j*segLengths[i]+k));
144: int leafID=RandomRegressionTree.lastNode;
145: leafNodeCounts[j][i][leafID]++;
146: }
147: }
148: }
149: sequences=null;
150: System.gc();
151: }
152: //转化后两实例的距离度量
153: public double distance(int[][] test, int[][] train){
154: double d=0;
155: for(int i=0;i<test.length;i++)
156: for(int j=0;j<test[i].length;j++){
157: double x=(test[i][j]-train[i][j]);
158: if(x>0)
159: d+=x;
160: else
161: d+=-x;
162: }
163: return d;
164: }
165: //分类
166: public double classifyInstance(Instance ins) throws Exception{
167: //用于存储转化后的测试序列
168: int[][] testNodeCounts=new int[nosTrees][];
169: //使用训练阶段生成的多个回归树来转化测试序列
170: for(int i=0;i<nosTrees;i++){
171: FastVector atts=new FastVector();
172: String name;
173: for(int j=0;j<2*nosSegments;j++){
174: name = "SegFeature"+j;
175: atts.addElement(new Attribute(name));
176: }
177: sequences = new Instances("SubsequenceIntervals",
178: atts,segLengths[i]);
179: for(int k=0;k<segLengths[i];k++){
180: DenseInstance in=new DenseInstance(sequences
181: .numAttributes());
182: for(int m=0;m<nosSegments;m++)
183: in.setValue(m, ins.value(segStarts[i][m]+k));
184: for(int m=0;m<nosSegments;m++)
185: in.setValue(nosSegments+m, ins.value(segDiffStarts[i][m]+k)
186: -ins.value(segDiffStarts[i][m]+k+1));
187: sequences.add(in);
188: }
189: sequences.setClassIndex(classAtt[i]);
190: testNodeCounts[i]=new int[trees[i].nosLeafNodes];
191: for(int k=0;k<sequences.numInstances();k++){
192: trees[i].distributionForInstance(sequences.instance(k));
193: int leafID=RandomRegressionTree.lastNode;
194: testNodeCounts[i][leafID]++;
195: }
196: }
197: //使用最近邻分类器来进行分类
198: double minDist=Double.MAX_VALUE;
199: int closest=0;
200: for(int i=0;i<leafNodeCounts.length;i++){
201: double d=distance(testNodeCounts,leafNodeCounts[i]);
202: if(d<minDist){
203: minDist=d;
204: closest=i;
205: }
206: }
207: return trainClassVals[closest];
208: }
209: }