用weka進行分類的小程序。
package test.weka;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Random;
import weka.classifiers.Classifier;
import weka.core.Instances;
import weka.classifiers.Evaluation;
import weka.core.Instance;
import java.util.Date;
public class Myweka {
private Instances trainInstances=null;
private Evaluation evaluation=null;
private File trainfile=null;
private int classindex=0;
/*
* 構造函數:
* 傳入arff文件,和classindex
* */
public Myweka(File trainfile,int classindex){
this.trainfile=trainfile;
this.classindex=classindex;
FileReader fReader=null;
try{
fReader=new FileReader(trainfile);
trainInstances=new Instances(fReader);
trainInstances.setClassIndex(classindex);
// evaluation=new Evaluation(trainInstances); 使用一個evaluation,跟調用方法是再new,的結果不一樣
}catch(Exception e){
e.printStackTrace();
}finally{
try {
fReader.close();
} catch (IOException e2) {
e2.printStackTrace();
}
}
}
/*
* 將訓練集作爲測試集進行實驗
* 並得到分類失敗的樣例行號
*
* */
public void selfTestResult(Classifier classifier){
FileReader fReader=null;
try {
classifier.buildClassifier(trainInstances);
evaluation=new Evaluation(trainInstances);//選擇調用時,new一個evalution
fReader=new FileReader(trainfile);
Instances testInstances=new Instances(fReader);
testInstances.setClassIndex(classindex);
Instance tempInstance=null;
int count=0;
for(int i=0;i<testInstances.numInstances();i++){
tempInstance=testInstances.instance(i);
if(classifier.classifyInstance(tempInstance)!=tempInstance.classValue()){
count++;
System.out.println("第"+i+"個樣本實例分類錯誤!");
}
}
System.out.println("有"+count+"個樣本分類錯誤!");
evaluation.evaluateModel(classifier, testInstances);
writeResult();
consoleResult();
} catch (Exception e) {
e.printStackTrace();
}finally{
try{
fReader.close();
}catch(IOException e0){
e0.printStackTrace();
}
}
}
/*
* 傳入測試集進行測試
* 這裏默認測試集與訓練集,classindex相同
* */
public void useTestset(Classifier classifier,File testFile){
FileReader fReader=null;
try {
evaluation=new Evaluation(trainInstances);
classifier.buildClassifier(trainInstances);
fReader=new FileReader(testFile);
Instances testInstances=new Instances(fReader);
testInstances.setClassIndex(classindex);
Instance tempInstance=null;
int count=0;
for(int i=0;i<testInstances.numInstances();i++){
tempInstance=testInstances.instance(i);
if(classifier.classifyInstance(tempInstance)!=tempInstance.classValue()){
count++;
System.out.println("第"+i+"個樣本實例分類錯誤!");
}
}
System.out.println("有"+count+"個樣本分類錯誤!");
evaluation.evaluateModel(classifier, testInstances);
writeResult();
consoleResult();
} catch (Exception e) {
e.printStackTrace();
}finally{
try{
fReader.close();
}catch(IOException e0){
e0.printStackTrace();
}
}
}
/*
* 傳入一個分類器
* 得到十折交叉驗證結果
*/
public void GetTenCrossResult(Classifier classifier){
try{
classifier.buildClassifier(trainInstances);
evaluation=new Evaluation(trainInstances);
evaluation.crossValidateModel(classifier, trainInstances, 10, new Random(1));
writeResult();
consoleResult();
}catch(Exception e){
e.printStackTrace();
}
}
private void writeResult(){
BufferedWriter bfr=null;
try{
bfr=new BufferedWriter(new FileWriter(new File("result.txt"),true));
bfr.write(getCurrentTime());
bfr.newLine();
bfr.write("the number of Attributes: "+trainInstances.numAttributes());
bfr.newLine();
bfr.write("the number if instances: "+trainInstances.numInstances());
bfr.newLine();
bfr.write(evaluation.toSummaryString());
bfr.newLine();
bfr.write(evaluation.toClassDetailsString());
bfr.newLine();
bfr.write(evaluation.toMatrixString());
bfr.newLine();
bfr.flush();
}catch(IOException e){
System.out.println("文件非法或arff格式錯誤");
}catch (Exception e) {
System.out.println("分類器創建失敗");
}finally{
try{
bfr.close();
}catch(IOException e0){
e0.printStackTrace();
}
}
}
private void consoleResult() throws Exception{
System.out.println(evaluation.toSummaryString());
System.out.println(evaluation.toClassDetailsString());
System.out.println(evaluation.toMatrixString());
}
private String getCurrentTime(){
Date date=new Date();
DateFormat format=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return format.format(date);
}
}
經過測試發現,在main方法調用GetTenCrossResult()方法後,繼續調用selfTestResult(),若是選擇用同一個evaluation,即:在構造函數中new,跟在方法中new,得到的結果不一樣,難道是使用一個evalution,第二次的結果會受第一次影響?
看到的大神,解釋一下。