分類算法:
1.調用C4.5
分類算法我們會額外計算它的分類準確率,實現代碼如下,有註釋了,分類算法的實現很多共通之處,往後的幾個 分類算法就不指明瞭:
必須提醒的是,設置分類屬性所在行號是必須的。
- import weka.classifiers.*;
- import weka.core.Instances;
- import weka.core.converters.*;
- import weka.classifiers.trees.J48; // C45算法(1)
- import java.io.File;
- import java.io.IOException;
- import javax.swing.*;
- public class callC45
- {
- public callC45()
- {}
- public void Main() throws Exception
- {
- J48 m_classifier = new J48();
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff"); //訓練語料文件
- ArffLoader atf = new ArffLoader();
- atf.setFile(inputFile);
- Instances instancesTrain = atf.getDataSet(); //讀入訓練文件
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff"); // 測試語料文件
- atf.setFile(inputFile);
- Instances instancesTest = atf.getDataSet(); //讀入訓練文件
- instancesTest.setClassIndex(0); //設置分類屬性所在行號(第一行爲0號),instancesTest.numAttributes()可以取得屬性總數
- double sum = instancesTest.numInstances(),right=0.0f; //測試語料實例數
- instancesTrain.setClassIndex(0);
- m_classifier.buildClassifier(instancesTrain);
- System.out.println(m_classifier.toString());
- System.out.println("");
- for(int i = 0; i < sum ; i++)
- {
- if(m_classifier.classifyInstance(instancesTest.instance(i))==instancesTest.instance(i).classValue()) //如果預測值和答案值相等(測試語料中的分類列提供的須爲正確答案,結果纔有意義)
- {
- right++;
- }
- }
- System.out.println("J48 classification precision:"+(right/sum));
- }
- // public static void main(String[] args) throws Exception
- // {
- // callC45 a = new callC45();
- // a.Main();
- // }
- }
2.調用AdaBoost算法
- import weka.classifiers.meta.AdaBoostM1;
- import weka.classifiers.*;
- import weka.core.converters.*;
- import weka.core.Instances;
- import java.io.*;
- public class callAdaBoostM1
- {
- public callAdaBoostM1()
- {
- }
- public void Main() throws Exception
- {
- AdaBoostM1 m_classifier = new AdaBoostM1();
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
- ArffLoader arf = new ArffLoader();
- arf.setFile(inputFile);
- Instances instancesTrain = arf.getDataSet();
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
- arf.setFile(inputFile);
- Instances instancesTest = arf.getDataSet();
- instancesTest.setClassIndex(0);
- double sum = instancesTest.numInstances(),right=0.0f;
- instancesTrain.setClassIndex(0);
- m_classifier.buildClassifier(instancesTrain);
- System.out.println(m_classifier.toString());
- System.out.println("");
- for(int i = 0; i<sum ; i++)
- {
- if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
- {
- right++;
- }
- }
- System.out.println("AdaBoostM1 classification precision:"+(right/sum));
- }
- public static void main(String[] args) throws Exception
- {
- callAdaBoostM1 a = new callAdaBoostM1();
- a.Main();
- }
- }
3.調用樸素bayes算法;
- import java.io.*;
- import weka.classifiers.*;
- import weka.classifiers.bayes.*;
- import weka.core.Instances;
- import weka.core.converters.*;
- import java.io.File;
- import java.io.IOException;
- public class callbayes
- {
- public callbayes()
- {}
- public void Main() throws Exception
- {
- NaiveBayes m_classifier = new NaiveBayes(); //樸素貝葉斯的調用
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\labor.arff");
- ArffLoader atf = new ArffLoader();
- atf.setFile(inputFile);
- Instances instancesTrain = atf.getDataSet();
- instancesTrain.setClassIndex(instancesTrain.numAttributes()-1);
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\labor.arff");
- atf.setFile(inputFile);
- Instances instancesTest = atf.getDataSet();
- instancesTest.setClassIndex(instancesTest.numAttributes()-1);
- double sum = instancesTest.numInstances(),right = 0.0f;
- m_classifier.buildClassifier(instancesTrain);
- System.out.println(m_classifier.toString());
- System.out.println("");
- for(int i = 0; i < sum ; i++)
- {
- if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
- {
- right++;
- }
- }
- System.out.println("Navisbayes classification precision:" + (right/sum));
- }
- // public static void main(String[] args) throws Exception
- // {
- // callbayes a = new callbayes();
- // a.Main();
- // }
- }
4.調用KNN算法:
- import weka.classifiers.*;
- import weka.classifiers.lazy.IBk;
- import weka.core.converters.*;
- import weka.core.Instances;
- import java.io.*;
- public class callKNN
- {
- public callKNN()
- {}
- public void Main() throws Exception
- {
- IBk m_classifier = new IBk(3);
- // int k=3;
- // Classifier m_classifier = new IBk(k);
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
- ArffLoader arf = new ArffLoader();
- arf.setFile(inputFile);
- Instances instancesTrain = arf.getDataSet();
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.with.vendor.arff");
- arf.setFile(inputFile);
- Instances instancesTest = arf.getDataSet();
- instancesTest.setClassIndex(0);
- double sum = instancesTest.numInstances(),right=0.0f;
- instancesTrain.setClassIndex(0);
- m_classifier.buildClassifier(instancesTrain);
- System.out.println("The k is : "+m_classifier.getKNN());
- System.out.println("");
- System.out.println(m_classifier.toString());
- System.out.println("");
- for(int i = 0; i<sum ; i++)
- {
- if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
- {
- right++;
- }
- }
- System.out.println("KNN classification precision:"+(right/sum));
- }
- public static void main(String[] args) throws Exception
- {
- callKNN a = new callKNN();
- a.Main();
- }
- }
5.調用ID3(CART) 算法
- import weka.classifiers.*;
- import weka.classifiers.trees.Id3;
- import weka.core.Instances;
- import weka.core.converters.*;
- import java.io.*;
- public class callId3
- {
- public callId3()
- {}
- public void Main() throws Exception
- {
- Id3 m_classifier = new Id3();
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
- ArffLoader atf = new ArffLoader();
- atf.setFile(inputFile);
- Instances instancesTrain = atf.getDataSet();
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
- atf.setFile(inputFile);
- Instances instancesTest = atf.getDataSet();
- instancesTest.setClassIndex(0);
- double sum = instancesTest.numInstances(),right=0.0f;
- instancesTrain.setClassIndex(0);
- m_classifier.buildClassifier(instancesTrain);
- System.out.println(m_classifier.toString());
- System.out.println("");
- for(int i = 0; i < sum ; i++)
- {
- if(m_classifier.classifyInstance(instancesTest.instance(i)) == instancesTest.instance(i).classValue())
- {
- right++;
- }
- }
- System.out.println("Id3 classification precision:"+(right/sum));
- }
- // public static void main(String[] args) throws Exception
- //{
- // callId3 a = new callId3();
- // a.Main();
- // }
- }
聚類算法:
主要是調用了EM算法和KM算法,SVM由於還不會用libsvm,就先放着了,聚類算法的調用的不同分類算法之處就是不必設置分類屬性行號:
1.調用EM算法;
- import weka.clusterers.EM;
- import weka.core.Instances;
- import weka.core.converters.*;
- import weka.clusterers.*;
- import java.io.*;
- public class callEM
- {
- public callEM()
- {
- }
- public void Main() throws Exception
- {
- EM m_cluster = new EM();
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.arff");
- ArffLoader arf = new ArffLoader();
- arf.setFile(inputFile);
- Instances instancesTrain = arf.getDataSet();
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.arff");
- arf.setFile(inputFile);
- Instances instancesTest = arf.getDataSet();
- m_cluster.buildClusterer(instancesTrain);
- System.out.println("The number of cluster : "+m_cluster.numberOfClusters());
- int num = m_cluster.numberOfClusters();
- System.out.println("");
- System.out.println(m_cluster.toString());
- System.out.println("");
- double[] predict = m_cluster.clusterPriors();
- for(int i = 0; i<num ; i++)
- {
- System.out.println("第 "+i+" 個 聚類的先驗爲 : "+predict[i]);
- }
- }
- public static void main(String[] args) throws Exception
- {
- callEM a = new callEM();
- a.Main();
- }
- }
2.調用KM算法;
- import weka.clusterers.*;
- import weka.core.converters.*;
- import weka.core.Instances;
- import java.io.*;
- public class callKM
- {
- public callKM()
- {
- }
- public void Main() throws Exception
- {
- SimpleKMeans m_cluster = new SimpleKMeans();
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.arff");
- ArffLoader arf = new ArffLoader();
- arf.setFile(inputFile);
- Instances instancesTrain = arf.getDataSet();
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\cpu.arff");
- arf.setFile(inputFile);
- Instances instancesTest = arf.getDataSet();
- m_cluster.buildClusterer(instancesTrain);
- System.out.println("The number of cluster : "+m_cluster.numberOfClusters());
- int num = m_cluster.numberOfClusters();
- System.out.println("");
- System.out.println(m_cluster.toString());
- System.out.println("");
- int[] size = m_cluster.getClusterSizes();
- int sum = 0;
- for(int i = 0; i<num ; i++)
- {
- sum += size[i];
- }
- for(int i = 0; i<num ; i++)
- {
- System.out.println("第 "+i+" 個 聚類的大小爲 : "+size[i]+" 所佔比例爲 : "+(double)size[i]/(double)sum);
- }
- }
- public static void main(String[] args) throws Exception
- {
- callKM a = new callKM();
- a.Main();
- }
- }
關聯規則算法
只是調用了Apriori算法,調用方法和聚類算法的基本一樣了。。。
1.調用Apriori算法
- import weka.associations.*;
- import weka.core.converters.*;
- import weka.core.Instances;
- import java.io.*;
- public class callApriori
- {
- public callApriori()
- {
- }
- public void Main() throws Exception
- {
- Apriori m_association = new Apriori();
- File inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
- ArffLoader arf = new ArffLoader();
- arf.setFile(inputFile);
- Instances instancesTrain = arf.getDataSet();
- inputFile = new File("E:\\資料\\數據挖掘\\weka-3-5-8\\data\\contact-lenses.arff");
- arf.setFile(inputFile);
- Instances instancesTest = arf.getDataSet();
- m_association.buildAssociations(instancesTrain);
- System.out.println("The Number of Rules : "+m_association.getNumRules());
- System.out.println(m_association.toString());
- System.out.println("");
- }
- // public static void main(String[] args) throws Exception
- // {
- // callApriori a = new callApriori();
- // a.Main();
- // }
- }