1.Naive Bayes公式
2.代碼
package weka.classifiers.myAlgorithm;
import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.Instances;
public class NB extends Classifier
{
public int m_NumInstance;
public int m_NumAttribute;
public int m_NumClass;
/**
* 第j個屬性的起始下標
*/
public int m_StartIndex[] = null;
/**
* 統計類別c出現的次數
*/
public double[] m_CountClass = null;
/**
* 統計屬性值Aj在類別c的情況下出現的次數
*/
public double[][] m_CountAttandClass = null;
@Override
public void buildClassifier(Instances data) throws Exception
{
// TODO Auto-generated method stub
//初始化
m_NumInstance = data.numInstances();
m_NumAttribute = data.numAttributes();
m_NumClass = data.classAttribute().numValues();
m_CountClass = new double[m_NumClass];
m_StartIndex = new int[m_NumAttribute];
int allAttValue = 0;
for (int i = 0; i < m_NumAttribute; i++)
if (i!=data.classIndex())
{
m_StartIndex[i+1] = data.attribute(i).numValues();
allAttValue += data.attribute(i).numValues();
}
m_CountAttandClass = new double[allAttValue][m_NumClass];
//統計信息
for (int i = 0; i < m_NumInstance; i++)
{
Instance instance = data.instance(i);
int classValue = (int)instance.classValue();
m_CountClass[classValue]++;
for (int j = 0; j < m_NumAttribute; j++)
{
if (j!=data.classIndex())
{
int attValue = (int)instance.value(j);
m_CountAttandClass[m_StartIndex[j] + attValue ][classValue] ++;
}
}
}
}
public double[] distributionForInstance(Instance instance)
{
double prob[] = new double[m_NumClass];
for (int i = 0; i < m_NumClass; i++)
{
prob[i] = (m_CountClass[i] + 1.0 )/(m_NumInstance + m_NumClass);//拉普拉斯糾正
for (int j = 0; j < m_NumAttribute; j++)
{
if (j != instance.classIndex())
{
int attValue = (int)instance.value(j);
prob[i] *= ((m_CountAttandClass[m_StartIndex[j] + attValue][i] + 1.0)/(m_CountClass[i] + instance.attribute(j).numValues()));//拉普拉斯糾正
}
}
}
weka.core.Utils.normalize(prob);//歸一化處理
return prob;
}
}