基於WEKA的樸素貝葉斯(Naive Bayes)代碼實現

1.Naive Bayes公式

NB分類公式
先驗概率與後驗概率公式

2.代碼

package weka.classifiers.myAlgorithm;

import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.Instances;

public class NB extends Classifier
{
	
	public int m_NumInstance;
	public int m_NumAttribute;
	public int m_NumClass;
	/**
	 * 第j個屬性的起始下標
	 */
	public int m_StartIndex[] = null;
	/**
	 * 統計類別c出現的次數
	 */
	public double[] m_CountClass = null;
	/**
	 * 統計屬性值Aj在類別c的情況下出現的次數
	 */
	public double[][] m_CountAttandClass = null;

	@Override
	public void buildClassifier(Instances data) throws Exception
	{
		// TODO Auto-generated method stub
		//初始化
		m_NumInstance = data.numInstances();
		m_NumAttribute = data.numAttributes();
		m_NumClass = data.classAttribute().numValues();
		m_CountClass = new double[m_NumClass];
		m_StartIndex = new int[m_NumAttribute];
		int allAttValue = 0;
		for (int i = 0; i < m_NumAttribute; i++)
			if (i!=data.classIndex())
				{
					m_StartIndex[i+1] = data.attribute(i).numValues();
					allAttValue += data.attribute(i).numValues();
				}
		m_CountAttandClass = new double[allAttValue][m_NumClass];
		
		//統計信息
		for (int i = 0; i < m_NumInstance; i++)
		{
			Instance instance = data.instance(i);
			int classValue = (int)instance.classValue();
			m_CountClass[classValue]++;
			for (int j = 0; j < m_NumAttribute; j++)
			{
				if (j!=data.classIndex())
				{
					int attValue = (int)instance.value(j);
					m_CountAttandClass[m_StartIndex[j] + attValue ][classValue] ++;
				}
			}
		}
		
	}

	public double[] distributionForInstance(Instance instance) 
	{
		double prob[] = new double[m_NumClass];
		
		for (int i = 0; i < m_NumClass; i++)
		{
			prob[i] = (m_CountClass[i] + 1.0 )/(m_NumInstance + m_NumClass);//拉普拉斯糾正
			for (int j = 0; j < m_NumAttribute; j++)
			{
				if (j != instance.classIndex())
				{
					int attValue = (int)instance.value(j);
					prob[i] *= ((m_CountAttandClass[m_StartIndex[j] + attValue][i] + 1.0)/(m_CountClass[i] + instance.attribute(j).numValues()));//拉普拉斯糾正
				}
			}
		}
		
		weka.core.Utils.normalize(prob);//歸一化處理
		return  prob;
	}
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章