# coding=utf-8
from math import log, exp
from numpy.core.umath import sign
from numpy.ma import multiply
from numpy import *
def loadSimpleDate():
dataMat = matrix([[1. , 2.1],
[2. , 1.1],
[1.3 , 1.],
[1. , 1.],
[2. , 1.]])
classLabels = [1.0,1.0,-1.0,-1.0,1.0]
return dataMat, classLabels
def stumpClassify(dataMatrix, dimen, threshVal, threshIneq):
retArry = ones((shape(dataMatrix)[0], 1))
if threshIneq == 'lt':
retArry[dataMatrix[:, dimen] <= threshVal] = -1.0
else:
retArry[dataMatrix[:, dimen] > threshVal] = -1.0
return retArry
# D是權重向量
def buildStump(dataArr, classLabels, D):
dataMatrix = mat(dataArr)
labelMat = mat(classLabels).T
m, n = shape(dataMatrix)
numSteps = 10.0 #在特徵所有可能值上遍歷
bestStump = {} #用於存儲單層決策樹的信息
bestClasEst = mat(zeros((m, 1)))
minError = inf
for i in range(n): # 遍歷所有特徵
rangeMin = dataMatrix[:,i].min()
rangeMax = dataMatrix[:,i].max()
stepSize = (rangeMax - rangeMin) / numSteps
for j in range(-1, int(numSteps) +1):
for inequal in ['lt','gt']:
threshVal = (rangeMin + float(j) * stepSize) # 得到閾值
# 根據閾值分類
predictedVals = stumpClassify(dataMatrix, i, threshVal, inequal)
errArr = mat(ones((m, 1)))
errArr[predictedVals == labelMat] = 0
weightedError = D.T * errArr # 不同樣本的權重是不一樣的
if weightedError < minError:
minError = weightedError
bestClasEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['thresh'] = threshVal
bestStump['ineq'] = inequal
return bestStump, minError, bestClasEst
def adaBoostTrainDS(dataArr, classLabels, numIt=40):
weakClassArr = []
m = shape(dataArr)[0]
D = mat(ones((m, 1))/m)# 初始化所有樣本的權值一樣
aggClassEst = mat(zeros((m, 1)))# 每個數據點的估計值
for i in range(numIt):
bestStump, error, classEst = buildStump(dataArr, classLabels, D)
# 計算alpha, max(error, 1e-16)保證沒有錯誤的時候不出現除零溢出
# alpha表示的是這個分類器的權重,錯誤率越低分類器權重越高
alpha = float(0.5 * log((1.0-error)/max(error, 1e-16)))
bestStump['alpha'] = alpha
weakClassArr.append(bestStump)
expon = multiply(-1 * alpha * mat(classLabels).T, classEst) #exponent for D calc, getting messy
D = multiply(D, exp(expon))
D = D/D.sum()
# calc training error of all classifiers, if this is 0 quit for loop early (use break)
aggClassEst += alpha * classEst
# print "aggClassEst: ",aggClassEst.T
aggErrors = multiply(sign(aggClassEst) != mat(classLabels).T, ones((m, 1)))
errorRate = aggErrors.sum()/m
print("total error: ",errorRate)
if errorRate == 0.0:
break
return weakClassArr
#dataToClass 表示要分類的點或點集
def adaClassify(datToClass, classifierArr):
dataMatrix = mat(datToClass) # do stuff similar to last aggClassEst in adaBoostTrainDS
m = shape(dataMatrix)[0]
aggClassEst = mat(zeros((m, 1)))
for i in range(len(classifierArr)):
classEst = stumpClassify(dataMatrix, classifierArr[i]['dim'],\
classifierArr[i]['thresh'],\
classifierArr[i]['ineq'])# call stump classify
aggClassEst += classifierArr[i]['alpha'] * classEst
print(aggClassEst)
return sign(aggClassEst)
def main():
dataMat, classLabels = loadSimpleDate()
D = mat(ones((5, 1))/5)
classifierArr = adaBoostTrainDS(dataMat, classLabels, 30)
t = adaClassify([0, 0], classifierArr)
print t
if __name__ == "__main__":
main()
AdaBoost算法
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.