LogisticRegression Python實現

from numpy import *
import matplotlib.pyplot as plt

def sigmoid(x):
    return 1/(1+exp(-x))

class LogRegressionClassifier(object):

    def __init__(self):
        self.dataMat = list()
        self.labelMat = list()
        self.weights = list()

    def loadDataSet(self, filename):
        fr = open(filename)
        for line in fr.readlines():
            lineArr = line.strip().split()
            dataLine = [1.0]
            for i in lineArr:
                dataLine.append(float(i))
            label = dataLine.pop()
            self.dataMat.append(dataLine)
            self.labelMat.append(int(label))
        self.dataMat = mat(self.dataMat)
        self.labelMat = mat(self.labelMat).transpose()

    def train(self):
        self.weights = self.stocGradAscent1()

    def batchGradAscent(self):
        m,n = shape(self.dataMat)
        alpha = 0.001
        maxCycles = 500
        weights = ones((n,1))
        for k in range(maxCycles):
            h = sigmoid(self.dataMat * weights)
            error = (self.labelMat-h)
            weights += alpha * self.dataMat.transpose()*error
        return weights

    def stocGradAscend1(self):
        m,n =  shape(self.dataMat)
        alpha = 0.01
        weights = ones(n,1)
        for i in range(m):
            h = sigmoid(sum(self.dataMat[i] * weights))
            error = self.labelMat[i] - h
            weights += (alpha * error * self.dataMat[i])
        return weights

    def stocGradAscend2(self):
        numIter = 2
        m,n = shape(self.dataMat)
        weights = ones((n,1))
        for j in range(numIter):
            alpha = 4/(1.0+j+i) +0.0001  #alpha decreases with iteration
            randIndex = int(random.uniform(0, len(dataIndex)))
            h = sigmoid( sum(self.dataMat[randIndex] * weights))
            error = self.labelMat[randIndex] - h
            weights += (alpha * error * self.dataMat[randIndex]).transpose()
            del (dataIndex[randIndex])
        return weights

    def classify(self, X):
        prob = sigmod( sum( X*self.weights))
        if prob > 0.5:
            return 1.0
        else:
            return 0.0

    def test(self):
        self.loadDataSet("testData.dat")
        weights0 = self.batchGradAscent()
        weights1 = self.stocGradAscend1()
        weights2 = self.stocGradAscend2()
        print "batchGradAscent:" + weights0
        print "stocGradAscent0:" + weights1
        print "stocGradAscent1:" + weights2

if __name__=='__main__':
    lr = LogRegressionClassifier()
    lr.test()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章