機器學習之邏輯迴歸 Logistic Regression(二)Python實現

參考http://blog.csdn.net/han_xiaoyang/article/details/49123419

        在一組數據上做邏輯迴歸,數據格式如下:


         先來看數據分佈,代碼如下:

from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel

data = loadtxt("data1.txt", delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
pos = where(y==1)
neg = where(y==0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel("Feature1")
ylabel("Feature2")
legend(["Fail", "Pass"])
show()


import matplotlib.pyplot as plt
import numpy as np

def loadData():
	data = loadtxt("data1.txt", delimiter=',')
	m,n = shape(data)
	train_x = data[:, 0:2]
	train_y = data[:, 2]
	X1 = np.ones((m,1))
	train_x = np.concatenate((X1, train_x), axis = 1)
	return(mat(train_x), mat(train_y).transpose())

def sigmoid(z):
	''' Compute sigmoid function '''
	gz = 1.0/(1.0+exp(-z))
	return(gz)

def trainLogRegress(train_x, train_y):
	''' Compute cost given predicted and actual values '''
	m = train_x.shape[0]  # number of training examples
	weight = zeros((3,1))
	alpha = 0.001
	maxCycles = 9999
	for k in range(maxCycles):
		h = sigmoid(train_x * weight)
		error = train_y - h
		weight = weight + alpha * train_x.transpose() * error 
		print(weight)
		#J = -1.0/m*(yMat.T*log(h)+(1-yMat.T)*log(1-h))
	return weight

def testLogRegress(weight, test_x, test_y):
	m = test_x.shape[0]
	match = 0
	for i in range(m):
		predict = sigmoid(test_x[i, :] * weight)[0,0] > 0.5
		if predict == bool(test_y[i,0]):
			match += 1
	return float(match) / m
		
def showLogRegress(weight, train_x, train_y):
	m = train_x.shape[0]
	# draw all samples
	for i in range(m):
		if int(train_y[i, 0]) == 0:
			plt.plot(train_x[i, 1], train_x[i, 2], 'xr')
		elif int(train_y[i, 0]) == 1:
			plt.plot(train_x[i, 1], train_x[i, 2], 'ob')
	# draw the classify line
	min_x = min(train_x[:, 1])[0, 0]
	max_x = max(train_x[:, 1])[0, 0]
	y_min_x = float(-weight[0,0] - weight[1,0] * min_x) / weight[2,0]
	y_max_x = float(-weight[0,0] - weight[1,0] * max_x) / weight[2,0]
	print(min_x, max_x, y_min_x, y_max_x)
	plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
	plt.xlabel("Feature1")
	plt.ylabel("Feature2")
	plt.show()
	
train_x, train_y = loadData()
weight = trainLogRegress(train_x, train_y)
accuracy = testLogRegress(weight, train_x, train_y)
print(accuracy)
showLogRegress(weight, train_x, train_y)

總結
       最後總結一下邏輯迴歸。它始於輸出結果爲有實際意義的連續值的線性迴歸,但是線性迴歸對於分類的問題沒有辦法準確而又具備魯棒性地分割,因此設計出了邏輯迴歸這樣一個算法,它的輸出結果表徵了某個樣本屬於某類別的概率。
       邏輯迴歸的成功之處在於,將原本輸出結果範圍可以非常大的θTX 通過sigmoid函數映射到(0,1),從而完成概率的估測。
        而直觀地在二維空間理解邏輯迴歸,是sigmoid函數的特性,使得判定的閾值能夠映射爲平面的一條判定邊界,當然隨着特徵的複雜化,判定邊界可能是多種多樣的樣貌,但是它能夠較好地把兩類樣本點分隔開,解決分類問題。
       求解邏輯迴歸參數的傳統方法是梯度下降,構造爲凸函數的代價函數後,每次沿着偏導方向(下降速度最快方向)邁進一小部分,直至N次迭代後到達最低點。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章