python numpy實現簡單logistic迴歸

# -*- coding: utf-8 -*-
"""
Created on Fri Mar  2 22:45:41 2018

@author: Administrator
"""

import numpy as np
import matplotlib.pyplot as plt

def generateData(opt='linear'):
    # 正負樣本個數
    m_pos = 100;
    m_neg = 100;
    X = np.zeros((2,m_pos+m_neg))
    Y = np.zeros((1,m_pos+m_neg))

    # 分佈類型:環形、線性函數、三次函數
    if opt=='circle':  
        R1_range = 10
        R2_range = 5
        R_pos = R1_range*np.random.rand(1,m_pos)
        R_neg = R2_range*np.random.rand(1,m_neg)+0.9*R1_range
        Theta_pos = np.pi*np.random.randn(1,m_pos)
        Theta_neg = np.pi*np.random.randn(1,m_neg)

    # !不能用 X[0,0:m_pos] = R_pos*np.cos(Theta_pos);
    # 左邊是 "rank-1 array" 

        X[0:1,0:m_pos]=R_pos*np.cos(Theta_pos);
        X[1:2,0:m_pos]=R_pos*np.sin(Theta_pos);
        Y[0,0:m_pos]=1;

        X[0:1,-m_neg:]=R_neg*np.cos(Theta_neg);
        X[1:2,-m_neg:]=R_neg*np.sin(Theta_neg);
        Y[0,-m_neg:]=0;

    if opt=='linear':        
        x1 = np.random.normal(loc=-1,scale=3,size=(1,m_pos))
        X[0:1,0:m_pos] = x1;
    # 整體線性分佈
        X[1:2,0:m_pos] = 2*x1+10+0.1*x1**2;
    # 加噪聲
        X[1:2,0:m_pos] += np.random.normal(loc=0,scale=5,size=(1,m_pos));
        Y[0,0:m_pos] = 1;
    # !不能用y[0:m_pos],這樣會在axis 0也就是行上面索引,0:m_pos行

        x1 = np.random.normal(loc=1,scale=3,size=(1,m_neg))
        X[0:1,-m_neg:] = x1;
        X[1:2,-m_neg:] = 2*x1-5-0.1*x1**2
        X[1:2,-m_neg:] += np.random.normal(loc=0,scale=5,size=(1,m_neg))

    if opt == 'cubic':
        x1 = np.random.normal(loc=0,scale=1.2,size=(1,m_pos))
        X[0:1,0:m_pos] = x1;
        X[1:2,0:m_pos] = 8-12*x1+3*x1**2+2*x1**3;
        X[1:2,0:m_pos] += np.maximum(np.random.normal(loc=10,scale=10,size=(1,m_pos)) , -10);
        Y[0,0:m_pos] = 1;

        x1 = np.random.normal(loc=0,scale=1.2,size=(1,m_neg))
        X[0:1,-m_neg:] = x1;
        X[1:2,-m_neg:] = -5-10*x1+3*x1**2+2*x1**3;
        X[1:2,-m_neg:] += np.minimum(np.random.normal(loc=-10,scale=10,size=(1,m_neg)),10)

    return X,Y


def plotData(X,Y):

    plt.figure()
    pos_idx = (Y==1);
    # size 1,m
    pos_idx = pos_idx[0,:];
    # size m, 這時纔可用來索引某[一]個維度
    neg_idx = (Y==0);
    neg_idx = neg_idx[0,:];

    plt.plot(X[0,pos_idx],X[1,pos_idx],'r+')
    plt.plot(X[0,neg_idx],X[1,neg_idx],'bo')

def sigmoid(z):
    return 1/(1+np.exp(-z))

def lossFunction(Y,A):
    return -(Y*np.log(A)+(1-Y)*np.log(1-A))

def costFunction(Y,A):
    m=Y.shape[1]
    return 1/m*np.sum(lossFunction(Y,A))
    # np.sum :對全體而言

def paraInitialization(size):
    return np.zeros(size),0

def plotDecisioinBoundary(X,Y,w,b):
    # 線性決策邊界
    if(np.abs(w[1])<0.0001):
        w[1]=0.0001
    # 避免分母爲0
    plotData(X,Y)
    x_plot = np.linspace(start=X[0,:].min(),stop=X[0,:].max(),num=100)
    y_plot = -w[0]/w[1]*x_plot - b/w[1]    
    plt.plot(x_plot,y_plot)


def predict(X,w,b):
    pred = sigmoid(np.dot(w.transpose(),X)+b)
    return pred

def gradientDescentOneStep(X,Y,w,b,alpha):
    m = Y.shape[1]
    Z = np.dot(w.transpose(),X) + b;
    A = sigmoid(Z);
    dZ = A-Y;
    dw = 1/m*np.dot(X,dZ.transpose())
    db = 1/m*np.sum(dZ)
    w = w - alpha * dw
    b = b - alpha * db
    return w,b

def gradientDescent(X,Y,w,b,alpha,iternum):
    for i in range(iternum):
        w,b = gradientDescentOneStep(X,Y,w,b,alpha)
    return w,b

def polyFeature(X,p):
    m = X.shape[1];
    X_poly = np.zeros((p*(p+3)//2,m))
    x1 = X[0,:];
    x2 = X[1,:];
    row=0;
    for i in range(1,p+1):        
        for j in range(i+1):
            X_poly[row,:] = x1**j * x2**(i-j)
            row += 1;
    return X_poly

def featureNormalize(X):
    mu = np.mean(X,axis=1,keepdims=True)
    sigma = np.std(X,axis=1,keepdims=True)
    X_norm = (X-mu)/sigma
    return mu,sigma,X_norm

def plotDecisioinBoundaryPoly(X,Y,w,b,mu,sigma,p):    
    plotData(X,Y)

    # 利用contour繪製決策邊界,需要在網格上計算預測函數
    # 網格大小爲plot_num × plot_num,其決定了邊界的繪製精度 
    # 爲了向量化計算,要將X_plot,Y_plot轉換爲2 × plot_num^2大小

    plot_num=50;
    plot_num_2D=plot_num**2;

    x_plot = np.linspace(start=X[0,:].min(),stop=X[0,:].max(),num=plot_num)
    y_plot = np.linspace(start=X[1,:].min(),stop=X[1,:].max(),num=plot_num)
    X_plot,Y_plot = np.meshgrid(x_plot,y_plot)

    X_array = np.zeros((2,plot_num_2D))
    X_array[0:1,:] = X_plot.reshape(1,plot_num_2D)
    X_array[1:2,:] = Y_plot.reshape(1,plot_num_2D)

    X_norm = (polyFeature(X_array,p)-mu)/sigma

    p_array = predict(X_norm,w,b)
    P_matrix = p_array.reshape((plot_num,plot_num))

    plt.contour(X_plot,Y_plot,P_matrix,0.5)


# Gnertate data
X,Y=generateData('linear');

m=X.shape[1]
n=X.shape[0]

plotData(X,Y)

# Feature mapping
p=4
X_poly = polyFeature(X,p);

n=X_poly.shape[0]

# Featre normalize
mu,sigma,X_norm =featureNormalize(X_poly)

# Implement sigmoid
plt.figure()
a=np.linspace(-10,10,100)
plt.plot(a,sigmoid(a))


# Initialization
w,b = paraInitialization((n,1))

# Gradient descent
w,b = gradientDescent(X_norm,Y,w,b,alpha=0.1,iternum=10000)

# Dislay result
plotDecisioinBoundaryPoly(X,Y,w,b,mu,sigma,p)

print('Predict Accuracy:', ( (predict(X_norm,w,b) > 0.5) == Y ).astype('float').mean()*100, '%' )

線性分佈數據
線性分佈數據
環形分佈數據
環形分佈數據
三次函數分佈數據
三次函數分佈數據

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章