# -*- coding: utf-8 -*-
"""
Created on Fri Mar 2 22:45:41 2018
@author: Administrator
"""
import numpy as np
import matplotlib.pyplot as plt
def generateData(opt='linear'):
# 正負樣本個數
m_pos = 100;
m_neg = 100;
X = np.zeros((2,m_pos+m_neg))
Y = np.zeros((1,m_pos+m_neg))
# 分佈類型:環形、線性函數、三次函數
if opt=='circle':
R1_range = 10
R2_range = 5
R_pos = R1_range*np.random.rand(1,m_pos)
R_neg = R2_range*np.random.rand(1,m_neg)+0.9*R1_range
Theta_pos = np.pi*np.random.randn(1,m_pos)
Theta_neg = np.pi*np.random.randn(1,m_neg)
# !不能用 X[0,0:m_pos] = R_pos*np.cos(Theta_pos);
# 左邊是 "rank-1 array"
X[0:1,0:m_pos]=R_pos*np.cos(Theta_pos);
X[1:2,0:m_pos]=R_pos*np.sin(Theta_pos);
Y[0,0:m_pos]=1;
X[0:1,-m_neg:]=R_neg*np.cos(Theta_neg);
X[1:2,-m_neg:]=R_neg*np.sin(Theta_neg);
Y[0,-m_neg:]=0;
if opt=='linear':
x1 = np.random.normal(loc=-1,scale=3,size=(1,m_pos))
X[0:1,0:m_pos] = x1;
# 整體線性分佈
X[1:2,0:m_pos] = 2*x1+10+0.1*x1**2;
# 加噪聲
X[1:2,0:m_pos] += np.random.normal(loc=0,scale=5,size=(1,m_pos));
Y[0,0:m_pos] = 1;
# !不能用y[0:m_pos],這樣會在axis 0也就是行上面索引,0:m_pos行
x1 = np.random.normal(loc=1,scale=3,size=(1,m_neg))
X[0:1,-m_neg:] = x1;
X[1:2,-m_neg:] = 2*x1-5-0.1*x1**2
X[1:2,-m_neg:] += np.random.normal(loc=0,scale=5,size=(1,m_neg))
if opt == 'cubic':
x1 = np.random.normal(loc=0,scale=1.2,size=(1,m_pos))
X[0:1,0:m_pos] = x1;
X[1:2,0:m_pos] = 8-12*x1+3*x1**2+2*x1**3;
X[1:2,0:m_pos] += np.maximum(np.random.normal(loc=10,scale=10,size=(1,m_pos)) , -10);
Y[0,0:m_pos] = 1;
x1 = np.random.normal(loc=0,scale=1.2,size=(1,m_neg))
X[0:1,-m_neg:] = x1;
X[1:2,-m_neg:] = -5-10*x1+3*x1**2+2*x1**3;
X[1:2,-m_neg:] += np.minimum(np.random.normal(loc=-10,scale=10,size=(1,m_neg)),10)
return X,Y
def plotData(X,Y):
plt.figure()
pos_idx = (Y==1);
# size 1,m
pos_idx = pos_idx[0,:];
# size m, 這時纔可用來索引某[一]個維度
neg_idx = (Y==0);
neg_idx = neg_idx[0,:];
plt.plot(X[0,pos_idx],X[1,pos_idx],'r+')
plt.plot(X[0,neg_idx],X[1,neg_idx],'bo')
def sigmoid(z):
return 1/(1+np.exp(-z))
def lossFunction(Y,A):
return -(Y*np.log(A)+(1-Y)*np.log(1-A))
def costFunction(Y,A):
m=Y.shape[1]
return 1/m*np.sum(lossFunction(Y,A))
# np.sum :對全體而言
def paraInitialization(size):
return np.zeros(size),0
def plotDecisioinBoundary(X,Y,w,b):
# 線性決策邊界
if(np.abs(w[1])<0.0001):
w[1]=0.0001
# 避免分母爲0
plotData(X,Y)
x_plot = np.linspace(start=X[0,:].min(),stop=X[0,:].max(),num=100)
y_plot = -w[0]/w[1]*x_plot - b/w[1]
plt.plot(x_plot,y_plot)
def predict(X,w,b):
pred = sigmoid(np.dot(w.transpose(),X)+b)
return pred
def gradientDescentOneStep(X,Y,w,b,alpha):
m = Y.shape[1]
Z = np.dot(w.transpose(),X) + b;
A = sigmoid(Z);
dZ = A-Y;
dw = 1/m*np.dot(X,dZ.transpose())
db = 1/m*np.sum(dZ)
w = w - alpha * dw
b = b - alpha * db
return w,b
def gradientDescent(X,Y,w,b,alpha,iternum):
for i in range(iternum):
w,b = gradientDescentOneStep(X,Y,w,b,alpha)
return w,b
def polyFeature(X,p):
m = X.shape[1];
X_poly = np.zeros((p*(p+3)//2,m))
x1 = X[0,:];
x2 = X[1,:];
row=0;
for i in range(1,p+1):
for j in range(i+1):
X_poly[row,:] = x1**j * x2**(i-j)
row += 1;
return X_poly
def featureNormalize(X):
mu = np.mean(X,axis=1,keepdims=True)
sigma = np.std(X,axis=1,keepdims=True)
X_norm = (X-mu)/sigma
return mu,sigma,X_norm
def plotDecisioinBoundaryPoly(X,Y,w,b,mu,sigma,p):
plotData(X,Y)
# 利用contour繪製決策邊界,需要在網格上計算預測函數
# 網格大小爲plot_num × plot_num,其決定了邊界的繪製精度
# 爲了向量化計算,要將X_plot,Y_plot轉換爲2 × plot_num^2大小
plot_num=50;
plot_num_2D=plot_num**2;
x_plot = np.linspace(start=X[0,:].min(),stop=X[0,:].max(),num=plot_num)
y_plot = np.linspace(start=X[1,:].min(),stop=X[1,:].max(),num=plot_num)
X_plot,Y_plot = np.meshgrid(x_plot,y_plot)
X_array = np.zeros((2,plot_num_2D))
X_array[0:1,:] = X_plot.reshape(1,plot_num_2D)
X_array[1:2,:] = Y_plot.reshape(1,plot_num_2D)
X_norm = (polyFeature(X_array,p)-mu)/sigma
p_array = predict(X_norm,w,b)
P_matrix = p_array.reshape((plot_num,plot_num))
plt.contour(X_plot,Y_plot,P_matrix,0.5)
# Gnertate data
X,Y=generateData('linear');
m=X.shape[1]
n=X.shape[0]
plotData(X,Y)
# Feature mapping
p=4
X_poly = polyFeature(X,p);
n=X_poly.shape[0]
# Featre normalize
mu,sigma,X_norm =featureNormalize(X_poly)
# Implement sigmoid
plt.figure()
a=np.linspace(-10,10,100)
plt.plot(a,sigmoid(a))
# Initialization
w,b = paraInitialization((n,1))
# Gradient descent
w,b = gradientDescent(X_norm,Y,w,b,alpha=0.1,iternum=10000)
# Dislay result
plotDecisioinBoundaryPoly(X,Y,w,b,mu,sigma,p)
print('Predict Accuracy:', ( (predict(X_norm,w,b) > 0.5) == Y ).astype('float').mean()*100, '%' )
線性分佈數據
環形分佈數據
三次函數分佈數據