數據集:
代碼:
import pandas as pd
import numpy as np
from pandas import *
from numpy import *
import matplotlib.pyplot as plt
def readdata():
dataset = np.loadtxt(r'C:\Users\zmy\Desktop\titanic\watermelon.csv',delimiter=",")
df1 = dataset[0:2,:]
df2 = dataset[3,:]
df1 = mat(df1).transpose()
df2 = mat(df2).transpose()
return df1,df2
def LDA(df1, df2):
x1 = pd.DataFrame(df1, columns=['density', 'sugar_rate'])
x2 = pd.DataFrame(df2, columns=['label'])
X1 = x1[x2.label == 1]
X0 = x1[x2.label == 0]
X1 = array(X1)
X0 = array(X0)
mean1 = array([mean(X1[:,0]),mean(X1[:,1])])
mean0 = array([mean(X0[:,0]), mean(X0[:,1])])
m1 = shape(X1)[0]
sw = zeros((2,2))
for i in range(m1):
xsmean = mat(X1[i,:] - mean1)
sw += xsmean.transpose()*xsmean
m0 = shape(X0)[0]
for i in range(m0):
xsmean = mat(X0[i,:] - mean0)
sw += xsmean.transpose()*xsmean
w = (mean0 - mean1)*(mat(sw).I)
return w
def plot(w,df1,df2):
dataMat = array(df1)
labelMat = array(df2)
m = shape(dataMat)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(m):
if(labelMat[i] == 1):
xcord1.append(dataMat[i,0]); ycord1.append(dataMat[i,1])
else:
xcord2.append(dataMat[i,0]); ycord2.append(dataMat[i,1])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s = 30, c ='red', marker='s')
ax.scatter(xcord2, ycord2, s = 30, c= 'green')
x = arange(-0.2,0.8,0.1)
y = array((-w[0,0]*x)/w[0,1])
print shape(x)
print shape(y)
plt.sca(ax)
plt.plot(x,y)
plt.xlabel('density')
plt.ylabel('ratio_sugar')
plt.title('LDA')
print w
plt.show()
df1,df2 = readdata()
w = LDA(df1,df2)
plot(w,df1,df2)
結果: