py實現LDA降維wine葡萄酒數據(無框架)

LDA代碼

理論部分看https://blog.csdn.net/jirong5206/article/details/105825057
相對於 PCA降維 LDA是有監督的即算法中是有labels參與的,體現在用labels分類取各自mean,而前一篇中的PCAlabels只用來染色區分而已

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class LDA:       #py中類名約定大寫字母開頭
    # 屬性
    fileName =""
    labels = []
    features = []
    featureMean = []
    Sw = np.zeros(shape=[13,13])
    Sb = np.zeros(shape=[13,13])
    Coord = []
    feature_test=[]
    def __init__(self,filename = "wine.txt"):
        self.fileName = filename
        self.labels = []
        self.features = []
        self.featureMean = []
    '''***************************************************************
     * @Fun_Name    :  def getSample(self):
     * @Function    :  得到樣本數據
     * @Parameter   : 
     * @Return      : 
     * @Creed       : Talk is cheap , show me the code
     ***********************xieqinyu creates in 21:19 2020/5/23***'''
    def getSample(self):
        dataSet = pd.read_csv(self.fileName,header = None).values #將字典轉化成列表
        self.features.append(dataSet[0:59,1:14])                  #第一類
        self.features.append(dataSet[59:130, 1:14])               #第二類
        self.features.append(dataSet[130:178, 1:14])              #第三類
    '''***************************************************************
     * @Fun_Name    : def getSw(self):
     * @Function    : 得到類內離差陣
     * @Parameter   : 
     * @Return      : 
     * @Creed       : Talk is cheap , show me the code
     ***********************xieqinyu creates in 21:20 2020/5/23***'''
    def getSw(self):
        featureDeal = []
        for i in range(len(self.features)):
          self.featureMean.append(np.mean(self.features[i],axis=0))
          featureDeal.append(np.mat(self.features[i]-self.featureMean[i]))  #去均值
          self.Sw += np.mat(featureDeal[i]).T*np.mat(featureDeal[i])
        self.Sw /= 178
        print(type(self.Sw))
        # print(self.Sw)
    '''***************************************************************
     * @Fun_Name    : def getSb(self):
     * @Function    : 得到類間離差陣
     * @Parameter   : 
     * @Return      : 
     * @Creed       : Talk is cheap , show me the code
     ***********************xieqinyu creates in 15:57 2020/5/24***'''
    def getSb(self):
        allFeatureMean = (59/178)*self.featureMean[0]+(71/178)*self.featureMean[1]+(48/178)*self.featureMean[2]
        self.Sb =   (59 / 178) * (np.mat(self.featureMean[0] - allFeatureMean).T * np.mat(self.featureMean[0] - allFeatureMean)) \
                  + (71 / 178) * (np.mat(self.featureMean[1] - allFeatureMean).T * np.mat(self.featureMean[1] - allFeatureMean)) \
                  + (48 / 178) * (np.mat(self.featureMean[2] - allFeatureMean).T * np.mat(self.featureMean[2] - allFeatureMean))
        print(type(self.Sb))
    '''***************************************************************
     * @Fun_Name    : def getU(self,n):
     * @Function    : 得到投影陣 這邊降到二維
     * @Parameter   : 我print過特徵值 第1 2 特徵值爲前二大 這邊只要了U的方向所以對模值沒做處理
     * @Return      : 
     * @Creed       : Talk is cheap , show me the code
     ***********************xieqinyu creates in 15:58 2020/5/24***'''
    def getU(self):
        featureValues, featureVector = np.linalg.eig((np.mat(self.Sw).I*np.mat(self.Sb)))
        print(featureValues)
        # print(featureVector)
        return featureVector[:, 1:3]
    def getCoord(self,Vector):
        # print(np.mat(self.features).shape,Vector.shape)
        for i in range(len(self.features)):
            self.Coord.append(np.mat(self.features[i]) * np.mat(Vector))


lda = LDA()
lda.getSample()
lda.getSw()
lda.getSb()
Vector = lda.getU(2)
lda.getCoord(Vector)
plt.scatter(lda.Coord[0][:,0].tolist(),lda.Coord[0][:,1].tolist(),color = "b")
plt.scatter(lda.Coord[1][:,0].tolist(),lda.Coord[1][:,1].tolist(),color = "r")
plt.scatter(lda.Coord[2][:,0].tolist(),lda.Coord[2][:,1].tolist(),color = "g")
plt.show()	

效果:在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章