Python3《機器學習實戰》代碼筆記(十四)--- SVD算法

參考資料:

機器學習實戰

'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 21:09:57
@LastEditTime: 2019-11-12 21:58:07
@FilePath: \\機器學習實戰\\14-SVD算法\\SVD.py
@Descripttion: SVD是從有噪聲數據中抽取相關特徵,利用SVD來逼近矩陣並從中提取重要特徵,通過保留矩陣80%~90%的能量,
就可以得到重要的特徵並去掉噪聲
'''


import numpy as np


def loadExData():
    return [[0, 0, 0, 2, 2],
            [0, 5, 0, 3, 3],
            [0, 0, 0, 1, 1],
            [1, 1, 1, 0, 0],
            [2, 1, 2, 4, 0],
            [5, 5, 5, 3, 0],
            [1, 1, 1, 2, 0]]


def loadExData2():
    return [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
            [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
            [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
            [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
            [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
            [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
            [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
            [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
            [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
            [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
            [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]


def ecludSim(inA, inB):
    """計算兩個列向量的歐氏距離"""
    return 1.0 / (1.0 + np.linalg.norm(inA - inB))


def pearsSim(inA, inB):
    """計算兩個列向量的皮爾遜相關係數"""
    if len(inA) < 3:
        return 1.0
    return 0.5 + 0.5 * np.corrcoef(inA, inB, rowvar=False)[0][1]


def cosSim(inA, inB):
    """計算兩個列向量的是餘弦相似度"""
    num = float(inA.T * inB)
    denom = np.linalg.norm(inA) * np.linalg.norm(inB)
    return 0.5 + 0.5 * (num / denom)


def test_Sim():
    """測試三種距離算法"""
    myDat = np.mat(loadExData())
    ecl = ecludSim(myDat[:, 0], myDat[:, 4])
    print(ecl)
    cos = cosSim(myDat[:, 0], myDat[:, 4])
    print(cos)
    pear = pearsSim(myDat[:, 0], myDat[:, 4])
    print(pear)


def standEst(dataMat, user, simMeas, item):
    """
    [summary]:計算在給定相似度計算方法的 條件下,用戶對物品的估計評分值

    Arguments:
        dataMat  -- 數據集
        user -- 
        simMeas  -- 
        item -- 
    
    Returns:
        [type] -- [description]
    """
    n = np.shape(dataMat)[1] # 行數
    simTotal = 0.0
    ratSimTotal = 0.0

    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0 or j == item:
            continue
        # 尋找兩個用戶都評級的物品
        overlap = np.nonzero(
            np.logical_and(dataMat[:, item].A > 0, dataMat[:, j].A > 0))[0]
        # 沒有任何重合元素,相似度爲0
        if len(overlap) == 0:
            similarity = 0
        else:
            similarity = simMeas(dataMat[overlap, j], dataMat[overlap, item])
        print('the {:d} and {:d} similarity is:{:.6f}'.format(
            item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal / simTotal


def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    """
    [summary]:
        (1) 尋找用戶沒有評級的菜餚,即在用戶-物品矩陣中的0值
        (2) 在用戶沒有評級的所有物品中,對每個物品預計一個可能的評級分數。這就是說,我們認爲用戶可能會對物品的打分(這就是相似度計算的初衷)
        (3) 對這些物品的評分從高到低進行排序,返回前N個物品
    
    Arguments:
        dataMat {[type]} -- [description]
        user {[type]} -- [description]
    
    Keyword Arguments:
        N {int} -- [description] (default: {3})
        simMeas {[type]} -- [description] (default: {cosSim})
        estMethod {[type]} -- [description] (default: {standEst})
    
    Returns:
        [type] -- [description]
    """
    # 尋找未評級物品
    unratedItems = np.nonzero(dataMat[user, :].A == 0)[1]
    if len(unratedItems) == 0:
        return 'all rated'
    itemScores = [(item, estMethod(dataMat, user, simMeas, item))
                  for item in unratedItems]
    # itemScores = []
    # for item in unratedItems:
    #     estimatedScore=estMethod(dataMat,user,simMeas,item)
    #     itemScores.append((item,estimatedScore))
    # 尋找前N個未評級物品
    return sorted(itemScores, key=lambda x: x[1], reverse=True)[:N]


def test_recommend():
    myDat = np.mat(loadExData())
    myDat[0, 1] = myDat[0, 0] = myDat[1, 0] = myDat[2, 0] = 4
    myDat[3, 3] = 2
    print(recommend(myDat, 2))


def svdEst(dataMat, user, simMeas, item):
    """
    [summary]:數對給定用戶給定物品構建了一個評分估計值
    
    Arguments:
        dataMat {[type]} -- [description]
        user {[type]} -- [description]
        simMeas {[type]} -- [description]
        item {[type]} -- [description]
    
    Returns:
        [type] -- [description]
    """
    # 行數
    n = np.shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    # SVD分解
    U, Sigma, VT = np.linalg.svd(dataMat)
    # 構建對角矩陣,Sigma[:4]只包含90%能量值的奇異值
    Sig4 = np.mat(np.eye(4) * Sigma[:4])
    xfromedItems = dataMat.T * U[:, :4] * Sig4.I
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0 or j == item:
            continue
        similarity = simMeas(xfromedItems[item, :].T, xfromedItems[j, :].T)
        print('the {:d} and {:d} similarity is:{:.6f}'.format(
            item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal / simTotal


def test_svdEst():
    myDat = np.mat(loadExData2())
    # print(recommend(myDat, 1, estMethod=svdEst))
    print(recommend(myDat, 1, estMethod=svdEst, simMeas=pearsSim))


# 打印矩陣
def printMat(inMat, thresh=0.8):
    for i in range(32):
        for k in range(32):
            if float(inMat[i, k]) > thresh:
                print(1, end='')  # 不換行
            else:
                print(0, end='')
        print(' ')


def imgCompress(numSV=3, thresh=0.8):
    myl = []
    for line in open(r'14-SVD算法\0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    myMat = np.mat(myl)
    print("****original matrix******")
    printMat(myMat, thresh)
    U, Sigma, VT = np.linalg.svd(myMat)
    SigRecon = np.mat(np.zeros((numSV, numSV)))
    for k in range(numSV):  # construct diagonal matrix from vector
        SigRecon[k, k] = Sigma[k]
    reconMat = U[:, :numSV] * SigRecon * VT[:numSV, :]
    print("****reconstructed matrix using %d singular values******" % numSV)
    printMat(reconMat, thresh)


if __name__ == '__main__':
    # test_Sim()
    # test_recommend()
    # test_svdEst()
    imgCompress(2)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章