參考資料:
機器學習實戰
'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 21:09:57
@LastEditTime: 2019-11-12 21:58:07
@FilePath: \\機器學習實戰\\14-SVD算法\\SVD.py
@Descripttion: SVD是從有噪聲數據中抽取相關特徵,利用SVD來逼近矩陣並從中提取重要特徵,通過保留矩陣80%~90%的能量,
就可以得到重要的特徵並去掉噪聲
'''
import numpy as np
def loadExData():
return [[0, 0, 0, 2, 2],
[0, 5, 0, 3, 3],
[0, 0, 0, 1, 1],
[1, 1, 1, 0, 0],
[2, 1, 2, 4, 0],
[5, 5, 5, 3, 0],
[1, 1, 1, 2, 0]]
def loadExData2():
return [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
[0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
[5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
[0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
[4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
[0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
def ecludSim(inA, inB):
"""計算兩個列向量的歐氏距離"""
return 1.0 / (1.0 + np.linalg.norm(inA - inB))
def pearsSim(inA, inB):
"""計算兩個列向量的皮爾遜相關係數"""
if len(inA) < 3:
return 1.0
return 0.5 + 0.5 * np.corrcoef(inA, inB, rowvar=False)[0][1]
def cosSim(inA, inB):
"""計算兩個列向量的是餘弦相似度"""
num = float(inA.T * inB)
denom = np.linalg.norm(inA) * np.linalg.norm(inB)
return 0.5 + 0.5 * (num / denom)
def test_Sim():
"""測試三種距離算法"""
myDat = np.mat(loadExData())
ecl = ecludSim(myDat[:, 0], myDat[:, 4])
print(ecl)
cos = cosSim(myDat[:, 0], myDat[:, 4])
print(cos)
pear = pearsSim(myDat[:, 0], myDat[:, 4])
print(pear)
def standEst(dataMat, user, simMeas, item):
"""
[summary]:計算在給定相似度計算方法的 條件下,用戶對物品的估計評分值
Arguments:
dataMat -- 數據集
user --
simMeas --
item --
Returns:
[type] -- [description]
"""
n = np.shape(dataMat)[1]
simTotal = 0.0
ratSimTotal = 0.0
for j in range(n):
userRating = dataMat[user, j]
if userRating == 0 or j == item:
continue
overlap = np.nonzero(
np.logical_and(dataMat[:, item].A > 0, dataMat[:, j].A > 0))[0]
if len(overlap) == 0:
similarity = 0
else:
similarity = simMeas(dataMat[overlap, j], dataMat[overlap, item])
print('the {:d} and {:d} similarity is:{:.6f}'.format(
item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0:
return 0
else:
return ratSimTotal / simTotal
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
"""
[summary]:
(1) 尋找用戶沒有評級的菜餚,即在用戶-物品矩陣中的0值
(2) 在用戶沒有評級的所有物品中,對每個物品預計一個可能的評級分數。這就是說,我們認爲用戶可能會對物品的打分(這就是相似度計算的初衷)
(3) 對這些物品的評分從高到低進行排序,返回前N個物品
Arguments:
dataMat {[type]} -- [description]
user {[type]} -- [description]
Keyword Arguments:
N {int} -- [description] (default: {3})
simMeas {[type]} -- [description] (default: {cosSim})
estMethod {[type]} -- [description] (default: {standEst})
Returns:
[type] -- [description]
"""
unratedItems = np.nonzero(dataMat[user, :].A == 0)[1]
if len(unratedItems) == 0:
return 'all rated'
itemScores = [(item, estMethod(dataMat, user, simMeas, item))
for item in unratedItems]
return sorted(itemScores, key=lambda x: x[1], reverse=True)[:N]
def test_recommend():
myDat = np.mat(loadExData())
myDat[0, 1] = myDat[0, 0] = myDat[1, 0] = myDat[2, 0] = 4
myDat[3, 3] = 2
print(recommend(myDat, 2))
def svdEst(dataMat, user, simMeas, item):
"""
[summary]:數對給定用戶給定物品構建了一個評分估計值
Arguments:
dataMat {[type]} -- [description]
user {[type]} -- [description]
simMeas {[type]} -- [description]
item {[type]} -- [description]
Returns:
[type] -- [description]
"""
n = np.shape(dataMat)[1]
simTotal = 0.0
ratSimTotal = 0.0
U, Sigma, VT = np.linalg.svd(dataMat)
Sig4 = np.mat(np.eye(4) * Sigma[:4])
xfromedItems = dataMat.T * U[:, :4] * Sig4.I
for j in range(n):
userRating = dataMat[user, j]
if userRating == 0 or j == item:
continue
similarity = simMeas(xfromedItems[item, :].T, xfromedItems[j, :].T)
print('the {:d} and {:d} similarity is:{:.6f}'.format(
item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0:
return 0
else:
return ratSimTotal / simTotal
def test_svdEst():
myDat = np.mat(loadExData2())
print(recommend(myDat, 1, estMethod=svdEst, simMeas=pearsSim))
def printMat(inMat, thresh=0.8):
for i in range(32):
for k in range(32):
if float(inMat[i, k]) > thresh:
print(1, end='')
else:
print(0, end='')
print(' ')
def imgCompress(numSV=3, thresh=0.8):
myl = []
for line in open(r'14-SVD算法\0_5.txt').readlines():
newRow = []
for i in range(32):
newRow.append(int(line[i]))
myl.append(newRow)
myMat = np.mat(myl)
print("****original matrix******")
printMat(myMat, thresh)
U, Sigma, VT = np.linalg.svd(myMat)
SigRecon = np.mat(np.zeros((numSV, numSV)))
for k in range(numSV):
SigRecon[k, k] = Sigma[k]
reconMat = U[:, :numSV] * SigRecon * VT[:numSV, :]
print("****reconstructed matrix using %d singular values******" % numSV)
printMat(reconMat, thresh)
if __name__ == '__main__':
imgCompress(2)