物品相似度計算
餘弦相似度公式:
其中,分別表示對物品,喜歡的用戶數,爲同時喜歡 和 的人數。我們這裏還是使用漫威英雄舉例:假設目前共有5個用戶: A、B、C、D、E;共有5個漫威英雄人物:死侍、鋼鐵俠、美國隊長、黑豹、蜘蛛俠。用戶與人物之間的愛好程度如下圖所示:
共現矩陣,記錄了同時愛好 i 和 j 的數量:
根據餘弦相似度公式計算相似度:
python 實現
import math
class ItemCF:
def __init__(self):
self.user_score_dict = self.initUserScore()
# self.items_sim = self.ItemSimilarity()
self.items_sim = self.ItemSimilarityBest()
# 初始化用戶評分數據
def initUserScore(self):
user_score_dict = {
"A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
"B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
"C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
"D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
}
return user_score_dict
# 計算item之間的相似度
def ItemSimilarity(self):
itemSim = dict()
# 得到每個物品有多少用戶產生過行爲
item_user_count = dict()
# 共現矩陣
count = dict()
for user, item in self.user_score_dict.items():
for i in item.keys():
item_user_count.setdefault(i, 0)
if self.user_score_dict[user][i] > 0.0:
item_user_count[i] += 1
for j in item.keys():
count.setdefault(i, {}).setdefault(j, 0)
if (
self.user_score_dict[user][i] > 0.0
and self.user_score_dict[user][j] > 0.0
and i != j
):
count[i][j] += 1
# 共現矩陣 -> 相似度矩陣
for i, related_items in count.items():
itemSim.setdefault(i, dict())
for j, cuv in related_items.items():
itemSim[i].setdefault(j, 0)
itemSim[i][j] = cuv / item_user_count[i]
return itemSim
# 計算item之間的相似度 優化後
def ItemSimilarityBest(self):
itemSim = dict()
# 得到每個物品有多少用戶產生過行爲
item_user_count = dict()
# 共現矩陣
count = dict()
for user, item in self.user_score_dict.items():
for i in item.keys():
item_user_count.setdefault(i, 0)
if self.user_score_dict[user][i] > 0.0:
item_user_count[i] += 1
for j in item.keys():
count.setdefault(i, {}).setdefault(j, 0)
if (
self.user_score_dict[user][i] > 0.0
and self.user_score_dict[user][j] > 0.0
and i != j
):
count[i][j] += 1
# 共現矩陣 -> 相似度矩陣
for i, related_items in count.items():
itemSim.setdefault(i, dict())
for j, cuv in related_items.items():
itemSim[i].setdefault(j, 0)
itemSim[i][j] = cuv / math.sqrt(item_user_count[i] * item_user_count[j])
return itemSim
# 預測用戶對item的評分
def preUserItemScore(self, userA, item):
score = 0.0
for item1 in self.items_sim[item].keys():
if item1 != item:
score += (
self.items_sim[item][item1] * self.user_score_dict[userA][item1]
)
return score
# 爲用戶推薦物品
def recommend(self, userA):
# 計算userA 未評分item的可能評分
user_item_score_dict = dict()
for item in self.user_score_dict[userA].keys():
# if self.user_score_dict[userA][item] <= 0:
user_item_score_dict[item] = self.preUserItemScore(userA, item)
return user_item_score_dict
if __name__ == "__main__":
ib = ItemCF()
print(ib.recommend("C"))