協同過濾與隱語義模型推薦系統實例2: 基於相似度的推薦

[ 協同過濾與隱語義模型推薦系統實例1: 數據處理 ]

基於相似度的推薦系統

一. 排行榜單推薦

from sklearn.model_selection import train_test_split

triplet_dataset_sub_song_merged_set = triplet_dataset_sub_song_merged
train_data, test_data = train_test_split(triplet_dataset_sub_song_merged_set, test_size = 0.3, random_state = 42)
train_data.head()

在這裏插入圖片描述

def create_popularity_recommendation(train_data, user_id, item_id):
	train_data_grouped = train_data.groupby([item_id]).agg({user_id:'count'}).reset_index()
	train_data_grouped.rename(columns = {user_id:'score'}, inplace = True)

	train_data_sort = train_data_grouped.sort_values(['score', item_id], ascending=[0, 1])
	train_data_sort['Rank'] = train_data_sort['score'].rank(ascending = 0, method = 'first')
	popularity_recommendations = train_data_sort.head(20)
	return popularity_recommendations
commendations = creat_popularity_recommendation(triplet_dataset_sub_song_merged, 'user', 'title')
commendations

在這裏插入圖片描述

二. 基於歌曲相似度的推薦

  • 選擇一小部分歌曲來實驗
song_count_subset = song_count_df.head(1000)
user_subset = list(play_count_subset.user)
song_subset = list(song_count_subset.song)
triplet_dataset_sub_song_merged_sub = triplet_dataset_sub_song_merged[triplet_dataset_sub_song_merged.song.isin(song_subset)]
triplet_dataset_sub_song_merged_sub.head()

在這裏插入圖片描述

# 推薦系統
class item_similarity_recommender_py(object):
	def __init__(self):
		self.train_data = None
		self.user_id = None
		self.item_id = None
		self.coccurence_matrix = None
		self.songs_dict = None
		self.rev_songs_dict = None
		self.item_similarity_recommendations = None
	def get_user_items(self, user):
		user_data = self.train_data[self.train_data[self.user_id] == user]
		user_items = list(user_data[self.item_id].unique())
		return user_items
	def get_item_users(self, item):
		item_data = self.train_data[self.train_data[self.item_id] == item]
		item_users = self(item_data[self.user_id].unique())
		return item_users
	def get_all_items_train_data(self):
		all_items = list(self.train_data[self.item_id].unique())
		return all_items
	def construct_cooccurence_matrix(self, user_songs, all_songs):
		user_songs_users = []
		for i in range(0, len(user_songs)):
			user_songs_users.append(self.get_item_users(user_songs[i]))
		cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float)
		for i in range(0, len(all_songs)):
			songs_i_data = self.train_data[self.train_data[self.item_id] == all_songs[i]]
			user_i = set(songs_i_data[self.user_id].unique())
			for j in range(0, len(user_songs)):
				users_j = user_songs_users[j]
				users_intersection = users_i.intersection(users_j) # 返回集合交集
				if len(users_intersection) != 0
					users_union = users_i.union(users_j) # 返回集合並集
					cooccurence_matrix[j,i] = float(len(users_intersection)) / float(len(users_union))
				else:
					cooccurence_matrix[j,i] = 0
		return cooccurence_matrix
	def generate_top_recommendations(self, user, cooccurence_matrix, all_songs, user_songs):
		print('Non zero values in cooccurence_matrix:%d' % np.count_nonzero(cooccurence_matrix))
		user_sim_scores = cooccurence_matrix.sum(axis=0) / float(cooccurence_matrix.shape[0])
		user_sim_scores = np.array(user_sim_scores)[0].tolist()
		sort_index = sorted(((e, i) for i,e in enumerate(list(user_sim_scores))), reverse = True)
		columns = ['user_id', 'song', 'score', 'rank']
		df = pd.DataFrame(columns = columns)

		rank = 1
		for i in range(0, len(sort_index)):
			if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 10:
				df.loc[len(df)] = [user, all_songs[sort_index[i][1]], sort_index[i][0], rank]
				rank += 1
		if df.shape[0] == 0:
			print('The current user has no songs for training the item similarity based recommendation model.')
			return -1
		else:
			return df
	def create(self, train_data, user_id, item_id):
		self.train_data = train_data
		self.user_id = user_id
		self.item_id = item_id
	def recommend(self, user):
		user_songs = self.get_user_items(user)
		print('No. of unique songs for the user: %d' % len(user_songs))
		all_songs = self.get_all_items_train_data()
		print('No. of unique songs in the training set: %d' % len(all_songs))
		cooccurence_matrix = self.contruct_cooccurence_matrix(user_songs, all_songs)
		df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
		return df_recommendations
	def get_similar_items(self, item_list):
		user_songs = item_list
		all_songs = self.get_all_items_train_data()
		print('No. of unique songs in the training set: %d' % len(all_songs))
		cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
		user = ''
		de_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
		return de_recommendations
train_data, test_data = train_test_split(triplet_dataset_sub_song_merged_sub, test_size = 0.3, random_state = 42)
is_model = item_similarity_recommender_py()
is_model.create(train_data, 'user', 'title')

user_id = list(train_data.user)[7]
user_items = is_model.get_user_items(user_id)
is_model.recommend(user_id)

No. of unique songs for the user: 19
No. of unique songs in the training set: 997
Non zero values in cooccurence_matrix:18876
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章