利用pandas和數據集ml-100k計算男女生對電影的看法

初學Python兩三天,代碼寫的很亂~

import pandas as pd
import numpy as np

unames = ['userid','age','gender','occupation','zip code']
users = pd.read_table('/Users/yuanwei/Downloads/ml-100k/u.user',sep = '\|',names = unames)
rnames = ['userid','itemid','rating','timestamp']
ratings = pd.read_table('/Users/yuanwei/Downloads/ml-100k/u.data',names = rnames,engine = 'python')
inames = ['itemid','movie title','release date','video release date','IMDB URL','unknown','Action','Adventure'
          ,'Animation','Children\'s','Comedy','Crime','DOcumentrary','Drama','Fatasy','Film-Noir','Horror','Musical'
          ,'Mystery','Romance','Sci-Fi','Thriller','War','Western']
items = pd.read_table('/Users/yuanwei/Downloads/ml-100k/u.item',sep = '\|',names = inames,engine = 'python')

#任務一:計算男女生對電影評分的平均值和標準差
users_df = pd.DataFrame()
users_df['userid'] = users['userid']
users_df['gender'] = users['gender']
ratings_df = pd.DataFrame()
ratings_df['userid'] = ratings['userid']
ratings_df['rating'] = ratings['rating']
rating_df = pd.merge(users_df,ratings_df)
gender_table = pd.pivot_table(rating_df,index = ['gender','userid'],values = 'rating')
gender_df = pd.DataFrame(gender_table)
Female_df = gender_df.query("gender == ['F']")
Male_df = gender_df.query("gender == ['M']")
print("男生電影評分平均值:"+str(Male_df.rating.sum()/len(Male_df.rating)))
print("女生電影評分平均值:"+str(Female_df.rating.sum()/len(Female_df.rating)))
print("男生電影評分標準差:"+str(np.std(Male_df.rating)))
print("女生電影評分標準差:"+str(np.std(Female_df.rating)))
#計算男女生最喜歡的十部電影
ratings_df_2 = pd.DataFrame()
ratings_df_2['userid'] = ratings['userid']
ratings_df_2['rating'] = ratings['rating']
ratings_df_2['itemid'] = ratings['itemid']
items_df = pd.DataFrame()
items_df['itemid'] = items['itemid']
items_df['movietitle'] = items['movie title']

tmp = pd.merge(users_df,ratings_df_2)
gender_item_df = pd.merge(tmp,items_df)
Female_item_df = gender_item_df.query("gender == ['F']")
Male_item_df = gender_item_df.query("gender == ['M']")
print("女生最愛看的五部電影")
print(Female_item_df.groupby(['movietitle']).rating.mean().sort_values(ascending = False)[0:10,])
print("男生最愛看的五部電影")
print(Male_item_df.groupby(['movietitle']).rating.mean().sort_values(ascending = False)[0:10,])

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章