基於Python推薦系統

1、算法1:基於召喚師信息推薦

我們編輯程序對從帶玩公司提供的接口所扒下來的數據進行處理,我從這些數據中提取了每個召喚師上路,中路,下路adc,輔助,打野的使用頻率作爲向量,通過比較歐氏距離最小推薦出一位和召喚師最相近的召喚師(它的信息包含他最常用三位英雄,及其位置使用頻次)。

#coding=utf-8
import sqlite3
import numpy
def jisuan(vec1,vec2):
    dist=numpy.sqrt(numpy.sum(numpy.square(vec1-vec2)))
    return dist
def tuijian(usename1,id1):
    user = open(r'C:\Users\win10\Desktop\LOL\userlist.txt','r+')
    usename =usename1
    id=id1
    b=user.readlines()
    for i in b:
#         print i.split(',')[0]
        if usename==i.split(',')[0] and id==i.split(',')[2]:
            qquin = i.split(',')[1]
#     print qquin                                
    cx = sqlite3.connect('C:\Users\win10\Desktop\LOL\shujuku.db')
    cu = cx.cursor()
    cu.execute("select * from catalog")
    alluser=cu.fetchall()    
    j=0
    aaa=0
    a=[]
    for user in alluser:
        if qquin==user[1]: 
            a=user
            mm = j
            break
        j=j+1       
    a1 = [a[9],a[10],a[11],a[12],a[13]]
    v1 = numpy.array(a1)
    juli=[]        
    for user1 in alluser:
         a2 = [user1[9],user1[10],user1[11],user1[12],user1[13]]
         v2=numpy.array(a2)
         juli.append(jisuan(v1,v2))
    kk = 0
    min= 10.0
    for i in juli:   
         #print juli
         if min>i and kk!=j:
             min=i
         kk=kk+1 
    bb=0       
    for ii in juli:
         if ii==min:
             return alluser[bb]  
         bb=bb+1 
print tuijian('殺你0沒商量','9')

2、算法2:基於英雄信息推薦

通過爲各個英雄的:位置,標籤,上手難度,在法師,坦克等方面的能力建立向量,通過比較向量的歐式距離大小爲召喚師推薦三個和他最善長使用的三個英雄最相似的英雄。

#coding=utf-8
import sqlite3
import numpy
import xlrd
def jisuan(vec1,vec2):
    dist=numpy.sqrt(numpy.sum(numpy.square(vec1-vec2)))
    return dist
def hero_tuijian(u_name,id1):
    user = open(r'C:\Users\win10\Desktop\LOL\userlist.txt','r+')
    b=user.readlines()
    usename=u_name
    id=id1
    listhero=[]
    for i in b:
#         print i.split(',')[0]
        if usename==i.split(',')[0] and id==i.split(',')[2]:
            qquin = i.split(',')[1]
#     print qquin
    cx = sqlite3.connect('C:\Users\win10\Desktop\LOL\shujuku.db')
    cu = cx.cursor()
    cu.execute("select * from catalog")
    alluser=cu.fetchall()
    
    for user in alluser:            
       if qquin==user[1]:
           a=user
           break
#     print a
    hero1=float(a[3])
#     print hero1
    hero2=float(a[5])
    hero3=float(a[7])
    data = xlrd.open_workbook('C:\Users\win10\Desktop\LOL\dataForHeroes.xlsx')
    table = data.sheet_by_name(u'sheet1')
    nrows = table.nrows
    h_hr3 = []
    h_hr2 = []
    h_hr1 = []
    heHang1=0
    heHang2=0
    heHang3=0
    for i in range(nrows):
        if hero1==table.row_values(i)[0]:
            #print table.row_values(i)
            h_hr1 = [table.row_values(i)[6],table.row_values(i)[7],table.row_values(i)[8],table.row_values(i)[9],table.row_values(i)[10],table.row_values(i)[11],table.row_values(i)[12],table.row_values(i)[13],table.row_values(i)[14],table.row_values(i)[15],table.row_values(i)[16],table.row_values(i)[17],table.row_values(i)[18],table.row_values(i)[20],table.row_values(i)[21],table.row_values(i)[22],table.row_values(i)[23],table.row_values(i)[24],table.row_values(i)[25]]                        
            heHang1=i
#             print heHang1
            #print h_hr1
        if hero2==table.row_values(i)[0]:
            #print table.row_values(i)
            h_hr2 = [table.row_values(i)[6],table.row_values(i)[7],table.row_values(i)[8],table.row_values(i)[9],table.row_values(i)[10],table.row_values(i)[11],table.row_values(i)[12],table.row_values(i)[13],table.row_values(i)[14],table.row_values(i)[15],table.row_values(i)[16],table.row_values(i)[17],table.row_values(i)[18],table.row_values(i)[20],table.row_values(i)[21],table.row_values(i)[22],table.row_values(i)[23],table.row_values(i)[24],table.row_values(i)[25]]            
            heHang2=i
#             print heHang2
            #print h_hr2
        if hero3==table.row_values(i)[0]:
           # print table.row_values(i)
            h_hr3 = [table.row_values(i)[6],table.row_values(i)[7],table.row_values(i)[8],table.row_values(i)[9],table.row_values(i)[10],table.row_values(i)[11],table.row_values(i)[12],table.row_values(i)[13],table.row_values(i)[14],table.row_values(i)[15],table.row_values(i)[16],table.row_values(i)[17],table.row_values(i)[18],table.row_values(i)[20],table.row_values(i)[21],table.row_values(i)[22],table.row_values(i)[23],table.row_values(i)[24],table.row_values(i)[25]]                           
            heHang3=i
#             print heHang3
            #print h_hr3
    h_XL1=numpy.array(h_hr1)
    h_XL2=numpy.array(h_hr2)
    h_XL3=numpy.array(h_hr3)
#     min_juli1=jisuan( h_XL2,h_XL1)
#     print min_juli1 
    min_true1=1000.0
    min_true2=1000.0
    min_true3=1000.0
    juli1=[]
    juli2=[]
    juli3=[]             
    for j in range(1,nrows):
        if j==heHang1 or j==heHang2 or j==heHang3:
            pass
        else:
            mabe_hero = [table.row_values(j)[6],table.row_values(j)[7],table.row_values(j)[8],table.row_values(j)[9],table.row_values(j)[10],table.row_values(j)[11],table.row_values(j)[12],table.row_values(j)[13],table.row_values(j)[14],table.row_values(j)[15],table.row_values(j)[16],table.row_values(j)[17],table.row_values(j)[18],table.row_values(j)[20],table.row_values(j)[21],table.row_values(j)[22],table.row_values(j)[23],table.row_values(j)[24],table.row_values(j)[25]]            
            v1=numpy.array(mabe_hero)
            min_juli3=jisuan(v1,h_XL3)
            min_juli2=jisuan(v1,h_XL2)          
            min_juli1=jisuan(v1,h_XL1)            
            juli1.append(min_juli1)
            juli2.append(min_juli2)
            juli3.append(min_juli3)
            if min_true1>min_juli1:
                min_true1=min_juli1
            if min_true2>min_juli2:
                min_true2=min_juli2
            if min_true3>min_juli3:
                min_true3=min_juli3           
    for j,element in enumerate(juli1):
        if min_true1==element:
            listhero.append(table.row_values(j)[1])     
    for j,element in enumerate(juli2):
        if min_true2==element:
            listhero.append(table.row_values(j)[1])
    for j,element in enumerate(juli3):
        if min_true3==element:
            str(table.row_values(j))
            listhero.append(table.row_values(j)[1])
#             print soup.prettify('utf-8').decode('utf8').encode(table.row_values(j))
    return listhero                       
print hero_tuijian('你怎麼這麼可愛啊','1')      

3、算法3:SVD主成分分析相似度

首先根據收集到的數據建立user-item的映射關係矩陣,對該矩陣進行使用numpy庫中的SVD變換拆解成sigma矩陣、V矩陣和U矩陣。其中sigma矩陣只有對角線上有數據,既是一系列不相關的特徵值。可採取PCA的方法,將特徵值進行壓縮,能量取大約90%即能大約表示原矩陣,該方法是一個有損壓縮,但會大大減少計算量。由原矩陣、sigma矩陣和U矩陣相乘成item-特徵的映射矩陣。當對某一用戶推薦item時首先找到該用戶對所有已用過的item的評價,對其每一件沒使用過的item進行評價。評價方法爲相似度評價,相似度評價方法有三種:歐氏距離、皮爾遜距離、餘弦距離。本次實驗主要選用了餘弦距離。對每一個沒使用過的item對其他使用過的物品進行相似度比較,比較因子爲已壓縮過的特徵。後將用戶對使用過商品的評分作爲權值與商品相似度乘積,所有相似度之和即爲沒使用過item的評分,按評分排序去最高的幾位作爲推薦。


# coding: utf-8



from numpy import *  
from numpy import linalg as la #用到別名
import test3
def loadExData2():  
    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],  
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],  
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],  
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],  
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],  
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],  
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],  
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],  
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],  
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],  
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
def loadExData3():  
    data_temp=test3.loadExData()
    data=[]
    items=['1','2','3','4','5','6','7','8','9','10',
           '11','12','13','14','15','16','17','18','19','20',
           '21','22','23','24','25','26','27','28','29','30',
           '31','32','33','34','35','36','37','38','39','40',
           '41','42','43','44','45','48','50','51','53','54',
           '55','56','57','58','59','60','61','62','63','64',
           '67','68','69','72','74','75','76','77','78','79',
           '80','81','82','83','84','85','86','89','90','91',
           '92','96','98','99','101','102','103','104','105','106',
           '107','110','111','112','113','114','115','117','119','120',
           '121','122','126','127','131','133','134','136','143','150',
           '154','157','161','163','164','201','202','203','222','223',
           '236','238','240','245','254','266','267','268','412','420','421','427','429','432','497','498']
#     for j in items:
#         print j
    for i in data_temp.keys():
        lis=[]
        a=data_temp.get(i)
        for j in items:
            lis.append(a.get(j))
        data.append(lis)
#         print i
#         print lis
    
    return data
def ecludSim(inA,inB):  
    return 1.0/(1.0 + la.norm(inA - inB))  #計算向量的第二範式,相當於直接計算了歐式距離  
  
def pearsSim(inA,inB):  
    if len(inA) < 3 : return 1.0  
    return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1] #corrcoef直接計算皮爾遜相關係數  
  
def cosSim(inA,inB):  
    num = float(inA.T*inB)  
    denom = la.norm(inA)*la.norm(inB)  
    return 0.5+0.5*(num/denom)  #計算餘弦相似度
def standEst(dataMat, user, simMeas, item):   
    n = shape(dataMat)[1] #計算列的數量,物品的數量  
    simTotal = 0.0; ratSimTotal = 0.0   
    for j in range(n):  
        userRating = dataMat[user,j]  
#         print(dataMat[user,j])  
        if userRating == 0: continue  #如果用戶u沒有對物品j進行打分,那麼這個判斷就可以跳過了  
        overLap = nonzero(logical_and(dataMat[:,item].A>0, \
                                      dataMat[:,j].A>0))[0]    #找到對物品 j 和item都打過分的用戶  
        if len(overLap) == 0: similarity = 0  
        else: similarity = simMeas(dataMat[overLap,item], dataMat[overLap,j])     #利用相似度計算兩個物品之間的相似度  
                                     
#         print 'the %d and %d similarity is: %f' % (item, j, similarity)  
        simTotal += similarity  
        ratSimTotal += similarity * userRating  #待推薦物品與用戶打過分的物品之間的相似度*用戶對物品的打分  
    if simTotal == 0: return 0  
    else: return ratSimTotal/simTotal 
def svdEst(dataMat, user, simMeas, item,n,xformedItems):  
#     n = shape(dataMat)[1]  
    simTotal = 0.0; ratSimTotal = 0.0  
#     U,Sigma,VT = la.svd(dataMat) #直接進行分解  
# #     print Sigma
#     Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix  
#     xformedItems = dataMat.T * U[:,:4] * Sig4.I  #create transformed items 
#     print xformedItems
    for j in range(n):  
        userRating = dataMat[user,j]  
        if userRating == 0 or j==item: continue  
        similarity = simMeas(xformedItems[item,:].T,                             xformedItems[j,:].T)  
#         print 'the %d and %d similarity is: %f' % (item, j, similarity)  
        simTotal += similarity  
        ratSimTotal += similarity * userRating  
    if simTotal == 0: return 0  
    else: return ratSimTotal/simTotal 
def recommend(dataMat, userid, N=3, simMeas=cosSim, estMethod=svdEst): 
    usr = open(r'C:\Users\dell\Desktop\shujuji\userlist.txt','r')
    user=-1
    for i in usr.readlines():
        te=i.split(',')[0]
        qu=i.split(',')[1]
        vid=i.split(',')[2]
        user+=1
        if '\xef\xbb\xbf'  in te:
            te=te.replace('\xef\xbb\xbf','')
        if te==userid:
            print vid+qu
            print user
            usr.close()
            break 
    unratedItems = nonzero(dataMat[user,:].A==0)[1]  #find unrated items  nonzero()[1]返回的是非零值所在的行數,返回的是一個元組   if len(unratedItems) == 0: return 'you rated everything'  
    itemScores = []
    n = shape(dataMat)[1]  
    U,Sigma,VT = la.svd(dataMat) #直接進行分解 
#     print U 
#     print VT
#     print Sigma
#     x=0
#     i=0
#     for a in Sigma:
#         i+=1
#         x+=a
#         if x>=375:break
#     print i
    Sig4 = mat(eye(106)*Sigma[:106]) #arrange Sig4 into a diagonal matrix  
    xformedItems = dataMat.T * U[:,:106] * Sig4.I  #create transformed items 
#     print U[:,:4]
#     print xformedItems
    for item in unratedItems:  
        if estMethod==standEst:
            estimatedScore=estMethod(dataMat, user, simMeas, item)
        else:
            estimatedScore = estMethod(dataMat, user, simMeas, item,n,xformedItems)  
        itemScores.append((item, estimatedScore))  
    te=sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]
    return te[0][0],te[1][0],te[2][0]

myMat=mat(loadExData3())
# print recommend(myMat,0,estMethod=standEst)
print recommend(myMat,'cao奈何橋斷了',estMethod=svdEst)
# print recommend(myMat,2,estMethod=svdEst)
# print recommend(myMat,3,estMethod=svdEst)
# print recommend(myMat,4,estMethod=svdEst)
# print recommend(myMat,5,estMethod=svdEst)





發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章