關聯規則 Apriori 算法及python3實現

數據挖掘課程在學習關聯規則,老師要求寫出Apriori算法,找了很多資料很多博客的內容都不能使用,於是上嗶哩嗶哩看看教程,找到一個挺好的教程,這個UP主講的非常棒,最重要的是有代碼,所以照着差不多打了一遍。爲了以後用着方便,所以就寫個博客吧

在這裏插入代碼片

def shu(): #原始數據
    shu=[
        [1,2,3,4,10],
        [2,3,4],
        [1,2,3,4],
        [3,4],
        [5,6],
        [2,3,4,5],
        ]
    return shu


def jishu():#計算數據中有幾個不同的數據
    jihe = []
    shu1 = shu()
    for i in shu1:
        for j in i:
            if not {j} in jihe:
                jihe.append({j})

    jihe.sort()
    
    return list(map(frozenset,jihe))#生成不可變的字典


def scanD(D,CK,minsupport):
    ssCnt = {}
    for tid in D:
        for can in CK:
            if can.issubset(tid):#判斷can是否是tid的子集,返回值是布爾型數據
                if can not in ssCnt.keys():
                    ssCnt[can] = 1
                    
                else:
                    ssCnt[can] = ssCnt[can] + 1
    numItems = float(len(D))
    retList = []
    supportData = {}
    for key in ssCnt:
        
        support = ssCnt[key] / numItems
        supportData[key] = support
        if support >= minsupport:
            retList.append(key)
    return retList,supportData

def aprioriGen(lk,k):
    ck = []
    lenlk = len(lk)
    for i in range(lenlk):
        for j in range(i+1,lenlk):
            l1 = list(lk[i])[:k-2]
            l1.sort()
            l2 = list(lk[j])[:k-2]
            l2.sort()
            if l1 == l2:
                ck.append(lk[i] | lk[j])
    return ck

def apriori(minsupport = 0.5):
    D = shu()
    C1 = jishu()
    l1,supportData = scanD(D,C1,minsupport)
    
    l = [l1]
    k = 2
    while (len(l[k-2])>0):
        ck = aprioriGen(l[k-2],k)
        lk,supk = scanD(D,ck,minsupport)
        supportData.update(supk)
        l.append(lk)
        k = k + 1
    return l,supportData

def main():
    L1,supportData = apriori(minsupport = 0.6)
    #for i in L1:
     #   print(i)
    for i,j in supportData.items():
        print(i,j)
    
main()
    
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章