數據挖掘課程在學習關聯規則,老師要求寫出Apriori算法,找了很多資料很多博客的內容都不能使用,於是上嗶哩嗶哩看看教程,找到一個挺好的教程,這個UP主講的非常棒,最重要的是有代碼,所以照着差不多打了一遍。爲了以後用着方便,所以就寫個博客吧
在這裏插入代碼片
def shu(): #原始數據
shu=[
[1,2,3,4,10],
[2,3,4],
[1,2,3,4],
[3,4],
[5,6],
[2,3,4,5],
]
return shu
def jishu():#計算數據中有幾個不同的數據
jihe = []
shu1 = shu()
for i in shu1:
for j in i:
if not {j} in jihe:
jihe.append({j})
jihe.sort()
return list(map(frozenset,jihe))#生成不可變的字典
def scanD(D,CK,minsupport):
ssCnt = {}
for tid in D:
for can in CK:
if can.issubset(tid):#判斷can是否是tid的子集,返回值是布爾型數據
if can not in ssCnt.keys():
ssCnt[can] = 1
else:
ssCnt[can] = ssCnt[can] + 1
numItems = float(len(D))
retList = []
supportData = {}
for key in ssCnt:
support = ssCnt[key] / numItems
supportData[key] = support
if support >= minsupport:
retList.append(key)
return retList,supportData
def aprioriGen(lk,k):
ck = []
lenlk = len(lk)
for i in range(lenlk):
for j in range(i+1,lenlk):
l1 = list(lk[i])[:k-2]
l1.sort()
l2 = list(lk[j])[:k-2]
l2.sort()
if l1 == l2:
ck.append(lk[i] | lk[j])
return ck
def apriori(minsupport = 0.5):
D = shu()
C1 = jishu()
l1,supportData = scanD(D,C1,minsupport)
l = [l1]
k = 2
while (len(l[k-2])>0):
ck = aprioriGen(l[k-2],k)
lk,supk = scanD(D,ck,minsupport)
supportData.update(supk)
l.append(lk)
k = k + 1
return l,supportData
def main():
L1,supportData = apriori(minsupport = 0.6)
#for i in L1:
# print(i)
for i,j in supportData.items():
print(i,j)
main()