Apriori算法:
def loadData():
return [[1,2,5], [2,4], [2,3], [1,2,4], [1,3], [2,3], [1,3], [1,2,3,5], [1,2,3]]
def find_frequent_1_itemsets(D, minsupport):
L1 = []
C1 = []
cnt = {}
for transcation in D:
for item in transcation:
if not [item] in C1:
C1.append([item])
cnt[item] = 1
else:
cnt[item] += 1
for transcation in C1:
for item in transcation:
if cnt[item] >= minsupport:
L1.append(transcation)
L1.sort()
return L1
def aproiri_gen(L, k):
res = []
lenL = len(L)
for i in range(lenL):
for j in range(i+1,lenL):
l1 = L[i][:-1]
l2 = L[j][:-1]
if l1 == l2 and L[i][-1] < L[j][-1]:
candidate = list(set(L[i]).union(set(L[j])))
if not has_infrequent_subset(candidate, L):
res.append(candidate)
return res
def has_infrequent_subset(candidate, L):
for i in range(len(candidate)):
subset = candidate.copy()
subset.remove(candidate[i])
if subset not in L:
return True
return False
def compareList(l1, l2):
for item in l1:
if item not in l2:
return False
return True
def Aproiri(D, minsupport):
L = []
L1 = find_frequent_1_itemsets(D, minsupport)
L.append([])
L.append(L1)
for k in range(2, 5):
Lk = []
if len(L[k-1]) == 0:
break
Ck = aproiri_gen(L[k-1], k-1)
print("自鏈接加剪枝後得到的候選Ck:" , Ck)
print("遍歷D對每個候選計數")
for candi in Ck:
cnt = 0
for transcation in D:
if compareList(candi, transcation):
cnt += 1
if cnt >= minsupport:
print ("符合要求的項集: ", candi, "出現次數: ",cnt)
Lk.append(candi)
L.append(Lk)
return L
#test
D = loadData()
L = Aproiri(D, 2)
print (L)
MaxMiner算法:
在Apriori算法基礎上進行修改,得到MaxMiner算法。
import sys
import time
def loadData():
itemset=[]
cf=open("D:\\data\\traindata04.data")
lines =cf.readlines()
for line in lines:
itemset.append(line)
return itemset
def find_frequent_1_itemsets(D, minsupport):
L1 = []
C1 = []
cnt = {}
for transcation in D:
for item in transcation:
if not (item=='\n' or item==' '):
if not [item] in C1:
C1.append([item])
cnt[item] = 1
else:
cnt[item] += 1
for transcation in C1:
for item in transcation:
if cnt[item] >= minsupport:
L1.append(transcation)
L1.sort()
return L1 #return all itemset in the same level
def aproiri_gen(L, L1, D , minsupport): #生成新一層的所有節點
res = []
for i in range(len(L)):
for j in range(len(L1)):
if L[i][-1] < L1[j][0]: #L[i][-1]是L列表中,第i項最後一個字母
candidate = list(set(L[i]).union(set(L1[j]))) #union
cnt = 0
for transcation in D:
if compareList(candidate, transcation):
cnt += 1
if cnt >= minsupport:
res.append(candidate)
return res
def compareList(l1, l2): #l1的每一項都在l2中
for item in l1:
if item not in l2:
return False
return True
def Aproiri(D, minsupport):
L = []
maximal=[]
close=[]
L1 = find_frequent_1_itemsets(D, minsupport) #第一層的候選項集
#print('第一層的候選項集爲:',L1)
L.append([])
L.append(L1)
for k in range(2,len(L1)):
Lk = []
if len(L[k-1]) == 0:
break
Ck = aproiri_gen(L[k-1] ,L1,D,minsupport)
#print("第",k,"層的候選項集爲:" , Ck)
L.append(Ck)
for i in range(len(L)):
for item in L[i]:
maximal.append(item)
print("頻繁項集個數爲:",len(maximal)-1)
for k in range(2,len(L1)):
if len(L[k-1]) == 0:
break
for transcation in L[k-1]:
for candidate in L[k]:
if compareList(transcation, candidate):
if transcation in maximal:
maximal.remove(transcation)
return L,maximal
#test
start=time.time()
D = loadData()
L ,maximal = Aproiri(D, 60 )
#print ("頻繁項集爲:",L)
#print("頻繁項集個數爲:",len(L))
print ("極大頻繁項集爲:",len(maximal))
end=time.time()
print (end-start,'s')
3.CLOSET算法:
import sys
import time
'''''
def loadData():
itemset=[]
cf=open("D:\\data\\traindata02.data")
lines =cf.readlines()
for line in lines:
itemset.append(line)
return itemset
'''''
def loadData():
return [[1,3,4,5,6],[1,2,5],[3,5,6],[1,3,4,6],[3,5,6]]
def find_frequent_1_itemsets(D, minsupport):
L1 = []
C1 = []
cnt = {}
for transcation in D:
for item in transcation:
if not (item=='\n' or item==' '):
if not [item] in C1:
C1.append([item])
cnt[item] = 1
else:
cnt[item] += 1
for transcation in C1:
for item in transcation:
if cnt[item] >= minsupport:
L1.append(transcation)
L1.sort()
return L1 #return all itemset in the same level
def aproiri_gen(L, L1, D , minsupport): #生成新一層的所有節點
res = []
for i in range(len(L)):
for j in range(len(L1)):
if L[i][-1] < L1[j][0]: #L[i][-1]是L列表中,第i項最後一個字母
candidate = list(set(L[i]).union(set(L1[j]))) #union
cnt = 0
for transcation in D:
if compareList(candidate, transcation):
cnt += 1
if cnt >= minsupport:
res.append(candidate)
return res
def compareList(l1, l2): #l1的每一項都在l2中
for item in l1:
if item not in l2:
return False
return True
def Aproiri(D, minsupport):
L = []
close=[]
L1 = find_frequent_1_itemsets(D, minsupport) #第一層的候選項集
#print('第 1 層的候選項集爲:',L1)
L.append([])
L.append(L1)
for k in range(2,len(L1)):
Lk = []
if len(L[k-1]) == 0:
break
Ck = aproiri_gen(L[k-1] ,L1,D,minsupport)
#print("第",k,"層的候選項集爲:" , Ck)
L.append(Ck)
for i in range(len(L)):
for item in L[i]:
close.append(item)
#print("頻繁項集個數爲:",len(close)-1)
# maximal.append(maximalE)
#close.append(closeE)
for k in range(2,len(L1)):
if len(L[k-1]) == 0:
break
for transcation in L[k-1]:
cnt = 0
for item in D:
if compareList(transcation, item):
cnt +=1
for candidate in L[k]:
if compareList(transcation, candidate):
cnm = 0
for item in D:
if compareList(candidate, item):
cnm +=1
if cnt <= cnm:
if transcation in close:
close.remove(transcation)
return L,close
#test
start=time.time()
D = loadData()
L ,close = Aproiri(D, 2)
print ("頻繁項集爲:",L)
#print ("閉合項集個數爲:",len(close))
print("閉合項集爲:",close)
end=time.time()
print ('程序運行時間爲:',end-start,'s')
4.已知閉合項集恢復頻繁項集:
import os
def find_frequent_1_itemsets(D):
L = []
for item in D:
if not (item=='\n' or item==' '):
if not [item] in L:
L.append([item])
return L #return all itemset in the same level
def aproiri_gen(L,L1): #生成新一層的所有節點
res = []
for i in range(len(L)):
for j in range(len(L1)):
if L[i][-1] < L1[j][0]: #L[i][-1]是L列表中,第i項最後一個字母
candidate = list(set(L[i]).union(set(L1[j]))) #union
if not candidate in res:
res.append(candidate)
return res
def recover(L):
itemset=[]
frequent=[]
for transaction in L:
lk=[]
L1=find_frequent_1_itemsets(transaction)
lk.append([])
lk.append(L1)
for k in range(2,len(transaction)):
if len(lk[k-1]) == 0:
break
Ck = aproiri_gen(lk[k-1] ,L1)
#print("第",k,"層的候選項集爲:" , Ck)
lk.append(Ck)
for item in lk:
if not item in itemset:
itemset.append(item)
itemset.append([transaction])
for i in range(len(itemset)):
for item in itemset[i]:
if not item in frequent:
frequent.append(item)
frequent.sort()
return frequent
def compareList(l1, l2): #l1的每一項都在l2中
for item in l1:
if item not in l2:
return False
return True
def support(frequent ,D, dic):
itemset={}
for transaction in frequent:
count=0
for candidate in D:
if compareList(transaction, candidate):
item=''
for p in candidate:
item +=str(p)
item += ','
if count < dic[item]:
count=dic[item]
item=''
for p in transaction:
item +=str(p)
item +=','
itemset.setdefault(item,count)
return itemset
D=[[1], [5], [1, 5], [3, 6], [3, 5, 6], [1, 3, 4, 6]]
dic={'1,':3,'5,':4,'1,5,':2,'3,6,':4,'3,5,6,':3,'1,3,4,6,':2}
L=recover(D)
m=support(L,D,dic)
print("恢復原頻繁項集爲:",L)
print("原頻繁項集支持度爲:",m)
#print(itemset)