Apriori、MaxMiner、CLOSET算法的python實現

Apriori算法:

	def loadData():
	    return [[1,2,5], [2,4], [2,3], [1,2,4], [1,3], [2,3], [1,3], [1,2,3,5], [1,2,3]]
	def find_frequent_1_itemsets(D, minsupport):
	    L1 = []
	    C1 = []
	    cnt = {}
	    for transcation in D:
	        for item in transcation:
	            if not [item] in C1:
	                C1.append([item])
	                cnt[item] = 1
	            else:
	                cnt[item] += 1
	    for transcation in C1:
	        for item in transcation:
	            if cnt[item] >= minsupport:
	                L1.append(transcation)
	    L1.sort()
	    return L1
	
	def aproiri_gen(L, k):
	    res = []
	    lenL = len(L)
	    for i in range(lenL):
	        for j in range(i+1,lenL):
	            l1 = L[i][:-1]
	            l2 = L[j][:-1]
	            if l1 == l2 and L[i][-1] < L[j][-1]:
	                candidate = list(set(L[i]).union(set(L[j])))                
	                if not has_infrequent_subset(candidate, L):
	                    res.append(candidate)
	    return res
	
	def has_infrequent_subset(candidate, L):
	
	    for i in range(len(candidate)):
	        subset = candidate.copy()
	        subset.remove(candidate[i])
	        if subset not in L:
	            return True
	    return False
	
	def compareList(l1, l2):
	    for item in l1:
	        if item not in l2:
	            return False
	    return True
	
	def Aproiri(D, minsupport):
	    L = []
	    L1 = find_frequent_1_itemsets(D, minsupport)
	    L.append([])
	    L.append(L1)
	    for k in range(2, 5):
	        Lk = []
	        if len(L[k-1]) == 0:
	            break
	        Ck = aproiri_gen(L[k-1], k-1)
	        print("自鏈接加剪枝後得到的候選Ck:" , Ck)
	        print("遍歷D對每個候選計數")
	        for candi in Ck:
	            cnt = 0
	            for transcation in D:
	                if compareList(candi, transcation):
	                    cnt += 1
	            if cnt >= minsupport:
	                print ("符合要求的項集: ", candi, "出現次數: ",cnt)
	                Lk.append(candi)
	        L.append(Lk)
	    return L
	#test
	D = loadData()
	L = Aproiri(D, 2)
	print (L)

MaxMiner算法:

    在Apriori算法基礎上進行修改,得到MaxMiner算法。

	import sys
	import time
	
	
	def loadData():
	    itemset=[]
	    cf=open("D:\\data\\traindata04.data")
	    lines =cf.readlines()
	    for line in lines:
	        itemset.append(line)
	    return itemset
	
	def find_frequent_1_itemsets(D, minsupport):
	    L1 = []
	    C1 = []
	    cnt = {}
	    for transcation in D:
	        for item in transcation:
	            if not (item=='\n' or item==' '):
	                if not [item] in C1:
	                    C1.append([item])
	                    cnt[item] = 1
	                else:
	                    cnt[item] += 1
	    for transcation in C1:
	        for item in transcation:
	            if cnt[item] >= minsupport:
	                L1.append(transcation)               
	    L1.sort()
	    return L1  #return all itemset in the same level
	
	def aproiri_gen(L, L1, D , minsupport): #生成新一層的所有節點
	    res = []
	    for i in range(len(L)):
	        for j in range(len(L1)):
	            if L[i][-1] < L1[j][0]:   #L[i][-1]是L列表中,第i項最後一個字母
	                candidate = list(set(L[i]).union(set(L1[j]))) #union             
	                cnt = 0
	                for transcation in D: 
	                    if compareList(candidate, transcation):
	                        cnt += 1 
	                if cnt >= minsupport:
	                    res.append(candidate)
	    return res
	
	def compareList(l1, l2): #l1的每一項都在l2中
	    for item in l1:
	        if item not in l2:
	            return False
	    return True
	
	
	def Aproiri(D, minsupport):
	    L = []
	    maximal=[]
	    close=[]
	    L1 = find_frequent_1_itemsets(D, minsupport) #第一層的候選項集
	    #print('第一層的候選項集爲:',L1)
	    L.append([])
	    L.append(L1)
	    
	    for k in range(2,len(L1)):
	        Lk = []
	        if len(L[k-1]) == 0:
	            break
	        Ck = aproiri_gen(L[k-1] ,L1,D,minsupport)
	        #print("第",k,"層的候選項集爲:" , Ck)
	        L.append(Ck)
	
	    for i in range(len(L)):
	        for item in L[i]:
	            maximal.append(item)
	    print("頻繁項集個數爲:",len(maximal)-1)
	
	    for k in range(2,len(L1)):
	        if len(L[k-1]) == 0:
	            break
	        for transcation in L[k-1]:
	            for candidate in L[k]:
	                if compareList(transcation, candidate):
	                    if transcation in maximal:
	                        maximal.remove(transcation)
	                    
	    return L,maximal
	#test
	start=time.time()
	D = loadData()
	L ,maximal = Aproiri(D, 60 )
	#print ("頻繁項集爲:",L)
	#print("頻繁項集個數爲:",len(L))
	print ("極大頻繁項集爲:",len(maximal))
	end=time.time()
	print (end-start,'s')
	

3.CLOSET算法:

	import sys
	import time
	'''''
	def loadData():
	    itemset=[]
	    cf=open("D:\\data\\traindata02.data")
	    lines =cf.readlines()
	    for line in lines:
	        itemset.append(line)
	    return itemset
	'''''
	def loadData():
	    return [[1,3,4,5,6],[1,2,5],[3,5,6],[1,3,4,6],[3,5,6]]
	def find_frequent_1_itemsets(D, minsupport):
	    L1 = []
	    C1 = []
	    cnt = {}
	    for transcation in D:
	        for item in transcation:
	            if not (item=='\n' or item==' '):
	                if not [item] in C1:
	                    C1.append([item])
	                    cnt[item] = 1
	                else:
	                    cnt[item] += 1
	    for transcation in C1:
	        for item in transcation:
	            if cnt[item] >= minsupport:
	                L1.append(transcation)               
	    L1.sort()
	    return L1  #return all itemset in the same level
	
	def aproiri_gen(L, L1, D , minsupport): #生成新一層的所有節點
	    res = []
	    for i in range(len(L)):
	        for j in range(len(L1)):
	            if L[i][-1] < L1[j][0]:   #L[i][-1]是L列表中,第i項最後一個字母
	                candidate = list(set(L[i]).union(set(L1[j]))) #union             
	                cnt = 0
	                for transcation in D: 
	                    if compareList(candidate, transcation):
	                        cnt += 1 
	                if cnt >= minsupport:
	                    res.append(candidate)
	
	    return res
	
	def compareList(l1, l2): #l1的每一項都在l2中
	    for item in l1:
	        if item not in l2:
	            return False
	    return True
	                
	def Aproiri(D, minsupport):
	    L = []
	    close=[]
	    L1 = find_frequent_1_itemsets(D, minsupport) #第一層的候選項集
	    #print('第 1 層的候選項集爲:',L1)
	    L.append([])
	    L.append(L1)
	    
	    for k in range(2,len(L1)):
	        Lk = []
	        if len(L[k-1]) == 0:
	            break
	        Ck = aproiri_gen(L[k-1] ,L1,D,minsupport)
	        #print("第",k,"層的候選項集爲:" , Ck)
	        L.append(Ck)
	    
	    for i in range(len(L)):
	        for item in L[i]:
	            close.append(item)
	    #print("頻繁項集個數爲:",len(close)-1)
	
	      #  maximal.append(maximalE)
	        #close.append(closeE)
	    for k in range(2,len(L1)):
	        if len(L[k-1]) == 0:
	            break
	        for transcation in L[k-1]:
	            cnt = 0
	            for item in D:
	                if compareList(transcation, item):
	                    cnt +=1
	            for candidate in L[k]:
	                if compareList(transcation, candidate):
	                    cnm = 0
	                    for item in D:
	                        if compareList(candidate, item):
	                            cnm +=1
	                    if cnt <= cnm:
	                        if transcation in close:
	                            close.remove(transcation)
	                            
	    return L,close
	#test
	start=time.time()
	D = loadData()
	
	L ,close = Aproiri(D, 2)
	print ("頻繁項集爲:",L)
	#print ("閉合項集個數爲:",len(close))
	print("閉合項集爲:",close)
	end=time.time()
	print ('程序運行時間爲:',end-start,'s')

4.已知閉合項集恢復頻繁項集:

	import os
	def find_frequent_1_itemsets(D):
	    L = []
	    for item in D:
	        if not (item=='\n' or item==' '):
	            if not [item] in L:
	                L.append([item])
	    return L  #return all itemset in the same level
	
	
	def aproiri_gen(L,L1): #生成新一層的所有節點
	    res = []
	    for i in range(len(L)):
	        for j in range(len(L1)):
	            if L[i][-1] < L1[j][0]:   #L[i][-1]是L列表中,第i項最後一個字母
	                candidate = list(set(L[i]).union(set(L1[j]))) #union             
	                if not candidate in res:
	                    res.append(candidate)
	
	    return res
	def recover(L):
	    itemset=[]
	    frequent=[]
	    for transaction in L:
	        
	        lk=[]
	        L1=find_frequent_1_itemsets(transaction)
	        lk.append([])
	        lk.append(L1)
	        for k in range(2,len(transaction)):
	            if len(lk[k-1]) == 0:
	                 break
	            Ck = aproiri_gen(lk[k-1] ,L1)
	            #print("第",k,"層的候選項集爲:" , Ck)
	            lk.append(Ck)
	        for item in lk:
	            if not item in itemset:
	                itemset.append(item)
	        itemset.append([transaction])
	    
	    for i in range(len(itemset)):
	        for item in itemset[i]:
	            if not item in frequent:
	                frequent.append(item)
	                
	    frequent.sort()    
	    return frequent
	
	def compareList(l1, l2): #l1的每一項都在l2中
	    for item in l1:
	        if item not in l2:
	            return False
	    return True
	
	def support(frequent ,D, dic):
	    itemset={}
	    for transaction in frequent:
	        count=0
	        for candidate in D:
	            if compareList(transaction, candidate):
	                item=''
	                for p in candidate:
	                    item +=str(p)
	                    item += ','
	                if count < dic[item]:
	                    count=dic[item]
	        item=''
	        for p in transaction:
	            item +=str(p)
	            item +=','
	        itemset.setdefault(item,count)
	    
	    return itemset
	
	            
	D=[[1], [5], [1, 5], [3, 6], [3, 5, 6], [1, 3, 4, 6]]
	dic={'1,':3,'5,':4,'1,5,':2,'3,6,':4,'3,5,6,':3,'1,3,4,6,':2}
	L=recover(D)
	m=support(L,D,dic)
	print("恢復原頻繁項集爲:",L)
	print("原頻繁項集支持度爲:",m)
	#print(itemset)
	            
	            

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章