import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict
from itertools import combinations,product
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.semi_supervised import LabelPropagation
from sklearn import metrics
from sklearn import datasets
from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error,f1_score
def getDistCut(distList,distPercent):
return max(distList) * distPercent / 100
def getRho(n,distMatrix,distCut):
rho = np.zeros(n,dtype=float)
for i in range(n-1):
for j in range(i+1,n):
rho[i] = rho[i] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
rho[j] = rho[j] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
return rho
#------------密度峯值聚類------------------#
def DPCA(n,distMatrix,rho,blockNum):
rhoOrdIndex = np.flipud(np.argsort(rho))
delta = np.zeros(n,dtype=float)
leader = np.ones(n,dtype=int) * int(-1)
'''獲取密度最大樣本的Delta和Leader'''
maxdist = 0
for ele in range(n):
if distMatrix[rhoOrdIndex[0],ele] > maxdist:
maxdist = distMatrix[rhoOrdIndex[0],ele]
delta[rhoOrdIndex[0]] = maxdist
'''獲取非密度最大樣本的Delta和Leader'''
for i in range(1,n):
mindist = np.inf
minindex = -1
for j in range(i):
if distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]] < mindist:
mindist = distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]]
minindex = rhoOrdIndex[j]
delta[rhoOrdIndex[i]] = mindist
leader[rhoOrdIndex[i]] = minindex
gamma = delta * rho
gammaOrdIdx = np.flipud(np.argsort(gamma))
'''開始聚類'''
clusterIdx = np.ones(n,dtype=int) * (-1)
#------初始化聚類中心-------#
for k in range(blockNum):
clusterIdx[gammaOrdIdx[k]] = k
#------對中心點以外樣本進行聚類-----------#
for i in range(n):
if clusterIdx[rhoOrdIndex[i]] == -1:
clusterIdx[rhoOrdIndex[i]] = clusterIdx[leader[rhoOrdIndex[i]]]
#-----------使用字典存儲類簇----------------#
clusterSet = OrderedDict()
for k in range(blockNum):
clusterSet[k] = []
for i in range(n):
clusterSet[clusterIdx[i]].append(i)
return clusterSet
X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1, 1, 1], random_state=104)
n = len(X)
Gamma = 0.5
distPercent = 5
distList = pdist(X,metric='euclidean')
distMatrix = squareform(distList)
distCut = getDistCut(distList,distPercent)
rho = getRho(n,distMatrix,distCut)
blockNum = 3
# clusterSet = DPCA(n,distMatrix,rho,blockNum)
# clusterSet = K_means(n,X,blockNum)
clusterSet = SpecClust(n,X,Gamma,blockNum)