import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict
def getDistCut(distList,distPercent):
maxDist = max(distList)
return maxDist * distPercent / 100
def getRho(n,distMatrix,distCut):
rho = np.zeros(n,dtype=float)
for i in range(n-1):
for j in range(i+1,n):
if distMatrix[i,j] < distCut:
rho[i] += 1
rho[j] += 1
return rho
def getGammaLeader(X,y,rho,distMatrix,Block):
blockLength = len(Block)
blockRho = rho[Block]
blockRhoOrdIndex = np.flipud(np.argsort(blockRho))
blockDelta = np.zeros(blockLength,dtype=float)
blockLeader = np.ones(blockLength,dtype=int) * (-1)
maxdist = 0
for ele in Block: ####對於Block中的每個樣本,計算與密度最大點的距離
if distMatrix[Block[blockRhoOrdIndex[0]],ele] > maxdist: ##########需要轉化爲原始索引才能用distMatrix
maxdist = distMatrix[Block[blockRhoOrdIndex[0]],ele]
blockDelta[blockRhoOrdIndex[0]] = maxdist #當前塊的Delta 使用當前索引
blockLeader[blockRhoOrdIndex[0]] = -1
for i in range(1,blockLength):
mindist = np.inf
minindex = -1
for j in range(i):
if distMatrix[Block[blockRhoOrdIndex[i]],Block[blockRhoOrdIndex[j]]] < mindist: #這裏沒問題
mindist = distMatrix[Block[blockRhoOrdIndex[i]],Block[blockRhoOrdIndex[j]]] #這裏沒問題
# minindex = Block[blockRhoOrdIndex[j]] ###這裏要思考下
minindex = blockRhoOrdIndex[j] ####只留下當前塊內的索引
blockDelta[blockRhoOrdIndex[i]] = mindist
blockLeader[blockRhoOrdIndex[i]] = minindex #當前塊內索引
blockGamma = blockDelta * blockRho
blockGammaOrdIndex = np.flipud(np.argsort(blockGamma))
# EE = X[blockGammaOrdIndex[:3]]
# plt.scatter(X[:,0],X[:,1],c = y,marker='o')
# plt.scatter(EE[:,0],EE[:,1],marker='*',c='k',s=100)
# plt.show()
return blockLeader,blockGammaOrdIndex,blockRhoOrdIndex
def getInformationBlock(X,y,Block,rho,distMatrix,blockNum):
blockLeader, blockGammaOrdIndex, blockRhoOrdIndex = getGammaLeader(X,y,rho,distMatrix,Block)
Length = len(Block)
blockClusterIndex = np.ones(Length,dtype=int) * (-1)
for i in range(blockNum):
blockClusterIndex[blockGammaOrdIndex[i]] = i
for i in range(1,Length):
if blockClusterIndex[blockRhoOrdIndex[i]] == -1:
# LD = blockLeader[blockRhoOrdIndex[i]]
blockClusterIndex[blockRhoOrdIndex[i]] = blockClusterIndex[blockLeader[blockRhoOrdIndex[i]]]
leftBlock = []
rightBlock = []
if len(set(blockClusterIndex)) != blockNum:
print("密度峯值聚類環節出錯了:類簇索引不是兩個:",set(blockClusterIndex))
for i in range(Length):
if blockClusterIndex[i] == 0:
leftBlock.append(Block[i])
elif blockClusterIndex[i] == 1:
rightBlock.append(Block[i])
else:
print("出錯了:沒有分配類簇標記")
return leftBlock,rightBlock
if __name__ == "__main__":
# iris = datasets.load_iris()
# X = iris.data
# y = iris.target
X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1.0, 1.0, 1.0], random_state=100)
n = len(X)
distPercent = 2
blockNum = 2
distList = pdist(X,metric='cityblock')
distMatrix = squareform(distList)
distCut = getDistCut(distList,distPercent)
rho = getRho(n,distMatrix,distCut)
currentBlock = [i for i in range(n)]
leftBlock, rightBlock = getInformationBlock(X,y,currentBlock,rho,distMatrix,blockNum)
A = X[leftBlock]
B = X[rightBlock]
print("A塊的長度:",len(A),"B塊的長度:",len(B))
plt.scatter(A[:,0],A[:,1],marker='+')
plt.scatter(B[:,0],B[:,1],marker='o')
plt.show()
ll,rr = getInformationBlock(X,y,leftBlock,rho,distMatrix,blockNum)
C = X[ll]
D = X[rr]
plt.scatter(B[:, 0], B[:, 1], marker='o')
plt.scatter(C[:, 0], C[:, 1], marker='*')
plt.scatter(D[:, 0], D[:, 1], marker='+')
plt.show()
非常規寫法,讀者慎用!