# Python：相對標準的DPC

``````import numpy as np
import pandas as pd
import copy
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import metrics
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy.io as scio

############%%%%%%-------------------%%%%%%%%%############

############%%%%%%-------------------%%%%%%%%%############

#--------獲取截斷距離的函數：沒有問題-----------#
def getDistCut(distList,distPercent):
maxDist = max(distList)
distCut = maxDist * distPercent / 100
return distCut
#--------獲取樣本密度的函數：沒有問題-----------#
def getRho(n,distMatrix,distCut):
rho = np.zeros(n,dtype=float)
for i in range(n-1):
for j in range(i+1,n):
if distMatrix[i,j] < distCut:
rho[i] += 1
rho[j] += 1
return rho

def getGammaOrderIndex(n,rho,distMatrix):
rhoOrdIndex = np.flipud(np.argsort(rho))
delta = np.zeros(n,dtype=float)
#-----------獲取塊密度最大點的Delta----------------#
maxdist = 0
for i in range(n):
if distMatrix[rhoOrdIndex[0],i] > maxdist:
maxdist = distMatrix[rhoOrdIndex[0],i]
delta[rhoOrdIndex[0]] = maxdist
# -----------獲取非密度最大點的Delta----------------#
for i in range(1,n):
mindist = np.inf
minindex = -1
for j in range(i):
if distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]] < mindist:
mindist = distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]]
minindex = rhoOrdIndex[j]
delta[rhoOrdIndex[i]] = mindist
gamma = delta * rho
gammaOrderIndex = np.flipud(np.argsort(gamma))

#-----------初始化樣本類簇索引----------------------#
clusterIndex = np.ones(n,dtype=int) * (-1)
# --------給聚類中心分配簇標籤----------------------#
for i in range(blockNum):
clusterIndex[gammaOrderIndex[i]] = i
#---------開始聚類---------------------------------#
for i in range(n):
if clusterIndex[rhoOrdIndex[i]] == -1:
##-------------初始化一個空字典，用於存儲類簇---------------##
clusterSet = OrderedDict()
#--------字典初始化，使用列表存儲類簇-----------#
for i in range(blockNum):
clusterSet[i] = []
#---將每個樣本根據類簇標號分配到字典當中---#
for i in range(n):
clusterSet[clusterIndex[i]].append(i)
return clusterSet

if __name__ == '__main__':
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
X = data[:, :-1]
y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# pca = PCA(0.9)
# X = data[:, :-1]
# X = pca.fit_transform(X)
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# X = data[:, :-1]
# y = data[:, -1]
# --------------------------------------#
# mnist = fetch_mldata('MNIST original')
# X = mnist['data']
# y = mnist['target']
# --------------------------------------#
# X, y = fetch_covtype(return_X_y=True)
#################上面是數據##########################
n = X.shape[0]
classNum = len(set(y))
blockNum = 7
distList = pdist(X, metric='cityblock')
distMatrix = squareform(distList)
distCut = getDistCut(distList,distPercent=7)
rho = getRho(n,distMatrix,distCut)
rhoOrdIndex, gamma, gammaOrderIndex, leader = getGammaOrderIndex(n,rho,distMatrix)
clusterSet = getDPCA(n, rhoOrdIndex, gammaOrderIndex, leader, blockNum)

budget = 50
for k,v in clusterSet.items():
E = X[v]
plt.scatter(E[:,0],E[:,1])
plt.show()``````

``````import numpy as np
import pandas as pd
import copy
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import metrics
from scipy.spatial.distance import pdist, squareform
from collections import OrderedDict
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scipy.io as scio

############%%%%%%-------------------%%%%%%%%%############

############%%%%%%-------------------%%%%%%%%%############
class DPC(object):
def __init__(self,X,clusterNum,distPercent):
self.X = X
self.N = X.shape[0]
self.clusterNum = clusterNum
self.distPercent = distPercent
self.distCut = 0
self.rho = np.zeros(self.N,dtype=float)
self.delta = np.zeros(self.N,dtype=float)
self.gamma = np.zeros(self.N,dtype=float)
self.distList = pdist(self.X,metric='euclidean')
self.distMatrix = squareform(self.distList)
self.clusterIdx = np.ones(self.N,dtype=int) * (-1)

def getDistCut(self):
maxDist = max(self.distList)
distCut = maxDist * self.distPercent /100
return distCut

def getRho(self):
self.distCut = self.getDistCut()
rho = np.zeros(self.N, dtype=float)
for i in range(self.N -1):
for j in range(i+1,self.N):
if self.distMatrix[i,j] < self.distCut:
rho[i] += 1
rho[j] += 1
return rho
def getGammaOrderIndex(self):
self.rho = self.getRho()
rhoOrdIndex = np.flipud(np.argsort(self.rho))
# -----------獲取塊密度最大點的Delta----------------#
maxdist = 0
for i in range(self.N):
if self.distMatrix[rhoOrdIndex[0], i] > maxdist:
maxdist = self.distMatrix[rhoOrdIndex[0], i]
self.delta[rhoOrdIndex[0]] = maxdist
# -----------獲取非密度最大點的Delta----------------#
for i in range(1, self.N):
mindist = np.inf
minindex = -1
for j in range(i):
if self.distMatrix[rhoOrdIndex[i], rhoOrdIndex[j]] < mindist:
mindist = self.distMatrix[rhoOrdIndex[i], rhoOrdIndex[j]]
minindex = rhoOrdIndex[j]
self.delta[rhoOrdIndex[i]] = mindist
self.gamma = self.delta * self.rho
gammaOrderIndex = np.flipud(np.argsort(self.gamma))
return gammaOrderIndex,rhoOrdIndex
def getDPC(self):
gammaOrderIndex,rhoOrdIndex = self.getGammaOrderIndex()
# -----------給聚類中心分配簇標籤------------------#
for i in range(self.clusterNum):
self.clusterIdx[gammaOrderIndex[i]] = i
# --------開始聚類-----------------------#
for i in range(self.N):
if self.clusterIdx[rhoOrdIndex[i]] == -1:
##-------------初始化一個空字典，用於存儲類簇---------------##
clusterSet = OrderedDict()
# --------字典初始化，使用列表存儲類簇-----------#
for i in range(self.clusterNum):
clusterSet[i] = []
# ---將每個樣本根據類簇標號分配到字典當中---#
for i in range(self.N):
clusterSet[self.clusterIdx[i]].append(i)
return clusterSet

if __name__ == '__main__':
# --------------------------------------#
X = data[:, :-1]
y = data[:, -1]
dpc = DPC(X,clusterNum=7,distPercent=7)
clusterSet = dpc.getDPC()

budget = 50
for k, v in clusterSet.items():
E = X[v]
plt.scatter(E[:, 0], E[:, 1])
plt.show()``````

``````import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import OrderedDict
from scipy.spatial.distance import pdist,squareform

class DPCA(object):
def __init__(self,X,neighborNum,blockNum):
self.X = X
self.N = X.shape[0]
self.K = neighborNum
self.blockNum = blockNum
self.distCut = 0
self.rho = np.zeros(self.N,dtype=float)
self.delta = np.zeros(self.N,dtype=float)
self.gamma = np.zeros(self.N,dtype=float)
self.distMatrix = squareform(pdist(self.X,metric='euclidean'))
self.clusterIdx = np.ones(self.N,dtype=int) * (-1)
def get_distCut(self):
deltaK = np.zeros(self.N,dtype=float)
for i in range(self.N):
ordIdx = np.argsort(self.distMatrix[i])
deltaK[i] = self.distMatrix[i][ordIdx[self.K+1]]
miuK = np.mean(deltaK)
tempSum = 0
for i in range(self.N):
tempSum += (deltaK[i] - miuK)**2
self.distCut = miuK + np.sqrt(tempSum/(self.N-1))
def get_Rho(self):
for i in range(self.N-1):
for j in range(i+1,self.N):
self.rho[i] = self.rho[i] + np.exp(-(self.distMatrix[i,j]/self.distCut)**2)
self.rho[j] = self.rho[j] + np.exp(-(self.distMatrix[i,j]/self.distCut)**2)
def DPCA(self):
rhoOrdIndex = np.flipud(np.argsort(self.rho))
maxdist = 0
for ele in range(self.N):
if self.distMatrix[rhoOrdIndex[0],ele]>maxdist:
maxdist = self.distMatrix[rhoOrdIndex[0],ele]
self.delta[rhoOrdIndex[0]] = maxdist

for i in range(1,self.N):
mindist = np.inf
minindex = -1
for j in range(i):
if self.distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]] < mindist:
mindist = self.distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]]
minindex = rhoOrdIndex[j]
self.delta[rhoOrdIndex[i]] = mindist
self.gamma = self.delta * self.rho
gammaOrdIdx = np.flipud(np.argsort(self.gamma))
# 初始化聚類中心
for k in range(self.blockNum):
self.clusterIdx[gammaOrdIdx[k]] = k
# 對中心點以外樣本進行聚類
for i in range(self.N):
if self.clusterIdx[rhoOrdIndex[i]] == -1:
clusterSet = OrderedDict()
for k in range(self.blockNum):
clusterSet[k] = []
for i in range(self.N):
clusterSet[self.clusterIdx[i]].append(i)
return clusterSet

if __name__ == '__main__':
# ----------------Aggregation(neighborNum=3)----------------------#
X = data[:, :-1]
y = data[:, -1]
neighborNum = 10
blockNum = 7
dpc = DPCA(X,neighborNum,blockNum)
dpc.get_distCut()
dpc.get_Rho()
clusterSet = dpc.DPCA()
for v in clusterSet.values():
E = X[v]
plt.scatter(E[:,0],E[:,1])
plt.show()``````