Python:正宗的密度峯值聚類

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict
from itertools import combinations,product
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.semi_supervised import LabelPropagation
from sklearn import metrics
from sklearn import datasets
from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error,f1_score

def getDistCut(distList,distPercent):
    return max(distList) * distPercent / 100

def getRho(n,distMatrix,distCut):
    rho = np.zeros(n,dtype=float)
    for i in range(n-1):
        for j in range(i+1,n):
            rho[i] = rho[i] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
            rho[j] = rho[j] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
    return rho
#------------密度峯值聚類------------------#
def DPCA(n,distMatrix,rho,blockNum):
    rhoOrdIndex = np.flipud(np.argsort(rho))
    delta = np.zeros(n,dtype=float)
    leader = np.ones(n,dtype=int) * int(-1)
    '''獲取密度最大樣本的Delta和Leader'''
    maxdist = 0
    for ele in range(n):
        if distMatrix[rhoOrdIndex[0],ele] > maxdist:
            maxdist = distMatrix[rhoOrdIndex[0],ele]
    delta[rhoOrdIndex[0]] = maxdist
    '''獲取非密度最大樣本的Delta和Leader'''
    for i in range(1,n):
        mindist = np.inf
        minindex = -1
        for j in range(i):
            if distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]] < mindist:
                mindist = distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]]
                minindex = rhoOrdIndex[j]
        delta[rhoOrdIndex[i]] = mindist
        leader[rhoOrdIndex[i]] = minindex
    gamma = delta * rho
    gammaOrdIdx = np.flipud(np.argsort(gamma))
    '''開始聚類'''
    clusterIdx = np.ones(n,dtype=int) * (-1)
    #------初始化聚類中心-------#
    for k in range(blockNum):
        clusterIdx[gammaOrdIdx[k]] = k
    #------對中心點以外樣本進行聚類-----------#
    for i in range(n):
        if clusterIdx[rhoOrdIndex[i]] == -1:
            clusterIdx[rhoOrdIndex[i]] = clusterIdx[leader[rhoOrdIndex[i]]]
    #-----------使用字典存儲類簇----------------#
    clusterSet = OrderedDict()
    for k in range(blockNum):
        clusterSet[k] = []
    for i in range(n):
        clusterSet[clusterIdx[i]].append(i)
    return clusterSet
    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1, 1, 1], random_state=104)

    n = len(X)
    Gamma = 0.5
    distPercent = 5
    distList = pdist(X,metric='euclidean')
    distMatrix = squareform(distList)
    distCut = getDistCut(distList,distPercent)
    rho = getRho(n,distMatrix,distCut)
    blockNum = 3
    # clusterSet = DPCA(n,distMatrix,rho,blockNum)
    # clusterSet = K_means(n,X,blockNum)
    clusterSet = SpecClust(n,X,Gamma,blockNum)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章