Python：正宗的密度峯值聚類

原創

2020-02-23 21:19

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict
from itertools import combinations,product
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.semi_supervised import LabelPropagation
from sklearn import metrics
from sklearn import datasets
from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error,f1_score

def getDistCut(distList,distPercent):
    return max(distList) * distPercent / 100

def getRho(n,distMatrix,distCut):
    rho = np.zeros(n,dtype=float)
    for i in range(n-1):
        for j in range(i+1,n):
            rho[i] = rho[i] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
            rho[j] = rho[j] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
    return rho
#------------密度峯值聚類------------------#
def DPCA(n,distMatrix,rho,blockNum):
    rhoOrdIndex = np.flipud(np.argsort(rho))
    delta = np.zeros(n,dtype=float)
    leader = np.ones(n,dtype=int) * int(-1)
    '''獲取密度最大樣本的Delta和Leader'''
    maxdist = 0
    for ele in range(n):
        if distMatrix[rhoOrdIndex[0],ele] > maxdist:
            maxdist = distMatrix[rhoOrdIndex[0],ele]
    delta[rhoOrdIndex[0]] = maxdist
    '''獲取非密度最大樣本的Delta和Leader'''
    for i in range(1,n):
        mindist = np.inf
        minindex = -1
        for j in range(i):
            if distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]] < mindist:
                mindist = distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]]
                minindex = rhoOrdIndex[j]
        delta[rhoOrdIndex[i]] = mindist
        leader[rhoOrdIndex[i]] = minindex
    gamma = delta * rho
    gammaOrdIdx = np.flipud(np.argsort(gamma))
    '''開始聚類'''
    clusterIdx = np.ones(n,dtype=int) * (-1)
    #------初始化聚類中心-------#
    for k in range(blockNum):
        clusterIdx[gammaOrdIdx[k]] = k
    #------對中心點以外樣本進行聚類-----------#
    for i in range(n):
        if clusterIdx[rhoOrdIndex[i]] == -1:
            clusterIdx[rhoOrdIndex[i]] = clusterIdx[leader[rhoOrdIndex[i]]]
    #-----------使用字典存儲類簇----------------#
    clusterSet = OrderedDict()
    for k in range(blockNum):
        clusterSet[k] = []
    for i in range(n):
        clusterSet[clusterIdx[i]].append(i)
    return clusterSet

    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1, 1, 1], random_state=104)

    n = len(X)
    Gamma = 0.5
    distPercent = 5
    distList = pdist(X,metric='euclidean')
    distMatrix = squareform(distList)
    distCut = getDistCut(distList,distPercent)
    rho = getRho(n,distMatrix,distCut)
    blockNum = 3
    # clusterSet = DPCA(n,distMatrix,rho,blockNum)
    # clusterSet = K_means(n,X,blockNum)
    clusterSet = SpecClust(n,X,Gamma,blockNum)

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Python：正宗的密度峯值聚類

HTML頁面關於高分屏的設置

北歐瑞典挪威芬蘭瑞士TikTok海外網紅與YouTube博主的合作模式

druid數據源 xml配置

Python：計算類別分佈CalculateClassDistribution

Python調用matlab 函數

Python：將sklearn自帶數據轉存爲CSV文件

Python:一排三個子圖

MATLAB：生成一個雙環二維數據

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結