Python：密度峯值聚類DPCA，分裂兩簇（版本：2）

原創

2020-02-23 21:19

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict

def getDistCut(distList,distPercent):
    maxDist = max(distList)
    return maxDist * distPercent / 100

def getRho(n,distMatrix,distCut):
    rho = np.zeros(n,dtype=float)
    for i in range(n-1):
        for j in range(i+1,n):
            if distMatrix[i,j] < distCut:
                rho[i] += 1
                rho[j] += 1
    return rho

def getGammaLeader(X,y,rho,distMatrix,Block):
    blockLength = len(Block)
    blockRho = rho[Block]
    blockRhoOrdIndex = np.flipud(np.argsort(blockRho))
    blockDelta = np.zeros(blockLength,dtype=float)
    blockLeader = np.ones(blockLength,dtype=int) * (-1)
    maxdist = 0
    for ele in Block:     ####對於Block中的每個樣本，計算與密度最大點的距離
        if distMatrix[Block[blockRhoOrdIndex[0]],ele] > maxdist:     ##########需要轉化爲原始索引才能用distMatrix
            maxdist = distMatrix[Block[blockRhoOrdIndex[0]],ele]
    blockDelta[blockRhoOrdIndex[0]] = maxdist        #當前塊的Delta 使用當前索引
    blockLeader[blockRhoOrdIndex[0]] = -1
    for i in range(1,blockLength):
        mindist = np.inf
        minindex = -1
        for j in range(i):
            if distMatrix[Block[blockRhoOrdIndex[i]],Block[blockRhoOrdIndex[j]]] < mindist:     #這裏沒問題
                mindist = distMatrix[Block[blockRhoOrdIndex[i]],Block[blockRhoOrdIndex[j]]]     #這裏沒問題
                # minindex = Block[blockRhoOrdIndex[j]]        ###這裏要思考下
                minindex = blockRhoOrdIndex[j]         ####只留下當前塊內的索引
        blockDelta[blockRhoOrdIndex[i]] = mindist
        blockLeader[blockRhoOrdIndex[i]] = minindex      #當前塊內索引
    blockGamma = blockDelta * blockRho
    blockGammaOrdIndex = np.flipud(np.argsort(blockGamma))

    # EE = X[blockGammaOrdIndex[:3]]
    # plt.scatter(X[:,0],X[:,1],c = y,marker='o')
    # plt.scatter(EE[:,0],EE[:,1],marker='*',c='k',s=100)
    # plt.show()
    return blockLeader,blockGammaOrdIndex,blockRhoOrdIndex

def getInformationBlock(X,y,Block,rho,distMatrix,blockNum):
    blockLeader, blockGammaOrdIndex, blockRhoOrdIndex = getGammaLeader(X,y,rho,distMatrix,Block)
    Length = len(Block)
    blockClusterIndex = np.ones(Length,dtype=int) * (-1)
    for i in range(blockNum):
        blockClusterIndex[blockGammaOrdIndex[i]] = i
    for i in range(1,Length):
        if blockClusterIndex[blockRhoOrdIndex[i]] == -1:
            # LD = blockLeader[blockRhoOrdIndex[i]]
            blockClusterIndex[blockRhoOrdIndex[i]] = blockClusterIndex[blockLeader[blockRhoOrdIndex[i]]]
    leftBlock = []
    rightBlock = []
    if len(set(blockClusterIndex)) != blockNum:
        print("密度峯值聚類環節出錯了：類簇索引不是兩個：",set(blockClusterIndex))
    for i in range(Length):
        if blockClusterIndex[i] == 0:
            leftBlock.append(Block[i])
        elif blockClusterIndex[i] == 1:
            rightBlock.append(Block[i])
        else:
            print("出錯了：沒有分配類簇標記")
    return leftBlock,rightBlock


if __name__ == "__main__":
    # iris = datasets.load_iris()
    # X = iris.data
    # y = iris.target
    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1.0, 1.0, 1.0], random_state=100)
    n = len(X)
    distPercent = 2
    blockNum = 2
    distList = pdist(X,metric='cityblock')
    distMatrix = squareform(distList)
    distCut = getDistCut(distList,distPercent)
    rho = getRho(n,distMatrix,distCut)
    currentBlock = [i for i in range(n)]
    leftBlock, rightBlock = getInformationBlock(X,y,currentBlock,rho,distMatrix,blockNum)
    A = X[leftBlock]
    B = X[rightBlock]
    print("A塊的長度：",len(A),"B塊的長度：",len(B))
    plt.scatter(A[:,0],A[:,1],marker='+')
    plt.scatter(B[:,0],B[:,1],marker='o')
    plt.show()
    ll,rr = getInformationBlock(X,y,leftBlock,rho,distMatrix,blockNum)
    C = X[ll]
    D = X[rr]
    plt.scatter(B[:, 0], B[:, 1], marker='o')
    plt.scatter(C[:, 0], C[:, 1], marker='*')
    plt.scatter(D[:, 0], D[:, 1], marker='+')
    plt.show()

非常規寫法，讀者慎用！

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Python：密度峯值聚類DPCA，分裂兩簇（版本：2）

HTML頁面關於高分屏的設置

北歐瑞典挪威芬蘭瑞士TikTok海外網紅與YouTube博主的合作模式

druid數據源 xml配置

Python：計算類別分佈CalculateClassDistribution

Python調用matlab 函數

Python：將sklearn自帶數據轉存爲CSV文件

Python:一排三個子圖

MATLAB：生成一個雙環二維數據

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結