藉助sklearn庫實現kmeans聚類和輪廓係數計算
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
K = range(2,20) # 設置主題個數區間
coef = []
for k in K:
km = KMeans(n_clusters=k,random_state = 0).fit(feature) #構建kmeans模型並訓練
score = silhouette_score(feature, km.labels_,sample_size=1000) # 計算對應模型的輪廓係數
coef.append(score)
可視化:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(K,coef) # K爲x軸輸出,coef是y軸輸出
plt.show()