library("factoextra")
library("NbClust")
data("USArrests")
### 數據標準化
data = scale(USArrests)
head(data, n=5)
#確認分類數目
set.seed(1234) #因爲method選擇的是kmeans,所以如果不設定種子,每次跑得結果可能不同
nb_clust <- NbClust(data, distance = "euclidean",
min.nc=2, max.nc=15, method = "kmeans",
index = "alllong", alphaBeale = 0.1)
#確認分類數目
fviz_nbclust(data, kmeans, method = "wss") + geom_vline(xintercept = 4, linetype = 2)
#利用k-mean是進行聚類
km_fit = kmeans(data, 4, nstart = 25) ## nstart 重複迭代kmeans的次數
print(km_fit)
fviz_cluster(km_fit, data, palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"), geom = "point" ,ellipse.type = "convex", star.plot = TRUE, repel = TRUE, ggtheme = theme_grey() )
#先求樣本之間兩兩相似性
result <- dist(data, method = "euclidean")
#產生層次結構
result_hc <- hclust(d = result, method = "ward.D2")
#進行初步展示
fviz_dend(result_hc, cex = 0.6)
fviz_dend(result_hc, k = 4, cex = 0.5, k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07") )