模式識別課作業,C均值聚類步驟:
- 選取隨機類心
- 根據集合中點與各類心最小距離,對集合每個元素分類
- 將分類後各類元素的均值作爲新的類心
- 若類心不再更新,則結果收斂,算法結束
#include<bits/stdc++.h>
using namespace std;
int N; //輸入數據個數
int C; //類別個數
struct Node {
double x, y;
Node (double x = 0, double y = 0) : x(x), y(y) {
}
}centers[100], preCenters[100], datas[1000];
vector<Node> cluster[100]; //當前聚類結果
bool equals(Node x, Node y) //判斷兩點是否相等
{
return fabs(x.x-y.x) < 0.0001 && fabs(x.y-y.y) < 0.0001;
}
bool check() //判斷聚類結果是否收斂
{
for (int i = 0; i < C; i++) {
if (!equals(preCenters[i], centers[i]))
return false;
}
return true;
}
void updateCluster() //更新聚類集合
{
for (int i = 0; i < C; i++)
cluster[i].clear();
for (int i = 0; i < N; i++) {
double minDis = 1e9;
int index = 0;
for (int j = 0; j < C; j++) {
double dis = sqrt(pow(datas[i].x-centers[j].x, 2)+pow(datas[i].y-centers[j].y, 2));
if (dis < minDis) {
minDis = dis;
index = j;
}
}
cluster[index].push_back(datas[i]);
}
}
void updateCenter() //更新類心集合
{
for (int i = 0; i < C; i++) {
double sumX = 0, sumY = 0;
int len = cluster[i].size();
for (int j = 0; j < len; j++) {
sumX += cluster[i][j].x;
sumY += cluster[i][j].y;
}
if (len != 0)
centers[i] = Node(sumX/len, sumY/len);
}
}
int main() {
freopen("Iris.txt", "r", stdin);
C = 3;
N = 0;
double x, y;
while (cin >> x >> y) {
datas[N++] = Node(x, y); //讀取數據
}
for (int i = 0; i < C; i++)
centers[i] = datas[i]; //隨機初始化類心
while (true) {
updateCluster();
updateCenter();
printf("當前類心:(%f, %f)、(%f, %f)、(%f, %f)\n", centers[0].x, centers[0].y, centers[1].x, centers[1].y, centers[2].x, centers[2].y);
if (check()) {
break;
}
memcpy(preCenters, centers, sizeof(centers));
}
freopen("C均值聚類結果.txt", "w", stdout);
for (int i = 0; i < C; i++) {
printf("第%d類類心:(%f, %f),該類中數據個數:%d\n", i, centers[i].x, centers[i].y, cluster[i].size());
for (int j = 0; j < cluster[i].size(); j++) {
printf("(%f, %f)\n", cluster[i][j].x, cluster[i][j].y);
}
}
return 0;
}