分级聚类原理
具体原理参见清华大学出版社出版张学工编著的模式识别一书或者任意一本模式识别书。其分类规则有最大、最小、均值分类。本练习实现最小规则分类,即根据类间最小距离的最小值判定为一类。
C++代码
1.cluster.cpp
// cluster.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "Point.h"
#include "ClassItem.h"
#include "iostream"
#include "cmath"
vector<Point> points;
vector<ClassItem> clas;
void outPutPoint(vector<Point> point){ //用于输出点
vector<Point>::iterator it = point.begin();
while (it != point.end()){
cout<<it->x<<" "<<it->y<<" "<<it->z<<endl;
it++;
}
}
void outPutClass(vector<ClassItem> clas){
cout<<"输出类"<<endl;
// vector<ClassItem>::iterator it = clas.begin();
auto it = clas.begin();
while (it!= clas.end()){
// vector<int>::iterator its = (it->getItem()).begin();
auto its = it->getItem().begin();
while(its != (it->getItem()).end()){
cout<<*its<<" ";
its++;
}
cout<<"类中包含的数目"<<it->getNum()<<endl;
it++;
}
}
double calDistance(int index1,int index2){//计算距离(两个点之间)
double dis;
dis = pow((points[index1].x-points[index2].x),2)+pow((points[index1].y-points[index2].y),2)
+pow((points[index1].z-points[index2].z),2);
return dis;
}
void julei(int cl1,int cl2){
clas[cl1].addNum(clas[cl2].getNum());
clas[cl2].setNum(0);
ClassItem::CLASSNUM--;
vector<int> cl2Item = clas[cl2].getItem();
vector<int>::iterator it = cl2Item.begin();
while(it!= cl2Item.end()){
clas[cl1].getItem().push_back(*it);
it++;
}
}
int _tmain(int argc, _TCHAR* argv[])
{
int m_x[] = {3,0,2,2,2};
int m_y[] = {1,2,3,0,1};
int m_z[] = {3,4,2,3,3};
for(int i = 0; i!= 5;++i){
ClassItem cla;
cla = ClassItem(1,i);
clas.push_back(cla); //构造类的向量
Point point;
point= Point(m_x[i],m_y[i],m_z[i]);//构造点的数组
points.push_back(point);
}
while(ClassItem::CLASSNUM!= 2){
//判断距离求得最小距离
double minDis = pow(10,3);
int cl1,cl2;
for(int i = 0;i!= 4;++i){
for(int j = i+1;j != 5;++j){
if(clas[i].getNum()!= 0 && clas[j].getNum()!= 0){
vector<int> item1 = clas[i].getItem();
vector<int> item2 = clas[j].getItem();
vector<int>::iterator it1 = item1.begin();
vector<int>::iterator it2 = item2.begin();
while(it1 != item1.end()){
while(it2 != item2.end()){
double distance = calDistance(*it1,*it2);
it2++;
if(distance < minDis){
cl1 = i;
cl2 = j;
minDis = distance;
}
}
it1++;
}
}
}
}
//聚类
julei(cl1,cl2);
}
outPutPoint(points);//输出点测试
outPutClass(clas); //输出类的测试。
while(1);
return 0;
}
2.ClassItem.cpp
#include "stdafx.h"
#include "ClassItem.h"
int ClassItem::CLASSNUM = 5; //总类数,当有合并的时候减1
void ClassItem::addNum(int num){
this->num +=num;
}
void ClassItem::addPoint(int index){
item.push_back(index);
}
//构造函数
ClassItem::ClassItem(void)
{
}
ClassItem::ClassItem(int num, int index){ //用于初始化用
this->num = num;
this->item.push_back(index);
}
int ClassItem::getNum(){
return num;
}
void ClassItem::setNum(int num){
this->num = num;
}
vector<int> &ClassItem::getItem(){
return item;
}
ClassItem::~ClassItem(void)
{
}
3.Point.cpp
#include "stdafx.h"
#include "Point.h"
Point::Point(int x,int y,int z){
this->x = x;
this->y = y;
this->z = z;
}
Point::Point(void)
{
}
Point::~Point(void)
{
}
注意
1.具体类的头文件就不上了
问题
1.在ClassItem.cpp中定义的getItem函数
vector<int> &ClassItem::getItem(){
return item;
}
首先定义的时候没有加上引用符号,从而造成了错误,但是这个错误真心不好找,打印出class时,系统提示迭代器不匹配。捣鼓了很久才有些许理解,可能是因为没有引用的时候本身没有被修改,所以当打印的时候item还没有被创建。所以产生了错误。具体原因不是很清楚,如果哪位大神知道还请不吝赐教。
结果
可见点被分成了两类,1,3,4,5点为一类,2点为一类