上一篇博客(https://blog.csdn.net/viafcccy/article/details/90724073)完成了對於決策樹的數據處理 下面我們來建立決策樹
和上篇博客一樣還是這些屬性 在上一篇中我們計算了信息嫡 信息增量這兩個重要的數據 並且將數據以鏈表的形式建立起來
最後我們使用較爲簡單的ID3算法建立決策樹
ID3算法是決策樹的一種,它是基於奧卡姆剃刀原理的,即用盡量用較少的東西做更多的事。ID3算法,
即Iterative Dichotomiser 3,迭代二叉樹3代,是Ross Quinlan發明的一種決策樹算法,這個
算法的基礎就是上面提到的奧卡姆剃刀原理,越是小型的決策樹越優於大的決策樹,儘管如此,也不總
是生成最小的樹型結構,而是一個啓發式算法。
在信息論中,期望信息越小,那麼信息增益就越大,從而純度就越高。ID3算法的核心思想就是以信息
增益來度量屬性的選擇,選擇分裂後信息增益最大的屬性進行分裂。該算法採用自頂向下的貪婪搜索遍
歷可能的決策空間。
在建立中注意的問題是
1.按照信息增量的高低來決定屬性的重要程度 從而確定屬性在決策樹中的位置
2.何時停止分裂
給定節點的所有樣本屬於同一類
沒有剩餘屬性可以用來進一步劃分樣本。在此情況下,使用多數表決,講給定的節點轉換成樹葉,並用樣本中的多數所在的類標記它。另外,可以存放節點樣本的類分佈。
分支test_attribute=a(i),沒有樣本。在這種情況下,以samples中的多數類創建一個樹葉。
//函數功能:用ID3算法建立一棵決策樹
struct TREE *CreatTree(struct SET *classHead,int Class[],struct TRAINING *head)
{
struct TREE *root,*t1,*t2,*t3;
struct TRAINING *p,*sub1,*sub2,*sub3;
struct SET *r;
int i;
int classSetCount=0;
int count1=0,count2=0;
double *gainPoint;
root=(struct TREE *)malloc(sizeof(struct TREE));
memset(root,0,sizeof(struct TREE));
//如果爲空
if(ClassIsEmpty(head))
{
root->attribute=NOTHING;
return root;
}
//若一個節點的樣本均爲同一類別,則該節點就成爲葉節點並標記爲該類別 也就是不論怎樣都去play或不去play
if(ClassAllSame(head))
{
root->attribute=head->next->play;
return root;
}
//如果非類屬性不爲空 也就是存在選擇play還是不play的情況 開始建立決策樹
if(ClassSetIsEmpty(classHead))
{
p=head->next;
while(p)
//統計play或不play的數量
{
if(p->play==Class[0])
count1++;
if(p->play==Class[1])
count2++;
}
//如果play的多從play開始建立反之
if(count1>count2)
root->attribute=Class[0];
else
root->attribute=Class[1];
return root;
}
//選擇類屬性鏈表的下一個元素
r=classHead->next;
while(r)
{
classSetCount++; //節點數加一
r=r->next;
}
gainPoint=(double *)malloc(classSetCount*sizeof(double));
memset(gainPoint,0,classSetCount*sizeof(double));
r=classHead->next;
for(i=0;i<classSetCount;i++)
{
*(gainPoint+i)=Gain(r->item,head); //建立一個信息嫡增量的鏈表
printf("gainPoint=%f,item=%d\n",*(gainPoint+i),r->item);
r=r->next;
}
i=GetMaxGainClass(classSetCount,gainPoint,classHead); //獲取最大的增量
DeleteClassSet(i,classHead);//從R(classHead)中減去{D}
printf("%d=M\n",i);
//反覆循環建立二叉樹
switch(i)
{
//如果outlook增量最大 建立三叉樹
case OUTLOOK:
root->attribute=OUTLOOK;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=VOERCAST;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=SUNNY;
t2=root->mid;
root->right=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->right,0,sizeof(struct TREE));
root->right->attribute=RAIN;
t3=root->right;
sub1=GetSubSet(VOERCAST,head);
t1->left=CreatTree(classHead,Class,sub1);
sub2=GetSubSet(SUNNY,head);
t2->mid=CreatTree(classHead,Class,sub2);
sub3=GetSubSet(RAIN,head);
t3->right=CreatTree(classHead,Class,sub3);
break;
case TEMPERATURE:
root->attribute=TEMPERATURE;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=HIGH_TEMPERATURE;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=MID_TEMPERATURE;
t2=root->mid;
root->right=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->right,0,sizeof(struct TREE));
root->right->attribute=COLD_TEMPERATURE;
t3=root->right;
break;
case HUMIDITY:
root->attribute=HUMIDITY;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=HIGH_HUMIDITY;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=NORMAL_HUMIDITY;
t2=root->mid;
sub1=GetSubSet2(HIGH_HUMIDITY,head);
sub2=GetSubSet2(NORMAL_HUMIDITY,head);
t1->right=CreatTree(classHead,Class,sub1);
t2->right=CreatTree(classHead,Class,sub2);
break;
case WINDY:
root->attribute=WINDY;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=TRUE;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=FALSE;
t2=root->mid;
sub1=GetSubSet3(TRUE,head);
sub2=GetSubSet3(FALSE,head);
t1->left=CreatTree(classHead,Class,sub1);
t2->left=CreatTree(classHead,Class,sub2);
break;
default:
return NULL;
}
return root;
}
便於理解我畫了圖來理解(只畫了一種就是可以分裂成二叉樹那一種)
對照相應數值 宏定義 根據輸出結果可以還原二叉樹
#define NOTHING 10
//天氣值域
#define SUNNY 11
#define VOERCAST 12
#define RAIN 13
//風值域
#define TRUE 1
#define FALSE 0
//類別屬性值域
#define CAN_PLAY 14
#define NOT_PLAY 15
//非類別屬性值域
#define OUTLOOK 16
#define TEMPERATURE 17
#define HUMIDITY 18
#define WINDY 19
//將溫度分三個類
#define HIGH_TEMPERATURE 8
#define MID_TEMPERATURE 7
#define COLD_TEMPERATURE 6
//將溼度分兩個類
#define HIGH_HUMIDITY 75
#define NORMAL_HUMIDITY 74
#define TRAINING_NUMBER 14
輸出決策樹的過程涉及到樹的遍歷
void DisplayTree(struct TREE *t)
{
struct TREE *p;
p=t;
if(p)
{
printf("tree=%d\n",p->attribute);
DisplayTree(p->left);
DisplayTree(p->mid);
DisplayTree(p->right);
}
這樣我們建立了一種模型通過這個模型 我們通過輸入的數據就可以預測明天能不能打籃球了(打個籃球可真是累啊)
關於決策樹更多的算法這裏不再多說 看下面幾篇博客吧
https://blog.csdn.net/weixin_39750084/article/details/83449866
https://www.cnblogs.com/starfire86/p/5749328.html
https://blog.csdn.net/qq_36330643/article/details/77415451
到這裏決策樹應該不是問題了
附上所有源代碼 (代碼我都註釋好了 應該沒啥問題 不會的往後讀 有些地方寫的不太好)
#include<stdio.h>
#include<conio.h>
#include<malloc.h>
#include<math.h>
#include<string.h>
#define NOTHING 10
//天氣值域
#define SUNNY 11
#define VOERCAST 12
#define RAIN 13
//風值域
#define TRUE 1
#define FALSE 0
//類別屬性值域
#define CAN_PLAY 14
#define NOT_PLAY 15
//非類別屬性值域
#define OUTLOOK 16
#define TEMPERATURE 17
#define HUMIDITY 18
#define WINDY 19
//將溫度分三個類
#define HIGH_TEMPERATURE 8
#define MID_TEMPERATURE 7
#define COLD_TEMPERATURE 6
//將溼度分兩個類
#define HIGH_HUMIDITY 75
#define NORMAL_HUMIDITY 74
#define TRAINING_NUMBER 14
//訓練數據集結構
struct TRAINING
{
int outLook;
int temperature;
int humidity;
int windy;
int play;
struct TRAINING *next;
};
//溫度和溼度增益結構
struct TEMPER_HUMID_GAIN
{
int value;
int playCount;
int c;
};
//非類屬性集結構
struct SET
{
int item; //記錄是否可以play
struct SET *next;
};
//決策樹結構
struct TREE
{
int attribute; //記錄節點包含的信息 就是上面定義的宏
struct TREE *left;
struct TREE *mid;
struct TREE *right;
};
struct TRAINING *trainHead;
struct SET *noClassSetHead;
struct TREE *treeT;
//天氣數據庫的訓練數據
struct TRAINING trainingSet[TRAINING_NUMBER]=
{
{SUNNY ,85,85,FALSE,NOT_PLAY,NULL},
{SUNNY ,80,90,TRUE ,NOT_PLAY,NULL},
{VOERCAST,83,78,FALSE,CAN_PLAY,NULL},
{RAIN ,70,96,FALSE,CAN_PLAY,NULL},
{RAIN ,68,80,FALSE,CAN_PLAY,NULL},
{RAIN ,65,70,TRUE ,NOT_PLAY,NULL},
{VOERCAST,64,65,TRUE ,CAN_PLAY,NULL},
{SUNNY ,72,95,FALSE,NOT_PLAY,NULL},
{SUNNY ,69,70,FALSE,CAN_PLAY,NULL},
{RAIN ,75,80,FALSE,CAN_PLAY,NULL},
{SUNNY ,75,70,TRUE ,CAN_PLAY,NULL},
{VOERCAST,72,90,TRUE ,CAN_PLAY,NULL},
{VOERCAST,81,75,FALSE,CAN_PLAY,NULL},
{RAIN ,71,80,TRUE ,NOT_PLAY,NULL}
};
//非類集合和類集合
int noClassSet[4]={OUTLOOK,TEMPERATURE,HUMIDITY,WINDY};
int classSet[2]={CAN_PLAY,NOT_PLAY};
//計算以2爲底的對數
double Log2(double x)
{
if(x<=0)
return 0.0;
else
return (double)log10(x)/log10(2);
}
//計算信息嫡的一項
double I(double p1,double p2)
{
return -(p1*Log2(p1)+p2*Log2(p2));
}
//計算訓練集的信息量
double Information(struct TRAINING *head)
{
double p1,p2;
int c=0; //深度
int canPlay=0;
int notPlay=0;
struct TRAINING *p;
p=head->next;
while(p)
{
c++;
if(p->play==CAN_PLAY)
canPlay++;
else
notPlay++;
p=p->next;
}
p1=(double)canPlay/c;
p2=(double)notPlay/c;
return I(p1,p2);
}
//判斷訓練子集是否全部同一類
int ClassAllSame(struct TRAINING *head)
{
struct TRAINING *p;
int same;
p=head->next;
//尋找具有相同can/not_play的節點
same=p->play;
while(p)
{
if(p->play!=same)
return 0;
p=p->next;
}
return 1;
}
//判斷訓練子集是否爲空 空返回0 非空深度 = (-返回值)+1
int ClassIsEmpty(struct TRAINING *head)
{
struct TRAINING *p;
int c=1;
p=head->next;
while(p)
{
c--;
if(!c)
break;
p=p->next;
}
return c;
}
//判斷非類屬性是否爲空
int ClassSetIsEmpty(struct SET *head)
{
struct SET *p;
int c=1;
p=head->next;
while(p)
{
c--;
if(!c)
break;
p=p->next;
}
return c;
}
//從非類屬性集中刪除值爲value的非類屬性
void DeleteClassSet(int value,struct SET *head)
{
struct SET *p,*q;
q=head;
p=head->next;
while((p)&&(p->item!=value))
{
q=p;
p=p->next;
}
q->next=p->next;
free(p);
}
//根據Gain值取得Gain最大值所對應的非類屬性
int GetMaxGainClass(int c,double *p,struct SET *head)
{
struct SET *t;
double max,*s;
int i=0,j=0;
s=p;
max=*(s);
t=head;
for(i=0;i<c;i++)
{
if(*(s+i)>max)
{
max=*(s+i);
j=i;
}
}
for(i=0;i<=j;i++)
t=t->next;
return t->item;
}
//函數功能:從訓練集中以非類屬性“outLook”獲取子集 複製一份相同的鏈表 返回複製鏈表頭指針
struct TRAINING *GetSubSet(int attr,struct TRAINING *head)
{
struct TRAINING *p,*q,*tempHead,*t;
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
tempHead=p;
p->next=NULL;
q=p;
t=head->next;
switch(attr)
{
case SUNNY:
while(t)
{
if(t->outLook==SUNNY)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
case VOERCAST:
while(t)
{
if(t->outLook==VOERCAST)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
case RAIN:
while(t)
{
if(t->outLook==RAIN)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
}
return NULL;
}
//函數功能:從訓練集中以非類屬性“temperature”獲取子集
struct TRAINING *GetSubSet1(int attr,struct TRAINING *head)
{
struct TRAINING *p,*q,*tempHead,*t;
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
tempHead=p;
q=p;
t=head->next;
switch(attr)
{
case HIGH_TEMPERATURE:
while(t)
{
if(t->temperature/10==HIGH_TEMPERATURE)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
case MID_TEMPERATURE:
while(t)
{
if(t->temperature/10==MID_TEMPERATURE)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
case COLD_TEMPERATURE:
while(t)
{
if(t->temperature/10==COLD_TEMPERATURE)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
}
return NULL;
}
//函數功能:從訓練集中以非類屬性“humidity”獲取子集
struct TRAINING *GetSubSet2(int attr,struct TRAINING *head)
{
struct TRAINING *p,*q,*tempHead,*t;
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
tempHead=p;
q=p;
t=head->next;
switch(attr)
{
case HIGH_HUMIDITY:
while(t)
{
if(t->humidity>=HIGH_HUMIDITY)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
case NORMAL_HUMIDITY:
while(t)
{
if(t->humidity<=NORMAL_HUMIDITY)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
}
return NULL;
}
//函數功能:從訓練集中以非類屬性“windy”獲取子集
struct TRAINING *GetSubSet3(int attr,struct TRAINING *head)
{
struct TRAINING *p,*q,*tempHead,*t;
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
tempHead=p;
q=p;
t=head->next;
switch(attr)
{
case TRUE:
while(t)
{
if(t->windy==TRUE)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
case FALSE:
while(t)
{
if(t->windy==FALSE)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=t->outLook;
p->temperature=t->temperature;
p->humidity=t->humidity;
p->windy=t->windy;
p->play=t->play;
q->next=p;
q=q->next;
}
t=t->next;
}
return tempHead;
}
return NULL;
}
//函數功能:建立非類屬性集
struct SET *CreateNoClassSet(int noClass[])
{
int i;
struct SET *head,*p,*q;
p=(struct SET *)malloc(sizeof(struct SET));
memset(p,0,sizeof(struct SET));
head=p;
q=p;
for(i=0;i<4;i++)
{
p=(struct SET *)malloc(sizeof(struct SET));
memset(p,0,sizeof(struct SET));
p->item=noClass[i];
q->next=p;
q=q->next;
}
return head;
}
//函數功能:建立訓練數據集
struct TRAINING *CreateTrainingLink(struct TRAINING set[])
{
struct TRAINING *head,*p,*q;
int i;
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
head=p;
p->next=NULL;
q=p;
for(i=0;i<TRAINING_NUMBER;i++)
{
p=(struct TRAINING *)malloc(sizeof(struct TRAINING));
memset(p,0,sizeof(struct TRAINING));
p->outLook=set[i].outLook;
p->temperature=set[i].temperature;
p->humidity=set[i].humidity;
p->windy=set[i].windy;
p->play=set[i].play;
p->next=NULL;
q->next=p;
q=q->next;
}
return head;
}
//反正你知道這個函數返回了信息嫡就行了
double InformationAttr(int attribute,struct TRAINING *head)
{
struct TRAINING *p;
//每一個數組代表一種屬性 和 屬性的屬性
//爲啥其他兩個屬性的屬性沒有結構體? 因爲懶得寫了
struct TEMPER_HUMID_GAIN tGain[3];
struct TEMPER_HUMID_GAIN mGain[2];
int c=0;
int i=0,j=0;
int sunny=0,overcast=0,rain=0;
int sPlay=0,oPlay=0,rPlay=0;
int wind=0,windPlay=0;
int noWind=0,noWindPlay=0;
double p1=0,p2=0,p3=0;
double i1=0,i2=0,i3=0;
p=head->next;
switch(attribute)
{
case OUTLOOK:
while(p)
{
c++; //深度 個數
if(p->outLook==SUNNY)
{
sunny++;//晴天總數
if(p->play==CAN_PLAY)
sPlay++;//是晴天時且可以打球的天數
}
if(p->outLook==VOERCAST)
{
overcast++;
if(p->play==CAN_PLAY)
oPlay++;
}
if(p->outLook==RAIN)
{
rain++;
if(p->play==CAN_PLAY)
rPlay++;
}
p=p->next;
}
//各個天氣的概率
p1=(double)sunny/c;
p2=(double)overcast/c;
p3=(double)rain/c;
if(sunny)
i1=I((double)sPlay/sunny,
(double)(sunny-sPlay)/sunny);
if(overcast)
i2=I((double)oPlay/overcast,
(double)(overcast-oPlay)/overcast);
if(rain)
i3=I((double)rPlay/rain,
(double)(rain-rPlay)/rain);
return p1*i1+p2*i2+p3*i3;
case TEMPERATURE:
for(i=0;i<3;i++)
{
tGain[i].c=0;
tGain[i].playCount=0;
if(i==0)
tGain[i].value=HIGH_TEMPERATURE;
else if(i==1)
tGain[i].value=MID_TEMPERATURE;
else
tGain[i].value=COLD_TEMPERATURE;
}
while(p)
{
c++;
//why/10? 看數據集 you know
if(p->temperature/10==HIGH_TEMPERATURE)
{
tGain[0].c++;
if(p->play==CAN_PLAY)
tGain[0].playCount++;
}
else if(p->temperature/10==MID_TEMPERATURE)
{
tGain[1].c++;
if(p->play==CAN_PLAY)
tGain[1].playCount++;
}
else if(p->temperature/10==COLD_TEMPERATURE)
{
tGain[2].c++;
if(p->play==CAN_PLAY)
tGain[2].playCount++;
}
p=p->next;
}
p1=(double)tGain[0].c/c;
p2=(double)tGain[1].c/c;
p3=(double)tGain[2].c/c;
if(tGain[0].c)
i1=I((double)tGain[0].playCount/tGain[0].c,
(double)(tGain[0].c-tGain[0].playCount)/tGain[0].c);
if(tGain[1].c)
i2=I((double)tGain[1].playCount/tGain[1].c,
(double)(tGain[1].c-tGain[1].playCount)/tGain[1].c);
if(tGain[2].c)
i3=I((double)tGain[2].playCount/tGain[2].c,
(double)(tGain[2].c-tGain[2].playCount)/tGain[2].c);
return p1*i1+p2*i2+p3*i3;
case HUMIDITY:
for(i=0;i<2;i++)
{
mGain[i].c=0;
mGain[i].playCount=0;
if(i==0)
mGain[i].value=HIGH_HUMIDITY;
else
mGain[i].value=NORMAL_HUMIDITY;
}
while(p)
{
c++;
if(p->humidity>=mGain[0].value)
{
mGain[0].c++;
if(p->play==CAN_PLAY)
mGain[0].playCount++;
}
else if(p->humidity<mGain[1].value)
{
mGain[1].c++;
if(p->play==CAN_PLAY)
mGain[1].playCount++;
}
p=p->next;
}
p1=(double)mGain[0].c/c;
p2=(double)mGain[1].c/c;
if(mGain[0].c)
i1=I((double)mGain[0].playCount/mGain[0].c,
(double)(mGain[0].c-mGain[0].playCount)/mGain[0].c);
if(mGain[1].c)
i2=I((double)mGain[1].playCount/mGain[1].c,
(double)(mGain[1].c-mGain[1].playCount)/mGain[1].c);
return p1*i1+p2*i2;
case WINDY:
while(p)
{
c++;
if(p->windy==TRUE)
{
wind++;
if(p->play==CAN_PLAY)
windPlay++;
}
if(p->windy==FALSE)
{
noWind++;
if(p->play==CAN_PLAY)
noWindPlay++;
}
p=p->next;
}
p1=(double)wind/c;
p2=(double)noWind/c;
if(wind)
i1=I((double)windPlay/wind,
(double)(wind-windPlay)/wind);
if(noWind)
i2=I((double)noWindPlay/noWind,
(double)(noWind-noWindPlay)/noWind);
return p1*i1+p2*i2;
}
return 0.0;
}
//函數功能:獲取增益
double Gain(int attribute,struct TRAINING *head)
{
return Information(head)-InformationAttr(attribute,head); //兩個log相除就是相減如果你要是還在想爲啥公式是除法的話。。。
}
//函數功能:排序
void Compositor(int len,int *p)
{
int i,j;
int temp;
for(i=0;i<len;i++)
for(j=i;j<len;j++)
{
if(*(p+j)<*(p+i))
{
temp=*(p+j);
*(p+j)=*(p+i);
*(p+i)=temp;
}
}
}
//函數功能:返回被刪除的元素的個數
int DeleteTheSameElement(int len,int *p)
{
int i,j;
int c=0;
for(i=0;i<len-c;i++)
{
j=i+1;
if(*(p+i)==*(p+j))
{
for(;j<len-c;j++)
{
*(p+j)=*(p+j+1);
}
*(p+j)='\0';
c++;
i--;
}
}
return c;
}
//函數功能:用ID3算法建立一棵決策樹
struct TREE *CreatTree(struct SET *classHead,int Class[],struct TRAINING *head)
{
struct TREE *root,*t1,*t2,*t3;
struct TRAINING *p,*sub1,*sub2,*sub3;
struct SET *r;
int i;
int classSetCount=0;
int count1=0,count2=0;
double *gainPoint;
root=(struct TREE *)malloc(sizeof(struct TREE));
memset(root,0,sizeof(struct TREE));
//如果爲空
if(ClassIsEmpty(head))
{
root->attribute=NOTHING;
return root;
}
//若一個節點的樣本均爲同一類別,則該節點就成爲葉節點並標記爲該類別 也就是不論怎樣都去play或不去play
if(ClassAllSame(head))
{
root->attribute=head->next->play;
return root;
}
//如果非類屬性不爲空 也就是存在選擇play還是不play的情況 開始建立決策樹
if(ClassSetIsEmpty(classHead))
{
p=head->next;
while(p)
//統計play或不play的數量
{
if(p->play==Class[0])
count1++;
if(p->play==Class[1])
count2++;
}
//如果play的多從play開始建立反之
if(count1>count2)
root->attribute=Class[0];
else
root->attribute=Class[1];
return root;
}
//選擇類屬性鏈表的下一個元素
r=classHead->next;
while(r)
{
classSetCount++; //節點數加一
r=r->next;
}
gainPoint=(double *)malloc(classSetCount*sizeof(double));
memset(gainPoint,0,classSetCount*sizeof(double));
r=classHead->next;
for(i=0;i<classSetCount;i++)
{
*(gainPoint+i)=Gain(r->item,head); //建立一個信息嫡增量的鏈表
printf("gainPoint=%f,item=%d\n",*(gainPoint+i),r->item);//輸出增量 和 對應的item
r=r->next;
}
i=GetMaxGainClass(classSetCount,gainPoint,classHead); //獲取最大的增量屬性
DeleteClassSet(i,classHead);//從R(classHead)中減去{D}
printf("%d=M\n",i); //輸出最大增量的屬性
//反覆循環建立二叉樹
switch(i)
{
//如果outlook增量最大 建立三叉樹
case OUTLOOK:
root->attribute=OUTLOOK;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=VOERCAST;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=SUNNY;
t2=root->mid;
root->right=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->right,0,sizeof(struct TREE));
root->right->attribute=RAIN;
t3=root->right;
sub1=GetSubSet(VOERCAST,head);
t1->left=CreatTree(classHead,Class,sub1);
sub2=GetSubSet(SUNNY,head);
t2->mid=CreatTree(classHead,Class,sub2);
sub3=GetSubSet(RAIN,head);
//這裏別忘了你再再次建立的時候指針位置改變 所以已經建立過節點的屬性 不會再來一遍
t3->right=CreatTree(classHead,Class,sub3);
break;
case TEMPERATURE:
root->attribute=TEMPERATURE;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=HIGH_TEMPERATURE;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=MID_TEMPERATURE;
t2=root->mid;
root->right=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->right,0,sizeof(struct TREE));
root->right->attribute=COLD_TEMPERATURE;
t3=root->right;
break;
case HUMIDITY:
root->attribute=HUMIDITY;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=HIGH_HUMIDITY;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=NORMAL_HUMIDITY;
t2=root->mid;
sub1=GetSubSet2(HIGH_HUMIDITY,head);
sub2=GetSubSet2(NORMAL_HUMIDITY,head);
t1->right=CreatTree(classHead,Class,sub1);
t2->right=CreatTree(classHead,Class,sub2);
break;
case WINDY:
root->attribute=WINDY;
root->left=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->left,0,sizeof(struct TREE));
root->left->attribute=TRUE;
t1=root->left;
root->mid=(struct TREE *)malloc(sizeof(struct TREE));
memset(root->mid,0,sizeof(struct TREE));
root->mid->attribute=FALSE;
t2=root->mid;
sub1=GetSubSet3(TRUE,head);
sub2=GetSubSet3(FALSE,head);
t1->left=CreatTree(classHead,Class,sub1);
t2->left=CreatTree(classHead,Class,sub2);
break;
default:
return NULL;
}
return root;
}
//函數功能:顯示決策樹
void DisplayTree(struct TREE *t)
{
struct TREE *p;
p=t;
if(p)
{
printf("tree=%d\n",p->attribute);
DisplayTree(p->left);
DisplayTree(p->mid);
DisplayTree(p->right);
}
}
int main()
{
trainHead=CreateTrainingLink(trainingSet);
noClassSetHead=CreateNoClassSet(noClassSet);
treeT=CreatTree(noClassSetHead,classSet,trainHead);
DisplayTree(treeT);
free(treeT);
free(noClassSetHead);
free(trainHead);
printf("\n按任意鍵退出");
getch();
return 0;
}