1 訓練樣本::
對於理論知識,大家可以參考http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression和http://blog.sina.com.cn/s/blog_6982136301015asd.html。這裏還是說一下實現的基本細節。步長這裏選取0.0.1,這個數據大家可以根據實驗的需要來調節,這裏主要實現了多分類的算法,對於觀察結果(y)這裏不只屬於【0.1】,而是屬於更多【1.2.3.4.5.6....】,參考文獻中說這種分類可以達到上限10個。
對於要求的分類方程y = theta1*x1 + theta2*x2+theta3*x3 ,這裏我只談一點對參數迭代公式的理解,也就是http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression參數的更新,這裏在迭代公式中對於theta是根據(y)的貢獻值來確定的,也就是說如果x屬於這個類,則把迭代中的y值設置爲1,如果不是設置爲0.
2 識別
識別過程分別計算每個類別的概率爲歸一化方程 也就是g函數。大家可以參考大尾巴龍的代碼。
#include <iostream>
#include <cmath>
#include <assert.h>
using namespace std;
const int K = 2;//有K+1類
const int M = 9;//訓練集大小
const int N = 4;//特徵數
double x[M][N]={{1,47,76,24}, //include x0=1
{1,46,77,23},
{1,48,74,22},
{1,34,76,21},
{1,35,75,24},
{1,34,77,25},
{1,55,76,21},
{1,56,74,22},
{1,55,72,22},
};
double y[M]={1,
1,
1,
2,
2,
2,
3,
3,
3,};
double theta[K][N]={
{0.3,0.3,0.01,0.01},
{0.5,0.5,0.01,0.01}}; // include theta0
double h_value[K];//h(x)向量值
//求exp(QT*x)
double fun_eqx(double* x, double* q)
{
double sum = 0;
for (int i = 0; i < N; i++)
{
sum += x[i] * q[i];
}
return pow(2.718281828, sum);
}
//求h向量
void h(double* x)
{
int i;
double sum = 1;//之前假定theta[K+1]={0},所以exp(Q[K+1]T*x)=1
for (i = 0; i < K; i++)
{
h_value[i] = fun_eqx(x, theta[i]);
sum += h_value[i];
}
assert(sum != 0);
for (i = 0; i < K; i++)
{
h_value[i] /= sum;
}
}
void modify_stochostic()
{
//隨機梯度下降,訓練參數
int i, j, k;
for (j = 0; j < M; j ++)
{
h(x[j]);
for (i = 0; i < K; i++)
{
for (k = 0; k < N; k++)
{
theta[i][k] += 0.001 * x[j][k] * ((y[j] == i+1?1:0) - h_value[i]);
}
}
}
}
void modify_batch()
{
//批量梯度下降,訓練參數
int i, j, k ;
for (i = 0; i < K; i++)
{
double sum[N] = {0.0};
for (j = 0; j < M; j++)
{
h(x[j]);
for (k = 0; k < N; k++)
{
sum[k] += x[j][k] * ((y[j] == i+1?1:0) - h_value[i]);
}
}
for (k = 0; k < N; k++)
{
theta[i][k] += 0.001 * sum[k] / N;
}
}
}
void train(void)
{
int i;
for (i = 0; i < 10000; i++)
{
//modify_stochostic();
modify_batch();
}
}
void predict(double* pre)
{
//輸出預測向量
int i;
for (i = 0; i < K; i++)
h_value[i] = 0;
train();
h(pre);
for (i = 0; i < K; i++)
cout << h_value[i] << " ";
cout << 1 - h_value[0] - h_value[1] << endl;
}
int main(void)
{
for (int i=0; i < M; i++)
{
predict(x[i]);
}
cout << endl;
double pre[] = {1,20, 80, 50 };
predict(pre);
return 0;
}