神經網絡和BP算法C及python代碼

以上僅給出了代碼。具體BP實現原理及神經網絡相關知識請見:神經網絡和反向傳播算法推導

首先是前向傳播的計算:

輸入:
首先爲正整數 n、m、p、t,分別代表特徵個數、訓練樣本個數、隱藏層神經元個數、輸出
層神經元個數。其中(1<n<=100,1<m<=1000, 1<p<=100, 1<t<=10)。
隨後爲 m 行,每行有 n+1 個整數。每行代表一個樣本中的 n 個特徵值 (x 1 , x 2 ,..., x n ) 與樣本的
實際觀測結果 y。特徵值的取值範圍是實數範圍,實際觀測結果爲(1-t 的正整數)。
最後爲 2 組特徵權值矩陣初始化值。
第一組爲輸入層與隱藏層特徵權值矩陣,矩陣大小爲 p*(n+1)。
第二組爲隱藏層與輸出層特徵權值矩陣,矩陣大小爲 t*(p+1)。
輸出:
包括三部分:
第一行爲 1 個浮點數,是神經網絡使用初始特徵權值矩陣計算出的代價值 J。
然後是 m 行,每行爲 p 個浮點數,神經網絡隱藏層的輸出(不算偏移 bias)。
最後是 m 行,每行爲 t 個浮點數,神經網絡輸出層的輸出(不算偏移 bias)。
Sample Input1:
3 3 5 3
0.084147 0.090930 0.014112 3
0.090930 0.065699 -0.053657 2
2 3 4 1
0.084147 -0.027942 -0.099999 -0.028790
0.090930 0.065699 -0.053657 -0.096140
0.014112 0.098936 0.042017 -0.075099
-0.075680 0.041212 0.099061 0.014988
-0.095892 -0.054402 0.065029 0.091295
0.084147 -0.075680 0.065699 -0.054402 0.042017 -0.028790
0.090930 -0.095892 0.098936 -0.099999 0.099061 -0.096140
0.014112 -0.027942 0.041212 -0.053657 0.065029 -0.075099
Sample Output1:
2.0946610.518066 0.522540 0.506299 0.484257 0.476700
0.519136 0.524614 0.507474 0.483449 0.474655
0.404465 0.419895 0.509409 0.589979 0.587968
0.514583 0.511113 0.497424
0.514587 0.511139 0.497447
0.515313 0.511164 0.496748

此處需要補充說明的是這裏計算的只是單層神經網絡並且在lable原本的值是3,2,1代表的是第一次輸出第三個輸出單元輸出爲1,第二次輸出第二個輸出單元輸出爲1...

python代碼如下:

#coding=utf-8 
from numpy import *
#from math import *
from numpy.distutils.core import numpy_cmdclass
f=open( r'test')
input=[]
#數據預處理,把文件數據轉換
for each in f:
    input.append(each.strip().split())
n,m,p,t=input[0]
sample=input[1:int(n)+1]
w_in_hidden=input[int(n)+1:int(n)+6]
w_hidden_out=input[int(n)+6:]
feature=[]#特徵矩陣
lable=[]#標記
for each in sample:
    feature.append(each[:-1])
    lable.append(each[-1])
#將list轉化成矩陣
feature=mat(feature)
lable=mat(lable)
w_in_hidden=mat(w_in_hidden)#隱藏層與輸入層的權值矩陣
w_hidden_out=mat(w_hidden_out)#隱藏層與輸出層的權值矩陣
#逆置
feature=feature.T
zero=mat(ones(feature.shape[0]))
feature=row_stack((zero,feature))
#將第0行加入矩陣,屬矩陣拼接問題
feature=feature.astype(dtype=float)
#生成新的矩陣,並改變矩陣內部數據類型,以前是str型的
w_in_hidden=w_in_hidden.astype(dtype=float)
lable=lable.astype(dtype=float)
w_hidden_out=w_hidden_out.astype(dtype=float)
hidden_output=dot(w_in_hidden,feature)
hidden_output=hidden_output.T
#此處exp是numpy裏面自帶的求矩陣指數的函數
hidden_output=1/(1+exp(-1*hidden_output))
print hidden_output#隱藏層的輸出
hidden_output=hidden_output.T
zero=mat(ones(hidden_output.shape[1]))
hidden_output=row_stack((zero,hidden_output))
output=dot(w_hidden_out,hidden_output)
output=output.T
output=1/(1+exp(-1*output))
print output#輸出層的輸出
#lable原本的值是3,2,1代表的是第一次輸出第三個輸出單元輸出爲1,第二次輸出第二個輸出單元輸出爲1...
lable=mat([[0,0,1],[0,1,0],[1,0,0]])
lable=lable.T
output=output.tolist()#將矩陣轉化回list
lable=lable.tolist()
sum=0.0
#計算誤差,其實也可以直接用矩陣計算,問題在於本人沒有找到求矩陣對角線和的函數,且做一標記,找到補上
for i in range (len(output)):
    for j in range (len(output[0])):
        sum+=math.log(output[i][j])*-lable[i][j]-math.log(1-output[i][j])*(1-lable[i][j])
print sum/3

此處輸出順序不對,請忽略這種小問題~~

輸出結果如下:

C代碼如下:(C代碼)

#include <stdio.h>
#include <math.h>

#define MAX_SAMPLE_NUMBER 1024
#define MAX_FEATURE_DIMENSION 128
#define MAX_LABEL_NUMBER 12

double sigmoid(double z){
	return 1 / (1 + exp(-z));
}

double hypothesis(double x[], double theta[], int feature_number){
//此處的hypothesis計算的是某個神經元的輸出
	double h = 0;
	for (int i = 0; i <= feature_number; i++){
		h += x[i] * theta[i];
	}
	return sigmoid(h);
}

void forward_propagation(double a[],
						 int feature_number,
						 double W[][MAX_FEATURE_DIMENSION],
						 int neuron_num,
						 double output[]){

	for (int i = 0; i < neuron_num; i++){
		output[i+1] = hypothesis(a, W[i], feature_number);
		//w[i]對應着第i個輸出神經元的上一層權值
	}
}

double compute_cost(double X[][MAX_FEATURE_DIMENSION], 
					int y[],
					int feature_number,
					int sample_number,
					double W1[][MAX_FEATURE_DIMENSION],
					int hidden_layer_size,
					double W2[][MAX_FEATURE_DIMENSION],
					int label_num,
					double a2[][MAX_FEATURE_DIMENSION],
					double a3[][MAX_FEATURE_DIMENSION]){
					//a2爲隱藏層輸出a3爲輸出層輸出w1,w2相同
	double sum = 0;
	for (int i = 0; i < sample_number; i++){
		X[i][0] = 1;
		forward_propagation(X[i], feature_number, W1, hidden_layer_size, a2[i]);
		a2[i][0] = 1;
		forward_propagation(a2[i], hidden_layer_size, W2, label_num, a3[i]);
		double yy[MAX_LABEL_NUMBER] = {0};
		yy[y[i]] = 1;
		for (int j = 1; j <= label_num; j++){
			sum += -yy[j] * log(a3[i][j]) - (1 - yy[j]) * log(1 - a3[i][j]);
		}
	}
	return sum / sample_number;
}

double X[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
int y[MAX_SAMPLE_NUMBER];
double W1[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION];
double W2[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION];
double a2[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];
double a3[MAX_SAMPLE_NUMBER][MAX_FEATURE_DIMENSION];

int main(){
	int feature_number;
	int sample_number;
	int hidden_layer_size;
	int label_num;
	scanf("%d %d %d %d", &feature_number, &sample_number, &hidden_layer_size, &label_num);
	for (int i = 0; i < sample_number; i++){
		for (int j = 1; j <= feature_number; j++){
			scanf("%lf", &X[i][j]);
		}
		scanf("%d", &y[i]);
	}
	for (int i = 0; i < hidden_layer_size; i++){
		for (int j = 0; j <= feature_number; j++){
			scanf("%lf", &W1[i][j]);
		}
	}
	for (int i = 0; i < label_num; i++){
		for (int j = 0; j <= hidden_layer_size; j++){
			scanf("%lf", &W2[i][j]);
		}
	}
	double J = compute_cost(X, y, feature_number, sample_number,
		W1, hidden_layer_size, W2, label_num, a2, a3);
	printf("%lf\n", J);
	for (int i = 0; i < sample_number; i++){
		for (int j = 1; j < hidden_layer_size; j++){
			printf("%lf ", a2[i][j]);
		}
		printf("%lf\n", a2[i][hidden_layer_size]);
	}
	for (int i = 0; i < sample_number; i++){
		for (int j = 1; j < label_num; j++){
			printf("%lf ", a3[i][j]);
		}
		printf("%lf\n", a3[i][label_num]);
	}
	return 0;
}

結果如下:

關於BP算法,沒有找到合適的測試樣例,此處僅僅給出了C++版本代碼和自測數據,無驗證集

C++代碼:

#include <stdio.h>
#include <math.h>

double sigmoid(double z){
	return 1 / (1 + exp(-z));
}

double hypothesis(double x[], double theta[], int feature_number){
	double h = 0;
	for (int i = 0; i <= feature_number; i++){
		h += x[i] * theta[i];
	}
	return h;
}

#define MAX_FEATURE_DIMENSION 128
#define MAX_LABEL_NUMBER 12

void forward_propagation(double input[],
						 int feature_number,
						 double W[][MAX_FEATURE_DIMENSION],
						 int neuron_num,
						 double z[],
						 double a[]){

	for (int i = 0; i < neuron_num; i++){
		z[i+1] = hypothesis(input, W[i], feature_number);
		a[i+1] = sigmoid(z[i+1]);
		//加1的原因是第一個要留作補充的神經元
	}
}

double sigmoid_gradient(double z){
	return sigmoid(z) * (1 - sigmoid(z));
	//對sigmoid函數求導可以化成如此形式,要注意的是z纔是自變量
}

void compute_layer_error(double layer_error[],
						double W[][MAX_FEATURE_DIMENSION],
						int neuron_num,
						int feature_number,
						double next_layer_error[],
						double z[]){
//此處計算的是theta(l)具體見上一篇博文
	for (int i = 1; i <= feature_number; i++){
		for (int j = 0; j < neuron_num; j++){
			layer_error[i] += W[j][i] * next_layer_error[j + 1];//next_layer_error[j + 1]=theta(l+1)
		}
	}
	for (int i = 1; i <=feature_number; i++){
		layer_error[i] = layer_error[i] * sigmoid_gradient(z[i]);
	}
}
void accumulate_gradient(double sum[][MAX_FEATURE_DIMENSION], 
						 double layer_error[],
						 int neuron_num,
						 int feature_number,
						 double a[]){
						 //計算誤差總和
	for (int i = 0; i < neuron_num; i++){
		for (int j = 0; j <= feature_number; j++){
			sum[i][j] += layer_error[i+1] * a[j];
		}
	}
}

void compute_gradient(double X[][MAX_FEATURE_DIMENSION], 
						int y[],
						int feature_number,
						int sample_number,
						double W1[][MAX_FEATURE_DIMENSION],
						int hidden_layer_size,
						double W2[][MAX_FEATURE_DIMENSION],
						int label_num,
						double w1_grad[][MAX_FEATURE_DIMENSION],
						double w2_grad[][MAX_FEATURE_DIMENSION]){

	double grad1_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0};
	double grad2_sum[MAX_FEATURE_DIMENSION][MAX_FEATURE_DIMENSION] = {0};
	for (int i = 0; i < sample_number; i++){
		X[i][0] = 1;
		double z2[MAX_FEATURE_DIMENSION] = {0, 0};
		double a2[MAX_FEATURE_DIMENSION] = {1, 0};		
		forward_propagation(X[i], feature_number, W1, hidden_layer_size, z2, a2);
		double z3[MAX_FEATURE_DIMENSION] = {0};
		double a3[MAX_FEATURE_DIMENSION] = {0};		
		forward_propagation(a2, hidden_layer_size, W2, label_num, z3, a3);
		double yy[MAX_LABEL_NUMBER] = {0};
		yy[y[i]] = 1;
		
		double layer3_error[MAX_FEATURE_DIMENSION] = {0};
		for (int j = 1; j <= label_num; j++){
			layer3_error[j] = a3[j] - yy[j];
		}		
		double layer2_error[MAX_FEATURE_DIMENSION] = {0};
		compute_layer_error(layer2_error, W2, label_num, hidden_layer_size, layer3_error, z2);
		accumulate_gradient(grad2_sum, layer3_error, label_num, hidden_layer_size, a2);
		accumulate_gradient(grad1_sum, layer2_error, hidden_layer_size, feature_number, X[i]);
	}
	for (int i = 0; i < hidden_layer_size; i++){
		for (int j = 0; j <= feature_number; j++){
			w1_grad[i][j] = grad1_sum[i][j] / sample_number;
		}
	}
	for (int i = 0; i < label_num; i++){
		for (int j = 0; j <= hidden_layer_size; j++){
			w2_grad[i][j] = grad2_sum[i][j] / sample_number;
		}
	}
}

int main(){
	double X[][MAX_FEATURE_DIMENSION] = {
		{0, 0.084147, 0.090930},
		{0, 0.090930, 0.065699},
		{0, 2, 3}
	};
	int y[] = {1, 2, 2};
	int hidden_layer_size = 4;
	int label_num = 2;
	int feature_number = 2;
	int sample_number = 3;
	double W1[][MAX_FEATURE_DIMENSION] = {
		{0.084147, -0.027942, -0.099999},
		{0.090930, 0.065699, -0.053657},
		{0.014112, 0.098936, 0.042017},
		{-0.075680, 0.041212, 0.099061},
	};
	double W2[][MAX_FEATURE_DIMENSION] = {
		{0.084147, -0.075680, 0.065699, -0.054402, 0.042017},
		{0.090930, -0.095892, 0.098936, -0.099999, 0.099061}
	};
	double a2[10][MAX_FEATURE_DIMENSION] = {0};
	double a3[10][MAX_FEATURE_DIMENSION] = {0};

	double w1_grad[10][MAX_FEATURE_DIMENSION] = {0};
	double w2_grad[10][MAX_FEATURE_DIMENSION] = {0};

	compute_gradient(X, y, feature_number, 3, W1,
					hidden_layer_size, W2, label_num, w1_grad, w2_grad);

	printf("w1_grad:\n");
	for (int i = 0; i < hidden_layer_size; i++){
		for (int j = 0; j <= feature_number; j++){
			printf("%lf ", w1_grad[i][j]);
		}
		printf("\n");
	}

	printf("w2_grad:\n");
	for (int i = 0; i < label_num; i++){
		for (int j = 0; j <= hidden_layer_size; j++){
			printf("%lf ", w2_grad[i][j]);
		}
		printf("\n");
	}
	
	return 0;
}

運行截圖:


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章