用C / C ++實現BP神經網絡

緣起

最近跟着老師在學習神經網絡,爲了更加深刻地理解這個黑盒,我打算自己用C/C++將其實現一遍。今天忙活了好一會兒,終於實現了一個BP神經網絡,後期還會陸續實現CNN神經網絡之類的,也會發上來和大家一起分享的~

因爲最近比較忙,所以這裏直接放代碼了,關於一些原理以及自己的一點見解會在有空的時候整理出來的~



代碼

main.cpp

#include <iostream>
#include <vector>
#include "BPUtils.h"
using namespace std;
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
vector<vector<double>>dataTest;
vector<double>dataTestY;
vector<vector<double>>trainDataX;
vector<double>trainDataY;
int main() {
//    double m1[3][1]={{1},{2},{3}};
//    double m2[1][4]={1,2,3,4};
//    double m3[3][4];
//    dott(&m1[0][0],&m2[0][0],&m3[0][0],3,1,4);
//    for(int i=0;i<3;i++){
//        for(int j=0;j<4;j++){
//            cout<<m3[i][j]<<" ";
//        }
//        cout<<endl;
//    }
    createTrainSet();
    createTestSet();
    guiYiHua(dataTest);
    guiYiHua(trainDataX);
    NeuralNetwork nn(2,44,2);
    nn.train(trainDataX,trainDataY);




//    for(int i=0;i<trainDataX.size();i++){
//        for(int j=0;j<trainDataX[i].size();j++){
//            cout<<trainDataX[i][j]<<" ";
//        }
//        cout<<endl;
//    }
//    for(int i=0;i<trainDataX.size();i++){
//        cout<<trainDataY[i]<<" ";
//    }
//
//    cout<<endl<<"---------------------------------------------------------"<<endl;
//
//    for(int i=0;i<dataTest.size();i++){
//        for(int j=0;j<dataTest[i].size();j++){
//            cout<<dataTest[i][j]<<" ";
//        }
//        cout<<endl;
//    }
//    for(int i=0;i<dataTestY.size();i++){
//        cout<<dataTestY[i]<<" ";
//    }
//    NeuralNetwork nn(2,4,3);
//    vector<vector<double>>dataX;
//    vector<double>dataY;
//    for(int i=0;i<4;i++){
//        vector<double>vec;
//        for(int j=0;j<2;j++){
//            vec.push_back(i+j);
//        }
//        dataX.push_back(vec);
//    }
//    for(int i=0;i<4;i++){
//        for(int j=0;j<2;j++){
//            cout<<dataX[i][j]<<" ";
//        }
//        cout<<endl;
//    }
//    for(int i=0;i<4;i++){
//        dataY.push_back(i);
//    }
//    nn.train(dataX,dataY);

    return 0;
}


BPUtils.h

#ifndef BP_UTILS
#define BP_UTILS
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <ctime>
#include <string.h>
#include <cstdio>
#include <fstream>
#define random(x) (rand()%x)
using namespace std;
#define MAXSIZE 99
//全局變量
//測試集
extern vector<vector<double>>dataTest;
extern vector<double>dataTestY;
extern vector<vector<double>>trainDataX;
extern vector<double>trainDataY;

vector<string> split(const string& str, const string& delim) {
    vector<string> res;
    if("" == str) return res;
    //先將要切割的字符串從string類型轉換爲char*類型
    char * strs = new char[str.length() + 1] ; //不要忘了
    strcpy(strs, str.c_str());

    char * d = new char[delim.length() + 1];
    strcpy(d, delim.c_str());

    char *p = strtok(strs, d);
    while(p) {
        string s = p; //分割得到的字符串轉換爲string類型
        res.push_back(s); //存入結果數組
        p = strtok(NULL, d);
    }

    return res;
}

double getMax(vector<vector<double>>dataSet){
    double MYMAX=-999;
    for(int i=0;i<dataSet.size();i++){
        for(int j=0;j<dataSet[i].size();j++){
            if(MYMAX<dataSet[i][j]){
                MYMAX=dataSet[i][j];
            }
        }
    }
    return MYMAX;
}

double getMin(vector<vector<double>>dataSet){
    double MYMIN=999;
    for(int i=0;i<dataSet.size();i++){
        for(int j=0;j<dataSet[i].size();j++){
            if(MYMIN>dataSet[i][j]){
                MYMIN=dataSet[i][j];
            }
        }
    }
    return MYMIN;
}

//數據歸一化
//一般是x=(x-x.min)/x.max-x.min
void guiYiHua(vector<vector<double>>&dataSet){
    double MYMAX=getMax(dataSet);
    double MYMIN=getMin(dataSet);
    for(int i=0;i<dataSet.size();i++){
        for(int j=0;j<dataSet[i].size();j++){
            dataSet[i][j]=(dataSet[i][j]-MYMIN)/(MYMAX-MYMIN);
        }
    }
}



//創建測試集的數據
void createTrainSet(){
    fstream f("train.txt");
    //保存讀入的每一行
    string line;
    vector<string>res;
    int ii=0;
    while(getline(f,line)){
        res=split(line,"\t");
        vector<double>vec1;
        for(int i=0;i<res.size();i++){
            //cout<<res[i]<<endl;
            char ch[MAXSIZE];
            strcpy(ch,res[i].c_str());
            if(i!=2){
                vec1.push_back(atof(ch));
            }else{
                trainDataY.push_back(atof(ch));
            }
        }
        trainDataX.push_back(vec1);
        ii++;
    }
}

//創建訓練集的數據
void createTestSet(){
    fstream f("test.txt");
    //保存讀入的每一行
    string line;
    vector<string>res;
    int ii=0;
    while(getline(f,line)){
        res=split(line,"\t");
        vector<double>vec1;
        for(int i=0;i<res.size();i++){
            //cout<<res[i]<<endl;
            char ch[MAXSIZE];
            strcpy(ch,res[i].c_str());
            if(i!=2){
                vec1.push_back(atof(ch));
            }else{
                dataTestY.push_back(atof(ch));
            }
        }
        dataTest.push_back(vec1);
        ii++;
    }
}


//sigmoid激活函數
double sigmoid(double x){
    return 1/(1+exp(-x));
}

//sigmoid函數的導數
double dsigmoid(double x){
    return x*(1-x);
}

class NeuralNetwork{
public:
    //輸入層單元個數
    int inputLayers;
    //隱藏層單元個數
    int hidenLayers;
    //輸出層單元個數
    int outputLayers;
    //輸入層到隱藏層的權值
    //行數爲輸入層單元個數+1(因爲有偏置)
    //列數爲隱藏層單元個數
    vector<vector<double>>VArr;
    //隱藏層到輸出層的權值
    //行數爲隱藏層單元個數
    //列數爲輸出層單元個數
    vector<vector<double>>WArr;
private:
    //矩陣乘積
    void dot(const double* m1,const double* m2,double *m3,int m,int n,int p){
        for(int i=0;i<m;++i)                 //點乘運算
            {
                for(int j=0;j<p;++j)
                {
                    (*(m3+i*p+j))=0;
                        for(int k=0;k<n;++k)
                        {
                                (*(m3+i*p+j))+=(*(m1+i*n+k))*(*(m2+k*p+j));
                         }
                }
            }
    }

    void vectorToArr1(vector<vector<double>>vec,double *arr,int n){
        for(int i=0;i<vec.size();i++){
            for(int j=0;j<vec[i].size();j++){
                    //cout<<endl<<vec[i][j]<<"******"<<i<<"*********"<<j<<"***";
                (*(arr+i*n+j))=vec[i][j];
            }
            //cout<<endl;
        }
    }

    void vectorToArr2(vector<double>vec,double *arr){
        for(int i=0;i<vec.size();i++){
            (*(arr+i))=vec[i];
        }
    }

    void arrToVector1(double *arr,vector<double>&vec,int m){
        for(int i=0;i<m;i++){
            vec.push_back((*(arr+i)));
        }

    }

    //矩陣轉置
    void ZhuanZhi(const double*m1,double *m2,int n1,int n2){
        for(int i=0;i<n1;i++){
            for(int j=0;j<n2;j++){
                (*(m2+j*n1+i))=(*(m1+i*n2+j));
            }
        }
    }

    //驗證準確率時的預測
    //輸入測試集的一行數據
    //ArrL2爲輸出層的輸出
    //eg.當我們要分成10類的時候,輸出10個數,類似於該樣本屬於這10個類別的概率
    //我們選取其中概率最大的類別作爲最終分類得到的類別
    void predict(vector<double>test,double *ArrL2){
//        for(int i=0;i<test.size();i++){
//            cout<<"test[i]:"<<test[i]<<endl;
//        }
        //添加轉置
        test.push_back(1);
        double testArr[1][inputLayers+1];
        //轉成矩陣
        vectorToArr2(test,&testArr[0][0]);
//        for(int i=0;i<inputLayers+1;i++){
//            cout<<"testArr:"<<testArr[0][i]<<endl;
//        }
        double dotL1[1][hidenLayers];

        double VArr_temp[inputLayers+1][hidenLayers];
        vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);
//        for(int i=0;i<inputLayers+1;i++){
//            for(int j=0;j<hidenLayers;j++){
//                cout<<VArr_temp[i][j]<<" ";
//            }
//            cout<<endl;
//        }
        //testArr[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers]
        dot(&testArr[0][0],&VArr_temp[0][0],&dotL1[0][0],1,inputLayers+1,hidenLayers);
//        for(int i=0;i<1;i++){
//            for(int j=0;j<hidenLayers;j++){
//                cout<<dotL1[i][j]<<" ";
//            }
//            cout<<endl;
//        }
        //隱藏層輸出
        double ArrL1[1][hidenLayers];
        //double ArrL2[1][outputLayers];
        for(int i=0;i<hidenLayers;i++){
            ArrL1[0][i]=sigmoid(dotL1[0][i]);
            //cout<<ArrL1[0][i]<<endl;
        }
        double dotL2[1][outputLayers];

        double WArr_temp[hidenLayers][outputLayers];
        vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);
        //ArrL1[1][hidenLayers] dot WArr[hidenLayers][outputLayers]
        dot(&ArrL1[0][0],&WArr_temp[0][0],&dotL2[0][0],1,hidenLayers,outputLayers);
        //輸出層輸出
        for(int i=0;i<outputLayers;i++){
            //ArrL2[0][i]=sigmoid(dotL2[0][1]);
            (*(ArrL2+i))=sigmoid(dotL2[0][i]);
            //cout<<*(ArrL2+i)<<endl;
        }
    }

    int getMaxIndex(vector<double>vec){
        int index=-1;
        double MYMAX=-999;
        for(int i=0;i<vec.size();i++){
            //cout<<vec.size()<<"*********"<<endl;
            //cout<<i<<"::::"<<vec[i]<<endl;
            if(MYMAX<vec[i]){
                MYMAX=vec[i];
                index=i;
            }
        }
        return index;
    }

public:
    //構造函數,傳入輸入層,隱藏層,輸出層單元個數
    //並且構造權值矩陣
    NeuralNetwork(int _inputLayers,int _hidenLayers,int _outputLayers){
        this->inputLayers=_inputLayers;
        hidenLayers=_hidenLayers;
        outputLayers=_outputLayers;

        //構造V權值矩陣
        for(int i=0;i<inputLayers+1;i++){
            vector<double>vec;
            for(int j=0;j<hidenLayers;j++){
                vec.push_back((double)rand()/RAND_MAX*2-1);
            }
            VArr.push_back(vec);
        }

        for(int i=0;i<hidenLayers;i++){
            vector<double>vec;
            for(int j=0;j<outputLayers;j++){
                vec.push_back((double)rand()/RAND_MAX*2-1);
            }
            WArr.push_back(vec);
        }
    }
    //開始訓練
    //傳入訓練集,預期的y值,學習效率,以及訓練迭代的次數
    //這裏規定輸入的數據爲2列的數據
    void train(vector<vector<double>>dataX,vector<double>dataY,double lr=0.03,int epochs=1000000){
        double arrL1[1][hidenLayers];
        //將VArr由vector轉成arr
        double VArr_temp[inputLayers+1][hidenLayers];
        double hangx_temp[1][inputLayers+1];
        vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);
        double hangxT[inputLayers+1][1];
        double hangxDotVArr[1][hidenLayers];
        double arrL2[1][outputLayers];
        double WArr_temp[hidenLayers][outputLayers];
        double arrL2_delta[1][outputLayers];
        double arrL1_delta[1][hidenLayers];
        double E;
        double dao;
        double dotTemp[hidenLayers][outputLayers];
        double WArr_tempT[outputLayers][hidenLayers];
        double arrL1T[hidenLayers][1];
        double dotTempp[inputLayers+1][hidenLayers];
        srand((int)time(0));
        //爲數據集添加偏置
        //eg.當我們輸入的數據集爲4X2的時候,需要爲其在最後添加一列偏置,讓其變成一個4X3的矩陣
        for(int i=0;i<dataX.size();i++){
            //最後一列爲偏置
            dataX[i].push_back(1);
        }

        //進行權值訓練更新
        for(int n=0;n<epochs;n++){
            //隨機選取一行樣本進行更新
            int iii=random(dataX.size());
            //cout<<"iii:"<<iii<<endl;
            //得到隨機選取的一行數據
            vector<double>hangx=dataX[iii];
//            for(int i=0;i<hangx.size();i++){
//                cout<<hangx[i]<<"***"<<endl;
//            }
            //隱藏層輸出
            //這裏先計算輸入矩陣與權值矩陣的點乘,再將其輸入sigmoid函數中,得到最終的輸出
            //eg.輸入4X2的dataX,我們先加上偏置變成4X3
            //選取其中的一行數據1X3
            //然後計算dataX與arrV(3XhidenLayers)的dot,得到一個1XhidenLayers的矩陣

//            for(int ii=0;ii<inputLayers+1;ii++){
//                for(int jj=0;jj<hidenLayers;jj++){
//                    cout<<VArr[ii][jj]<<"---";
//                    cout<<VArr_temp[ii][jj]<<" ";
//                }
//                cout<<endl;
//            }

            vectorToArr2(hangx,&hangx_temp[0][0]);
//            for(int i=0;i<inputLayers+1;i++){
//                cout<<hangx[i]<<"---"<<endl;
//                cout<<hangx_temp[0][i]<<"**"<<endl;
//            }

            //hangx[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers]
            dot(&hangx_temp[0][0],&VArr_temp[0][0],&arrL1[0][0],1,inputLayers+1,hidenLayers);
            //將點乘後的值輸入到sigmoid函數中
            for(int k1=0;k1<hidenLayers;k1++){
                arrL1[0][k1]=sigmoid(arrL1[0][k1]);
                //cout<<arrL1[0][k1]<<endl;
            }




            vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);
//            for(int ii=0;ii<hidenLayers;ii++){
//                for(int jj=0;jj<outputLayers;jj++){
//                    cout<<WArr_temp[ii][jj]<<endl;
//                }
//            }
            //arrL1[1][hidenLayers] dot WArr_temp[hidenLayers][outputLayers]
            dot(&arrL1[0][0],&WArr_temp[0][0],&arrL2[0][0],1,hidenLayers,outputLayers);
            //cout<<outputLayers<<endl;
            //cout<<arrL2[0][1]<<endl;
//            for(int k1=0;k1<outputLayers;k1++){
//                arrL2[0][k1]=sigmoid(arrL2[0][k1]);
////               // cout<<k1<<endl;
////                cout<<arrL2[0][k1]<<endl;
//            }





            //求權值的delta

            //根據公式計算權值更新的delta
           for(int k1=0;k1<outputLayers;k1++){
                arrL2[0][k1]=sigmoid(arrL2[0][k1]);
               // cout<<k1<<endl;
                //cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl;
                E=dataY[iii]-arrL2[0][k1];
                //cout<<"E:"<<E<<endl;
                dao=dsigmoid(arrL2[0][k1]);
                //cout<<"dao:"<<dao<<endl;
                arrL2_delta[0][k1]=E*dao;
                //cout<<"arrL2_delta[0][k1]:"<<arrL2_delta[0][k1]<<endl;
            }

//            for(int k1=0;k1<outputLayers;k1++){
//                //計算誤差
//                E=dataY[iii]-arrL2[0][k1];
//                //對L2輸出的結果求導
//                dao=dsigmoid(arrL2[0][k1]);
////                cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl;
////                cout<<"dataY[iii]:"<<dataY[iii]<<endl;
////                cout<<"E:"<<E<<endl;
////                cout<<"dao:"<<dao<<endl;
//                //計算delta
//                arrL2_delta[0][k1]=E*dao;
//            }

//            for(int i=0;i<outputLayers;i++){
//                cout<<arrL2_delta[0][i]<<endl;
//            }

            //W矩陣轉置

            ZhuanZhi(&WArr_temp[0][0],&WArr_tempT[0][0],hidenLayers,outputLayers);
//            for(int i=0;i<outputLayers;i++){
//                for(int j=0;j<hidenLayers;j++){
//                    cout<<WArr_temp[j][i]<<"**";
//                    cout<<WArr_tempT[i][j]<<" ";
//                }
//                cout<<endl;
//            }
            //arrL2_delta[1][outputLayers] dot WArr_tempT[outputLayers][hidenLayers]
            dot(&arrL2_delta[0][0],&WArr_tempT[0][0],&arrL1_delta[0][0],1,outputLayers,hidenLayers);                       //乘上L1輸出的導數
//            for(int k1=0;k1<hidenLayers;k1++){
//                cout<<dsigmoid(arrL1[0][k1])<<endl;
//            }
            //乘上L1輸出的導數
            for(int k1=0;k1<hidenLayers;k1++){
                double ii=arrL1_delta[0][k1];
                arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]);
                //cout<<ii<<"**"<<dsigmoid(arrL1[0][k1])<<"**"<<arrL1_delta[0][k1]<<endl;
            }

            //通過上面的delta更新權值WV

            ZhuanZhi(&arrL1[0][0],&arrL1T[0][0],1,hidenLayers);

//            for(int i=0;i<hidenLayers;i++){
//                cout<<arrL1T[i][0]<<endl;
//            }
            //arrL1T[hidenLayers][1] dot arrL2_delta[1][outputLayers]



            dot(&arrL1T[0][0],&arrL2_delta[0][0],&dotTemp[0][0],hidenLayers,1,outputLayers);
//                                    for(int k1=0;k1<outputLayers;k1++){
//                cout<<arrL2_delta[0][k1]<<endl;
//            }
//            for(int k1=0;k1<hidenLayers;k1++){
//                for(int k2=0;k2<outputLayers;k2++){
//                    cout<<dotTemp[k1][k2]<<" ";
//                }
//                cout<<endl;
//            }

//            for(int k1=0;k1<outputLayers;k1++){
//                cout<<arrL2_delta[0][k1]<<endl;
//            }
            for(int k1=0;k1<hidenLayers;k1++){
                for(int k2=0;k2<outputLayers;k2++){
                    //根據學習效率進行更新
                    //cout<<dotTemp[k1][k2]<<endl;
                    WArr[k1][k2]+=(lr*dotTemp[k1][k2]);
                    //cout<<"WArr[k1][k2]:"<<WArr[k1][k2]<<endl;
                }
            }

            //轉置

            ZhuanZhi(&hangx_temp[0][0],&hangxT[0][0],1,inputLayers+1);
//            for(int i=0;i<inputLayers+1;i++){
//                cout<<hangxT[i][0]<<"))"<<endl;
//            }
            //hangxT[inputLayers+1][1] dot arrL1_delta[1][hidenLayers]

//            for(int k1=0;k1<hidenLayers;k1++){
//                //double ii=arrL1_delta[0][k1];
//                //arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]);
//                cout<<arrL1_delta[0][k1]<<"** ";
//            }
            //cout<<endl;
            dot(&hangxT[0][0],&arrL1_delta[0][0],&dotTempp[0][0],inputLayers+1,1,hidenLayers);
//            for(int i=0;i<inputLayers+1;i++){
//                for(int j=0;j<hidenLayers;j++){
//                    cout<<dotTempp[i][j]<<" ";
//                }
//                cout<<endl;
//            }
            for(int k1=0;k1<inputLayers+1;k1++){
                for(int k2=0;k2<hidenLayers;k2++){
                    VArr[k1][k2]+=(lr*dotTempp[k1][k2]);
                    //cout<<"(lr*dotTempp[k1][k2]):"<<(lr*dotTempp[k1][k2])<<endl;
                    //cout<<VArr[k1][k2]<<"*****"<<endl;
                }
            }


            //每訓練100次預測一下準確率
            if(n%10000==0){
                //使用測試集驗證一下準確率
                //存放預測返回的結果
                double resultArr[1][outputLayers];

                int index;
                //整個樣本集中預測結果正確的樣本個數
                int num=0;
                //準確率
                double accuracy=0;
                //遍歷整個測試樣本
                for(int k1=0;k1<dataTest.size();k1++){
                    vector<double>result;
                    //取測試集中的第k1行進行測試,結果保存在resultArr中
                    predict(dataTest[k1],&resultArr[0][0]);
                    //將arr轉成vector
                    arrToVector1(&resultArr[0][0],result,outputLayers);
//                    for(int kk=0;kk<result.size();kk++){
//                            //cout<<resultArr[0][kk]<<"%%%%%%%%"<<endl;
//                        cout<<result[kk]<<"&&&&&&&&&7"<<endl;
//                    }
                    //取得結果中的最大值(概率最大)的index
                    index=getMaxIndex(result);
//                        cout<<"**k1:"<<k1<<endl;
//                        cout<<"**index:"<<index<<endl;
//                        cout<<"**Y:"<<dataTestY[k1]<<endl;
                    if(index==dataTestY[k1]){
//                        cout<<"k1:"<<k1<<endl;
//                        cout<<"index:"<<index<<endl;
//                        cout<<"Y:"<<dataTestY[k1]<<endl;
                        num++;
                    }
                }
                accuracy=(double)num/dataTestY.size();
                //if(num>5)cout<<"num:!!!!!!!!!!!!!!!!!!!!!!!111"<<num<<endl;
                cout<<"epoch: "<<n<<",   "<<"accuracy: "<<accuracy<<endl;
            }
         }
    }
};


#endif



訓練效果

圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章