緣起
最近跟著老師在學(xué)習(xí)神經(jīng)網(wǎng)絡(luò)削茁,為了更加深刻地理解這個(gè)黑盒聪建,我打算自己用C/C++將其實(shí)現(xiàn)一遍。今天忙活了好一會(huì)兒氧腰,終于實(shí)現(xiàn)了一個(gè)BP神經(jīng)網(wǎng)絡(luò)枫浙,后期還會(huì)陸續(xù)實(shí)現(xiàn)CNN神經(jīng)網(wǎng)絡(luò)之類的,也會(huì)發(fā)上來(lái)和大家一起分享的~
因?yàn)樽罱容^忙古拴,所以這里直接放代碼了箩帚,關(guān)于一些原理以及自己的一點(diǎn)見(jiàn)解會(huì)在有空的時(shí)候整理出來(lái)的~
代碼
main.cpp
#include <iostream>
#include <vector>
#include "BPUtils.h"
using namespace std;
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
vector<vector<double>>dataTest;
vector<double>dataTestY;
vector<vector<double>>trainDataX;
vector<double>trainDataY;
int main() {
// double m1[3][1]={{1},{2},{3}};
// double m2[1][4]={1,2,3,4};
// double m3[3][4];
// dott(&m1[0][0],&m2[0][0],&m3[0][0],3,1,4);
// for(int i=0;i<3;i++){
// for(int j=0;j<4;j++){
// cout<<m3[i][j]<<" ";
// }
// cout<<endl;
// }
createTrainSet();
createTestSet();
guiYiHua(dataTest);
guiYiHua(trainDataX);
NeuralNetwork nn(2,44,2);
nn.train(trainDataX,trainDataY);
// for(int i=0;i<trainDataX.size();i++){
// for(int j=0;j<trainDataX[i].size();j++){
// cout<<trainDataX[i][j]<<" ";
// }
// cout<<endl;
// }
// for(int i=0;i<trainDataX.size();i++){
// cout<<trainDataY[i]<<" ";
// }
//
// cout<<endl<<"---------------------------------------------------------"<<endl;
//
// for(int i=0;i<dataTest.size();i++){
// for(int j=0;j<dataTest[i].size();j++){
// cout<<dataTest[i][j]<<" ";
// }
// cout<<endl;
// }
// for(int i=0;i<dataTestY.size();i++){
// cout<<dataTestY[i]<<" ";
// }
// NeuralNetwork nn(2,4,3);
// vector<vector<double>>dataX;
// vector<double>dataY;
// for(int i=0;i<4;i++){
// vector<double>vec;
// for(int j=0;j<2;j++){
// vec.push_back(i+j);
// }
// dataX.push_back(vec);
// }
// for(int i=0;i<4;i++){
// for(int j=0;j<2;j++){
// cout<<dataX[i][j]<<" ";
// }
// cout<<endl;
// }
// for(int i=0;i<4;i++){
// dataY.push_back(i);
// }
// nn.train(dataX,dataY);
return 0;
}
BPUtils.h
#ifndef BP_UTILS
#define BP_UTILS
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <ctime>
#include <string.h>
#include <cstdio>
#include <fstream>
#define random(x) (rand()%x)
using namespace std;
#define MAXSIZE 99
//全局變量
//測(cè)試集
extern vector<vector<double>>dataTest;
extern vector<double>dataTestY;
extern vector<vector<double>>trainDataX;
extern vector<double>trainDataY;
vector<string> split(const string& str, const string& delim) {
vector<string> res;
if("" == str) return res;
//先將要切割的字符串從string類型轉(zhuǎn)換為char*類型
char * strs = new char[str.length() + 1] ; //不要忘了
strcpy(strs, str.c_str());
char * d = new char[delim.length() + 1];
strcpy(d, delim.c_str());
char *p = strtok(strs, d);
while(p) {
string s = p; //分割得到的字符串轉(zhuǎn)換為string類型
res.push_back(s); //存入結(jié)果數(shù)組
p = strtok(NULL, d);
}
return res;
}
double getMax(vector<vector<double>>dataSet){
double MYMAX=-999;
for(int i=0;i<dataSet.size();i++){
for(int j=0;j<dataSet[i].size();j++){
if(MYMAX<dataSet[i][j]){
MYMAX=dataSet[i][j];
}
}
}
return MYMAX;
}
double getMin(vector<vector<double>>dataSet){
double MYMIN=999;
for(int i=0;i<dataSet.size();i++){
for(int j=0;j<dataSet[i].size();j++){
if(MYMIN>dataSet[i][j]){
MYMIN=dataSet[i][j];
}
}
}
return MYMIN;
}
//數(shù)據(jù)歸一化
//一般是x=(x-x.min)/x.max-x.min
void guiYiHua(vector<vector<double>>&dataSet){
double MYMAX=getMax(dataSet);
double MYMIN=getMin(dataSet);
for(int i=0;i<dataSet.size();i++){
for(int j=0;j<dataSet[i].size();j++){
dataSet[i][j]=(dataSet[i][j]-MYMIN)/(MYMAX-MYMIN);
}
}
}
//創(chuàng)建測(cè)試集的數(shù)據(jù)
void createTrainSet(){
fstream f("train.txt");
//保存讀入的每一行
string line;
vector<string>res;
int ii=0;
while(getline(f,line)){
res=split(line,"\t");
vector<double>vec1;
for(int i=0;i<res.size();i++){
//cout<<res[i]<<endl;
char ch[MAXSIZE];
strcpy(ch,res[i].c_str());
if(i!=2){
vec1.push_back(atof(ch));
}else{
trainDataY.push_back(atof(ch));
}
}
trainDataX.push_back(vec1);
ii++;
}
}
//創(chuàng)建訓(xùn)練集的數(shù)據(jù)
void createTestSet(){
fstream f("test.txt");
//保存讀入的每一行
string line;
vector<string>res;
int ii=0;
while(getline(f,line)){
res=split(line,"\t");
vector<double>vec1;
for(int i=0;i<res.size();i++){
//cout<<res[i]<<endl;
char ch[MAXSIZE];
strcpy(ch,res[i].c_str());
if(i!=2){
vec1.push_back(atof(ch));
}else{
dataTestY.push_back(atof(ch));
}
}
dataTest.push_back(vec1);
ii++;
}
}
//sigmoid激活函數(shù)
double sigmoid(double x){
return 1/(1+exp(-x));
}
//sigmoid函數(shù)的導(dǎo)數(shù)
double dsigmoid(double x){
return x*(1-x);
}
class NeuralNetwork{
public:
//輸入層單元個(gè)數(shù)
int inputLayers;
//隱藏層單元個(gè)數(shù)
int hidenLayers;
//輸出層單元個(gè)數(shù)
int outputLayers;
//輸入層到隱藏層的權(quán)值
//行數(shù)為輸入層單元個(gè)數(shù)+1(因?yàn)橛衅?
//列數(shù)為隱藏層單元個(gè)數(shù)
vector<vector<double>>VArr;
//隱藏層到輸出層的權(quán)值
//行數(shù)為隱藏層單元個(gè)數(shù)
//列數(shù)為輸出層單元個(gè)數(shù)
vector<vector<double>>WArr;
private:
//矩陣乘積
void dot(const double* m1,const double* m2,double *m3,int m,int n,int p){
for(int i=0;i<m;++i) //點(diǎn)乘運(yùn)算
{
for(int j=0;j<p;++j)
{
(*(m3+i*p+j))=0;
for(int k=0;k<n;++k)
{
(*(m3+i*p+j))+=(*(m1+i*n+k))*(*(m2+k*p+j));
}
}
}
}
void vectorToArr1(vector<vector<double>>vec,double *arr,int n){
for(int i=0;i<vec.size();i++){
for(int j=0;j<vec[i].size();j++){
//cout<<endl<<vec[i][j]<<"******"<<i<<"*********"<<j<<"***";
(*(arr+i*n+j))=vec[i][j];
}
//cout<<endl;
}
}
void vectorToArr2(vector<double>vec,double *arr){
for(int i=0;i<vec.size();i++){
(*(arr+i))=vec[i];
}
}
void arrToVector1(double *arr,vector<double>&vec,int m){
for(int i=0;i<m;i++){
vec.push_back((*(arr+i)));
}
}
//矩陣轉(zhuǎn)置
void ZhuanZhi(const double*m1,double *m2,int n1,int n2){
for(int i=0;i<n1;i++){
for(int j=0;j<n2;j++){
(*(m2+j*n1+i))=(*(m1+i*n2+j));
}
}
}
//驗(yàn)證準(zhǔn)確率時(shí)的預(yù)測(cè)
//輸入測(cè)試集的一行數(shù)據(jù)
//ArrL2為輸出層的輸出
//eg.當(dāng)我們要分成10類的時(shí)候,輸出10個(gè)數(shù)黄痪,類似于該樣本屬于這10個(gè)類別的概率
//我們選取其中概率最大的類別作為最終分類得到的類別
void predict(vector<double>test,double *ArrL2){
// for(int i=0;i<test.size();i++){
// cout<<"test[i]:"<<test[i]<<endl;
// }
//添加轉(zhuǎn)置
test.push_back(1);
double testArr[1][inputLayers+1];
//轉(zhuǎn)成矩陣
vectorToArr2(test,&testArr[0][0]);
// for(int i=0;i<inputLayers+1;i++){
// cout<<"testArr:"<<testArr[0][i]<<endl;
// }
double dotL1[1][hidenLayers];
double VArr_temp[inputLayers+1][hidenLayers];
vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);
// for(int i=0;i<inputLayers+1;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<VArr_temp[i][j]<<" ";
// }
// cout<<endl;
// }
//testArr[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers]
dot(&testArr[0][0],&VArr_temp[0][0],&dotL1[0][0],1,inputLayers+1,hidenLayers);
// for(int i=0;i<1;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<dotL1[i][j]<<" ";
// }
// cout<<endl;
// }
//隱藏層輸出
double ArrL1[1][hidenLayers];
//double ArrL2[1][outputLayers];
for(int i=0;i<hidenLayers;i++){
ArrL1[0][i]=sigmoid(dotL1[0][i]);
//cout<<ArrL1[0][i]<<endl;
}
double dotL2[1][outputLayers];
double WArr_temp[hidenLayers][outputLayers];
vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);
//ArrL1[1][hidenLayers] dot WArr[hidenLayers][outputLayers]
dot(&ArrL1[0][0],&WArr_temp[0][0],&dotL2[0][0],1,hidenLayers,outputLayers);
//輸出層輸出
for(int i=0;i<outputLayers;i++){
//ArrL2[0][i]=sigmoid(dotL2[0][1]);
(*(ArrL2+i))=sigmoid(dotL2[0][i]);
//cout<<*(ArrL2+i)<<endl;
}
}
int getMaxIndex(vector<double>vec){
int index=-1;
double MYMAX=-999;
for(int i=0;i<vec.size();i++){
//cout<<vec.size()<<"*********"<<endl;
//cout<<i<<"::::"<<vec[i]<<endl;
if(MYMAX<vec[i]){
MYMAX=vec[i];
index=i;
}
}
return index;
}
public:
//構(gòu)造函數(shù)紧帕,傳入輸入層,隱藏層桅打,輸出層單元個(gè)數(shù)
//并且構(gòu)造權(quán)值矩陣
NeuralNetwork(int _inputLayers,int _hidenLayers,int _outputLayers){
this->inputLayers=_inputLayers;
hidenLayers=_hidenLayers;
outputLayers=_outputLayers;
//構(gòu)造V權(quán)值矩陣
for(int i=0;i<inputLayers+1;i++){
vector<double>vec;
for(int j=0;j<hidenLayers;j++){
vec.push_back((double)rand()/RAND_MAX*2-1);
}
VArr.push_back(vec);
}
for(int i=0;i<hidenLayers;i++){
vector<double>vec;
for(int j=0;j<outputLayers;j++){
vec.push_back((double)rand()/RAND_MAX*2-1);
}
WArr.push_back(vec);
}
}
//開(kāi)始訓(xùn)練
//傳入訓(xùn)練集是嗜,預(yù)期的y值,學(xué)習(xí)效率挺尾,以及訓(xùn)練迭代的次數(shù)
//這里規(guī)定輸入的數(shù)據(jù)為2列的數(shù)據(jù)
void train(vector<vector<double>>dataX,vector<double>dataY,double lr=0.03,int epochs=1000000){
double arrL1[1][hidenLayers];
//將VArr由vector轉(zhuǎn)成arr
double VArr_temp[inputLayers+1][hidenLayers];
double hangx_temp[1][inputLayers+1];
vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers);
double hangxT[inputLayers+1][1];
double hangxDotVArr[1][hidenLayers];
double arrL2[1][outputLayers];
double WArr_temp[hidenLayers][outputLayers];
double arrL2_delta[1][outputLayers];
double arrL1_delta[1][hidenLayers];
double E;
double dao;
double dotTemp[hidenLayers][outputLayers];
double WArr_tempT[outputLayers][hidenLayers];
double arrL1T[hidenLayers][1];
double dotTempp[inputLayers+1][hidenLayers];
srand((int)time(0));
//為數(shù)據(jù)集添加偏置
//eg.當(dāng)我們輸入的數(shù)據(jù)集為4X2的時(shí)候鹅搪,需要為其在最后添加一列偏置踱讨,讓其變成一個(gè)4X3的矩陣
for(int i=0;i<dataX.size();i++){
//最后一列為偏置
dataX[i].push_back(1);
}
//進(jìn)行權(quán)值訓(xùn)練更新
for(int n=0;n<epochs;n++){
//隨機(jī)選取一行樣本進(jìn)行更新
int iii=random(dataX.size());
//cout<<"iii:"<<iii<<endl;
//得到隨機(jī)選取的一行數(shù)據(jù)
vector<double>hangx=dataX[iii];
// for(int i=0;i<hangx.size();i++){
// cout<<hangx[i]<<"***"<<endl;
// }
//隱藏層輸出
//這里先計(jì)算輸入矩陣與權(quán)值矩陣的點(diǎn)乘睹酌,再將其輸入sigmoid函數(shù)中姚炕,得到最終的輸出
//eg.輸入4X2的dataX假瞬,我們先加上偏置變成4X3
//選取其中的一行數(shù)據(jù)1X3
//然后計(jì)算dataX與arrV(3XhidenLayers)的dot榄审,得到一個(gè)1XhidenLayers的矩陣
// for(int ii=0;ii<inputLayers+1;ii++){
// for(int jj=0;jj<hidenLayers;jj++){
// cout<<VArr[ii][jj]<<"---";
// cout<<VArr_temp[ii][jj]<<" ";
// }
// cout<<endl;
// }
vectorToArr2(hangx,&hangx_temp[0][0]);
// for(int i=0;i<inputLayers+1;i++){
// cout<<hangx[i]<<"---"<<endl;
// cout<<hangx_temp[0][i]<<"**"<<endl;
// }
//hangx[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers]
dot(&hangx_temp[0][0],&VArr_temp[0][0],&arrL1[0][0],1,inputLayers+1,hidenLayers);
//將點(diǎn)乘后的值輸入到sigmoid函數(shù)中
for(int k1=0;k1<hidenLayers;k1++){
arrL1[0][k1]=sigmoid(arrL1[0][k1]);
//cout<<arrL1[0][k1]<<endl;
}
vectorToArr1(WArr,&WArr_temp[0][0],outputLayers);
// for(int ii=0;ii<hidenLayers;ii++){
// for(int jj=0;jj<outputLayers;jj++){
// cout<<WArr_temp[ii][jj]<<endl;
// }
// }
//arrL1[1][hidenLayers] dot WArr_temp[hidenLayers][outputLayers]
dot(&arrL1[0][0],&WArr_temp[0][0],&arrL2[0][0],1,hidenLayers,outputLayers);
//cout<<outputLayers<<endl;
//cout<<arrL2[0][1]<<endl;
// for(int k1=0;k1<outputLayers;k1++){
// arrL2[0][k1]=sigmoid(arrL2[0][k1]);
//// // cout<<k1<<endl;
//// cout<<arrL2[0][k1]<<endl;
// }
//求權(quán)值的delta
//根據(jù)公式計(jì)算權(quán)值更新的delta
for(int k1=0;k1<outputLayers;k1++){
arrL2[0][k1]=sigmoid(arrL2[0][k1]);
// cout<<k1<<endl;
//cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl;
E=dataY[iii]-arrL2[0][k1];
//cout<<"E:"<<E<<endl;
dao=dsigmoid(arrL2[0][k1]);
//cout<<"dao:"<<dao<<endl;
arrL2_delta[0][k1]=E*dao;
//cout<<"arrL2_delta[0][k1]:"<<arrL2_delta[0][k1]<<endl;
}
// for(int k1=0;k1<outputLayers;k1++){
// //計(jì)算誤差
// E=dataY[iii]-arrL2[0][k1];
// //對(duì)L2輸出的結(jié)果求導(dǎo)
// dao=dsigmoid(arrL2[0][k1]);
//// cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl;
//// cout<<"dataY[iii]:"<<dataY[iii]<<endl;
//// cout<<"E:"<<E<<endl;
//// cout<<"dao:"<<dao<<endl;
// //計(jì)算delta
// arrL2_delta[0][k1]=E*dao;
// }
// for(int i=0;i<outputLayers;i++){
// cout<<arrL2_delta[0][i]<<endl;
// }
//W矩陣轉(zhuǎn)置
ZhuanZhi(&WArr_temp[0][0],&WArr_tempT[0][0],hidenLayers,outputLayers);
// for(int i=0;i<outputLayers;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<WArr_temp[j][i]<<"**";
// cout<<WArr_tempT[i][j]<<" ";
// }
// cout<<endl;
// }
//arrL2_delta[1][outputLayers] dot WArr_tempT[outputLayers][hidenLayers]
dot(&arrL2_delta[0][0],&WArr_tempT[0][0],&arrL1_delta[0][0],1,outputLayers,hidenLayers); //乘上L1輸出的導(dǎo)數(shù)
// for(int k1=0;k1<hidenLayers;k1++){
// cout<<dsigmoid(arrL1[0][k1])<<endl;
// }
//乘上L1輸出的導(dǎo)數(shù)
for(int k1=0;k1<hidenLayers;k1++){
double ii=arrL1_delta[0][k1];
arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]);
//cout<<ii<<"**"<<dsigmoid(arrL1[0][k1])<<"**"<<arrL1_delta[0][k1]<<endl;
}
//通過(guò)上面的delta更新權(quán)值WV
ZhuanZhi(&arrL1[0][0],&arrL1T[0][0],1,hidenLayers);
// for(int i=0;i<hidenLayers;i++){
// cout<<arrL1T[i][0]<<endl;
// }
//arrL1T[hidenLayers][1] dot arrL2_delta[1][outputLayers]
dot(&arrL1T[0][0],&arrL2_delta[0][0],&dotTemp[0][0],hidenLayers,1,outputLayers);
// for(int k1=0;k1<outputLayers;k1++){
// cout<<arrL2_delta[0][k1]<<endl;
// }
// for(int k1=0;k1<hidenLayers;k1++){
// for(int k2=0;k2<outputLayers;k2++){
// cout<<dotTemp[k1][k2]<<" ";
// }
// cout<<endl;
// }
// for(int k1=0;k1<outputLayers;k1++){
// cout<<arrL2_delta[0][k1]<<endl;
// }
for(int k1=0;k1<hidenLayers;k1++){
for(int k2=0;k2<outputLayers;k2++){
//根據(jù)學(xué)習(xí)效率進(jìn)行更新
//cout<<dotTemp[k1][k2]<<endl;
WArr[k1][k2]+=(lr*dotTemp[k1][k2]);
//cout<<"WArr[k1][k2]:"<<WArr[k1][k2]<<endl;
}
}
//轉(zhuǎn)置
ZhuanZhi(&hangx_temp[0][0],&hangxT[0][0],1,inputLayers+1);
// for(int i=0;i<inputLayers+1;i++){
// cout<<hangxT[i][0]<<"))"<<endl;
// }
//hangxT[inputLayers+1][1] dot arrL1_delta[1][hidenLayers]
// for(int k1=0;k1<hidenLayers;k1++){
// //double ii=arrL1_delta[0][k1];
// //arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]);
// cout<<arrL1_delta[0][k1]<<"** ";
// }
//cout<<endl;
dot(&hangxT[0][0],&arrL1_delta[0][0],&dotTempp[0][0],inputLayers+1,1,hidenLayers);
// for(int i=0;i<inputLayers+1;i++){
// for(int j=0;j<hidenLayers;j++){
// cout<<dotTempp[i][j]<<" ";
// }
// cout<<endl;
// }
for(int k1=0;k1<inputLayers+1;k1++){
for(int k2=0;k2<hidenLayers;k2++){
VArr[k1][k2]+=(lr*dotTempp[k1][k2]);
//cout<<"(lr*dotTempp[k1][k2]):"<<(lr*dotTempp[k1][k2])<<endl;
//cout<<VArr[k1][k2]<<"*****"<<endl;
}
}
//每訓(xùn)練100次預(yù)測(cè)一下準(zhǔn)確率
if(n%10000==0){
//使用測(cè)試集驗(yàn)證一下準(zhǔn)確率
//存放預(yù)測(cè)返回的結(jié)果
double resultArr[1][outputLayers];
int index;
//整個(gè)樣本集中預(yù)測(cè)結(jié)果正確的樣本個(gè)數(shù)
int num=0;
//準(zhǔn)確率
double accuracy=0;
//遍歷整個(gè)測(cè)試樣本
for(int k1=0;k1<dataTest.size();k1++){
vector<double>result;
//取測(cè)試集中的第k1行進(jìn)行測(cè)試粹断,結(jié)果保存在resultArr中
predict(dataTest[k1],&resultArr[0][0]);
//將arr轉(zhuǎn)成vector
arrToVector1(&resultArr[0][0],result,outputLayers);
// for(int kk=0;kk<result.size();kk++){
// //cout<<resultArr[0][kk]<<"%%%%%%%%"<<endl;
// cout<<result[kk]<<"&&&&&&&&&7"<<endl;
// }
//取得結(jié)果中的最大值(概率最大)的index
index=getMaxIndex(result);
// cout<<"**k1:"<<k1<<endl;
// cout<<"**index:"<<index<<endl;
// cout<<"**Y:"<<dataTestY[k1]<<endl;
if(index==dataTestY[k1]){
// cout<<"k1:"<<k1<<endl;
// cout<<"index:"<<index<<endl;
// cout<<"Y:"<<dataTestY[k1]<<endl;
num++;
}
}
accuracy=(double)num/dataTestY.size();
//if(num>5)cout<<"num:!!!!!!!!!!!!!!!!!!!!!!!111"<<num<<endl;
cout<<"epoch: "<<n<<", "<<"accuracy: "<<accuracy<<endl;
}
}
}
};
#endif
訓(xùn)練效果
訓(xùn)練效果