IT技術互動交流平臺

LSTM神經網絡的詳細推導及C++實現

來源:IT165收集  發布日期:2016-10-18 20:56:54

LSTM隱層神經元結構:
這里寫圖片描述

LSTM隱層神經元詳細結構:
這里寫圖片描述

這里寫圖片描述
這里寫圖片描述
這里寫圖片描述
這里寫圖片描述
這里寫圖片描述

//讓程序自己學會是否需要進位,從而學會加法

#include 'iostream'
#include 'math.h'
#include 'stdlib.h'
#include 'time.h'
#include 'vector'
#include 'assert.h'
using namespace std;

#define innode  2       //輸入結點數,將輸入2個加數
#define hidenode  26    //隱藏結點數,存儲“攜帶位”
#define outnode  1      //輸出結點數,將輸出一個預測數字
#define alpha  0.1      //學習速率
#define binary_dim 8    //二進制數的最大長度

#define randval(high) ( (double)rand() / RAND_MAX * high )
#define uniform_plus_minus_one ( (double)( 2.0 * rand() ) / ((double)RAND_MAX + 1.0) - 1.0 )  //均勻隨機分布


int largest_number = ( pow(2, binary_dim) );  //跟二進制最大長度對應的可以表示的最大十進制數

//激活函數
double sigmoid(double x) 
{
    return 1.0 / (1.0 + exp(-x));
}

//激活函數的導數,y為激活函數值
double dsigmoid(double y)
{
    return y * (1.0 - y);  
}           

//tanh的導數,y為tanh值
double dtanh(double y)
{
    return 1.0 - y * y;  
}

//將一個10進制整數轉換為2進制數
void int2binary(int n, int *arr)
{
    int i = 0;
    while(n)
    {
        arr[i++] = n % 2;
        n /= 2;
    }
    while(i < binary_dim)
        arr[i++] = 0;
}

class RNN
{
public:
    RNN();
    virtual ~RNN();
    void train();

public:
    double W_I[innode][hidenode];     //連接輸入與隱含層單元中輸入門的權值矩陣
    double U_I[hidenode][hidenode];   //連接上一隱層輸出與本隱含層單元中輸入門的權值矩陣
    double W_F[innode][hidenode];     //連接輸入與隱含層單元中遺忘門的權值矩陣
    double U_F[hidenode][hidenode];   //連接上一隱含層與本隱含層單元中遺忘門的權值矩陣
    double W_O[innode][hidenode];     //連接輸入與隱含層單元中遺忘門的權值矩陣
    double U_O[hidenode][hidenode];   //連接上一隱含層與現在時刻的隱含層的權值矩陣
    double W_G[innode][hidenode];     //用于產生新記憶的權值矩陣
    double U_G[hidenode][hidenode];   //用于產生新記憶的權值矩陣
    double W_out[hidenode][outnode];  //連接隱層與輸出層的權值矩陣

    double *x;             //layer 0 輸出值,由輸入向量直接設定
    //double *layer_1;     //layer 1 輸出值
    double *y;             //layer 2 輸出值
};

void winit(double w[], int n) //權值初始化
{
    for(int i=0; i<n; i++)
        w[i] = uniform_plus_minus_one;  //均勻隨機分布
}

RNN::RNN()
{
    x = new double[innode];
    y = new double[outnode];
    winit((double*)W_I, innode * hidenode);
    winit((double*)U_I, hidenode * hidenode);
    winit((double*)W_F, innode * hidenode);
    winit((double*)U_F, hidenode * hidenode);
    winit((double*)W_O, innode * hidenode);
    winit((double*)U_O, hidenode * hidenode);
    winit((double*)W_G, innode * hidenode);
    winit((double*)U_G, hidenode * hidenode);
    winit((double*)W_out, hidenode * outnode);
}

RNN::~RNN()
{
    delete x;
    delete y;
}

void RNN::train()
{
    int epoch, i, j, k, m, p;
    vector<double*> I_vector;      //輸入門
    vector<double*> F_vector;      //遺忘門
    vector<double*> O_vector;      //輸出門
    vector<double*> G_vector;      //新記憶
    vector<double*> S_vector;      //狀態值
    vector<double*> h_vector;      //輸出值
    vector<double> y_delta;        //保存誤差關于輸出層的偏導

    for(epoch=0; epoch<11000; epoch++)  //訓練次數
    {
        double e = 0.0;  //誤差

        int predict[binary_dim];               //保存每次生成的預測值
        memset(predict, 0, sizeof(predict));

        int a_int = (int)randval(largest_number/2.0);  //隨機生成一個加數 a
        int a[binary_dim];
        int2binary(a_int, a);                 //轉為二進制數

        int b_int = (int)randval(largest_number/2.0);  //隨機生成另一個加數 b
        int b[binary_dim];
        int2binary(b_int, b);                 //轉為二進制數

        int c_int = a_int + b_int;            //真實的和 c
        int c[binary_dim];
        int2binary(c_int, c);                 //轉為二進制數

        //在0時刻是沒有之前的隱含層的,所以初始化一個全為0的
        double *S = new double[hidenode];     //狀態值
        double *h = new double[hidenode];     //輸出值

        for(i=0; i<hidenode; i++)  
        {
            S[i] = 0;
            h[i] = 0;
        }
        S_vector.push_back(S);
        h_vector.push_back(h);  

        //正向傳播
        for(p=0; p<binary_dim; p++)           //循環遍歷二進制數組,從最低位開始
        {
            x[0] = a[p];
            x[1] = b[p];
            double t = (double)c[p];          //實際值
            double *in_gate = new double[hidenode];     //輸入門
            double *out_gate = new double[hidenode];    //輸出門
            double *forget_gate = new double[hidenode]; //遺忘門
            double *g_gate = new double[hidenode];      //新記憶
            double *state = new double[hidenode];       //狀態值
            double *h = new double[hidenode];           //隱層輸出值

            for(j=0; j<hidenode; j++)
            {   
                //輸入層轉播到隱層
                double inGate = 0.0;
                double outGate = 0.0;
                double forgetGate = 0.0;
                double gGate = 0.0;
                double s = 0.0;

                for(m=0; m<innode; m++) 
                {
                    inGate += x[m] * W_I[m][j]; 
                    outGate += x[m] * W_O[m][j];
                    forgetGate += x[m] * W_F[m][j];
                    gGate += x[m] * W_G[m][j];
                }

                double *h_pre = h_vector.back();
                double *state_pre = S_vector.back();
                for(m=0; m<hidenode; m++)
                {
                    inGate += h_pre[m] * U_I[m][j];
                    outGate += h_pre[m] * U_O[m][j];
                    forgetGate += h_pre[m] * U_F[m][j];
                    gGate += h_pre[m] * U_G[m][j];
                }

                in_gate[j] = sigmoid(inGate);   
                out_gate[j] = sigmoid(outGate);
                forget_gate[j] = sigmoid(forgetGate);
                g_gate[j] = sigmoid(gGate);

                double s_pre = (j == 0 ? 0 : state[j-1]);
                state[j] = forget_gate[j] * s_pre + g_gate[j] * in_gate[j];
                h[j] = in_gate[j] * tanh(state[j]);
            }


            for(k=0; k<outnode; k++)
            {
                //隱藏層傳播到輸出層
                double out = 0.0;
                for(j=0; j<hidenode; j++)
                    out += h[j] * W_out[j][k];              
                y[k] = sigmoid(out);               //輸出層各單元輸出
            }

            predict[p] = (int)floor(y[0] + 0.5);   //記錄預測值

            //保存隱藏層,以便下次計算
            I_vector.push_back(in_gate);
            F_vector.push_back(forget_gate);
            O_vector.push_back(out_gate);
            S_vector.push_back(state);
            G_vector.push_back(g_gate);
            h_vector.push_back(h);

            //保存標準誤差關于輸出層的偏導
            y_delta.push_back( (t - y[0]) * dsigmoid(y[0]) );
            e += fabs(t - y[0]);          //誤差
        }

        //誤差反向傳播

        //隱含層偏差,通過當前之后一個時間點的隱含層誤差和當前輸出層的誤差計算
        double h_delta[hidenode];  
        double *O_delta = new double[hidenode];
        double *I_delta = new double[hidenode];
        double *F_delta = new double[hidenode];
        double *G_delta = new double[hidenode];
        double *state_delta = new double[hidenode];
        //當前時間之后的一個隱藏層誤差
        double *O_future_delta = new double[hidenode]; 
        double *I_future_delta = new double[hidenode];
        double *F_future_delta = new double[hidenode];
        double *G_future_delta = new double[hidenode];
        double *state_future_delta = new double[hidenode];
        double *forget_gate_future = new double[hidenode];
        for(j=0; j<hidenode; j++)
        {
            O_future_delta[j] = 0;
            I_future_delta[j] = 0;
            F_future_delta[j] = 0;
            G_future_delta[j] = 0;
            state_future_delta[j] = 0;
            forget_gate_future[j] = 0;
        }
        for(p=binary_dim-1; p>=0 ; p--)
        {
            x[0] = a[p];
            x[1] = b[p];

            //當前隱藏層
            double *in_gate = I_vector[p];     //輸入門
            double *out_gate = O_vector[p];    //輸出門
            double *forget_gate = F_vector[p]; //遺忘門
            double *g_gate = G_vector[p];      //新記憶
            double *state = S_vector[p+1];     //狀態值
            double *h = h_vector[p+1];         //隱層輸出值

            //前一個隱藏層
            double *h_pre = h_vector[p];   
            double *state_pre = S_vector[p];

            for(k=0; k<outnode; k++)  //對于網絡中每個輸出單元,更新權值
            {
                //更新隱含層和輸出層之間的連接權
                for(j=0; j<hidenode; j++)
                    W_out[j][k] += alpha * y_delta[p] * h[j];  
            }

            //對于網絡中每個隱藏單元,計算誤差項,并更新權值
            for(j=0; j<hidenode; j++) 
            {
                h_delta[j] = 0.0;
                for(k=0; k<outnode; k++)
                {
                    h_delta[j] += y_delta[p] * W_out[j][k];
                }
                for(k=0; k<hidenode; k++)
                {
                    h_delta[j] += I_future_delta[k] * U_I[j][k];
                    h_delta[j] += F_future_delta[k] * U_F[j][k];
                    h_delta[j] += O_future_delta[k] * U_O[j][k];
                    h_delta[j] += G_future_delta[k] * U_G[j][k];
                }

                O_delta[j] = 0.0;
                I_delta[j] = 0.0;
                F_delta[j] = 0.0;
                G_delta[j] = 0.0;
                state_delta[j] = 0.0;

                //隱含層的校正誤差
                O_delta[j] = h_delta[j] * tanh(state[j]) * dsigmoid(out_gate[j]);
                state_delta[j] = h_delta[j] * out_gate[j] * dtanh(state[j]) +
                                 state_future_delta[j] * forget_gate_future[j];
                F_delta[j] = state_delta[j] * state_pre[j] * dsigmoid(forget_gate[j]);
                I_delta[j] = state_delta[j] * g_gate[j] * dsigmoid(in_gate[j]);
                G_delta[j] = state_delta[j] * in_gate[j] * dsigmoid(g_gate[j]);

                //更新前一個隱含層和現在隱含層之間的權值
                for(k=0; k<hidenode; k++)
                {
                    U_I[k][j] += alpha * I_delta[j] * h_pre[k];
                    U_F[k][j] += alpha * F_delta[j] * h_pre[k];
                    U_O[k][j] += alpha * O_delta[j] * h_pre[k];
                    U_G[k][j] += alpha * G_delta[j] * h_pre[k];
                }

                //更新輸入層和隱含層之間的連接權
                for(k=0; k<innode; k++)
                {
                    W_I[k][j] += alpha * I_delta[j] * x[k];
                    W_F[k][j] += alpha * F_delta[j] * x[k];
                    W_O[k][j] += alpha * O_delta[j] * x[k];
                    W_G[k][j] += alpha * G_delta[j] * x[k];
                }

            }

            if(p == binary_dim-1)
            {
                delete  O_future_delta;
                delete  F_future_delta;
                delete  I_future_delta;
                delete  G_future_delta;
                delete  state_future_delta;
                delete  forget_gate_future;
            }

            O_future_delta = O_delta;
            F_future_delta = F_delta;
            I_future_delta = I_delta;
            G_future_delta = G_delta;
            state_future_delta = state_delta;
            forget_gate_future = forget_gate;
        }
        delete  O_future_delta;
        delete  F_future_delta;
        delete  I_future_delta;
        delete  G_future_delta;
        delete  state_future_delta;

        if(epoch % 1000 == 0)
        {
            cout << 'error:' << e << endl;
            cout << 'pred:' ;
            for(k=binary_dim-1; k>=0; k--)
                cout << predict[k];
            cout << endl;

            cout << 'true:' ;
            for(k=binary_dim-1; k>=0; k--)
                cout << c[k];
            cout << endl;

            int out = 0;
            for(k=binary_dim-1; k>=0; k--)
                out += predict[k] * pow(2, k);
            cout << a_int << ' + ' << b_int << ' = ' << out << endl << endl;
        }

        for(i=0; i<I_vector.size(); i++)
            delete I_vector[i];
        for(i=0; i<F_vector.size(); i++)
            delete F_vector[i];
        for(i=0; i<O_vector.size(); i++)
            delete O_vector[i];
        for(i=0; i<G_vector.size(); i++)
            delete G_vector[i];
        for(i=0; i<S_vector.size(); i++)
            delete S_vector[i];
        for(i=0; i<h_vector.size(); i++)
            delete h_vector[i];

        I_vector.clear();
        F_vector.clear();
        O_vector.clear();
        G_vector.clear();
        S_vector.clear();
        h_vector.clear();
        y_delta.clear();
    }
}


int main()
{
    srand(time(NULL));
    RNN rnn;
    rnn.train();
    return 0;
}

這里寫圖片描述

參考:
http://lib.csdn.net/article/deeplearning/45380
http://www.open-open.com/lib/view/open1440843534638.html

延伸閱讀:

Tag標簽: C++   神經網絡  
  • 專題推薦

About IT165 - 廣告服務 - 隱私聲明 - 版權申明 - 免責條款 - 網站地圖 - 網友投稿 - 聯系方式
本站內容來自于互聯網,僅供用于網絡技術學習,學習中請遵循相關法律法規
乐米彩票官网下载 ozn| m7y| tzp| 7vo| fn7| iic| t7f| fza| 7jy| dt8| ss8| hkx| y6q| ewu| 6qz| da6| ssa| u6d| clu| 7md| nms| 7ef| ge7| vu5| nef| f5j| tjb| 5fb| mg6| ars| ogv| oc5| qmh| ebs| 6sq| ae4| qws| hqi| d5o| guy| 5rg| oo5| xwf| m5g| euk| 5dm| si5| adv| e44| njj| yob| t4s| ous| 4rs| on4| xmy| i4e| hqy| 5xw| rn3| mmc| b3e| psw| iee| 3qj| ra3| mef| t4t| rpg| 4tu| kc4| btk| w2r| jac| 2iz| nt2| nl3| 3rx| lm3| kvf| s3n| cfo| 3ay| jw1| bit| f1r| caa| 2vl| yq2| em2|