learning rate

78bdfb45 · xiaotong · 58d5fc31 · 78bdfb45 · 78bdfb45 · 78bdfb45
Commit 78bdfb45 authored Aug 25, 2018 by xiaotong
--- a/source/network/XBackwardMath.cpp
+++ b/source/network/XBackwardMath.cpp
@@ -797,6 +797,7 @@ gradient for normalize
 void XMathGrad::GradNormalize(XTensor * node)
 {
    ShowNTErrors("This is really a bad piece of code!!!");
    XLink &income = node->income;
    CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");

--- a/source/sample/transformer/T2TTrainer.cpp
+++ b/source/sample/transformer/T2TTrainer.cpp
@@ -56,6 +56,7 @@ void T2TTrainer::Init(int argc, const char ** argv)
    LoadParamBool(argc, argv, "mem", &useMem, useMem);
    LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F);
+    LoadParamFloat(argc, argv, "lrbias", &lrbias, 0);
    LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
    LoadParamInt(argc, argv, "wbatch", &wBatchSize, 1);
    LoadParamInt(argc, argv, "nepoch", &nepoch, 1);
@@ -138,7 +139,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
            net.Backward(output, batch, CROSSENTROPY);
            /* learning rate */
-            lr = lrate * (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5), (step + 1) * pow(nwarmup, -1.5));
+            lr = lrate * (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5F - lrbias), (step + 1) * pow(nwarmup, -1.5F - lrbias));
            /* update the parameters */
            Update(model, lr);

--- a/source/sample/transformer/T2TTrainer.h
+++ b/source/sample/transformer/T2TTrainer.h
@@ -69,6 +69,9 @@ public:
    /* learning rate */
    float lrate;
+    /* the parameter that controls the maximum learning rate in training */
+    float lrbias;
    /* sentence batch size */
    int sBatchSize;

--- a/source/tensor/XMem.cpp
+++ b/source/tensor/XMem.cpp
@@ -394,7 +394,7 @@ void * XMem::AllocDynamic(int myDevID, MTYPE mySize)
            CheckNTErrors(cudaMemset(mem, 0, b->size + 2 * CUDA_PITCH) == cudaSuccess, "Cannot update the memory.");
            SetDevice(devIDBackup);
 #else
-            ShowNTErrors("Please specify USE_CUDA for compiling this program.");>	NiuTrans.Network.exe!nts::XMem::AllocDynamic(int myDevID, unsigned __int64 mySize)  387	C++
+            ShowNTErrors("Please specify USE_CUDA for compiling this program.");
 #endif
        }