Commit 78bdfb45 by xiaotong

learning rate

parent 58d5fc31
......@@ -797,6 +797,7 @@ gradient for normalize
void XMathGrad::GradNormalize(XTensor * node)
{
ShowNTErrors("This is really a bad piece of code!!!");
XLink &income = node->income;
CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");
......
......@@ -56,6 +56,7 @@ void T2TTrainer::Init(int argc, const char ** argv)
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F);
LoadParamFloat(argc, argv, "lrbias", &lrbias, 0);
LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
LoadParamInt(argc, argv, "wbatch", &wBatchSize, 1);
LoadParamInt(argc, argv, "nepoch", &nepoch, 1);
......@@ -138,7 +139,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
net.Backward(output, batch, CROSSENTROPY);
/* learning rate */
lr = lrate * (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5), (step + 1) * pow(nwarmup, -1.5));
lr = lrate * (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5F - lrbias), (step + 1) * pow(nwarmup, -1.5F - lrbias));
/* update the parameters */
Update(model, lr);
......
......@@ -69,6 +69,9 @@ public:
/* learning rate */
float lrate;
/* the parameter that controls the maximum learning rate in training */
float lrbias;
/* sentence batch size */
int sBatchSize;
......
......@@ -394,7 +394,7 @@ void * XMem::AllocDynamic(int myDevID, MTYPE mySize)
CheckNTErrors(cudaMemset(mem, 0, b->size + 2 * CUDA_PITCH) == cudaSuccess, "Cannot update the memory.");
SetDevice(devIDBackup);
#else
ShowNTErrors("Please specify USE_CUDA for compiling this program.");> NiuTrans.Network.exe!nts::XMem::AllocDynamic(int myDevID, unsigned __int64 mySize) 387 C++
ShowNTErrors("Please specify USE_CUDA for compiling this program.");
#endif
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论