Commit dee31741 by liyinqiao

Merge branch liyinqiao.

parent 174d7757
差异被折叠。 点击展开。
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
# NiuTrans.Tensor环境配置
## 注意事项
CUDA最新版本9.2尚且不支持VS2017最新版本,因此建议使用CUDA版本为9.0或9.1,建议使用VS版本为VS2015,或使用VS2017时安装v140工具集,解决方案平台设置为×64。
## CUDA配置
在已安装好VS、CUDA并配置好环境变量后,一些关键的CUDA配置选项如下所示,以下配置选项在 **项目 -> 属性** 中可以找到。
>$(CUDA_PATH)\include
加入到 **VC++目录 -> 包含** 中。
>$(CUDA_PATH)\lib\Win32
加入到 **VC++目录 -> 库** 中。
>cuda.lib;cudadevrt.lib;cudart.lib;cudart_static.lib;nvcuvid.lib;OpenCL.lib;cublas.lib;curand.lib;
加入到 **链接器->输入->附加依赖项** 中。
配置完成后,右键 **工程->项目依赖性** ,选择CUDA9。
在.cu文件上右键属性,在项类型中选择"CUDA C/C++"(最好搜索.cu文件,然后全选设置)。
## 其他配置
**C/C++->常规->SDL检查**,设为否。
**C/C++->预处理器->预处理器定义** 中,添加
>USE_CUDA;USE_BLAS;WIN32;MKL;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS_
CONSOLE;
**链接器->系统->子系统**,设置为控制台。
**常规->字符集**,使用Unicode字符集。
**调试->命令参数**中设置可执行文件所需要的参数。
This source diff could not be displayed because it is too large. You can view the blob instead.
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "./network/XNet.h"
#include "./tensor/XUtility.h"
#include "./tensor/function/FHeader.h"
#include "./tensor/core/CHeader.h"
#include "./tensor/test/Test.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/NMT.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace fnnlm;
using namespace nmt;
int main( int argc, const char ** argv )
{
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t"))
NMTMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTensor! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
}
return 0;
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "XNet.h"
#include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/test/Test.h"
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
void BackwardTest();
void TransposeTest();
void SumDimTest();
using namespace nts;
using namespace fnnlm;
using namespace transformer;
int main( int argc, const char ** argv )
{
//_CrtSetBreakAlloc(896);
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t"))
TransformerMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}
//_CrtDumpMemoryLeaks();
return 0;
}
void BackwardTest()
{
XNet net;
XTensor a;
XTensor b;
XTensor c;
XTensor mean;
XTensor origin;
InitTensor2D(&a, 2, 3);
InitTensor1D(&b, 2);
a.SetZeroAll();
b.SetZeroAll();
a.Set2D(1.0F, 0, 0);
a.Set2D(2.0F, 0, 1);
a.Set2D(3.0F, 0, 2);
a.Set2D(4.0F, 1, 0);
a.Set2D(5.0F, 1, 1);
a.Set2D(6.0F, 1, 2);
b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1);
c = DivDim(a, b, 0);
c.Dump(stderr, "c:");
XLink::ShowNetwork(stderr, &c);
net.Backward(c);
net.Dump(stderr);
}
void TransposeTest()
{
#ifdef USE_CUDA
XMem mem0(0, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
//XMem mem1(1, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
XTensor x;
XTensor y;
XTensor z;
int loops = 2000;
int B = 3 * 2 * 4;
int K = 8 * 1;
int N = 50;
int H = 512 * 4;
int nnn = GDevs.nGPU;
InitTensor3D(&x, B, N, H, X_FLOAT, 0);
InitTensor4D(&y, K, B, N, H/K, X_FLOAT, 0);
InitTensor3D(&z, B, N, H, X_FLOAT, 0);
cudaEvent_t ctime0;
cudaEvent_t ctime1;
cudaEvent_t ctime2;
cudaEvent_t ctime3;
cudaEvent_t ctime4;
cudaEvent_t ctime5;
float elapsedSplit = 0.0;
float elapsedMerge = 0.0;
float elapsedSum = 0.0;
cudaEventCreate(&ctime0);
cudaEventCreate(&ctime1);
cudaEventCreate(&ctime2);
cudaEventCreate(&ctime3);
cudaEventCreate(&ctime4);
cudaEventCreate(&ctime5);
cudaEventRecord(ctime0, 0);
double time0 = GetClock();
for(int i = 0; i < loops; i++)
_Split(&x, &y, 2, K);
double time1 = GetClock();
cudaEventRecord(ctime1, 0);
cudaEventSynchronize(ctime1);
cudaEventElapsedTime(&elapsedSplit, ctime0, ctime1);
cudaEventRecord(ctime2, 0);
double time2 = GetClock();
for(int i = 0; i < loops; i++)
_Merge(&y, &x, 3);
double time3 = GetClock();
cudaEventRecord(ctime3, 0);
cudaEventSynchronize(ctime3);
cudaEventElapsedTime(&elapsedMerge, ctime2, ctime3);
cudaEventRecord(ctime4, 0);
double time4 = GetClock();
for(int i = 0; i < loops; i++)
_Sum(&x, &z, &x);
double time5 = GetClock();
cudaEventRecord(ctime5, 0);
cudaEventSynchronize(ctime5);
cudaEventElapsedTime(&elapsedSum, ctime4, ctime5);
fprintf(stderr, "split:%f merge:%f sum:%f\n", time1 - time0, time3 - time2, time5 - time4);
fprintf(stderr, "split:%f merge:%f sum:%f\n", elapsedSplit, elapsedMerge, elapsedSum);
#endif
}
void SumDimTest()
{
XTensor x;
XTensor y;
XTensor z;
int a = 5;
int b = 7;
int c = 3;
InitTensor3D(&x, a, b, c, X_FLOAT, -1);
InitTensor1D(&y, c, X_FLOAT, -1);
InitTensor3D(&z, a, b, c, X_FLOAT, -1);
x.SetZeroAll();
y.SetZeroAll();
z.SetZeroAll();
float * data = new float[x.unitNum];
for(int i = 0; i < x.unitNum; i++)
data[i] = (DTYPE)i;
x.SetData(data, x.unitNum);
for(int i = 0; i < y.unitNum; i++)
data[i] = -(DTYPE)i;
y.SetData(data, y.unitNum);
_SumDim(&x, &y, &z, 2);
z.Dump(stderr, "z:");
delete[] data;
}
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -29,41 +29,67 @@ ...@@ -29,41 +29,67 @@
namespace nts{ namespace nts{
/* compute dE/dx of a node */ /* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node) void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
{ {
if (!isEfficient) {
CheckNTErrors(node->grad != NULL, "No gradient found!");
}
else {
CheckNTErrors(!node->isGrad || node->grad != NULL, "No gradient found!");
}
XLink &income = node->income; XLink &income = node->income;
int operID = income.typeID; int operID = income.typeID;
CheckNTErrors(node->grad != NULL, "No gradient found!");
CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!"); CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * output = node; XTensor * output = node;
XNoder::MakeGrad(input); if (!isEfficient || input->isGrad) {
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH) XTensor * dedx = input->grad;
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS); XTensor * dedy = output->grad;
else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS); XTensor* tmp;
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0); /* store the result to a temporary node if the input has multiple children */
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!"); if (input->outgo.tailNum > 1) {
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS); tmp = NewTensor(output);
} tmp->SetZeroAll();
else if(operID == FUNC_RECTIFY) }
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS); /* otherwise, the result is directly stored into the input node */
else if(operID == FUNC_SIGMOID) else {
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS); tmp = dedx;
else if(operID == FUNC_SOFTMAX){ }
int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!"); if (operID == FUNC_HARDTANH)
_SoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS); _HardTanHBackward(output, input, dedy, tmp);
} else if (operID == FUNC_IDENTITY)
else{ _IdentityBackward(output, input, dedy, tmp);
ShowNTErrors("Wrong activation function type!"); else if (operID == FUNC_LOGSOFTMAX) {
int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_LogSoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS);
}
else if (operID == FUNC_RECTIFY)
_RectifyBackward(output, input, dedy, tmp);
else if (operID == FUNC_SIGMOID)
_SigmoidBackward(output, input, dedy, tmp);
else if (operID == FUNC_SOFTMAX) {
int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
_SoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS);
}
else {
ShowNTErrors("Unsupported backward computation! TODO!");
}
if (input->outgo.tailNum > 1) {
_SumMe(dedx, tmp);
DelTensor(tmp);
}
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -35,7 +35,7 @@ class XFuncGrad ...@@ -35,7 +35,7 @@ class XFuncGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node); void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for an activation function */ /* indicates whether the node is for an activation function */
static static
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -20,7 +20,10 @@ ...@@ -20,7 +20,10 @@
*/ */
#include "XBackwardLoss.h" #include "XBackwardLoss.h"
#include "XNoder.h"
#include "../tensor/XName.h" #include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/getandset/SetData.h"
#include "../tensor/function/HardTanH.h" #include "../tensor/function/HardTanH.h"
#include "../tensor/function/Identity.h" #include "../tensor/function/Identity.h"
#include "../tensor/function/LogSoftmax.h" #include "../tensor/function/LogSoftmax.h"
...@@ -30,63 +33,69 @@ ...@@ -30,63 +33,69 @@
namespace nts{ namespace nts{
/* /* compute dE/dx of a node */
compute dE/dx for a given function y = f(x) void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> dedy - dE/dy
>> dedx - dE/dx
>> funcID - id of the function f
>> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName)
{ {
CheckNTErrors(gold && y && x, "Empty input tensors!"); XLink &income = node->income;
CheckNTErrors(dedx, "Empty gradient tensors!"); int operID = income.typeID;
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
if(funcID == FUNC_HARDTANH){ CheckNTErrors(income.tailNum >= 1, "Wrong number of tensors for loss computation!");
_HardTanHBackward(gold, y, x, dedy, dedx, lossName);
} XTensor * output = income.tails[0];
else if(funcID == FUNC_IDENTITY){ XTensor * gold = NULL;
_IdentityBackward(gold, y, x, dedy, dedx, lossName); XTensor * weight = NULL;
} XTensor * padding = NULL;
else if(funcID == FUNC_LOGSOFTMAX){ int leadingDim;
int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName); bool isRoot = XNoder::IsRoot(node);
}
else if(funcID == FUNC_RECTIFY){ if (!isEfficient || output->isGrad) {
_RectifyBackward(gold, y, x, dedy, dedx, lossName); XNoder::MakeGrad(output);
} XTensor * dedy = output->grad;
else if(funcID == FUNC_SIGMOID){
_SigmoidBackward(gold, y, x, dedy, dedx, lossName); if (income.tailNum == 1) {
}else if(funcID == FUNC_SOFTMAX){ dedy->SetDataFixed(1);
int leadDim = *(int*)params; return;
_SoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName); }
}
else{ gold = income.tails[1];
ShowNTErrors("wrong function found when call the backward process!");
XTensor* tmp;
if (!isRoot) {
tmp = NewTensor(output);
tmp->SetZeroAll();
}
else{
tmp = dedy;
}
if (operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
padding = income.tails[2];
leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(tmp, output, gold, weight, padding, leadingDim);
if (isRoot)
gold->DestroyData();
else
_SumMe(dedy, tmp);
}
else {
ShowNTErrors("Unsupported backward computation! TODO!");
}
if (!isRoot)
DelTensor(tmp);
} }
node->visitMark = NODE_FINISHED;
} }
/* /* indicates whether the node is for a loss computation */
compute dE/dy for variable y and error(loss) function E bool XLossGrad::IsLossOP(XTensor * node)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName)
{ {
_LossBackward(dedy, gold, y, lossName); XLink &income = node->income;
return (income.typeID & LOSS_BASE) != 0;
} }
} }
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h" #include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XBACKWARDLOSS_H__ #ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__ #define __XBACKWARDLOSS_H__
...@@ -34,15 +35,23 @@ namespace nts{ ...@@ -34,15 +35,23 @@ namespace nts{
class XLossGrad class XLossGrad
{ {
public: public:
/* compute dE/dx for a given function y = f(x) */ /* compute dE/dx of a node */
void Compute(XTensor * gold, XTensor * y, XTensor * x, static
XTensor * dedy, XTensor * dedx, void MakeGrad(XTensor * node, bool isEfficient);
int funcID, void * params,
LOSS_FUNCTION_NAME lossName); /* indicates whether the node is for a Loss computation */
static
bool IsLossOP(XTensor * node);
///* compute dE/dx for a given function y = f(x) */
//void Compute(XTensor * gold, XTensor * y, XTensor * x,
// XTensor * dedy, XTensor * dedx, XTensor * padding,
// int funcID, void * params,
// LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */ /* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y, void Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * dedy, XTensor * padding,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
}; };
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -33,7 +33,7 @@ class XMathGrad ...@@ -33,7 +33,7 @@ class XMathGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node); void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a math operation */ /* indicates whether the node is for a math operation */
static static
...@@ -43,121 +43,167 @@ private: ...@@ -43,121 +43,167 @@ private:
/* gradient for absolute */ /* gradient for absolute */
static static
void GradAbsolute(XTensor * node); void GradAbsolute(XTensor * node, bool isEfficient);
/* gradient for cos */ /* gradient for cos */
static static
void GradCos(XTensor * node); void GradCos(XTensor * node, bool isEfficient);
/* gradient for exp */ /* gradient for exp */
static static
void GradExp(XTensor * node); void GradExp(XTensor * node, bool isEfficient);
/* gradient for log: c = log(a) */ /* gradient for log: c = log(a) */
static static
void GradLog(XTensor * node); void GradLog(XTensor * node, bool isEfficient);
/* gradient for round */ /* gradient for round */
static static
void GradRound(XTensor * node); void GradRound(XTensor * node, bool isEfficient);
/* gradient for sign */ /* gradient for sign */
static static
void GradSign(XTensor * node); void GradSign(XTensor * node, bool isEfficient);
/* gradient for sin */ /* gradient for sin */
static static
void GradSin(XTensor * node); void GradSin(XTensor * node, bool isEfficient);
/* gradient for tan */ /* gradient for tan */
static static
void GradTan(XTensor * node); void GradTan(XTensor * node, bool isEfficient);
/* gradient for clip */ /* gradient for clip */
static static
void GradClip(XTensor * node); void GradClip(XTensor * node, bool isEfficient);
/* gradient for Divide */ /* gradient for Divide */
static static
void GradDiv(XTensor * node); void GradDiv(XTensor * node, bool isEfficient);
/* gradient for DivideDim */ /* gradient for DivideDim */
static static
void GradDivDim(XTensor * node); void GradDivDim(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */ /* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static static
void GradMatrixMul(XTensor * node); void GradMatrixMul(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */ /* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static static
void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA, void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB, XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
XTensor * dedc, DTYPE alpha); XTensor * dedc, DTYPE alpha, bool isEfficient);
/* gradient for matrix multiply in batch mode. /* gradient for matrix multiply in batch mode.
for each batch: c_i = matmul(a_i, b_i) * \alpha */ for each batch: c_i = matmul(a_i, b_i) * \alpha */
static static
void GradMatrixMulBatched(XTensor * node); void GradMatrixMulBatched(XTensor * node, bool isEfficient);
/* gradient for multiply (dot production): c = a * b * \alpha */ /* gradient for multiply (dot production): c = a * b * \alpha */
static static
void GradMultiply(XTensor * node); void GradMultiply(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b * \alpha /* gradient for multiply one dimension: c = a * b * \alpha
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradMultiplyDim(XTensor * node); void GradMultiplyDim(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b
where some dimensions of b are of size 1 */
static
void GradMultiplyBroadcast(XTensor * node, bool isEfficient);
/* gradient for negate */ /* gradient for negate */
static static
void GradNegate(XTensor * node); void GradNegate(XTensor * node, bool isEfficient);
/* gradient for normalize */ /* gradient for normalize */
static static
void GradNormalize(XTensor * node); void GradNormalize(XTensor * node, bool isEfficient);
/* gradient for power */
static
void GradPower(XTensor * node, bool isEfficient);
/* gradient for power */ /* gradient for power */
static static
void GradPower(XTensor * node); void GradReciprocal(XTensor* node, bool isEfficient);
/* gradient for sqrt */
static
void GradSqrt(XTensor* node, bool isEfficient);
/* gradient for square */
static
void GradSquare(XTensor* node, bool isEfficient);
/* gradient for ScaleAndShift */ /* gradient for ScaleAndShift */
static static
void GradScaleAndShift(XTensor * node); void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Scale */
static
void GradScale(XTensor * node, bool isEfficient);
/* gradient for Shift */
static
void GradShift(XTensor * node, bool isEfficient);
/* gradient for Descale */
static
void GradDescale(XTensor * node, bool isEfficient);
/* gradient for Minus */ /* gradient for Minus */
static static
void GradSub(XTensor * node); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
void GradSum(XTensor * node); void GradSum(XTensor * node, bool isEfficient);
/* gradient for sum with one dimension: c = a + b * \beta /* gradient for sum with one dimension: c = a + b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSumDim(XTensor * node); void GradSumDim(XTensor * node, bool isEfficient);
/* gradient for sum by broadcasting: c = a + b * \beta
where some dimensions of b are of size 1 */
static
void GradSumBroadcast(XTensor * node, bool isEfficient);
/* gradient for reduceMean */ /* gradient for reduceMean */
static static
void GradReduceMean(XTensor * node); void GradReduceMean(XTensor * node, bool isEfficient);
/* gradient for reduceSum */ /* gradient for reduceSum */
static static
void GradReduceSum(XTensor * node); void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumAll */
static
void GradReduceSumAll(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */ /* gradient for reduceSumSquared */
static static
void GradReduceSumSquared(XTensor * node); void GradReduceSumSquared(XTensor * node, bool isEfficient);
/* gradient for reduceVariance */ /* gradient for reduceVariance */
static static
void GradReduceVariance(XTensor * node); void GradReduceVariance(XTensor * node, bool isEfficient);
/* gradient for operation */
static
void GradMulAndShift(XTensor * node, bool isEfficient);
/* gradient for MLP */
static
void GradMLP(XTensor* node, bool isEfficient);
}; };
} }
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -34,7 +34,7 @@ class XShapeGrad ...@@ -34,7 +34,7 @@ class XShapeGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node); void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a shaping operation */ /* indicates whether the node is for a shaping operation */
static static
...@@ -42,39 +42,60 @@ public: ...@@ -42,39 +42,60 @@ public:
/* post processing of a node */ /* post processing of a node */
static static
void PostProcessing(XTensor * node, int typeId); void PostProcessing(XTensor * node, int typeId, bool isEfficient);
private: private:
/* gradient computation for convertdatatype: b = convertdatatype(a) */
static
void GradConvertDataType(XTensor * node, bool isEfficient);
/* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */
static
void GradCopyIndexed(XTensor * node, bool isEfficient);
/* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
static
void GradGather(XTensor * node, bool isEfficient);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficient);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node); void GradMerge(XTensor * node, bool isEfficient);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */ /* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static static
void GradMergeList(XTensor * node); void GradMergeList(XTensor * node, bool isEfficient);
/* gradient computation for transposing a tensor : b = transpose(a) */
static
void GradTranspose(XTensor * node, bool isEfficient);
/* gradient computation for reshaping a tensor: c = reshape(a) */
static
void GradReshape(XTensor * node, bool isEfficient);
/* gradient computation for split: c = split(a) */ /* gradient computation for split: c = split(a) */
static static
void GradSplit(XTensor * node); void GradSplit(XTensor * node, bool isEfficient);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */ /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static static
void GradSplitList(XTensor * node); void GradSplitList(XTensor * node, bool isEfficient);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a). /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */ manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static static
void GradSplitListPost(XTensor * node); void GradSplitListPost(XTensor * node, bool isEfficient);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */ /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static static
void GradUnsqueeze(XTensor * node); void GradUnsqueeze(XTensor * node, bool isEfficient);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradTranspose(XTensor * node);
}; };
} }
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -55,6 +55,7 @@ void XNetClearAll() ...@@ -55,6 +55,7 @@ void XNetClearAll()
XNet::XNet() XNet::XNet()
{ {
nodes.Clear(); nodes.Clear();
isGradEfficient = true;
} }
/* de-constructor */ /* de-constructor */
...@@ -72,129 +73,96 @@ void XNet::Clear() ...@@ -72,129 +73,96 @@ void XNet::Clear()
} }
/* /*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
Backward(roots, golds, loss);
}
/*
backward propagation to obtain gradient backward propagation to obtain gradient
>> root - root node (output) of the network >> root - root node (output) of the network
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
XList golds(1); Backward(roots);
golds.Add(NULL);
Backward(roots, golds, loss);
} }
/* /*
backward propagation to obtain gradient wrt. the loss/error function backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes with a number of root nodes
>> root - a list of root nodes (output) of the network >> roots - a list of root nodes (output) of the network
>> gold - a list of gold standard for the output
>> loss - name of loss function
*/ */
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots)
{ {
Traverse(roots); Traverse(roots);
/* label tensors where the backward computation is neccessary */
if(isGradEfficient)
MakeEfficientNet();
for(int i = 0; i < nodes.count; i++){ for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i); XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
} }
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, loss);
}
}
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);; XTensor * node = (XTensor*)nodes.Get(i);
if(node->visitMark == NODE_FINISHED)
continue;
BackwardNode(node);
}
}
/* if(node->mem != NULL){
backward propagation to obtain gradient CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
with a number of root nodes }
>> root - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
{
XList golds(roots.count);
for(int i = 0; i < roots.count; i++)
golds.Add(NULL);
Backward(roots, golds, loss); if(node->visitMark != NODE_FINISHED)
BackwardNode(node, isGradEfficient);
if(isGradEfficient){
XLink & outgo = node->outgo;
for(int i = 0; i < outgo.tailNum; i++){
XTensor * parent = outgo.tails[i];
ClearGrad(parent);
}
if (XNoder::IsLeaf(node)) {
ClearGrad(node);
if (node->outgo.tailNum == 0) {
delete node;
}
}
}
}
} }
/* /*
backward computation for a given node backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function) >> node - the node keeps the result of an operation (e.g., activation function)
>> isEfficient - indicates whether the back-propagation is compuated in an
efficient manner
*/ */
void XNet::BackwardNode(XTensor * node) void XNet::BackwardNode(XTensor * node, bool isEfficent)
{ {
if(node == NULL || node->visitMark == NODE_FINISHED) if(node == NULL || node->visitMark == NODE_FINISHED)
return; return;
if(!XNoder::IsLeaf(node)){ if(!XNoder::IsLeaf(node)){
/* post processing for parent nodes */ /* post processing for parent nodes */
BackwardNodePost(node); BackwardNodePost(node, isEfficent);
/* process the current node */ /* process the current node */
if(XMathGrad::IsMathOP(node)) if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node); XMathGrad::MakeGrad(node, isEfficent);
else if(XFuncGrad::IsFunc(node)) else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node); XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
} }
else{
node->visitMark = NODE_FINISHED;
}
} }
/* /*
...@@ -202,7 +170,7 @@ backward computation (in post processing) for a given node ...@@ -202,7 +170,7 @@ backward computation (in post processing) for a given node
>> node - the node whose parent nodes are not processed yet. So >> node - the node whose parent nodes are not processed yet. So
we do the job at the child node. we do the job at the child node.
*/ */
void XNet::BackwardNodePost(XTensor * node) void XNet::BackwardNodePost(XTensor * node, bool isEfficent)
{ {
bool isSplitList = false; bool isSplitList = false;
XLink &outgo = node->outgo; XLink &outgo = node->outgo;
...@@ -212,7 +180,7 @@ void XNet::BackwardNodePost(XTensor * node) ...@@ -212,7 +180,7 @@ void XNet::BackwardNodePost(XTensor * node)
} }
if(isSplitList) if(isSplitList)
XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST); XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST, isEfficent);
} }
/* /*
...@@ -222,7 +190,7 @@ depth-first search (Tarjan's algorithm) ...@@ -222,7 +190,7 @@ depth-first search (Tarjan's algorithm)
*/ */
void XNet::Traverse(XTensor &root) void XNet::Traverse(XTensor &root)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
Traverse(roots); Traverse(roots);
...@@ -233,7 +201,7 @@ traverse the net and find the topological order by ...@@ -233,7 +201,7 @@ traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes) >> roots - a list of roots (or output nodes)
*/ */
void XNet::Traverse(XList &roots) void XNet::Traverse(TensorList &roots)
{ {
id = MakeNetID(); id = MakeNetID();
nodes.Clear(); nodes.Clear();
...@@ -258,7 +226,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering) ...@@ -258,7 +226,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering)
>> orders - topological order of the nodes >> orders - topological order of the nodes
>> code - code of the network >> code - code of the network
*/ */
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code) void XNet::TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code)
{ {
if(node == NULL) if(node == NULL)
return; return;
...@@ -279,6 +247,8 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code) ...@@ -279,6 +247,8 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
node->visitMark = code + 2; node->visitMark = code + 2;
orders.Add(node); orders.Add(node);
} }
else if(node->visitMark == code + 2){
}
} }
/* /*
...@@ -299,4 +269,92 @@ void XNet::Dump(FILE * file) ...@@ -299,4 +269,92 @@ void XNet::Dump(FILE * file)
} }
} }
} /*
\ No newline at end of file set the flag of gradient-efficient
>> flag - the flag
*/
void XNet::SetGradEfficientFlag(bool flag)
{
isGradEfficient = flag;
}
/* generate the gradient-efficient flag for every node */
void XNet::MakeEfficientNet()
{
/* back-propagation from output to input */
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
XLink &income = node->income;
for(int j = 0; j < income.tailNum; j++){
XTensor * child = income.tails[j];
if(child->isGrad || child->isVar){
node->SetGradFlag(true);
break;
}
}
}
}
/*
clear the graident information if the node is no use
>> node - the node that we want to clear
*/
void XNet::ClearGrad(XTensor * node)
{
if(node->isVar)
return;
if(node->grad == NULL)
return;
if(node->visitMark != NODE_FINISHED)
return;
XLink & income = node->income;
bool finished = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
if(child->visitMark != NODE_FINISHED){
finished = false;
break;
}
}
if(finished){
delete node->grad;
node->grad = NULL;
}
}
/*
show network topology
>> file - file to dump information
>> node - pointer to the node
*/
void XNet::ShowNetwork(FILE * file, XTensor * node)
{
TensorList roots(1);
roots.Add(node);
Traverse(roots);
//XLink::ShowNode(file, node);
/* go over nodes in its topological order */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * n = (XTensor*)nodes.Get(i);
XLink::ShowNode(file, n);
}
}
/*
search for a node in a top-down manner by its name
>> top - the top most node
<< return - the node we found
*/
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{
//return XLink::SearchNode(top, name);
//}
}
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h" #include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XNET_H__ #ifndef __XNET_H__
#define __XNET_H__ #define __XNET_H__
...@@ -36,16 +37,19 @@ struct XNet ...@@ -36,16 +37,19 @@ struct XNet
unsigned int id; unsigned int id;
/* tensor nodes of the network (in order) */ /* tensor nodes of the network (in order) */
XList nodes; TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/ /* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes; TensorList gradNodes;
/* output nodes of the network */ /* output nodes of the network */
XList outputs; TensorList outputs;
/* input nodes of the network */ /* input nodes of the network */
XList inputs; TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient;
/* constructor */ /* constructor */
XNet(); XNet();
...@@ -56,25 +60,18 @@ struct XNet ...@@ -56,25 +60,18 @@ struct XNet
/* clear the network */ /* clear the network */
void Clear(); void Clear();
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient */ /* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node); void BackwardNode(XTensor * node, bool isEfficent = false);
/* backward computation (in post processing) for a given node */ /* backward computation (in post processing) for a given node */
void BackwardNodePost(XTensor * node); void BackwardNodePost(XTensor * node, bool isEfficent = false);
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
...@@ -82,13 +79,29 @@ struct XNet ...@@ -82,13 +79,29 @@ struct XNet
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots); void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */ /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code); void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */ /* dump network information */
void Dump(FILE * file); void Dump(FILE * file);
/* set the flag of gradient-efficient */
void SetGradEfficientFlag(bool flag = true);
/* generate the gradient-efficient flag for every node */
void MakeEfficientNet();
/* clear the graident information if the node is no use */
void ClearGrad(XTensor * node);
/* show network topology */
void ShowNetwork(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */
//static
//XTensor * SearchNode(XTensor * top, const char * name);
}; };
/* we make a unique id for every tensor */ /* we make a unique id for every tensor */
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node) ...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL) if(node == NULL)
return; return;
if(!XTensor::IsSameShaped(node, node->grad)){ if(!_IsSameShaped(node, node->grad)){
delete node->grad; delete node->grad;
node->grad = NewTensor(node); node->grad = NewTensor(node);
node->grad->SetZeroAll(); node->grad->SetZeroAll();
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/ */
#include "../tensor/XTensor.h" #include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__ #ifndef __XNODER_H__
#define __XNODER_H__ #define __XNODER_H__
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northeastern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-10-09
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/
#include "Decoder.h"
#include "Utility.h"
#include "module/LayerNorm.h"
#include "module/CommonModules.h"
#include "../../tensor/core/CHeader.h"
namespace nmt
{
/* constructor */
AttDecoder::AttDecoder()
{
selfAtt = NULL;
fnns = NULL;
selfAttLayerNorms = NULL;
fnnLayerNorms = NULL;
enDeAtt = NULL;
enDeAttLayerNorms = NULL;
decoderLayerNorm = NULL;
selfAttCache = NULL;
enDeAttCache = NULL;
}
/* de-constructor */
AttDecoder::~AttDecoder()
{
delete[] selfAttCache;
delete[] enDeAttCache;
delete[] selfAtt;
delete[] fnns;
delete[] selfAttLayerNorms;
delete[] fnnLayerNorms;
delete[] enDeAtt;
delete[] enDeAttLayerNorms;
if (preNorm)
delete decoderLayerNorm;
}
/*
initialize the model
>> config - configurations of the model
*/
void AttDecoder::InitModel(Config& config)
{
devID = config.devID;
nlayer = config.nDecLayer;
hSize = config.modelSize;
eSize = config.embSize;
vSize = config.tgtVocabSize;
dropoutP = config.dropout;
preNorm = config.preNorm;
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */
embedder.InitModel(config, false);
selfAtt = new Attention[nlayer];
fnns = new FNN[nlayer];
selfAttLayerNorms = new LN[nlayer];
enDeAtt = new Attention[nlayer];
enDeAttLayerNorms = new LN[nlayer];
fnnLayerNorms = new LN[nlayer];
selfAttCache = new Cache[nlayer];
enDeAttCache = new Cache[nlayer];
if (preNorm)
decoderLayerNorm = new LN;
/* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) {
selfAtt[i].InitModel(config);
fnns[i].InitModel(config);
selfAttLayerNorms[i].InitModel(config);
fnnLayerNorms[i].InitModel(config);
enDeAtt[i].InitModel(config);
enDeAttLayerNorms[i].InitModel(config);
selfAttCache[i].enable = true;
enDeAttCache[i].enable = true;
}
if (preNorm)
decoderLayerNorm->InitModel(config);
}
/*
make the decoding network
>> inputDec - the input tensor of the decoder
>> outputEnc - the output tensor of the encoder
>> mask - mask that indicates which position is valid
>> maskEncDec - mask for the encoder-decoder attention
>> nstep - the current length of the decoder input
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the decoder
*/
XTensor AttDecoder::Make(XTensor& inputDec, XTensor& outputEnc, XTensor* mask,
XTensor* maskEncDec, int nstep, bool isTraining)
{
XTensor x;
x = embedder.Make(inputDec, true, isTraining, nstep);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
for (int i = 0; i < nlayer; i++) {
XTensor att;
XTensor ende;
XTensor fnn;
XTensor res;
XTensor selfAttnBefore;
XTensor selfAttnAfter;
XTensor endeAttnBefore;
XTensor endeAttnAfter;
XTensor fnnBefore;
/* layer normalization with pre-norm for self-attn */
selfAttnBefore = LayerNorm(x, selfAttLayerNorms[i], preNorm, true, false);
/******************/
/* self attention */
att = selfAtt[i].Make(selfAttnBefore, selfAttnBefore, selfAttnBefore,
mask, isTraining, &selfAttCache[i], SELF_ATT);
/* dropout */
if (isTraining && dropoutP > 0)
att = Dropout(att, dropoutP);
/* residual connection */
res = Sum(att, x);
/* layer normalization with post-norm for self-attention */
selfAttnAfter = LayerNorm(res, selfAttLayerNorms[i], preNorm, false, true);
/* layer normalization with pre-norm for encoder-decoder attention */
endeAttnBefore = LayerNorm(selfAttnAfter, enDeAttLayerNorms[i], preNorm, true, false);
/* encoder-decoder attention */
ende = enDeAtt[i].Make(outputEnc, endeAttnBefore, outputEnc, maskEncDec,
isTraining, &enDeAttCache[i], EN_DE_ATT);
/* dropout */
if (isTraining && dropoutP > 0)
ende = Dropout(ende, dropoutP);
/* residual connection */
res = Sum(ende, selfAttnAfter);
/* layer normalization with post-norm for encoder-decoder attention */
endeAttnAfter = LayerNorm(res, enDeAttLayerNorms[i], preNorm, false, true);
/* layer normalization with pre-norm for fnn */
fnnBefore = LayerNorm(endeAttnAfter, fnnLayerNorms[i], preNorm, true, false);
/* fnn */
fnn = fnns[i].Make(fnnBefore, isTraining);
/* dropout */
if (isTraining && dropoutP > 0)
fnn = Dropout(fnn, dropoutP);
/* residual connection */
res = Sum(fnn, endeAttnAfter);
/* layer normalization with post-norm for fnn */
x = LayerNorm(res, fnnLayerNorms[i], preNorm, false, true);
}
if (preNorm)
return decoderLayerNorm->Make(x);
return x;
}
/*
make the decoding network
>> inputDec - the input tensor of the decoder
>> outputEnc - the output tensor of the encoder
>> mask - mask that indicates which position is valid
>> maskEncDec - mask for the encoder-decoder attention
>> nstep - the current length of the decoder input
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the decoder
*/
XTensor AttDecoder::MakeFast(XTensor& inputDec, XTensor& outputEnc, XTensor* mask,
XTensor* maskEncDec, int nstep, bool isTraining)
{
XTensor x;
x = embedder.Make(inputDec, true, isTraining, nstep);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
for (int i = 0; i < nlayer; i++) {
XTensor res;
res = x;
/* layer normalization with pre-norm for self-attn */
x = selfAttLayerNorms[i].Make(x);
/******************/
/* self attention */
x = selfAtt[i].Make(x, x, x, mask, isTraining, &selfAttCache[i], SELF_ATT);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
/* residual connection */
x = Sum(res, x);
res = x;
/* layer normalization with pre-norm for encoder-decoder attention */
x = enDeAttLayerNorms[i].Make(x);
/* encoder-decoder attention */
x = enDeAtt[i].Make(outputEnc, x, outputEnc, maskEncDec,
isTraining, &enDeAttCache[i], EN_DE_ATT);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
/* residual connection */
x = Sum(res, x);
res = x;
/* layer normalization with pre-norm for fnn */
x = fnnLayerNorms[i].Make(x);
/* fnn */
x = fnns[i].Make(x, isTraining);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
/* residual connection */
x = Sum(res, x);
}
x = decoderLayerNorm->Make(x);
return x;
}
}
\ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/
#ifndef __DECODER_H__
#define __DECODER_H__
#include "Encoder.h"
#include "Utility.h"
namespace nmt
{
class AttDecoder
{
public:
/* device id */
int devID;
/* layer number */
int nlayer;
/* hidden layer size of the FNN layer */
int hSize;
/* embedding size */
int eSize;
/* vocabulary size */
int vSize;
/* dropout probability */
DTYPE dropoutP;
/* embedding of word at each position */
Embedder embedder;
/* FNN model of each layer */
FNN* fnns;
/* attention model of each layer */
Attention* selfAtt;
/* layer normalization for attention */
LN* selfAttLayerNorms;
/* layer normalization for fnn */
LN* fnnLayerNorms;
/* layer normalization for decoder */
LN* decoderLayerNorm;
/* encoder-decoder attention model of each layer */
Attention* enDeAtt;
/* layer normalization for encoder-decoder attention */
LN* enDeAttLayerNorms;
/* layer cache list */
Cache* selfAttCache;
/* layer cache list */
Cache* enDeAttCache;
/* the location of layer normalization */
bool preNorm;
public:
/* constructor */
AttDecoder();
/* de-constructor */
~AttDecoder();
/* initialize the model */
void InitModel(Config& config);
/* make the decoding network */
XTensor Make(XTensor& inputDec, XTensor& outputEnc, XTensor* mask,
XTensor* maskEncDec, int nstep, bool isTraining);
/* make the decoding network (pre norm) */
XTensor MakeFast(XTensor& inputDec, XTensor& outputEnc, XTensor* mask,
XTensor* maskEncDec, int nstep, bool isTraining);
};
}
#endif
\ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/
#include "Encoder.h"
#include "Utility.h"
#include "module/LayerNorm.h"
#include "module/CommonModules.h"
#include "../../tensor/core/CHeader.h"
namespace nmt
{
/* constructor */
AttEncoder::AttEncoder()
{
selfAtt = NULL;
fnns = NULL;
attLayerNorms = NULL;
fnnLayerNorms = NULL;
encoderLayerNorm = NULL;
}
/* de-constructor */
AttEncoder::~AttEncoder()
{
delete[] selfAtt;
delete[] fnns;
delete[] attLayerNorms;
delete[] fnnLayerNorms;
if (preNorm)
delete encoderLayerNorm;
}
/*
initialize the model
>> config - configurations for the model
*/
void AttEncoder::InitModel(Config& config)
{
devID = config.devID;
nlayer = config.nEncLayer;
eSize = config.embSize;
hSize = config.modelSize;
vSize = config.srcVocabSize;
preNorm = config.preNorm;
dropoutP = config.dropout;
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "Set vocabulary size by \"-vsize\"");
/* embedding model */
embedder.InitModel(config);
selfAtt = new Attention[nlayer];
fnns = new FNN[nlayer];
attLayerNorms = new LN[nlayer];
fnnLayerNorms = new LN[nlayer];
if (preNorm)
encoderLayerNorm = new LN;
/* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) {
selfAtt[i].InitModel(config);
fnns[i].InitModel(config);
attLayerNorms[i].InitModel(config);
fnnLayerNorms[i].InitModel(config);
}
if (preNorm)
encoderLayerNorm->InitModel(config);
}
/*
make the encoding network
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> maskEncDec - no use
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttEncoder::Make(XTensor& input, XTensor* mask, XTensor& maskEncDec, bool isTraining)
{
XTensor x;
x = embedder.Make(input, false, isTraining);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
for (int i = 0; i < nlayer; i++) {
XTensor att;
XTensor fnn;
XTensor res;
XTensor attnBefore;
XTensor attnAfter;
XTensor fnnBefore;
/* layer normalization with pre-norm for self-attn */
attnBefore = LayerNorm(x, attLayerNorms[i], preNorm, true, false);
/* self attention */
att = selfAtt[i].Make(attnBefore, attnBefore, attnBefore, mask, isTraining, NULL, SELF_ATT);
/* dropout */
if (isTraining && dropoutP > 0)
att = Dropout(att, dropoutP);
/* residual connection */
res = Sum(att, x);
/* layer normalization with post-norm for self-attn */
attnAfter = LayerNorm(res, attLayerNorms[i], preNorm, false, true);
/* layer normalization with pre-norm for fnn */
fnnBefore = LayerNorm(attnAfter, fnnLayerNorms[i], preNorm, true, false);
/* fnn */
fnn = fnns[i].Make(fnnBefore, isTraining);
/* dropout */
if (isTraining && dropoutP > 0)
fnn = Dropout(fnn, dropoutP);
/* residual connection */
res = Sum(fnn, attnAfter);
/* layer normalization with post-norm for fnn */
x = LayerNorm(res, fnnLayerNorms[i], preNorm, false, true);
}
if (preNorm)
return encoderLayerNorm->Make(x);
return x;
}
/*
make the encoding network
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> maskEncDec - no use
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttEncoder::MakeFast(XTensor& input, XTensor* mask, XTensor& maskEncDec, bool isTraining)
{
XTensor x;
x = embedder.Make(input, false, isTraining);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
for (int i = 0; i < nlayer; i++) {
XTensor res;
res = x;
/* layer normalization with pre-norm for self-attn */
x = attLayerNorms[i].Make(x);
/* self attention */
x = selfAtt[i].Make(x, x, x, mask, isTraining, NULL, SELF_ATT);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
/* residual connection */
x = Sum(res, x);
res = x;
/* layer normalization with pre-norm for fnn */
x = fnnLayerNorms[i].Make(x);
/* fnn */
x = fnns[i].Make(x, isTraining);
/* dropout */
if (isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
/* residual connection */
x = Sum(res, x);
}
x = encoderLayerNorm->Make(x);
return x;
}
/*
make the encoding network (wrapper)
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttEncoder::Make(XTensor& input, XTensor* mask, bool isTraining)
{
XTensor nothing;
return Make(input, mask, nothing, isTraining);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2020 NiuTrans Research. All rights reserved.
* All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
...@@ -17,54 +16,42 @@ ...@@ -17,54 +16,42 @@
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/ */
#ifndef __T2TENCODER_H__ #ifndef __ENCODER_H__
#define __T2TENCODER_H__ #define __ENCODER_H__
#include "T2TFNN.h" #include "Utility.h"
#include "T2TAttention.h" #include "module/FNN.h"
#include "T2TEmbedding.h" #include "module/Attention.h"
#include "T2TLayerNormal.h" #include "module/Embedding.h"
#include "module/LayerNorm.h"
#include "../../network/XNet.h" #include "../../network/XNet.h"
using namespace nts; using namespace nts;
namespace transformer namespace nmt
{ {
/* /*
base class of the encoder base class of the encoder
*/
class T2TEncoder
{
public:
virtual
XTensor Make(XTensor &input, XTensor &mask, bool skipInputRes) = 0;
};
/*
the encoder based on RNN
*/ */
class RNNEncoder : T2TEncoder class Encoder
{ {
public: public:
XTensor Make(XTensor &input, XTensor &mask, bool skipInputRes); virtual XTensor Make(XTensor& input, XTensor* mask, XTensor& mask2, bool isTraining) = 0;
}; };
/*
/* the encoder based on self-attention
the encoder based on self-attention
*/ */
class AttEncoder : T2TEncoder class AttEncoder : Encoder
{ {
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -77,31 +64,34 @@ public: ...@@ -77,31 +64,34 @@ public:
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
/* dropout probability */
DTYPE dropoutP;
/* some positions can be ignored in attention. this is useful in lm where the first position needs /* some positions can be ignored in attention. this is useful in lm where the first position needs
special design for the attention model. */ special design for the attention model. */
int ignored; int ignored;
/* embedding of word at each position */ /* embedding of word at each position */
T2TEmbedder embedder; Embedder embedder;
/* FNN model of each layer */ /* FNN model of each layer */
T2TFNN * fnns; FNN* fnns;
/* attention model of each layer */ /* attention model of each layer */
T2TAttention * attentions; Attention* selfAtt;
/* layer normalizations for attention */
LN* attLayerNorms;
/* layer normalization for fnn */ /* layer normalization for fnn */
T2TLN * fnnLayerNorms; LN* fnnLayerNorms;
/* layer normalization for attention */ /* layer normalization for encoder */
T2TLN * attLayerNorms; LN* encoderLayerNorm;
/* input tensor of the encoder */ /* the location of layer normalization */
XTensor * input; bool preNorm;
/* output tensor of the encoder */
XTensor * output;
public: public:
/* constructor */ /* constructor */
AttEncoder(); AttEncoder();
...@@ -110,14 +100,17 @@ public: ...@@ -110,14 +100,17 @@ public:
~AttEncoder(); ~AttEncoder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, void InitModel(Config& config);
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, bool skipInputRes); XTensor Make(XTensor& input, XTensor* mask, XTensor& maskEncDec, bool isTraining);
};
/* make the encoding network */
XTensor MakeFast(XTensor& input, XTensor* mask, XTensor& maskEncDec, bool isTraining);
/* make the encoding network (wrapper) */
XTensor Make(XTensor& input, XTensor* mask, bool isTraining);
};
} }
......
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/
#ifndef __MODEL_H__
#define __MODEL_H__
#include "Encoder.h"
#include "Decoder.h"
#include "module/FNN.h"
#include "module/Output.h"
#include "Utility.h"
#include "module/Attention.h"
namespace nmt
{
/* a nmt model that keeps parameters of the encoder,
the decoder and the output layer (softmax). */
class Model
{
public:
/* device id */
int devID;
/* the encoder */
AttEncoder* encoder;
/* the decoder */
AttDecoder* decoder;
/* output layer */
Output* outputLayer;
/* indicates whether the model is running for language modeling */
bool isLM;
/* indicates whether the model is running for machine translation */
bool isMT;
/* indicates whether the model is running with FP16 data type */
bool useFP16;
/* number of heads in the attention model */
int nhead;
/* indicates whether share encoders embeddings with decoders */
int shareAllEmbeddings;
/* indicates whether share decoder embeddings with output weights */
int shareDecInputOutputWeight;
public:
/* constructor */
Model();
/* de-constructor */
~Model();
/* initialize the model */
void InitModel(Config& config);
/* print model configurations */
void ShowModelConfig(Config& config);
/* make the encoding network */
XTensor MakeEncoder(XTensor& input, XTensor* mask, bool isTraining);
/* make the encoding network */
XTensor MakeDecoder(XTensor& inputEnc, XTensor& inputDec, XTensor* mask,
XTensor& MaskEncDec, bool isTraining);
/* make the network for language modeling (with the output softmax layer) */
void MakeLM(XTensor& input, XTensor& output, XTensor& padding, bool isTraining);
/* make the network for machine translation (with the output softmax layer) */
void MakeMT(XTensor& inputEnc, XTensor& inputDec, XTensor& output,
XTensor& paddingEnc, XTensor& paddingDec, bool isTraining);
/* make the mask for training MT models */
void MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskEnc, XTensor& maskDec, XTensor& maskEncDec);
/* make the mask of the encoder */
void MakeMTMaskEnc(XTensor& paddingEnc, XTensor& maskEnc);
/* make the mask of the decoder */
void MakeMTMaskDec(XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskDec, XTensor& maskEncDec);
/* get parameter matrices */
void GetParams(TensorList& list);
/* dump the model to a file */
void Dump(const char* fn);
/* read the parameters */
void Read(FILE* file);
};
}
#endif
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-06, 2020-07
*/
#include <ctime>
#include "NMT.h"
#include "train/Trainer.h"
#include "translate/Translator.h"
namespace nmt
{
int NMTMain(int argc, const char** argv)
{
if (argc == 0)
return 1;
/* load configurations */
Config config(argc, argv);
srand(1);
/* training */
if (strcmp(config.trainFN, "") != 0) {
Model model;
model.InitModel(config);
Trainer trainer;
trainer.Init(config);
trainer.Train(config.trainFN, config.validFN, config.modelFN, &model);
}
/* translating */
if (strcmp(config.testFN, "") != 0 && strcmp(config.outputFN, "") != 0) {
/* disable grad flow */
DISABLE_GRAD;
Model model;
model.InitModel(config);
Translator translator;
translator.Init(config);
translator.Translate(config.testFN, config.srcVocabFN,
config.tgtVocabFN, config.outputFN, &model);
}
return 0;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2020 NiuTrans Research. All rights reserved.
* All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
...@@ -16,26 +15,17 @@ ...@@ -16,26 +15,17 @@
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * An implementation of the NMT system.
*/ */
#ifndef __T2TDECODER_H__ #ifndef __NMT_H__
#define __T2TDECODER_H__ #define __NMT_H__
namespace transformer namespace nmt
{ {
class T2TDecoder /* entrance of the program */
{ int NMTMain(int argc, const char** argv);
};
class AttDecoder : T2TDecoder
{
public:
/* initialize the model */
void InitModel(int argc, const char ** argv);
};
} }
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <math.h>
#include "T2TAttention.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
/* constructor */
T2TAttention::T2TAttention()
{
nhead = -1;
dk = -1;
dv = -1;
d = -1;
isMasked = false;
ignored = 0;
}
/* deconstructor */
T2TAttention::~T2TAttention()
{
}
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TAttention::InitModel(int argc, const char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
{
devID = myDevID;
mem = myMem;
isMasked = myIsMasked;
ignored = myIgnored;
float minmax = 0;
LoadParamInt(argc, argv, "nhead", &nhead, 8);
LoadParamInt(argc, argv, "d", &dk, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &dv, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem);
float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
float finfoutv = (float)sqrt(6.0F * scale/(d + dv));
wk.SetDataRand(-finfoutk, finfoutk);
wq.SetDataRand(-finfoutk, finfoutk);
wv.SetDataRand(-finfoutv, finfoutv);
}
/*
make the network
>> k - keys. It might be of size B * L * H
where B = batch size, L = sequence length,
and H = vector size of each position
>> q - queries
>> v - values
>> maske - as it is
<< return - multi-attention result
*/
XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask)
{
XTensor k2;
XTensor q2;
XTensor v2;
/* linear transofmration before self-attention */
k2 = MMul(k, wk);
q2 = MMul(q, wq);
v2 = MMul(v, wv);
XTensor kheads;
XTensor qheads;
XTensor vheads;
/* multi head */
kheads = Split(k2, k2.order - 1, nhead);
qheads = Split(q2, q2.order - 1, nhead);
vheads = Split(v2, v2.order - 1, nhead);
XTensor att;
XTensor dot;
XTensor scalar;
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */
dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS);
if(isMasked)
dot = dot + mask;
scalar = Softmax(Linear(dot, 1/(float)sqrt((float)dk)), -1);
if(ignored > 0)
_SetDataDim(&scalar, 0, ignored, scalar.order - 2, 1e-9F);
att = BMMul(scalar, vheads);
/* concatenate the heads */
return Merge(att, att.order - 1);
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#ifndef __T2TATTENTION_H__
#define __T2TATTENTION_H__
#include "../../network/XNet.h"
using namespace nts;
namespace transformer
{
/*
multi-head attention
y(Q, K, V) = cat(head_1, head_2, ..., head_n)
where head_i = Attention(Q * w_i^Q, K * w_i^K, V * w_i^V)
attention(Q, K, V) = softmax(Q * K^T/d_k^0.5) V
d_k = dimension size of K
*/
class T2TAttention
{
public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* head number */
int nhead;
/* transformation matrix for K */
XTensor wk;
/* transformation matrix for Q */
XTensor wq;
/* transformation matrix for V */
XTensor wv;
/* size of transformed Q and K */
int dk;
/* size of transformed V */
int dv;
/* size of input Q, K and V */
int d;
/* indicates whether the attention is masked */
bool isMasked;
/* some positions can be ignored in attention. this is useful in lm where the first position needs
special design for the attention model. */
int ignored;
public:
/* constructor */
T2TAttention();
/* de-constructor */
~T2TAttention();
/* initialize the model */
void InitModel(int argc, const char ** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
/* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-08-01
*/
#include <math.h>
#include "T2TEmbedding.h"
#include "T2TUtility.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
/* constructor */
T2TEmbedder::T2TEmbedder()
{
devID = -1;
mem = NULL;
vSize = -1;
maxLength = -1;
}
/* deconstructor */
T2TEmbedder::~T2TEmbedder()
{
}
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem)
{
devID = myDevID;
mem = myMem;
LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "maxlen", &maxLength, 512);
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem);
w.SetDataRandn(0, 1.0F/(float)sqrt((float)eSize));
/* create the positional embedding matrix */
MakePosEmbedding(eSize, d, maxLength);
}
/*
make positional embeddings (of size eSize * length
eSize - embedding size
length - length of the sequenc
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem);
float * data = new float[posEmbeddingBase.unitNum];
for(int pos = 0; pos < length; pos++){
float * dp = data + pos * eSize;
for(int k = 0; k < eSize; k++){
if(k % 2 == 0){
int i = k/2;
dp[k] = (float)sin(pos/pow(10000.0F, 2.0F*i/d));
}
else{
int i = (k - 1)/2;
dp[k] = (float)cos(pos/pow(10000.0F, 2.0F*i/d));
}
}
}
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
delete[] data;
}
/*
make the network
*/
XTensor T2TEmbedder::Make(XTensor &input)
{
CheckNTErrors(input.GetDim(-1) == vSize, "Wrong vocabulary size!");
CheckNTErrors(input.order > 1, "Wrong input tensor size!");
CheckNTErrors(input.dimSize[input.order - 2] < maxLength, "The sequence is too long!");
CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, input.dimSize, input.order * sizeof(int));
dims[input.order - 1] = eSize;
bool match = (posEmbedding.order == input.order);
if(match){
for(int i = 0; i < input.order; i++){
if(dims[i] != posEmbedding.GetDim(i))
match = false;
}
}
/* we make positional embeddings first */
if(!match){
InitTensor(&posEmbedding, input.order, dims, X_FLOAT, 1.0F, devID, mem);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
DelTensorBuf(posTMP);
}
XTensor wordEmbedding;
/* then we make word embeddings */
wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)d));
/* we sum over the two embeddings */
return wordEmbedding + posEmbedding;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <math.h>
#include "T2TEncoder.h"
#include "T2TLayerNormal.h"
#include "T2TUtility.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
/* constructor */
AttEncoder::AttEncoder()
{
}
/* de-constructor */
AttEncoder::~AttEncoder()
{
delete[] attentions;
delete[] fnns;
delete[] attLayerNorms;
delete[] fnnLayerNorms;
}
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, const char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
{
devID = myDevID;
mem = myMem;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsize", &vSize, -1);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */
embedder.InitModel(argc, argv, devID, mem);
attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer];
fnnLayerNorms = new T2TLN[nlayer];
/* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
fnns[i].InitModel(argc, argv, myDevID, myMem);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
}
}
/*
make the encoding network
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> skipInputRes - indicates whether we skip the residual connection of the first layer
<< return - the output tensor of the encoder
*/
XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes)
{
XTensor x;
x = embedder.Make(input);
for(int i = 0; i < nlayer; i++){
XTensor att;
XTensor ln;
XTensor fnn;
XTensor res;
if(skipInputRes && i == 0){
/* self attention */
att = attentions[i].Make(x, x, x, mask);
/* TODO: dropout */
/* layer normalization */
x = attLayerNorms[i].Make(att);
}
else{
/* self attention */
att = attentions[i].Make(x, x, x, mask);
/* residual connection */
res = Sum(att, x);
/* TODO: dropout */
/* layer normalization */
x = attLayerNorms[i].Make(res);
}
/* fnn */
fnn = fnns[i].Make(x);
/* residual connection */
res = Sum(fnn, x);
/* TODO: dropout */
/* layer normalization */
x = fnnLayerNorms[i].Make(res);
}
return x;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include "T2TModel.h"
#include "T2TUtility.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
/* constructor */
T2TModel::T2TModel()
{
devID = -1;
mem = NULL;
isLM = false;
isMT = false;
nhead = 1;
}
/* de-constructor */
T2TModel::~T2TModel()
{
delete mem;
}
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
*/
void T2TModel::InitModel(int argc, const char ** argv)
{
bool useMem = false;
LoadParamInt(argc, argv, "dev", &devID, -1);
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamBool(argc, argv, "lm", &isLM, true);
LoadParamBool(argc, argv, "mt", &isMT, false);
LoadParamInt(argc, argv, "nhead", &nhead, 8);
if(useMem){
delete mem;
mem = new XMem(devID);
}
encoder.InitModel(argc, argv, isLM, isLM ? 1 : 0, devID, mem);
outputLayer.InitModel(argc, argv, devID, mem);
}
/*
make the encoding network
>> input - input tensor
>> mask - the mask for positions that are/not involved in computation
>> skipInputRes - indicates whether we skip the residual connection of the first layer
<< return - encoding result
*/
XTensor T2TModel::MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes)
{
return encoder.Make(input, mask, skipInputRes);
}
/*
make the entire network (with the output softmax layer)
>> input - input tensor
>> output - output tensor (distribution)
*/
void T2TModel::Make(XTensor &input, XTensor &output)
{
XTensor encoding;
if(isLM){
/* generate mask to see "previous" words only */
int len = input.GetDim(input.order - 2);
int * dims = new int[input.order + 1];
for(int i = 0; i < input.order; i++)
dims[i + 1] = input.GetDim(i);
dims[0] = nhead;
dims[input.order] = len;
XTensor mask(input.order + 1, dims, X_FLOAT, 1.0F, input.devID, input.mem);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9 */
_SetDataLowTri(&mask, 1e9F, -1);
_ScaleAndShiftMe(&mask, 1.0F, -1e9F);
encoding = MakeEncoding(input, mask, true);
outputLayer.Make(encoding, output);
delete[] dims;
}
else{
ShowNTErrors("TODO!");
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#ifndef __T2TMODEL_H__
#define __T2TMODEL_H__
#include "T2TFNN.h"
#include "T2TAttention.h"
#include "T2TEncoder.h"
#include "T2TDecoder.h"
#include "T2TOutput.h"
namespace transformer
{
class T2TModel
{
public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* the encoder */
AttEncoder encoder;
/* the decoder */
AttDecoder decoder;
/* output layer */
T2TOutput outputLayer;
/* indicates whether the model is running for language modeling */
bool isLM;
/* indicates whether the model is running for machine translation */
bool isMT;
/* number of heads in the attention model */
int nhead;
public:
/* constructor */
T2TModel();
/* de-constructor */
~T2TModel();
/* initialize the model */
void InitModel(int argc, const char ** argv);
/* make the encoding network */
XTensor MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes);
/* make the entire network (with the output softmax layer) */
void Make(XTensor &input, XTensor &output);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <math.h>
#include "T2TOutput.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
/* constructor */
T2TOutput::T2TOutput()
{
devID = -1;
mem = NULL;
vSize = -1;
inSize = -1;
hSize = -1;
}
/* de-constructor */
T2TOutput::~T2TOutput()
{
}
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TOutput::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem)
{
devID = myDevID;
mem = myMem;
float minmax = 0;
LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem);
float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
w.SetDataRand(-finfout, finfout);
}
/*
make the network
y = softmax(x * w)
>> input - input tensor
<< return - output tensor
*/
XTensor T2TOutput::Make(XTensor &input)
{
XTensor &x = input;
return LogSoftmax(MMul(x, w), -1);
}
/*
make the network (redefined output tensor)
>> input - input tensor
>> output - output tensor
*/
void T2TOutput::Make(XTensor &input, XTensor &output)
{
XTensor &x = input;
output = LogSoftmax(MMul(x, w), -1);
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-08-02
*/
#ifndef __T2TTRAINER_H__
#define __T2TTRAINER_H__
#include "T2TModel.h"
#include "../../tensor/function/FHeader.h"
#define MAX_SEQUENCE_LENGTH 1024 * 4
using namespace nts;
namespace transformer
{
/* trainer of the T2T model */
class T2TTrainer
{
public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* buffer for loading words */
int * buf;
/* buffer size */
int bufSize;
/* length of each sequence */
int * seqLen;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* dimension size of each inner layer */
int d;
/* step number of warm-up for training */
int nwarmup;
/* vocabulary size of the source side */
int vSize;
/* learning rate */
float lrate;
/* sentence batch size */
int sBatchSize;
/* word batch size */
int wBatchSize;
/* training epoch number */
int nepoch;
/* traing step number */
int nstep;
public:
/* constructor */
T2TTrainer();
/* de-constructor */
~T2TTrainer();
/* initialize the trainer */
void Init(int argc, const char ** argv);
/* train the model */
void Train(const char * fn, T2TModel * model);
/* load data to buffer */
int LoadBuf(FILE * file);
/* load a batch of sequences */
int LoadBatch(FILE * file, XTensor * batch, int step, int vs, int sBatch, int wBatch, bool isSorted, int &wCount);
/* get word probabilities for a batch of sequences */
float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs);
/* update the model by delta rule */
void Update(T2TModel * model, const float lr);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
namespace transformer
{
FILE * tmpFILE;
void LoadParamString(int argc, const char ** argv, const char * name, char * p, const char * defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname) && i + 1 < argc){
strcpy(p, argv[i + 1]);
//fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
hit = true;
}
}
if(!hit)
strcpy(p, defaultP);
}
void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname) && i + 1 < argc){
*(int*)p = atoi(argv[i + 1]);
//fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
hit = true;
}
}
if(!hit)
*p = defaultP;
}
void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bool defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname)){
*(bool*)p = true;
//fprintf(stderr, " %s=%s\n", name, "true");
hit = true;
}
}
if(!hit)
*p = defaultP;
}
void LoadParamFloat(int argc, const char ** argv, const char * name, float * p, float defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname) && i + 1 < argc){
*p = (float)atof(argv[i + 1]);
//fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
hit = true;
}
}
if(!hit)
*p = defaultP;
}
void ShowParams(int argc, const char ** argv)
{
fprintf(stderr, "args:\n");
for(int i = 0; i < argc; i++){
if(argv[i][1] == 0)
continue;
if(argv[i][0] == '-' && (argv[i][1] < '1' || argv[i][1] > '9')){
if(i + 1 < argc && argv[i + 1][0] != '-')
fprintf(stderr, " %s=%s\n", argv[i], argv[i + 1]);
else
fprintf(stderr, " %s=yes\n", argv[i]);
}
}
fprintf(stderr, "\n");
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include "Transformer.h"
#include "T2TModel.h"
#include "T2TUtility.h"
#include "T2TTrainer.h"
#include "../../tensor/XDevice.h"
namespace transformer
{
int TransformerMain(int argc, const char ** argv)
{
if(argc == 0)
return 1;
tmpFILE = fopen("tmp.txt", "wb");
ShowParams(argc, argv);
char * trainFN = new char[MAX_LINE_LENGTH];
LoadParamString(argc, argv, "train", trainFN, "");
T2TModel model;
model.InitModel(argc, argv);
if(strcmp(trainFN, "")){
T2TTrainer trainer;
trainer.Init(argc, argv);
trainer.Train(trainFN, &model);
}
delete[] trainFN;
fclose(tmpFILE);
return 0;
}
}
\ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04, 2020-06
*/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <fstream>
#include <sstream>
#include "Utility.h"
#include "../../tensor/XGlobal.h"
using namespace nts;
using namespace std;
namespace nmt
{
/*
load configurations from the command
>> argc - number of arguments
>> argv - the list of arguments
*/
Config::Config(int argc, const char** argv)
{
char** args = new char* [MAX_PARAM_NUM];
for (int i = 0; i < argc; i++) {
args[i] = new char[strlen(argv[i]) + 1];
strcpy(args[i], argv[i]);
}
char* configFN = new char[1024];
LoadParamString(argc, args, "config", configFN, "");
int argsNum = argc;
/* load configurations from a file */
if (strcmp(configFN, "") != 0)
argsNum = LoadFromFile(configFN, args);
ShowParams(argsNum, args);
/* options for the model */
LoadParamInt(argsNum, args, "nhead", &nhead, 4);
LoadParamInt(argsNum, args, "enclayer", &nEncLayer, 6);
LoadParamInt(argsNum, args, "declayer", &nDecLayer, 6);
LoadParamInt(argsNum, args, "maxrp", &maxRP, 8);
LoadParamInt(argsNum, args, "embsize", &embSize, 512);
LoadParamInt(argsNum, args, "modelsize", &modelSize, 512);
LoadParamInt(argsNum, args, "maxpos", &maxPosLen, 1024);
LoadParamInt(argsNum, args, "fnnhidden", &fnnHiddenSize, modelSize * 2);
LoadParamInt(argsNum, args, "vsize", &srcVocabSize, 10152);
LoadParamInt(argsNum, args, "vsizetgt", &tgtVocabSize, 10152);
LoadParamInt(argsNum, args, "padid", &padID, 1);
LoadParamInt(argsNum, args, "startid", &startID, 2);
LoadParamInt(argsNum, args, "endid", &endID, 2);
LoadParamBool(argsNum, args, "rpr", &useRPR, false);
LoadParamBool(argsNum, args, "prenorm", &preNorm, true);
// TODO: refactor the parameters type to support weight sharing during training
LoadParamInt(argsNum, args, "shareemb", &shareAllEmbeddings, 0);
LoadParamInt(argsNum, args, "sharedec", &shareDecInputOutputWeight, 0);
LoadParamString(argsNum, args, "model", modelFN, "");
LoadParamString(argsNum, args, "srcvocab", srcVocabFN, "vocab.src");
LoadParamString(argsNum, args, "tgtvocab", tgtVocabFN, "vocab.tgt");
/* options for training */
LoadParamString(argsNum, args, "train", trainFN, "");
LoadParamString(argsNum, args, "valid", validFN, "");
LoadParamInt(argsNum, args, "dev", &devID, 0);
LoadParamInt(argsNum, args, "wbatch", &wBatchSize, 4096);
LoadParamInt(argsNum, args, "sbatch", &sBatchSize, 8);
isTraining = (strcmp(trainFN, "") == 0) ? false : true;
LoadParamBool(argsNum, args, "mt", &isMT, true);
LoadParamFloat(argsNum, args, "dropout", &dropout, 0.3);
LoadParamFloat(argsNum, args, "fnndrop", &fnnDropout, 0.1);
LoadParamFloat(argsNum, args, "attdrop", &attDropout, 0.1);
LoadParamFloat(argc, args, "lrate", &lrate, 0.0015F);
LoadParamFloat(argc, args, "lrbias", &lrbias, 0);
LoadParamInt(argc, args, "nepoch", &nepoch, 50);
LoadParamInt(argc, args, "maxcheckpoint", &maxCheckpoint, 10);
LoadParamInt(argc, args, "nstep", &nstep, 100000);
LoadParamInt(argc, args, "nwarmup", &nwarmup, 8000);
LoadParamBool(argc, args, "adam", &useAdam, true);
LoadParamFloat(argc, args, "adambeta1", &adamBeta1, 0.9F);
LoadParamFloat(argc, args, "adambeta2", &adamBeta2, 0.98F);
LoadParamFloat(argc, args, "adamdelta", &adamDelta, 1e-9F);
LoadParamBool(argc, args, "shuffled", &isShuffled, true);
LoadParamFloat(argc, args, "labelsmoothing", &labelSmoothingP, 0.1);
LoadParamInt(argc, args, "nstepcheckpoint", &nStepCheckpoint, -1);
LoadParamBool(argc, args, "epochcheckpoint", &useEpochCheckpoint, true);
LoadParamInt(argc, args, "updatestep", &updateStep, 1);
LoadParamBool(argc, args, "sorted", &isLenSorted, false);
LoadParamInt(argc, args, "bufsize", &bufSize, 50000);
LoadParamBool(argc, args, "doubledend", &isDoubledEnd, false);
LoadParamBool(argc, args, "smallbatch", &isSmallBatch, true);
LoadParamBool(argc, args, "bigbatch", &isBigBatch, false);
LoadParamBool(argc, args, "randbatch", &isRandomBatch, false);
LoadParamInt(argc, args, "bucketsize", &bucketSize, wBatchSize * 10);
/* options for translating */
LoadParamString(argsNum, args, "test", testFN, "");
LoadParamString(argsNum, args, "output", outputFN, "");
LoadParamInt(argsNum, args, "beamsize", &beamSize, 1);
LoadParamBool(argsNum, args, "fp16", &useFP16, false);
LoadParamFloat(argsNum, args, "lenalpha", &lenAlpha, 0.6);
LoadParamFloat(argsNum, args, "maxlenalpha", &maxLenAlpha, 1.2);
for (int i = 0; i < argc; i++)
delete[] args[i];
delete[] args;
delete[] configFN;
}
/*
load configurations from a file
>> configFN - path to the configuration file
>> args - the list to store the configurations
format: one option per line, separated by a blank or a tab
*/
int Config::LoadFromFile(const char* configFN, char** args) {
ifstream f(configFN, ios::in);
CheckNTErrors(f.is_open(), "unable to open the config file");
int argsNum = 0;
/* parse arguments */
string key, value;
while (f >> key >> value) {
key += '-';
strcpy(args[argsNum++], key.c_str());
strcpy(args[argsNum++], value.c_str());
}
/* record the number of arguments */
return argsNum;
}
void LoadParamString(int argc, char** argv, const char* name, char* p, const char* defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
strcpy(p, argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
strcpy(p, defaultP);
}
void LoadParamInt(int argc, char** argv, const char* name, int* p, int defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
*(int*)p = atoi(argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
void LoadParamBool(int argc, char** argv, const char* name, bool* p, bool defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname)) {
*(bool*)p = true;
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
void LoadParamFloat(int argc, char** argv, const char* name, float* p, float defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
*p = (float)atof(argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
void ShowParams(int argc, char** argv)
{
fprintf(stderr, "args:\n");
for (int i = 0; i < argc; i++) {
if (argv[i][1] == 0)
continue;
if (argv[i][0] == '-' && (argv[i][1] < '1' || argv[i][1] > '9')) {
if (i + 1 < argc && argv[i + 1][0] != '-')
fprintf(stderr, " %s=%s\n", argv[i], argv[i + 1]);
else
fprintf(stderr, " %s=yes\n", argv[i]);
}
}
fprintf(stderr, "\n");
}
#define MAX_WORD_NUM 120
/*
split string by delimiter, this will return indices of all sub-strings
>> s - the original string
>> delimiter - as it is
<< indices - indices of all sub-strings
*/
UInt64List SplitToPos(const string& s, const string& delimiter)
{
UInt64List indices;
if (delimiter.length() == 0) {
indices.Add(0);
}
size_t pos = 0;
uint64_t start = 0;
while ((pos = s.find(delimiter, start)) != string::npos) {
if (pos != start) {
indices.Add(start);
}
start = pos + delimiter.length();
}
if (start != s.length()) {
indices.Add(start);
}
return indices;
}
/* split a string to a int64_t list */
IntList SplitInt(const string& s, const string& delimiter)
{
IntList values;
auto indices = SplitToPos(s, delimiter);
for (int i = 0; i < indices.Size(); i++) {
values.Add(strtol(s.data() + indices[i], nullptr, 10));
}
return values;
}
/* split a string to a float list */
FloatList SplitFloat(const string& s, const string& delimiter)
{
FloatList values;
auto indices = SplitToPos(s, delimiter);
for (int i = 0; i < indices.Size(); i++) {
values.Add(strtof(s.data() + indices[i], nullptr));
}
return values;
}
}
\ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-06
*/
#ifndef __UTILITY_H__
#define __UTILITY_H__
#include <string>
#include <cstdio>
#include "../../tensor/XList.h"
using namespace std;
using namespace nts;
namespace nmt
{
#define MAX_PARAM_NUM 100
/* load arguments */
void LoadParamInt(int argc, char** argv, const char* name, int* p, int defaultP);
void LoadParamBool(int argc, char** argv, const char* name, bool* p, bool defaultP);
void LoadParamFloat(int argc, char** argv, const char* name, float* p, float defaultP);
void LoadParamString(int argc, char** argv, const char* name, char* p, const char* defaultP);
/* show arguments */
void ShowParams(int argc, char** argv);
/* split string */
IntList SplitInt(const string& s, const string& delimiter);
FloatList SplitFloat(const string& s, const string& delimiter);
UInt64List SplitToPos(const string& s, const string& delimiter);
/* configurations for */
class Config {
public:
/* path to the model */
char modelFN[1024];
/* path to the source vocab */
char srcVocabFN[1024];
/* path to the target vocab */
char tgtVocabFN[1024];
/* path to the input file (for inference) */
char testFN[1024];
/* path to the output file (for inference) */
char outputFN[1024];
/* path to the training file */
char trainFN[1024];
/* path to the validation file */
char validFN[1024];
/* device id */
int devID;
/* beam size */
int beamSize;
/* word batch size */
int wBatchSize;
/* sentence batch size */
int sBatchSize;
/* number of heads in attention */
int nhead;
/* number of encoder layers */
int nEncLayer;
/* number of decoder layers */
int nDecLayer;
/* the maximum relative position in RPR attentions */
int maxRP;
/* the dimension of embeddings */
int embSize;
/* the dimension of hidden layer */
int modelSize;
/* the maximum length in positional embedding */
int maxPosLen;
/* the dimension of fnn hidden layer */
int fnnHiddenSize;
/* the vocab size of source sequence */
int srcVocabSize;
/* the vocab size of target sequence */
int tgtVocabSize;
/* the padding id */
int padID;
/* start symbol */
int startID;
/* end symbol */
int endID;
/* indicates whether the model uses pre-norm */
bool preNorm;
/* indicates whether the model is running for machine translation */
bool isMT;
/* indicates whether share encoder decoder embeddings */
int shareAllEmbeddings;
/* indicates whether share decoder embeddings and output weights */
int shareDecInputOutputWeight;
/* indicates whether the model is running with FP16 data type */
bool useFP16;
/* indicates whether we use the RPR attention */
bool useRPR;
/* indicates whether we train the model */
bool isTraining;
/* dropout rate for the model */
float dropout;
/* dropout rate for fnn layers */
float fnnDropout;
/* dropout rate for attention layers */
float attDropout;
/* the alpha parameter controls the length preference */
float lenAlpha;
/* scalar of the input sequence (for max number of search steps) */
float maxLenAlpha;
/* learning rate */
float lrate;
/* the parameter that controls the maximum learning rate in training */
float lrbias;
/* training epoch number */
int nepoch;
/* training step number */
int nstep;
/* the maximum number of saved checkpoints */
int maxCheckpoint;
/* indicates whether we use Adam */
bool useAdam;
/* hyper parameters of Adam */
float adamBeta1;
float adamBeta2;
float adamDelta;
/* step number of warm-up for training */
int nwarmup;
/* indicates whether the data file is shuffled for training */
bool isShuffled;
/* the factor of label smoothing */
float labelSmoothingP;
/* number of steps after which we make a checkpoint */
int nStepCheckpoint;
/* indicates whether we make a checkpoint after each training epoch */
bool useEpochCheckpoint;
/* number of batches on which we do model update */
int updateStep;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* buffer size */
int bufSize;
/* indicates whether we double the </s> symbol for the output of LM */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* bucket size */
int bucketSize;
public:
/* load configurations from the command */
Config(int argc, const char** argv);
/* load configurations from a file */
int LoadFromFile(const char* configFN, char** args);
};
}
#endif
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04, 2020-06
*/
#ifndef __ATTENTION_H__
#define __ATTENTION_H__
#include "NNUtil.h"
#include "../Utility.h"
#include "../../../network/XNet.h"
#include "../../../tensor/core/CHeader.h"
using namespace nts;
namespace nmt
{
/* attention type */
enum { NONE, SELF_ATT, EN_DE_ATT };
/* layer cache for keys and values */
class Cache
{
public:
/* cache for keys, (B, L, H) */
XTensor key;
/* cache for values, (B, L, H) */
XTensor value;
public:
/* indicates cache miss if 'true' */
bool miss;
/* indicates whether we use cache */
bool enable;
/* constructor */
Cache();
/* update the states cache */
void Update(XTensor&& k, XTensor&& v);
/* keep alive states */
void KeepAlive(XTensor& aliveIdx);
/* reorder alive states */
void Reorder(XTensor& reorder);
};
/* multi-head attention */
class Attention
{
public:
/* device id */
int devID;
/* head number */
int nhead;
/* transformation matrix for Q */
XTensor weightQ;
/* bias for Q */
XTensor biasQ;
/* transformation matrix for K */
XTensor weightK;
/* bias for K */
XTensor biasK;
/* transformation matrix for V */
XTensor weightV;
/* bias for V */
XTensor biasV;
XTensor wBig;
XTensor bBig;
/* RPR emb */
XTensor RPEmbK;
/* transformation after dot-product attention */
XTensor weightO;
/* bias after dot-product attention */
XTensor biasO;
/* size of transformed Q and K */
int dk;
/* size of transformed V */
int dv;
/* size of input Q, K and V */
int d;
/* indicates whether we use the RPR attention */
bool useRPR;
/* dropout probability */
DTYPE dropoutP;
/* the maximum relative window size */
int maxRP;
public:
/* constructor */
Attention();
/* de-constructor */
~Attention();
/* initialize the model */
void InitModel(Config& config);
/* make the network */
XTensor Make(XTensor& k, XTensor& q, XTensor& v,
XTensor* mask, bool isTraining,
Cache* cache, int cacheType);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeAttention(XTensor& k, XTensor& q, XTensor& v,
XTensor* mask, bool isTraining);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeRPRAttention(XTensor& k, XTensor& q, XTensor& v,
XTensor* mask, bool isTraining, bool isEnc);
/* generate relative position embeddings */
XTensor GetRPEmbedding(const int lenQ, const int lenKV, const int maxRelativeLen, const bool isEnc);
/* relative position-aware dot-product attention inner calculation */
XTensor RPDotProduct(XTensor& x, XTensor& y, XTensor& z, const bool is_key);
};
}
#endif
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Bei Li (libei_neu@outlook.com) 2020-02-05
* This file includes some common modules of the Transformer model
*/
#include "CommonModules.h"
#include "../../../tensor/core/CHeader.h"
#include "../../../tensor/function/FHeader.h"
namespace nmt
{
/*
flexible layer normalization for the Transformer
>> input - input tensor
>> ln - the layernorm network
>> prenorm - whether we use prenorm or not
>> before - whether we use layernorm before attention/fnn
>> after - whether we use layernorm after attention/fnn
*/
XTensor LayerNorm(XTensor& input, LN& ln, bool prenorm, bool before, bool after)
{
if (after ^ prenorm)
return ln.Make(input);
else
return input;
}
}
\ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Bei Li (libei_neu@outlook.com) 2020-02-03
*/
#ifndef __COMMONMODULE_H__
#define __COMMONMODULE_H__
#include "LayerNorm.h"
#include "CommonModules.h"
using namespace nts;
namespace nmt
{
/* the layer normalization module to control pre-norm or post-norm*/
XTensor LayerNorm(XTensor& input, LN& ln, bool prenorm, bool before, bool after);
}
#endif
\ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-08-01
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-07
*/
#include "Embedding.h"
#include "../Utility.h"
#include "../../../tensor/core/CHeader.h"
namespace nmt
{
/* constructor */
Embedder::Embedder()
{
devID = -1;
vSize = -1;
maxLength = -1;
}
/* de-constructor */
Embedder::~Embedder()
{
}
/*
initialize the model
>> config - configurations of the model
>> isEnc - indicates if it is used for the encoder
*/
void Embedder::InitModel(Config& config, bool isEnc)
{
devID = config.devID;
d = config.modelSize;
padIdx = config.padID;
eSize = config.embSize;
maxLength = config.maxPosLen;
vSize = (isEnc) ? config.srcVocabSize : config.tgtVocabSize;
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID);
maxLength = maxLength + 1 + 1;
DTYPE v = 1.0F / (float)sqrt((float)eSize);
w.SetDataRandn(0, v);
/* create the positional embedding matrix */
MakePosEmbedding(maxLength);
}
/*
make positional embeddings (of size eSize * length)
>> length - length of the sequence
*/
void Embedder::MakePosEmbedding(int length)
{
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float* data = new float[posEmbeddingBase.unitNum];
for (int pos = 0; pos < length; pos++) {
float* dp = data + pos * eSize;
int channelSize = eSize / 2;
int offset = 0;
for (int i = 0; i < channelSize; i++) {
dp[offset++] = (float)sin(pos * exp(-i * log(10000.0F) / (channelSize - 1)));
}
for (int i = 0; i < channelSize; i++) {
dp[offset++] = (float)cos(pos * exp(-i * log(10000.0F) / (channelSize - 1)));
}
}
/* padding zeros */
int padStart = padIdx * eSize;
for (int i = padStart; i < padStart + eSize; i++)
data[i] = 0.F;
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
if (w.dataType != posEmbeddingBase.dataType)
posEmbeddingBase = ConvertDataType(posEmbeddingBase, w.dataType);
delete[] data;
}
/*
make the network
>> input - the word indices
>> nstep - the length of current sequence
>> isDec - indicates whether it is decoder
>> isTraining - indicates whether it is training
<< return - word & position embeddings of the input
*/
XTensor Embedder::Make(XTensor& input, bool isDec, bool isTraining, int nstep)
{
/* make sure the padding index is 1 */
CheckNTErrors(input.order > 1, "Wrong input tensor size!");
CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!");
CheckNTErrors(vSize > 0, "Set vocabulary size by \"-vsize\"");
CheckNTErrors(eSize > 0, "Set embedding size by \"-esize\"");
XTensor wordEmbedding, position, posEmbedding;
InitTensor1D(&position, input.GetDim(-1), X_INT, devID);
if (!isDec || isTraining || input.GetDim(-1) > 1)
{
position.Range(0, position.unitNum, 1);
// disable grad
ScaleAndShiftMe(position, 1.0F, float(padIdx + 1));
}
else
{
/* decoder embeddings during decoding */
position.SetDataFixed(nstep + padIdx + 1);
}
/* we make positional embeddings first */
XTensor embTMP;
embTMP = Gather(posEmbeddingBase, position);
posEmbedding = Unsqueeze(embTMP, 0, input.GetDim(0));
/* then we make word embeddings */
//w.enableGrad = false;
wordEmbedding = Gather(w, input);
wordEmbedding = Linear(wordEmbedding, (float)sqrt((float)eSize));
/* we sum over the two embeddings */
SumMe(wordEmbedding, posEmbedding);
return wordEmbedding;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2020 NiuTrans Research. All rights reserved.
* All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
...@@ -17,33 +16,32 @@ ...@@ -17,33 +16,32 @@
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-08-01 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-08-01
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-07
*/ */
#ifndef __T2TEMBEDDING_H__ #ifndef __EMBEDDING_H__
#define __T2TEMBEDDING_H__ #define __EMBEDDING_H__
#include "../../network/XNet.h" #include "../Utility.h"
#include "../../../network/XNet.h"
using namespace nts; using namespace nts;
namespace transformer namespace nmt
{ {
#define DEFAULT_EMBEDDING_SIZE 512 #define DEFAULT_EMBEDDING_SIZE 512
/* /*
embedding (of word at position i): embedding (of word at position i):
word embedding + positional embedding word embedding + positional embedding
*/ */
class T2TEmbedder class Embedder
{ {
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -53,34 +51,34 @@ public: ...@@ -53,34 +51,34 @@ public:
/* maximum length of the sequence */ /* maximum length of the sequence */
int maxLength; int maxLength;
/* dimension size of the hidden layers in the t2t model */ /* dimension size of the hidden layers in the model */
int d; int d;
/* padding index */
int padIdx;
/* word embedding matrix */ /* word embedding matrix */
XTensor w; XTensor w;
/* predefined positional embeddings. It can speeds up /* predefined positional embeddings. It can speeds up
the embedding processing by re-loading. */ the embedding processing by re-loading. */
XTensor posEmbeddingBase; XTensor posEmbeddingBase;
/* positional embeddings */
XTensor posEmbedding;
public: public:
/* constructor */ /* constructor */
T2TEmbedder(); Embedder();
/* de-constructor */ /* de-constructor */
~T2TEmbedder(); ~Embedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(Config& config, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int length);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor& input, bool isDec, bool isTraining, int nstep = 0);
}; };
} }
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2020 NiuTrans Research. All rights reserved.
* All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
...@@ -17,82 +16,81 @@ ...@@ -17,82 +16,81 @@
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/ */
#include <math.h> #include "FNN.h"
#include "T2TFNN.h" #include "Embedding.h"
#include "T2TUtility.h" #include "../Utility.h"
#include "T2TEmbedding.h" #include "../../../tensor/core/CHeader.h"
#include "../../tensor/core/CHeader.h" #include "../../../tensor/function/FHeader.h"
#include "../../tensor/function/FHeader.h"
namespace transformer namespace nmt
{ {
/* constructor */ /* constructor */
T2TFNN::T2TFNN() FNN::FNN()
{ {
inSize = -1; inSize = -1;
outSize = -1; outSize = -1;
hSize = -1; hSize = -1;
} }
/* deconstructor */ /* de-constructor */
T2TFNN::~T2TFNN() FNN::~FNN()
{ {
} }
/* /*
initialize the model initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> config - configurations of the model
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem) void FNN::InitModel(Config& config)
{ {
devID = myDevID; devID = config.devID;
mem = myMem;
float minmax = 0;
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE); inSize = config.modelSize;
LoadParamInt(argc, argv, "d", &outSize, DEFAULT_EMBEDDING_SIZE); outSize = config.modelSize;
LoadParamInt(argc, argv, "fnnh", &hSize, DEFAULT_EMBEDDING_SIZE); hSize = config.fnnHiddenSize;
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); dropoutP = config.fnnDropout;
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1D(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1D(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); _SetDataFanInOut(&w1, scale);
float finfout2 = (float)sqrt(6.0F * scale/(hSize + outSize)); _SetDataFanInOut(&w2, scale);
w1.SetDataRand(-finfout1, finfout1); w1.SetDataRand(-(DTYPE)sqrt(6.0F / inSize), (DTYPE)sqrt(6.0F / inSize));
w2.SetDataRand(-(DTYPE)sqrt(6.0F / hSize), (DTYPE)sqrt(6.0F / hSize));
b1.SetZeroAll(); b1.SetZeroAll();
w2.SetDataRand(-finfout2, finfout2);
b2.SetZeroAll(); b2.SetZeroAll();
} }
/* /*
make the network make the network
y = max(0, x * w1 + b1) * w2 + b2 y = max(0, x * w1 + b1) * w2 + b2
>> input - the input tensor >> input - the input tensor
>> return - the output tensor >> return - the output tensor
*/ */
XTensor T2TFNN::Make(XTensor &input) XTensor FNN::Make(XTensor& input, bool isTraining)
{ {
XTensor t1; XTensor t1;
/* t1 = max(0, x * w1 + b1) */ /* t1 = max(0, x * w1 + b1) */
t1 = Rectify(MMul(input, w1) + b1); t1 = Rectify(MulAndShift(input, w1, b1));
if (isTraining && dropoutP > 0)
t1 = Dropout(t1, dropoutP);
/* result = t1 * w2 + b2 */ /* result = t1 * w2 + b2 */
return MMul(t1, w2) + b2; return MulAndShift(t1, w2, b2);
} }
}
} \ No newline at end of file
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Modified by: HU Chi (huchinlp@gmail.com) 2020-04
*/
#ifndef __FNN_H__
#define __FNN_H__
#include "LayerNorm.h"
#include "../Utility.h"
#include "../../../tensor/XTensor.h"
using namespace nts;
namespace nmt
{
/* a fnn: y = max(0, x * w1 + b1) * w2 + b2 */
class FNN
{
public:
/* device id */
int devID;
/* size of input vector */
int inSize;
/* size of output vector */
int outSize;
/* size of hidden layers */
int hSize;
/* matrix of transformation 1 */
XTensor w1;
/* bias of transformation 1 */
XTensor b1;
/* matrix of transformation 2 */
XTensor w2;
/* bias of transformation 2 */
XTensor b2;
/* dropout probability */
DTYPE dropoutP;
public:
/* constructor */
FNN();
/* de-constructor */
~FNN();
/* initialize the model */
void InitModel(Config& config);
/* make the network */
XTensor Make(XTensor& input, bool isTraining);
};
}
#endif
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Bei Li (libei_neu@outlook.com) 2020-02-03
*/
#include "GLU.h"
#include "Embedding.h"
#include "../Utility.h"
#include "../../../tensor/core/CHeader.h"
#include "../../../tensor/function/FHeader.h"
namespace nmt
{
/* constructor */
GLU::GLU()
{
inSize = -1;
outSize = -1;
hSize = -1;
}
/* de-constructor */
GLU::~GLU()
{
}
/*
initialize the model
>> config - configurations of the model
*/
void GLU::InitModel(Config& config)
{
devID = config.devID;
float minmax = 0;
inSize = config.modelSize;
outSize = config.modelSize;
InitTensor2D(&w1, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b1, outSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID);
}
/*
make the network
y = W1 * x + b1 * sigmod(W2 * x + b2)
>> input - the input tensor, size = 2 * hSize
>> return - the output tensor, size = hSize
*/
XTensor GLU::Make(XTensor& input)
{
XTensor t1;
XTensor t2;
TensorList input_list;
/* split the input into two vectors with the dim hSize */
Split(input, input_list, -1, 2);
/* t1 = W1 * x + b1 */
t1 = MulAndShift(input_list.GetItem(0), w1, b1);
/* t2 = W2 * x + b2 */
t2 = MulAndShift(input_list.GetItem(1), w2, b2);
return t1 * Sigmoid(t2);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2020 NiuTrans Research. All rights reserved.
* All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
...@@ -16,29 +15,27 @@ ...@@ -16,29 +15,27 @@
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: Bei Li (libei_neu@outlook.com) 2020-02-03
*/ */
#ifndef __T2TFNN_H__
#define __T2TFNN_H__
#include "../../tensor/XTensor.h" #ifndef __GLU_H__
#define __GLU_H__
#include "LayerNorm.h"
using namespace nts; using namespace nts;
namespace transformer namespace nmt
{ {
/* a fnn: y = max(0, x * w1 + b1) * w2 + b2 */ /* a fnn: y = max(0, x * w1 + b1) * w2 + b2 */
class T2TFNN class GLU
{ {
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -63,19 +60,18 @@ public: ...@@ -63,19 +60,18 @@ public:
public: public:
/* constructor */ /* constructor */
T2TFNN(); GLU();
/* deconstructor */ /* de-constructor */
~T2TFNN(); ~GLU();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(Config& config);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor& input);
}; };
} }
#endif #endif
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论