Commit 7786f2b7 by 张裕浩

Test and merge.

parent 467c2ed7
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include "../tensor/test/Test.h" #include "../tensor/test/Test.h"
#include "../sample/fnnlm/FNNLM.h" #include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h" #include "../sample/transformer/Transformer.h"
//#include "../tensor/timer.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
...@@ -36,37 +35,19 @@ ...@@ -36,37 +35,19 @@
void BackwardTest(); void BackwardTest();
void TransposeTest(); void TransposeTest();
void SumDimTest(); void SumDimTest();
void SplitBackwardTest();
void MemTest();
using namespace nts; using namespace nts;
using namespace fnnlm; using namespace fnnlm;
using namespace transformer; using namespace transformer;
void test()
{
XTensor a;
InitTensor2D(&a, 100, 100, X_FLOAT, 0);
XTensor b;
InitTensor2D(&b, 100, 100, X_FLOAT16, 0);
_ConvertDataType(&a, &b);
return;
}
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
//timer_c asd; //_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
test(); //_CrtSetBreakAlloc(2708);
//MemTest();
//return 0; if(argc > 1 && !strcmp(argv[1], "-test"))
//SplitBackwardTest(); Test();
//return 0; else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
//_CrtSetBreakAlloc(896);
//BackwardTest();
//return 0;
/*if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1); FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t")) else if(argc > 1 && !strcmp(argv[1], "-t2t"))
TransformerMain(argc - 1, argv + 1); TransformerMain(argc - 1, argv + 1);
...@@ -75,7 +56,8 @@ int main( int argc, const char ** argv ) ...@@ -75,7 +56,8 @@ int main( int argc, const char ** argv )
fprintf(stderr, "neural networks in an easy way. \n\n"); fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n"); fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}*/ fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
}
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
...@@ -89,6 +71,9 @@ void BackwardTest() ...@@ -89,6 +71,9 @@ void BackwardTest()
XTensor a; XTensor a;
XTensor b; XTensor b;
XTensor c; XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2D(&a, 2, 3);
...@@ -106,14 +91,15 @@ void BackwardTest() ...@@ -106,14 +91,15 @@ void BackwardTest()
b.Set1D(2.0F, 0); b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1); b.Set1D(1.0F, 1);
c = DivDim(a, b, 0); DivDim(a, b, c, 0);
c.Dump(stderr, "c:"); c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c); //XLink::ShowNetwork(stderr, &c);
net.Backward(c); net.Backward(loss);
net.Dump(stderr); a.grad->Dump(stderr);
} }
...@@ -229,67 +215,3 @@ void SumDimTest() ...@@ -229,67 +215,3 @@ void SumDimTest()
delete[] data; delete[] data;
} }
void SplitBackwardTest()
{
int * dimSize = new int[2];
dimSize[0] = 2;
dimSize[1] = 4;
XTensor t1;
InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
XTensor t2;
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor;
//_SetDataFixedFloat(&t1, 1.0F);
//_SetDataFixedFloat(&t2, 2.0F);
t1.SetDataRand();
t2.SetDataRand();
tensor = t1 + t2;
XList smalls;
XTensor first;
XTensor second;
InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
smalls.Add(&first);
smalls.Add(&second);
Split(tensor, smalls, 1, 2);
XTensor mul;
mul = Sum(first, second);
XNet net;
net.Backward(mul);
net.Dump(stderr);
printf("Done!");
}
void MemTest()
{
XMem * mem;
mem = new XMem(0, FREE_ON_THE_FLY, (MTYPE)MILLION, 1024, MILLION);
XTensor tensor;
InitTensor2D(&tensor, 2, 4, X_FLOAT, 0, mem);
tensor.SetZeroAll();
tensor.Dump(stderr);
delete mem;
if (tensor.mem != NULL) {
printf("It isn't null!\n");
printf("%d\n", (int)tensor.mem->signature);
}
else {
printf("It's null\n");
}
tensor.Dump(stderr);
}
\ No newline at end of file
...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH) if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _HardTanHBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_IDENTITY) else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _IdentityBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_LOGSOFTMAX){ else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS); _LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS);
} }
else if(operID == FUNC_RECTIFY) else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _RectifyBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SIGMOID) else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _SigmoidBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SOFTMAX){ else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
......
...@@ -20,7 +20,9 @@ ...@@ -20,7 +20,9 @@
*/ */
#include "XBackwardLoss.h" #include "XBackwardLoss.h"
#include "XNoder.h"
#include "../tensor/XName.h" #include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/getandset/SetData.h" #include "../tensor/core/getandset/SetData.h"
#include "../tensor/function/HardTanH.h" #include "../tensor/function/HardTanH.h"
#include "../tensor/function/Identity.h" #include "../tensor/function/Identity.h"
...@@ -31,6 +33,60 @@ ...@@ -31,6 +33,60 @@
namespace nts{ namespace nts{
/* compute dE/dx of a node */
void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(income.tailNum >= 1, "Wrong number of tensors for loss computation!");
XTensor * output = income.tails[0];
XTensor * gold = NULL;
XTensor * weight = NULL;
XTensor * padding = NULL;
int leadingDim;
XNoder::MakeGrad(output);
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
return;
}
gold = income.tails[1];
if(operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
padding = income.tails[2];
leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
}
else{
ShowNTErrors("Wrong activation function type!");
}
node->visitMark = NODE_FINISHED;
}
/* indicates whether the node is for a loss computation */
bool XLossGrad::IsLossOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & LOSS_BASE) != 0;
}
/* /*
compute dE/dx for a given function y = f(x) compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss) >> gold - gold standard to measure error (or loss)
...@@ -42,39 +98,39 @@ compute dE/dx for a given function y = f(x) ...@@ -42,39 +98,39 @@ compute dE/dx for a given function y = f(x)
>> params - parameters of the function >> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x, //void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
CheckNTErrors(gold && y && x, "Empty input tensors!"); // CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!"); // CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id"); // CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
//
if(funcID == FUNC_HARDTANH){ // if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName); // _HardTanHBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_IDENTITY){ // else if(funcID == FUNC_IDENTITY){
_IdentityBackward(gold, y, x, dedy, dedx, lossName); // _IdentityBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_LOGSOFTMAX){ // else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else if(funcID == FUNC_RECTIFY){ // else if(funcID == FUNC_RECTIFY){
_RectifyBackward(gold, y, x, dedy, dedx, lossName); // _RectifyBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_SIGMOID){ // else if(funcID == FUNC_SIGMOID){
_SigmoidBackward(gold, y, x, dedy, dedx, lossName); // _SigmoidBackward(gold, y, x, dedy, dedx, lossName);
}else if(funcID == FUNC_SOFTMAX){ // }else if(funcID == FUNC_SOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else{ // else{
ShowNTErrors("wrong function found when call the backward process!"); // ShowNTErrors("wrong function found when call the backward process!");
} // }
//
} //}
/* /*
compute dE/dy for variable y and error(loss) function E compute dE/dy for variable y and error(loss) function E
...@@ -83,27 +139,27 @@ compute dE/dy for variable y and error(loss) function E ...@@ -83,27 +139,27 @@ compute dE/dy for variable y and error(loss) function E
>> dedy - dE/dy >> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, //void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * padding, // XTensor * dedy, XTensor * padding,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
if(gold == NULL){ // if(gold == NULL){
if(dedy->dataType == X_FLOAT) // if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F); // _SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) // else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0); // _SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT) // else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); // _SetDataFixedInt(dedy, 1);
else{ // else{
ShowNTErrors("TODO"); // ShowNTErrors("TODO");
} // }
return; // return;
} // }
//
//_LossBackward(dedy, gold, y, lossName); // //_LossBackward(dedy, gold, y, lossName);
if(lossName == CROSSENTROPY) // if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold, NULL, padding); // _CrossEntropyBackward(dedy, y, gold, NULL, padding);
//
} //}
} }
\ No newline at end of file
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h" #include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XBACKWARDLOSS_H__ #ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__ #define __XBACKWARDLOSS_H__
...@@ -34,11 +35,19 @@ namespace nts{ ...@@ -34,11 +35,19 @@ namespace nts{
class XLossGrad class XLossGrad
{ {
public: public:
/* compute dE/dx for a given function y = f(x) */ /* compute dE/dx of a node */
void Compute(XTensor * gold, XTensor * y, XTensor * x, static
XTensor * dedy, XTensor * dedx, XTensor * padding, void MakeGrad(XTensor * node, bool isEfficient);
int funcID, void * params,
LOSS_FUNCTION_NAME lossName); /* indicates whether the node is for a Loss computation */
static
bool IsLossOP(XTensor * node);
///* compute dE/dx for a given function y = f(x) */
//void Compute(XTensor * gold, XTensor * y, XTensor * x,
// XTensor * dedy, XTensor * dedx, XTensor * padding,
// int funcID, void * params,
// LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */ /* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y, void Compute(XTensor * gold, XTensor * y,
......
...@@ -109,6 +109,11 @@ private: ...@@ -109,6 +109,11 @@ private:
static static
void GradMultiplyDim(XTensor * node, bool isEfficient); void GradMultiplyDim(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b
where some dimensions of b are of size 1 */
static
void GradMultiplyBroadcast(XTensor * node, bool isEfficient);
/* gradient for negate */ /* gradient for negate */
static static
void GradNegate(XTensor * node, bool isEfficient); void GradNegate(XTensor * node, bool isEfficient);
...@@ -125,14 +130,26 @@ private: ...@@ -125,14 +130,26 @@ private:
static static
void GradScaleAndShift(XTensor * node, bool isEfficient); void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Scale */
static
void GradScale(XTensor * node, bool isEfficient);
/* gradient for Shift */
static
void GradShift(XTensor * node, bool isEfficient);
/* gradient for Descale */
static
void GradDescale(XTensor * node, bool isEfficient);
/* gradient for Minus */ /* gradient for Minus */
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node, bool isEfficient); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
...@@ -143,6 +160,11 @@ private: ...@@ -143,6 +160,11 @@ private:
static static
void GradSumDim(XTensor * node, bool isEfficient); void GradSumDim(XTensor * node, bool isEfficient);
/* gradient for sum by broadcasting: c = a + b * \beta
where some dimensions of b are of size 1 */
static
void GradSumBroadcast(XTensor * node, bool isEfficient);
/* gradient for reduceMean */ /* gradient for reduceMean */
static static
void GradReduceMean(XTensor * node, bool isEfficient); void GradReduceMean(XTensor * node, bool isEfficient);
...@@ -158,6 +180,10 @@ private: ...@@ -158,6 +180,10 @@ private:
/* gradient for reduceVariance */ /* gradient for reduceVariance */
static static
void GradReduceVariance(XTensor * node, bool isEfficient); void GradReduceVariance(XTensor * node, bool isEfficient);
/* gradient for operation */
static
void GradMulAndShift(XTensor * node, bool isEfficient);
}; };
} }
......
...@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent) ...@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
GradCopyIndexed(node, isEfficent); GradCopyIndexed(node, isEfficent);
else if(operID == MOVEMENT_GATHER) else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent); GradGather(node, isEfficent);
else if (operID == MOVEMENT_DROPOUTWITHINDEX)
GradDropoutWithIndex(node, isEfficent);
else if(operID == SHAPE_MERGE) else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent); GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST) else if(operID == SHAPE_MERGE_LIST)
...@@ -62,7 +64,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent) ...@@ -62,7 +64,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
} }
} }
/* indicates whether the node is for a shape operation */ /* indicates whether the node is for a math operation */
bool XShapeGrad::IsShapeOP(XTensor * node) bool XShapeGrad::IsShapeOP(XTensor * node)
{ {
XLink &income = node->income; XLink &income = node->income;
...@@ -115,7 +117,7 @@ dE/da = spreadforgather(b) ...@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
void XShapeGrad::GradGather(XTensor * node, bool isEfficent) void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * index = income.tails[1]; XTensor * index = income.tails[1];
...@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent) ...@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/*
gradient computation for DropoutWithIndex function
*/
void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(input);
//_Identity(node->grad, input->grad);
_CopyValues(node->grad, input->grad);
int order = node->grad->order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = node->grad->dimSize[i];
}
int order1 = 1;
int * dimSize1 = new int[order1];
dimSize1[0] = input->grad->unitNum;
input->grad->Reshape(order1, dimSize1);
_DropoutWithIndex(node->grad, index, input->grad);
_ScaleAndShiftMe(input->grad, scale);
input->grad->Reshape(order, dimSize);
node->visitMark = NODE_FINISHED;
}
/* /*
gradient for merge gradient for merge
for for
...@@ -232,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -232,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL; XTensor * last = NULL;
XList smalls(income.tailNum); TensorList smalls(income.tailNum);
XList smallsGrad(income.tailNum); TensorList smallsGrad(income.tailNum);
bool mergeOnly = true; bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){ for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i]; XTensor * tail = income.tails[i];
...@@ -242,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -242,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
smallsGrad.Add(tail->grad); smallsGrad.Add(tail->grad);
if(i > 1){ if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail), CheckNTErrors(_IsSameShaped(last, tail),
"Input tensors must be of the same size!"); "Input tensors must be of the same size!");
} }
...@@ -401,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -401,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
/* we compute the gradient for current node, rather than for /* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */ child node, i.e., we use the outgoing edge here */
XLink &outgo = node->outgo; XLink &outgo = node->outgo;
XList splits(outgo.tailNum); TensorList splits(outgo.tailNum);
int whereToSplit = -1; int whereToSplit = -1;
int splitNum = 0; int splitNum = 0;
...@@ -411,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -411,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){ if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0); int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1); int splitID = income.GetParamInt(1);
if(whereToSplit < 0) if(whereToSplit < 0)
whereToSplit = w; whereToSplit = w;
splitNum++; splitNum++;
......
...@@ -54,6 +54,10 @@ private: ...@@ -54,6 +54,10 @@ private:
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node, bool isEfficent); void GradMerge(XTensor * node, bool isEfficent);
......
...@@ -55,7 +55,7 @@ void XNetClearAll() ...@@ -55,7 +55,7 @@ void XNetClearAll()
XNet::XNet() XNet::XNet()
{ {
nodes.Clear(); nodes.Clear();
isGradEfficient = false; isGradEfficient = true;
} }
/* de-constructor */ /* de-constructor */
...@@ -77,104 +77,20 @@ backward propagation to obtain gradient ...@@ -77,104 +77,20 @@ backward propagation to obtain gradient
>> root - root node (output) of the network >> root - root node (output) of the network
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
XList golds(1); Backward(roots);
golds.Add(NULL);
XList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
XList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the gradient computation
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
XList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
{
XList golds(roots.count);
XList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
}
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
XList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
} }
/* /*
backward propagation to obtain gradient wrt. the loss/error function backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes with a number of root nodes
>> roots - a list of root nodes (output) of the network >> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/ */
void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots)
{ {
Traverse(roots); Traverse(roots);
...@@ -187,39 +103,6 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N ...@@ -187,39 +103,6 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
} }
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, padding, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
else {
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i); XTensor * node = (XTensor*)nodes.Get(i);
...@@ -266,6 +149,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -266,6 +149,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
XFuncGrad::MakeGrad(node, isEfficent); XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
...@@ -300,7 +185,7 @@ depth-first search (Tarjan's algorithm) ...@@ -300,7 +185,7 @@ depth-first search (Tarjan's algorithm)
*/ */
void XNet::Traverse(XTensor &root) void XNet::Traverse(XTensor &root)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
Traverse(roots); Traverse(roots);
...@@ -311,7 +196,7 @@ traverse the net and find the topological order by ...@@ -311,7 +196,7 @@ traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes) >> roots - a list of roots (or output nodes)
*/ */
void XNet::Traverse(XList &roots) void XNet::Traverse(TensorList &roots)
{ {
id = MakeNetID(); id = MakeNetID();
nodes.Clear(); nodes.Clear();
...@@ -336,7 +221,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering) ...@@ -336,7 +221,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering)
>> orders - topological order of the nodes >> orders - topological order of the nodes
>> code - code of the network >> code - code of the network
*/ */
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code) void XNet::TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code)
{ {
if(node == NULL) if(node == NULL)
return; return;
...@@ -444,7 +329,7 @@ show network topology ...@@ -444,7 +329,7 @@ show network topology
*/ */
void XNet::ShowNetwork(FILE * file, XTensor * node) void XNet::ShowNetwork(FILE * file, XTensor * node)
{ {
XList roots(1); TensorList roots(1);
roots.Add(node); roots.Add(node);
Traverse(roots); Traverse(roots);
...@@ -458,4 +343,14 @@ void XNet::ShowNetwork(FILE * file, XTensor * node) ...@@ -458,4 +343,14 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
} }
} }
} /*
\ No newline at end of file search for a node in a top-down manner by its name
>> top - the top most node
<< return - the node we found
*/
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{
//return XLink::SearchNode(top, name);
//}
}
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h" #include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XNET_H__ #ifndef __XNET_H__
#define __XNET_H__ #define __XNET_H__
...@@ -36,16 +37,16 @@ struct XNet ...@@ -36,16 +37,16 @@ struct XNet
unsigned int id; unsigned int id;
/* tensor nodes of the network (in order) */ /* tensor nodes of the network (in order) */
XList nodes; TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/ /* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes; TensorList gradNodes;
/* output nodes of the network */ /* output nodes of the network */
XList outputs; TensorList outputs;
/* input nodes of the network */ /* input nodes of the network */
XList inputs; TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */ /* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient; bool isGradEfficient;
...@@ -60,25 +61,11 @@ struct XNet ...@@ -60,25 +61,11 @@ struct XNet
void Clear(); void Clear();
/* backward propagation to obtain gradient */ /* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
...@@ -92,10 +79,10 @@ struct XNet ...@@ -92,10 +79,10 @@ struct XNet
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots); void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */ /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code); void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */ /* dump network information */
void Dump(FILE * file); void Dump(FILE * file);
...@@ -111,6 +98,10 @@ struct XNet ...@@ -111,6 +98,10 @@ struct XNet
/* show network topology */ /* show network topology */
void ShowNetwork(FILE * file, XTensor * node); void ShowNetwork(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */
//static
//XTensor * SearchNode(XTensor * top, const char * name);
}; };
/* we make a unique id for every tensor */ /* we make a unique id for every tensor */
......
...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node) ...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL) if(node == NULL)
return; return;
if(!XTensor::IsSameShaped(node, node->grad)){ if(!_IsSameShaped(node, node->grad)){
delete node->grad; delete node->grad;
node->grad = NewTensor(node); node->grad = NewTensor(node);
node->grad->SetZeroAll(); node->grad->SetZeroAll();
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/ */
#include "../tensor/XTensor.h" #include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__ #ifndef __XNODER_H__
#define __XNODER_H__ #define __XNODER_H__
......
...@@ -51,14 +51,12 @@ initialize the model ...@@ -51,14 +51,12 @@ initialize the model
>> myIgnored - number of position ignored in attention (from the begining) >> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask >> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TAttention::InitModel(int argc, char ** argv, void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
isMasked = myIsMasked; isMasked = myIsMasked;
ignored = myIgnored; ignored = myIgnored;
...@@ -71,20 +69,18 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -71,20 +69,18 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem); InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
float finfoutv = (float)sqrt(6.0F * scale/(d + dv));
float finfouta = (float)sqrt(6.0F * scale / (d + d));
wk.SetDataRand(-finfoutk, finfoutk); float scale = 1.0F;
wq.SetDataRand(-finfoutk, finfoutk); _SetDataFanInOut(&wk, scale);
wv.SetDataRand(-finfoutv, finfoutv); _SetDataFanInOut(&wq, scale);
wa.SetDataRand(-finfouta, finfouta); _SetDataFanInOut(&wv, scale);
_SetDataFanInOut(&wa, scale);
_SetDataFanInOut(&wbig, scale);
} }
/* /*
...@@ -103,40 +99,88 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bo ...@@ -103,40 +99,88 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bo
XTensor k2; XTensor k2;
XTensor q2; XTensor q2;
XTensor v2; XTensor v2;
/* linear transofmration before self-attention */ /* linear transformation before self-attention */
k2 = MMul(k, wk); k2 = MMul(k, wk);
q2 = MMul(q, wq); q2 = MMul(q, wq);
v2 = MMul(v, wv); v2 = MMul(v, wv);
return MakeAttention(k2, q2, v2, mask, isTraining);
}
/*
make the network given a big tensor that keeps keys, queries and values
>> kqv - the big tensor
>> mask - as it is
>> isTraining - indicates whether the model is used for training
*/
XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
{
XTensor k2;
XTensor q2;
XTensor v2;
XTensor kqv2;
TensorList split;
kqv2 = MMul(kqv, wbig);
int d1 = kqv2.GetDim(0);
int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3;
InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2);
split.Add(&k2);
split.Add(&v2);
Split(kqv2, split, 2, 3);
return MakeAttention(k2, q2, v2, mask, isTraining);
}
/*
make the attention network given keys, queries and values (after linear transformation)
>> k - keys. It might be of size B * L * H
where B = batch size, L = sequence length,
and H = vector size of each position
>> q - queries
>> v - values
>> mask - as it is
>> isTraining - indicates whether the model is used for training
*/
XTensor T2TAttention::MakeAttention(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining)
{
XTensor kheads; XTensor kheads;
XTensor qheads; XTensor qheads;
XTensor vheads; XTensor vheads;
/* multi head */ /* multi head */
kheads = Split(k2, k2.order - 1, nhead); kheads = Split(k, k.order - 1, nhead);
qheads = Split(q2, q2.order - 1, nhead); qheads = Split(q, q.order - 1, nhead);
vheads = Split(v2, v2.order - 1, nhead); vheads = Split(v, v.order - 1, nhead);
XTensor att; XTensor att;
XTensor dot; XTensor dot;
XTensor scalar; XTensor scalar;
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */ /* scalar = softmax(Q * K^T / sqrt(dk)) * V */
dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS); dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS);
if(isMasked) if(isMasked)
dot = dot + mask; dot = dot + mask;
dot = Linear(dot, 1.0F/(float)sqrt((float)dk/nhead)); dot = Linear(dot, 1.0F/(float)sqrt((float)dk/nhead));
scalar = Softmax(dot, -1);
scalar = Softmax(dot, -1);
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP); scalar = Dropout(scalar, dropoutP);
att = BMMul(scalar, vheads); att = BMMul(scalar, vheads);
/* concatenate the heads */ /* concatenate the heads */
return MMul(Merge(att, att.order - 1), wa); return MMul(Merge(att, att.order - 1), wa);
} }
......
...@@ -42,9 +42,6 @@ public: ...@@ -42,9 +42,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* head number */ /* head number */
int nhead; int nhead;
...@@ -59,7 +56,9 @@ public: ...@@ -59,7 +56,9 @@ public:
/* transformation after dot-product attention */ /* transformation after dot-product attention */
XTensor wa; XTensor wa;
XTensor wbig;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
...@@ -92,10 +91,16 @@ public: ...@@ -92,10 +91,16 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining); XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
/* make the network given a big tensor that keeps keys, queries and values */
XTensor MakeBig(XTensor &kqv, XTensor &mask, bool isTraining);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeAttention(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
}; };
} }
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-25
* it is cold today but i'll move to a warm place tomorrow :)
*/
#ifndef __T2TBATCHLOADER_H__
#define __T2TBATCHLOADER_H__
#include "../../network/XNet.h"
using namespace nts;
namespace transformer
{
#define MAX_SEQUENCE_LENGTH 1024 * 4
/* node to keep batch information */
struct BatchNode
{
/* begining position */
int beg;
/* end position */
int end;
/* maximum word number on the encoder side */
int maxEnc;
/* maximum word number on the decoder side */
int maxDec;
/* a key for sorting */
int key;
};
class T2TBatchLoader
{
public:
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* batch buf */
BatchNode * bufBatch;
/* buffer size */
int bufSize;
/* size of batch buffer */
int bufBatchSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* offset for next batch */
int nextBatch;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* bucket size */
int bucketSize;
public:
/* constructor */
T2TBatchLoader();
/* de-constructor */
~T2TBatchLoader();
/* initialization */
void Init(int argc, char ** argv);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* set the random batch flag */
void SetRandomBatch(bool flag = true);
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
};
}
#endif
\ No newline at end of file
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <math.h> #include <math.h>
#include "T2TDecoder.h" #include "T2TDecoder.h"
#include "T2TUtility.h"
#include "T2TLayerNormal.h"
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
namespace transformer namespace transformer
...@@ -29,6 +31,10 @@ namespace transformer ...@@ -29,6 +31,10 @@ namespace transformer
/* constructor */ /* constructor */
AttDecoder::AttDecoder() AttDecoder::AttDecoder()
{ {
attentions = NULL;
fnns = NULL;
attLayerNorms = NULL;
fnnLayerNorms = NULL;
attentionsEnde = NULL; attentionsEnde = NULL;
attEndeLayerNorms = NULL; attEndeLayerNorms = NULL;
} }
...@@ -36,6 +42,10 @@ AttDecoder::AttDecoder() ...@@ -36,6 +42,10 @@ AttDecoder::AttDecoder()
/* de-constructor */ /* de-constructor */
AttDecoder::~AttDecoder() AttDecoder::~AttDecoder()
{ {
delete[] attentions;
delete[] fnns;
delete[] attLayerNorms;
delete[] fnnLayerNorms;
delete[] attentionsEnde; delete[] attentionsEnde;
delete[] attEndeLayerNorms; delete[] attEndeLayerNorms;
} }
...@@ -47,21 +57,43 @@ initialize the model ...@@ -47,21 +57,43 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttDecoder::InitModel(int argc, char ** argv, void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); //AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */
embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer];
fnnLayerNorms = new T2TLN[nlayer];
attentionsEnde = new T2TAttention[nlayer]; attentionsEnde = new T2TAttention[nlayer];
attEndeLayerNorms = new T2TLN[nlayer]; attEndeLayerNorms = new T2TLN[nlayer];
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for (int i = 0; i < nlayer; i++) {
attentionsEnde[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
...@@ -93,7 +125,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X ...@@ -93,7 +125,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/******************/ /******************/
/* self attention */ /* self attention */
att = attentions[i].Make(x, x, x, mask, isTraining); att = attentions[i].MakeBig(x, mask, isTraining);
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
...@@ -133,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X ...@@ -133,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
} }
x.SetName(DECODING_NAME);
return x; return x;
} }
......
...@@ -26,10 +26,57 @@ ...@@ -26,10 +26,57 @@
namespace transformer namespace transformer
{ {
#define DECODING_NAME "decoding"
#define DECODING_INPUT_NAME "decoding_input"
class AttDecoder : public AttEncoder class AttDecoder
{ {
public: public:
/* device id */
int devID;
/* layer number */
int nlayer;
/* hidden layer size of the FNN layer */
int hSize;
/* embedding size */
int eSize;
/* vocabulary size */
int vSize;
/* dropout probability */
DTYPE dropoutP;
/* some positions can be ignored in attention. this is useful in lm where the first position needs
* special design for the attention model. */
int ignored;
/* embedding of word at each position */
T2TEmbedder embedder;
/* FNN model of each layer */
T2TFNN * fnns;
/* attention model of each layer */
T2TAttention * attentions;
/* layer normalization for fnn */
T2TLN * fnnLayerNorms;
/* layer normalization for attention */
T2TLN * attLayerNorms;
/* input tensor of the encoder */
XTensor * input;
/* output tensor of the encoder */
XTensor * output;
/* encoder-decoder attention model of each layer */ /* encoder-decoder attention model of each layer */
T2TAttention * attentionsEnde; T2TAttention * attentionsEnde;
...@@ -45,7 +92,7 @@ public: ...@@ -45,7 +92,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the decoding network */ /* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder() T2TEmbedder::T2TEmbedder()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
maxLength = -1; maxLength = -1;
} }
...@@ -46,19 +45,23 @@ initialize the model ...@@ -46,19 +45,23 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
LoadParamInt(argc, argv, "vsize", &vSize, -1); if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1);
}
else{
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
}
//LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "maxlen", &maxLength, 512); LoadParamInt(argc, argv, "maxlen", &maxLength, 512);
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -75,7 +78,7 @@ make positional embeddings (of size eSize * length) ...@@ -75,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -139,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -139,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem); XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -41,9 +41,6 @@ public: ...@@ -41,9 +41,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -71,7 +68,7 @@ public: ...@@ -71,7 +68,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int eSize, int d, int length);
......
...@@ -52,15 +52,12 @@ initialize the model ...@@ -52,15 +52,12 @@ initialize the model
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id*/
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem); embedder.InitModel(argc, argv, devID);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
...@@ -103,8 +100,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo ...@@ -103,8 +100,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
x = embedder.Make(input); x = embedder.Make(input);
//x.Dump(tmpFILE, "embedding: ");
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
x = Dropout(x, dropoutP); x = Dropout(x, dropoutP);
...@@ -116,8 +111,8 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo ...@@ -116,8 +111,8 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
XTensor res; XTensor res;
/* self attention */ /* self attention */
att = attentions[i].Make(x, x, x, mask, isTraining); att = attentions[i].MakeBig(x, mask, isTraining);
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
att = Dropout(att, dropoutP); att = Dropout(att, dropoutP);
...@@ -141,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo ...@@ -141,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
} }
x.SetName(ENCODING_NAME);
input.SetName(ENCODING_INPUT_NAME);
return x; return x;
} }
......
...@@ -32,6 +32,9 @@ using namespace nts; ...@@ -32,6 +32,9 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define ENCODING_NAME "encoding"
#define ENCODING_INPUT_NAME "encoding_input"
/* /*
base class of the encoder base class of the encoder
...@@ -62,9 +65,6 @@ public: ...@@ -62,9 +65,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -115,7 +115,7 @@ public: ...@@ -115,7 +115,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -47,12 +47,10 @@ initialize the model ...@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -62,19 +60,17 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,19 +60,17 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); _SetDataFanInOut(&w1, scale);
float finfout2 = (float)sqrt(6.0F * scale/(hSize + outSize)); _SetDataFanInOut(&w2, scale);
w1.SetDataRand(-finfout1, finfout1);
b1.SetZeroAll(); b1.SetZeroAll();
w2.SetDataRand(-finfout2, finfout2);
b2.SetZeroAll(); b2.SetZeroAll();
} }
...@@ -89,13 +85,15 @@ XTensor T2TFNN::Make(XTensor &input, bool isTraining) ...@@ -89,13 +85,15 @@ XTensor T2TFNN::Make(XTensor &input, bool isTraining)
XTensor t1; XTensor t1;
/* t1 = max(0, x * w1 + b1) */ /* t1 = max(0, x * w1 + b1) */
t1 = Rectify(MMul(input, w1) + b1); //t1 = Rectify(MMul(input, w1) + b1);
t1 = Rectify(MulAndShift(input, w1, b1));
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
t1 = Dropout(t1, dropoutP); t1 = Dropout(t1, dropoutP);
/* result = t1 * w2 + b2 */ /* result = t1 * w2 + b2 */
return MMul(t1, w2) + b2; //return MMul(t1, w2) + b2;
return MulAndShift(t1, w2, b2);
} }
......
...@@ -36,9 +36,6 @@ public: ...@@ -36,9 +36,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -72,7 +69,7 @@ public: ...@@ -72,7 +69,7 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input, bool isTraining); XTensor Make(XTensor &input, bool isTraining);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN() T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL;
d = 0; d = 0;
} }
...@@ -46,18 +45,16 @@ initialize the model ...@@ -46,18 +45,16 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -36,9 +36,6 @@ class T2TLN ...@@ -36,9 +36,6 @@ class T2TLN
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */ /* the transformation matrix w */
XTensor w; XTensor w;
...@@ -57,7 +54,7 @@ public: ...@@ -57,7 +54,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "../../tensor/core/CHeader.h"
#include "T2TLengthPenalty.h"
using namespace nts;
namespace transformer
{
/*
GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence
>> length - length of the sequence (for each entry)
>> alpha - the parameter controls the length preference
<< return - length penaltyof the sequence (for each entry)
*/
XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
{
XTensor base;
XTensor lp;
//base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5);
lp = Power(base, alpha);
return lp;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-08
* Start of a new week - I just finished several documents.
* Writing document is harder than writing code :)
*/
#ifndef __T2TLENGTHPENALTY_H__
#define __T2TLENGTHPENALTY_H__
#include "../../tensor/XTensor.h"
using namespace nts;
namespace transformer
{
/* We intend to penalize short sequences because they have higher score
in product of a sequence of probability-like terms and have more chances
to beat others in search. */
class T2TLengthPenalizer
{
public:
/* GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence */
static
XTensor GNMT(const XTensor & length, float alpha);
};
}
#endif
...@@ -31,15 +31,15 @@ ...@@ -31,15 +31,15 @@
namespace transformer namespace transformer
{ {
/* a transformer model that keeps parameters of the encoder,
the decoder and the output layer (softmax). Also, it creates
the network used in transformer. */
class T2TModel class T2TModel
{ {
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the encoder */ /* the encoder */
AttEncoder * encoder; AttEncoder * encoder;
...@@ -78,10 +78,24 @@ public: ...@@ -78,10 +78,24 @@ public:
void MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining); void MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining);
/* make the network for machine translation (with the output softmax layer) */ /* make the network for machine translation (with the output softmax layer) */
void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, XTensor &paddingDec, bool isTraining); void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output,
XTensor &paddingEnc, XTensor &paddingDec, bool isTraining);
/* make the mask for training MT models */
void MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
XTensor &paddingEnc, XTensor &paddingDec,
XTensor &maskEnc, XTensor &maskDec, XTensor &maskEncDec);
/* make the mask of the encoder */
void MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &maskEnc);
/* make the mask of the decoder */
void MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
XTensor &paddingEnc, XTensor &paddingDec,
XTensor &maskDec, XTensor &maskEncDec);
/* get parameter matrics */ /* get parameter matrics */
void GetParams(XList &list); void GetParams(TensorList &list);
/* dump the parameters */ /* dump the parameters */
void Dump(const char * fn); void Dump(const char * fn);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TOutput::T2TOutput() T2TOutput::T2TOutput()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
inSize = -1; inSize = -1;
hSize = -1; hSize = -1;
...@@ -47,21 +46,19 @@ initialize the model ...@@ -47,21 +46,19 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
...@@ -93,8 +90,9 @@ void T2TOutput::Make(XTensor &input, XTensor &output) ...@@ -93,8 +90,9 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{ {
XTensor &x = input; XTensor &x = input;
output = LogSoftmax(MMul(x, w), -1); //output = LogSoftmax(MMul(x, w), -1);
//output = Softmax(MMul(x, w), -1); output = Softmax(MMul(x, w), -1);
output.SetName(OUTPUT_NAME);
} }
} }
...@@ -28,6 +28,8 @@ using namespace nts; ...@@ -28,6 +28,8 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define OUTPUT_NAME "output"
/* output layer */ /* output layer */
class T2TOutput class T2TOutput
...@@ -36,9 +38,6 @@ public: ...@@ -36,9 +38,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -59,7 +58,7 @@ public: ...@@ -59,7 +58,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/
#include "T2TPredictor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TStateBundle::T2TStateBundle()
{
states = NULL;
isStart = false;
}
/* de-constructor */
T2TStateBundle::~T2TStateBundle()
{
if(states != NULL)
delete[] states;
}
/*
create states
>> num - number of states
*/
void T2TStateBundle::MakeStates(int num)
{
CheckNTErrors(num > 0, "invalid number");
if(states != NULL)
delete[] states;
states = new T2TState[num];
for(int i = 0; i < num; i++){
states[i].prediction = -1;
states[i].pid = T2T_PID_EMPTY;
states[i].isEnd = false;
states[i].isStart = false;
states[i].isCompleted = false;
states[i].prob = 0;
states[i].probPath = 0;
states[i].modelScore = 0;
states[i].nstep = 0;
states[i].last = NULL;
}
stateNum = num;
}
/* constructor */
T2TPredictor::T2TPredictor()
{
startSymbol = -1;
}
/* de-constructor */
T2TPredictor::~T2TPredictor()
{
}
/*
create an initial state
>> model - the t2t model
>> top - the top-most layer of the network
>> input - input of the network
>> beamSize - beam size
>> state - the state to be initialized
*/
void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state)
{
state->layersEnc.Clear();
state->layersDec.Clear();
XTensor * encoding = XLink::SearchNode(top, ENCODING_NAME);
CheckNTErrors(encoding != NULL, "No encoding layers found!");
state->layersEnc.Add(encoding);
state->layersDec.Add(NULL);
int dims[MAX_TENSOR_DIM_NUM];
for (int i = 0; i < input->order - 1; i++)
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll();
state->nstep.SetZeroAll();
state->endMark.SetZeroAll();
state->stateNum = 0;
}
/*
set start symbol
>> symbol - the symbol (in integer)
*/
void T2TPredictor::SetStartSymbol(int symbol)
{
startSymbol = symbol;
}
/*
read a state
>> model - the t2t model that keeps the network created so far
>> state - a set of states. It keeps
1) hypotheses (states)
2) probablities of hypotheses
3) parts of the network for expanding toward the next state
*/
void T2TPredictor::Read(T2TModel * model, T2TStateBundle * state)
{
m = model;
s = state;
}
/*
predict the next state
>> next - next states (assuming that the current state has been read)
>> encoding - encoder output
>> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder
*/
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor * inputEnc, XTensor * paddingEnc)
{
int dims[MAX_TENSOR_DIM_NUM];
next->layersEnc.Clear();
next->layersDec.Clear();
AttDecoder &decoder = *m->decoder;
/* word indices of previous positions */
XTensor * inputLast = (XTensor*)s->layersDec.GetItem(0);
/* word indices of positions up to next state */
XTensor inputDec;
/* the first token */
XTensor first;
CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!");
for(int i = 0; i < inputEnc->order - 1; i++)
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) {
inputDec = Identity(first);
}
else{
inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID);
inputDec = Concatenate(first, inputDec, inputDec.order - 1);
}
/* prediction probabilities */
XTensor &output = next->prob;
XTensor decoding;
XTensor decodingStep;
for(int i = 0; i < inputDec.order - 1; i++)
dims[i] = inputDec.GetDim(i);
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1);
XTensor maskDec;
XTensor maskEncDec;
/* decoder mask */
m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec);
/* make the decoding network */
decoding = decoder.Make(inputDec, *encoding, maskDec, maskEncDec, false);
XTensor selectSrc;
XTensor selectTgt;
CheckNTErrors(decoding.order >= 2, "The tensor must be of order 2 or larger!");
int stride = decoding.GetDim(decoding.order - 2);
InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0);
selectSrc.SetDevice(decoding.devID);
selectTgt.SetDevice(decoding.devID);
/* the decoder output of the last position */
decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt);
/* generate the output probabilities */
m->outputLayer->Make(decodingStep, output);
next->layersEnc.AddList(&s->layersEnc);
next->layersDec.Add(&inputDec);
next->layersDec.Add(&output);
}
/*
generate paths up to the states of the current step
>> state - state bundle of the current step
*/
XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
{
CheckNTErrors(state->stateNum >= 0, "Illegal state!");
int distance = -1;
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
cur = cur->last;
}
if(nsteps > distance)
distance = nsteps;
}
XTensor path;
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
path.Set2DInt(cur->prediction, i, distance - nsteps);
cur = cur->last;
}
}
return path;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start!
*/
#ifndef __T2TPREDICTOR_H__
#define __T2TPREDICTOR_H__
#include "T2TModel.h"
#include "T2TLengthPenalty.h"
namespace transformer
{
#define T2T_PID_EMPTY -1
/* state for search. It keeps the path (back-pointer), prediction distribution,
and etc. It can be regarded as a hypothsis in translation. */
class T2TState
{
public:
/* we assume that the prediction is an integer */
int prediction;
/* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis
is empty if id = -1 */
int pid;
/* indicates whether the state is an end */
bool isEnd;
/* indicates whether the state is the start */
bool isStart;
/* indicates whether the state is completed */
bool isCompleted;
/* probability of every prediction (last state of the path) */
float prob;
/* probability of every path */
float probPath;
/* model score of every path. A model score = path probability + some other stuff */
float modelScore;
/* nubmer of steps we go over so far */
int nstep;
/* pointer to the previous state */
T2TState * last;
};
/* a bundle of states */
class T2TStateBundle
{
public:
/* predictions */
XTensor prediction;
/* id of the previous state that generates the current one */
XTensor preID;
/* mark that indicates whether each hypothesis is completed */
XTensor endMark;
/* probability of every prediction (last state of the path) */
XTensor prob;
/* probability of every path */
XTensor probPath;
/* model score of every path */
XTensor modelScore;
/* step number of each hypothesis */
XTensor nstep;
/* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */
TensorList layersEnc;
/* layers on the decoder side */
TensorList layersDec;
/* list of states */
T2TState * states;
/* number of states */
int stateNum;
/* indicates whether it is the first state */
bool isStart;
public:
/* constructor */
T2TStateBundle();
/* de-constructor */
~T2TStateBundle();
/* create states */
void MakeStates(int num);
};
/* The predictor reads the current state and then predicts the next.
It is exactly the same procedure of MT inference -
we get the state of previous words and then generate the next word.
Here, a state can be regared as the representation of words (word
indices, hidden states, embeddings and etc.). */
class T2TPredictor
{
private:
/* pointer to the transformer model */
T2TModel * m;
/* current state */
T2TStateBundle * s;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TPredictor();
/* de-constructor */
~T2TPredictor();
/* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */
void SetStartSymbol(int symbol);
/* read a state */
void Read(T2TModel * model, T2TStateBundle * state);
/* predict the next state */
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc);
/* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#ifndef __T2TSEARCH_H__
#define __T2TSEARCH_H__
#include "T2TModel.h"
#include "T2TPredictor.h"
namespace transformer
{
/* The class orgnizes the search process. It calls "predictors" to generate
distributions of the predictions and prunes the search space by beam pruning.
This makes a graph where each path respresents a translation hypothsis.
The output can be the path with the highest model score. */
class T2TSearch
{
private:
/* the alpha parameter controls the length preference */
float alpha;
/* predictor */
T2TPredictor predictor;
/* max length of the generated sequence */
int maxLength;
/* beam size */
int beamSize;
/* batch size */
int batchSize;
/* we keep the final hypotheses in a heap for each sentence in the batch. */
XHeap<MIN_HEAP, float> * fullHypos;
/* array of the end symbols */
int * endSymbols;
/* number of the end symbols */
int endSymbolNum;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TSearch();
/* de-constructor */
~T2TSearch();
/* initialize the model */
void Init(int argc, char ** argv);
/* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
/* preparation */
void Prepare(int myBatchSize,int myBeamSize);
/* compute the model score for each hypothesis */
void Score(T2TStateBundle * prev, T2TStateBundle * beam);
/* generate token indices via beam pruning */
void Generate(T2TStateBundle * beam);
/* expand the search graph */
void Expand(T2TStateBundle * prev, T2TStateBundle * beam);
/* collect hypotheses with ending symbol */
void Collect(T2TStateBundle * beam);
/* fill the hypotheis heap with incomplete hypothses */
void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */
void Dump(XTensor * output);
/* check if the token is an end symbol */
bool IsEnd(int token);
/* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum);
/* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include <math.h>
#include "T2TUtility.h"
#include "T2TTester.h"
#include "T2TSearch.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TTester::T2TTester()
{
}
/* de-constructor */
T2TTester::~T2TTester()
{
}
/* initialize the model */
void T2TTester::Init(int argc, char ** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
batchLoader.Init(argc, argv);
seacher.Init(argc, argv);
}
/*
test the model
>> fn - test data file
>> ofn - output data file
>> model - model that is trained
*/
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
{
int wc = 0;
int ws = 0;
int wordCount = 0;
int wordCountTotal = 0;
int sentCount = 0;
int batchCount = 0;
float loss = 0;
/* data files */
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot read the test file");
FILE * ofile = fopen(ofn, "wb");
CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID;
XNet net;
double startT = GetClockSec();
wordCount = 0;
/* batch of input sequences */
XTensor batchEnc;
XTensor batchDec;
/* label */
XTensor label;
/* padding */
XTensor paddingEnc;
XTensor paddingDec;
/* gold standard */
XTensor gold;
/* an array that keeps the sequences */
int * seqs = new int[MILLION];
batchLoader.SetRandomBatch(false);
batchLoader.ClearBuf();
while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, false))
{
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!");
CheckNTErrors(!model->isLM, "Only MT model is supported!");
XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output);
Dump(ofile, &output);
float prob = 0;
loss += -prob;
wc = batchEnc.GetDim(-1);
wordCount += wc;
wordCountTotal += wc;
sentCount += batchEnc.GetDim(-2);
batchCount += 1;
if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
}
fclose(file);
fclose(ofile);
delete[] seqs;
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, exp(loss/wordCount));
}
/*
dump the result into the file
>> file - data file
>> output - output tensor
*/
void T2TTester::Dump(FILE * file, XTensor * output)
{
int seqLength = output->GetDim(-1);
for (int i = 0; i < output->unitNum; i += seqLength) {
for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j);
fprintf(file, "%d ", w);
if (w < 0)
break;
}
fprintf(file, "\n");
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :)
*/
#ifndef __T2TTESTER_H__
#define __T2TTESTER_H__
#include "T2TSearch.h"
#include "T2TBatchLoader.h"
namespace transformer
{
/* This class translates test sentences with a trained model. */
class T2TTester
{
public:
/* vocabulary size of the source side */
int vSize;
/* vocabulary size of the target side */
int vSizeTgt;
/* for batching */
T2TBatchLoader batchLoader;
/* decoder for inference */
T2TSearch seacher;
public:
/* constructor */
T2TTester();
/* de-constructor */
~T2TTester();
/* initialize the model */
void Init(int argc, char ** argv);
/* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */
void Dump(FILE * file, XTensor * output);
};
}
#endif
\ No newline at end of file
...@@ -23,11 +23,9 @@ ...@@ -23,11 +23,9 @@
#define __T2TTRAINER_H__ #define __T2TTRAINER_H__
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TBatchLoader.h"
#include "../../tensor/function/FHeader.h" #include "../../tensor/function/FHeader.h"
#define MAX_SEQUENCE_LENGTH 1024 * 4
using namespace nts; using namespace nts;
namespace transformer namespace transformer
...@@ -42,33 +40,6 @@ public: ...@@ -42,33 +40,6 @@ public:
/* parameter array */ /* parameter array */
char ** argArray; char ** argArray;
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* buffer size */
int bufSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* dimension size of each inner layer */ /* dimension size of each inner layer */
int d; int d;
...@@ -111,10 +82,10 @@ public: ...@@ -111,10 +82,10 @@ public:
float adamBeta2T; float adamBeta2T;
/* list of the moment of the parameter matrics */ /* list of the moment of the parameter matrics */
XList moments; TensorList moments;
/* list of the 2nd order moment of the parameter matrics */ /* list of the 2nd order moment of the parameter matrics */
XList moments2nd; TensorList moments2nd;
/* indicates whether the data file is shuffled for training */ /* indicates whether the data file is shuffled for training */
bool isShuffled; bool isShuffled;
...@@ -130,20 +101,15 @@ public: ...@@ -130,20 +101,15 @@ public:
/* number of batches on which we do model update */ /* number of batches on which we do model update */
int updateStep; int updateStep;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */ /* indicates whether we intend to debug the net */
bool isBigBatch; bool isDebugged;
/* indicates whether we use small memory footprint for backward process */ /* indicates whether the sequence is sorted by length */
bool isSmallFootprint; bool isLenSorted;
/* for batching */
T2TBatchLoader batchLoader;
public: public:
/* constructor */ /* constructor */
...@@ -163,46 +129,6 @@ public: ...@@ -163,46 +129,6 @@ public:
/* make a checkpoint */ /* make a checkpoint */
void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id); void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
/* get word probabilities for a batch of sequences */ /* get word probabilities for a batch of sequences */
float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs); float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs);
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TUtility.h" #include "T2TUtility.h"
#include "T2TTrainer.h" #include "T2TTrainer.h"
#include "T2TPredictor.h"
#include "T2TTester.h"
#include "../../tensor/XDevice.h" #include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h" #include "../../tensor/XGlobal.h"
...@@ -36,8 +38,6 @@ int TransformerMain(int argc, const char ** argv) ...@@ -36,8 +38,6 @@ int TransformerMain(int argc, const char ** argv)
{ {
if(argc == 0) if(argc == 0)
return 1; return 1;
fprintf(stderr, "%e\n", log(1e-8F));
char ** args = new char*[argc]; char ** args = new char*[argc];
for(int i = 0; i < argc; i++){ for(int i = 0; i < argc; i++){
...@@ -49,6 +49,7 @@ int TransformerMain(int argc, const char ** argv) ...@@ -49,6 +49,7 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, args); ShowParams(argc, args);
bool isBeamSearch = false;
char * trainFN = new char[MAX_LINE_LENGTH]; char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH]; char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH]; char * testFN = new char[MAX_LINE_LENGTH];
...@@ -58,8 +59,10 @@ int TransformerMain(int argc, const char ** argv) ...@@ -58,8 +59,10 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString(argc, args, "model", modelFN, ""); LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "test", testFN, ""); LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "output", outputFN, ""); LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
T2TTrainer trainer; T2TTrainer trainer;
trainer.Init(argc, args); trainer.Init(argc, args);
...@@ -78,12 +81,22 @@ int TransformerMain(int argc, const char ** argv) ...@@ -78,12 +81,22 @@ int TransformerMain(int argc, const char ** argv)
if(strcmp(modelFN, "")) if(strcmp(modelFN, ""))
model.Read(modelFN); model.Read(modelFN);
T2TTrainer tester;
tester.Init(argc, args);
/* test the model on the new data */ /* test the model on the new data */
if(strcmp(testFN, "") && strcmp(outputFN, "")) if(strcmp(testFN, "") && strcmp(outputFN, "")){
tester.Test(testFN, outputFN, &model); /* beam search */
if(isBeamSearch){
T2TTester searcher;
searcher.Init(argc, args);
searcher.Test(testFN, outputFN, &model);
}
/* forced decoding */
else{
T2TTrainer tester;
tester.Init(argc, args);
tester.Test(testFN, outputFN, &model);
}
}
delete[] trainFN; delete[] trainFN;
delete[] modelFN; delete[] modelFN;
......
...@@ -30,7 +30,9 @@ ...@@ -30,7 +30,9 @@
#include "XDevice.h" #include "XDevice.h"
#include "./test/Test.h" #include "./test/Test.h"
#include "./core/CHeader.h" #include "./core/CHeader.h"
#include "./XBLAS.h"
#include "./core/sort/TopK.h"
#include "./core/movement/Gather.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
//#include <crtdbg.h> //#include <crtdbg.h>
...@@ -39,9 +41,6 @@ using namespace nts; ...@@ -39,9 +41,6 @@ using namespace nts;
void SmallTest(); void SmallTest();
void TransposeTest(); void TransposeTest();
void LittleTest();
void T2TTest();
void T2TTest2();
void PowerTest(); void PowerTest();
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
...@@ -166,127 +165,5 @@ void TransposeTest() ...@@ -166,127 +165,5 @@ void TransposeTest()
delete[] data; delete[] data;
} }
void LittleTest()
{
int a = 5000;
int b = 100000;
int c = a*b;
printf("%d\n", c);
exit(1);
}
void T2TTest()
{
XTensor * input;
XTensor * weight;
XTensor * output;
XTensor * gold;
XTensor * dedy;
XTensor * dedx;
XTensor * dedxTmp;
XTensor * dedw;
XTensor * padding;
DTYPE loss;
int * dimSize = new int[2];
dimSize[0] = 256;
dimSize[1] = 10001;
int * dimSize2 = new int[3];
dimSize2[0] = 2;
dimSize2[1] = 31;
dimSize2[2] = 256;
int * dimSize3 = new int[3];
dimSize3[0] = 2;
dimSize3[1] = 31;
dimSize3[2] = 10001;
int * dimSize4 = new int[2];
dimSize4[0] = 2;
dimSize4[1] = 31;
input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//weight = NewTensor(2, dimSize);
//dedw = NewTensor(2, dimSize);
//input = NewTensor(3, dimSize2);
//gold = NewTensor(3, dimSize3);
//output = NewTensor(3, dimSize3);
//dedy = NewTensor(3, dimSize3);
//dedx = NewTensor(3, dimSize3);
//dedxTmp = NewTensor(3, dimSize3);
//padding = NewTensor(2, dimSize4);
myRead(input, "x.txt", "x");
myRead(weight, "w.txt", "w");
myRead(gold, "gold.txt", "gold");
myRead(padding, "padding.txt", "padding");
XTensor inter;
inter = MMul(*input, *weight);
_Softmax(&inter, output, 2);
//_LogMe(output);
loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
printf("loss: %f\n", loss);
_CrossEntropyBackward(dedy, output, gold, NULL);
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
myDump(dedy, "dedy.txt", "dedy");
_SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
_Sub(output, gold, dedxTmp);
myDump(dedx, "dedx.txt", "dedx");
dedx->Dump(stderr, "dedx", 200);
dedxTmp->Dump(stderr, "dedxTmp", 200);
input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
_MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
myDump(dedw, "dedw.txt", "dedw");
}
void T2TTest2()
{
int dimSize[3];
dimSize[0] = 161;
dimSize[1] = 47;
dimSize[2] = 10001;
XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " ");
_SetDataFixedFloat(probs, 1.0F);
probs->Reshape(1, probs->unitNum);
DTYPE sum = _ReduceSumAll(probs);
printf("%e\n", sum);
//XTensor tmp;
//tmp = IsNonZero(*probs);
//DTYPE nonZeroNum = ReduceSumAll(tmp);
//printf("%f\n", nonZeroNum);
//
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//printf("%e\n", gpu);
}
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#ifndef __XBLAS_H__ #ifndef __XBLAS_H__
#define __XBLAS_H__ #define __XBLAS_H__
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
...@@ -36,7 +35,7 @@ namespace nts{ ...@@ -36,7 +35,7 @@ namespace nts{
#define OPENBLAS_CONST const #define OPENBLAS_CONST const
typedef int BLASINT; typedef int BLASINT;
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-21
*/
#ifndef __XCALL_H__
#define __XCALL_H__
#include "XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
* we define the "new and delete" functions below
*/
/* initialize a XTensor */
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
/* generate a XTensor with no initialization */
XTensor * NewTensor();
/* generate a XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID, const bool isEnableGrad = true);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
} // namespace nts(NiuTrans.Tensor)
#endif // __XCALL_H__
\ No newline at end of file
...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName) ...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName)
} }
} }
/**************************************************** /*
Below is for calling CPU BLAS for fast matrix operations Below is for calling CPU BLAS for fast matrix operations
I'm not sure how fast it is. But it seems that other I'm not sure how fast it is. But it seems that other
guys are crazy about this. So I decided to have a try. guys are crazy about this. So I decided to have a try.
...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h) ...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
return f; return f;
} }
/*
data type conversion
>> devID - device id
>> s - source data array
>> typeS - source data type
>> t - target data array
>> typeT - target data type
>> size - number of the items in s (and t)
*/
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{
CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT)
return;
if(typeS == X_FLOAT && typeT == X_FLOAT16){
for(int i = 0; i < size; i++){
((unsigned short*)t)[i] = FloatToFloat16(((float*)s)[i]);
}
}
else if(typeS == X_FLOAT16 && typeT == X_FLOAT){
for(int i = 0; i < size; i++){
((float*)t)[i] = Float16ToFloat(((unsigned short*)s)[i]);
}
}
else{
ShowNTErrors("Unsupported data types for conversion!");
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName); ...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
/* data conversion (for lower precision computation) */ /* data conversion (for lower precision computation) */
unsigned short FloatToFloat16(float f); unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h); float Float16ToFloat(unsigned short h);
void ConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#ifdef USE_CUDA
void CudaConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#endif
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <time.h>
#include "XDevice.h" #include "XDevice.h"
#include "XGlobal.h" #include "XGlobal.h"
#include "XThread.h" #include "XThread.h"
...@@ -59,6 +60,7 @@ XDevice::~XDevice() ...@@ -59,6 +60,7 @@ XDevice::~XDevice()
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(stream != NULL) if(stream != NULL)
delete stream; delete stream;
curandDestroyGenerator(gen);
#endif #endif
} }
...@@ -68,6 +70,7 @@ void XDevice::Init(int myDevID) ...@@ -68,6 +70,7 @@ void XDevice::Init(int myDevID)
Clear(); Clear();
devID = myDevID; devID = myDevID;
seed = rand();
/* CPU information */ /* CPU information */
if(devID < 0){ if(devID < 0){
...@@ -80,6 +83,10 @@ void XDevice::Init(int myDevID) ...@@ -80,6 +83,10 @@ void XDevice::Init(int myDevID)
cudaDeviceProp prop; cudaDeviceProp prop;
cudaSetDevice(myDevID); cudaSetDevice(myDevID);
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen, seed);
if(cudaGetDeviceProperties(&prop, devID) != cudaSuccess){ if(cudaGetDeviceProperties(&prop, devID) != cudaSuccess){
XPRINT1(0, stderr, "cannot get GPU(%d) information.", devID); XPRINT1(0, stderr, "cannot get GPU(%d) information.", devID);
exit(1); exit(1);
...@@ -194,7 +201,8 @@ void XDevice::SetGPUDevice(int devID) ...@@ -194,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
cudaError_t error = cudaSetDevice(devID); cudaError_t error = cudaSetDevice(devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
#else #else
...@@ -209,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID) ...@@ -209,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
SetFastFlags(); SetFastFlags();
} }
/* switch to a get current dev */ /* get the id of the current GPU device */
int XDevice::GetGPUDevice() int XDevice::GetGPUDevice()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -217,7 +225,8 @@ int XDevice::GetGPUDevice() ...@@ -217,7 +225,8 @@ int XDevice::GetGPUDevice()
cudaError_t error = cudaGetDevice(&devID); cudaError_t error = cudaGetDevice(&devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
...@@ -241,7 +250,7 @@ void XDevice::SetFastFlags() ...@@ -241,7 +250,7 @@ void XDevice::SetFastFlags()
#endif #endif
} }
/* reset cuda flag for more efficient cuda execution (all devices) */ /* reset the cuda flag for more efficient cuda execution (all devices) */
void XDevice::SetFastFlagsAllDevices() void XDevice::SetFastFlagsAllDevices()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -267,9 +276,11 @@ XDevManager::~XDevManager() ...@@ -267,9 +276,11 @@ XDevManager::~XDevManager()
} }
/* initialize it and get the CPU and GPU information */ /* initialization */
void XDevManager::Init() void XDevManager::Init()
{ {
srand((unsigned int)time(NULL));
Clear(); Clear();
/* CPUs (we actually do not care about how many CPUs are using) */ /* CPUs (we actually do not care about how many CPUs are using) */
...@@ -309,7 +320,7 @@ void XDevManager::Clear() ...@@ -309,7 +320,7 @@ void XDevManager::Clear()
#ifdef USE_CUDA #ifdef USE_CUDA
/* get the handle of GPU */ /* get the handle of a given GPU */
cublasHandle_t * XDevManager::GetCudaHandle(const int devID) cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -317,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID) ...@@ -317,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
return GPUs[devID].GetCublasHandle(); return GPUs[devID].GetCublasHandle();
} }
/* get the stream of cuda */ /* get the stream of a given GPU */
cudaStream_t * XDevManager::GetCudaStream(const int devID) cudaStream_t * XDevManager::GetCudaStream(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -465,7 +476,7 @@ split a string ...@@ -465,7 +476,7 @@ split a string
>> items - splitting result >> items - splitting result
<< return - how many items are there << return - how many items are there
*/ */
int SplitALine(char * inputString, const char * seperator, XList * items) int SplitALine(char * inputString, const char * seperator, StrList* items)
{ {
items->Clear(); items->Clear();
...@@ -514,12 +525,12 @@ get device ids for the given device information ...@@ -514,12 +525,12 @@ get device ids for the given device information
devInfo = "0:CPU-1 1:GPU-0 2:CPU-1" devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
means that the first device is CPU, the second device means that the first device is CPU, the second device
is GPU-0, the third device is CPU. is GPU-0, the third device is CPU.
>> devIDs - device sequence specified by devInfo >> devIDs - device IDs specified by devInfo
<< return - number of devices << return - number of devices
*/ */
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{ {
XList * terms = new XList(1); StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms); SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){ for(int i = 0; i < terms->count; i++){
...@@ -556,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) ...@@ -556,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
return devCount; return devCount;
} }
/* show id sequence */ /* show device IDs */
void XDevManager::ShowDeviceIDs(char * devInfo, char * msg) void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
{ {
msg[0] = 0; msg[0] = 0;
......
...@@ -99,6 +99,9 @@ public: ...@@ -99,6 +99,9 @@ public:
/* default stream for the device */ /* default stream for the device */
XStream * stream; XStream * stream;
/* seed for random number generation */
int seed;
#ifdef USE_CUDA #ifdef USE_CUDA
/* mutex for handle (GPU cublas) */ /* mutex for handle (GPU cublas) */
...@@ -109,6 +112,9 @@ public: ...@@ -109,6 +112,9 @@ public:
/* specify if the handle is initialized */ /* specify if the handle is initialized */
bool isHandleReady; bool isHandleReady;
/* generater of random numbers */
curandGenerator_t gen;
#endif #endif
...@@ -230,6 +236,18 @@ extern XDevManager GDevs; ...@@ -230,6 +236,18 @@ extern XDevManager GDevs;
cudaSetDevice(devIDBackup); \ cudaSetDevice(devIDBackup); \
} \ } \
#define CheckDev(a, b) \
{ \
if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
else if (a >= 0 && b >= 0 && a != b) { \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
...@@ -49,7 +49,7 @@ namespace nts { ...@@ -49,7 +49,7 @@ namespace nts {
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
#define DTYPE double #define DTYPE double
#define DTYPE_MIN (DTYPE)1.79E+308 #define DTYPE_MIN (DTYPE)-1.79E+308
#else #else
#define DTYPE float #define DTYPE float
#define DTYPE_MIN (DTYPE)-3.40E+38 #define DTYPE_MIN (DTYPE)-3.40E+38
...@@ -151,7 +151,9 @@ extern int verboseLevel; ...@@ -151,7 +151,9 @@ extern int verboseLevel;
#define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}} #define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}}
#define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}} #define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}}
#define B2I(V) V==0?false:true #define B2I(V) V == 0 ? false : true
#define MODX(a, b) int(b == 0 ? a : a - floor(double(a)/b) * b)
/* BLAS interfaces */ /* BLAS interfaces */
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
......
...@@ -31,15 +31,15 @@ namespace nts{ ...@@ -31,15 +31,15 @@ namespace nts{
/* constructor */ /* constructor */
template<HeapType hType, typename T> template<HeapType hType, typename T>
XHeap<hType, T>::XHeap()
{
}
/* constructor */
template<HeapType hType, typename T>
XHeap<hType, T>::XHeap(int mySize, XMem * myMem) XHeap<hType, T>::XHeap(int mySize, XMem * myMem)
{ {
mem = myMem; Init(mySize, myMem);
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
} }
/* deconstructor */ /* deconstructor */
...@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap() ...@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap()
} }
template<HeapType hType, typename T> template<HeapType hType, typename T>
void XHeap<hType, T>::Init(int mySize, XMem * myMem)
{
mem = myMem;
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
}
template<HeapType hType, typename T>
void XHeap<hType, T>::Clear(T initValue) void XHeap<hType, T>::Clear(T initValue)
{ {
count = 0; count = 0;
...@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End() ...@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End()
template<HeapType hType, typename T> template<HeapType hType, typename T>
_XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node) _XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node)
{ {
//CheckNTErrors((count < size), "Heap is full!"); if (count < size) {
items[count] = node; items[count] = node;
Up(count); Up(count);
count++; count++;
}
else if(count == size){
HeapNode<T> & item0 = items[0];
if (hType == MIN_HEAP && item0.value >= node.value)
return;
else if (hType == MAX_HEAP && item0.value <= node.value)
return;
items[0] = node;
Down(0);
}
else {
ShowNTErrors("Overflow of the heap!");
}
} }
/* replace the top-most item and update the heap */ /* replace the top-most item and update the heap */
...@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node) ...@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node)
template<HeapType hType, typename T> template<HeapType hType, typename T>
_XINLINE_ HeapNode<T> XHeap<hType, T>::Pop() _XINLINE_ HeapNode<T> XHeap<hType, T>::Pop()
{ {
//CheckNTErrors((size > 0), "Empty heap!"); CheckNTErrors(count > 0, "Empty heap!");
HeapNode<T> node = items[0]; HeapNode<T> node = items[0];
items[0] = items[count - 1]; items[0] = items[count - 1];
count--; count--;
......
...@@ -39,7 +39,7 @@ template <typename T> ...@@ -39,7 +39,7 @@ template <typename T>
struct HeapNode struct HeapNode
{ {
/* node index */ /* node index */
int index; long long index;
/* value of the node */ /* value of the node */
T value; T value;
...@@ -52,9 +52,16 @@ struct HeapNode ...@@ -52,9 +52,16 @@ struct HeapNode
HeapNode(int i, T v) HeapNode(int i, T v)
{ {
index = i; index = (long long)i;
value = v; value = v;
}; };
HeapNode(void * i, T v)
{
index = (long long)i;
value = v;
}
}; };
/* a heap that keeps a data array of T */ /* a heap that keeps a data array of T */
...@@ -76,11 +83,17 @@ public: ...@@ -76,11 +83,17 @@ public:
public: public:
/* constructor */ /* constructor */
XHeap();
/* constructor */
XHeap(int mySize, XMem * myMem = NULL); XHeap(int mySize, XMem * myMem = NULL);
/* deconstructor */ /* deconstructor */
~XHeap(); ~XHeap();
/* initialization */
void Init(int mySize, XMem * myMem = NULL);
/* clear the data */ /* clear the data */
void Clear(T initValue); void Clear(T initValue);
...@@ -107,6 +120,9 @@ public: ...@@ -107,6 +120,9 @@ public:
/* move item k up the tree */ /* move item k up the tree */
void Up(int k); void Up(int k);
/* how many items are kept in the heap */
inline int Count() { return count; };
}; };
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -300,9 +300,36 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id ...@@ -300,9 +300,36 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL) if(h == NULL)
return; return;
XList list(2); if (!t1->enableGrad)
list.Add(t1); return;
list.Add(t2);
TensorList list(2);
list.Add((XTensor*)t1);
list.Add((XTensor*)t2);
MakeLink(&list, h, id);
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - the second tail tensor
>> t3 - the third tail tensor
>> h - head tensor
>> id - id of the edge type
*/
void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,XTensor * h, int id)
{
if (h == NULL)
return;
if (!t1->enableGrad || !t2->enableGrad)
return;
TensorList list(3);
list.Add((XTensor*)t1);
list.Add((XTensor*)t2);
list.Add((XTensor*)t3);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -313,7 +340,7 @@ create a hyper edge with a list of tensors and a output tensor ...@@ -313,7 +340,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor >> h - head tensor
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(const XList * list, XTensor * h, int id) void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
{ {
/* forward */ /* forward */
XLink &income = h->income; XLink &income = h->income;
...@@ -347,8 +374,11 @@ create a hyper edge with a input tensors and a list of output tensors ...@@ -347,8 +374,11 @@ create a hyper edge with a input tensors and a list of output tensors
>> list - a list of output tensors >> list - a list of output tensors
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(XTensor * t, XList * list, int id) void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{ {
if (!t->enableGrad)
return;
/* forward */ /* forward */
for(int i = 0; i < list->count; i++){ for(int i = 0; i < list->count; i++){
XTensor * h = (XTensor*)list->GetItem(i); XTensor * h = (XTensor*)list->GetItem(i);
...@@ -509,6 +539,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne) ...@@ -509,6 +539,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
} }
} }
/*
copy a node with another, i.e., we add the links to the new node
>> src - the node to be copied
>> tgt - the new node
*/
void XLink::Copy(const XTensor * reference, XTensor * target)
{
if (reference == NULL || target == NULL)
return;
XLink &newIncome = target->income;
XLink &newOutgo = target->outgo;
XLink::ClearOutgoing(target);
XLink::ClearIncoming(target);
/* incoming nodes */
if (reference->income.typeID != 0) {
if (newIncome.tailNum < reference->income.tailNum) {
delete[] newIncome.tails;
newIncome.tails = new XTensor*[reference->income.tailNum];
}
newIncome.SetType(reference->income.typeID);
newIncome.head = target;
newIncome.tailNum = reference->income.tailNum;
memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = reference->income.paramNum * reference->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, reference->income.params, paraArraySize);
newIncome.paramNum = reference->income.paramNum;
/* update the link to each child node */
for (int i = 0; i < newIncome.tailNum; i++) {
XTensor * child = newIncome.tails[i];
XLink &childOutgo = child->outgo;
bool hit = false;
for (int j = 0; j < childOutgo.tailNum; j++) {
if (childOutgo.tails[j] == reference) {
//childOutgo.tails[j] = target;
childOutgo.AddTail(target);
hit = true;
break;
}
}
if (childOutgo.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in child.outgo edge!");
}
}
}
if (newOutgo.tailNum < reference->outgo.tailNum) {
delete[] newOutgo.tails;
newOutgo.tails = new XTensor*[reference->outgo.tailNum];
}
/* outgoing nodes */
newOutgo.head = target;
newOutgo.tailNum = reference->outgo.tailNum;
memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
/* update the link to each parent node */
for (int i = 0; i < newOutgo.tailNum; i++) {
XTensor * parent = newOutgo.tails[i];
XLink &parentIncome = parent->income;
bool hit = false;
for (int j = 0; j < parentIncome.tailNum; j++) {
if (parentIncome.tails[j] == reference) {
//parentIncome.tails[j] = target;
parentIncome.AddTail(target);
hit = true;
}
}
if (parentIncome.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
}
/* /*
copy incoming edges of a given node copy incoming edges of a given node
>> reference - the node we copy from >> reference - the node we copy from
...@@ -521,7 +633,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target) ...@@ -521,7 +633,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
ClearIncoming(target); ClearIncoming(target);
int tailNum = reference->income.tailNum; int tailNum = reference->income.tailNum;
XList tails(tailNum); TensorList tails(tailNum);
for(int i = 0; i < tailNum; i++){ for(int i = 0; i < tailNum; i++){
XTensor * tail = (XTensor*)reference->income.tails[i]; XTensor * tail = (XTensor*)reference->income.tails[i];
tails.Add(tail); tails.Add(tail);
...@@ -634,6 +746,29 @@ void XLink::ShowNode(FILE * file, XTensor * node) ...@@ -634,6 +746,29 @@ void XLink::ShowNode(FILE * file, XTensor * node)
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
/*
search for a node in a top-down manner by its name
>> top - the top most node
<< return - the node we found
*/
XTensor * XLink::SearchNode(XTensor * top, const char * name)
{
if(!strcmp(top->name, name))
return top;
XLink &incoming = top->income;
for(int i = 0; i < incoming.tailNum; i++){
XTensor * child = incoming.tails[i];
XTensor * hit = SearchNode(child, name);
if(hit != NULL)
return hit;
}
return NULL;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */ /* cross reference */
struct XTensor; struct XTensor;
#define MAX_OP_NAME_LENGTH 16 #define MAX_OP_NAME_LENGTH 64
#define PARAM_UNTI_SIZE 64 #define PARAM_UNTI_SIZE 64
/* /*
...@@ -138,13 +138,17 @@ struct XLink ...@@ -138,13 +138,17 @@ struct XLink
static static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with three input tensors and a output tensor */
static
void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);
/* create a hyper edge with a list of input tensors and a output tensor */ /* create a hyper edge with a list of input tensors and a output tensor */
static static
void MakeLink(const XList * list, XTensor * h, int id); void MakeLink(const TensorList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */ /* create a hyper edge with a input tensors and a list of output tensors */
static static
void MakeLink(XTensor * h, XList * list, int id); void MakeLink(XTensor * h, TensorList * list, int id);
/* add a parameter */ /* add a parameter */
static static
...@@ -170,6 +174,10 @@ struct XLink ...@@ -170,6 +174,10 @@ struct XLink
static static
void Replace(const XTensor * oldOne, XTensor * newOne); void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy a node with another, i.e., we add the links to the new node */
static
void Copy(const XTensor * reference, XTensor * target);
/* copy links of a given node */ /* copy links of a given node */
static static
void CopyIncoming(const XTensor * reference, XTensor * target); void CopyIncoming(const XTensor * reference, XTensor * target);
...@@ -181,6 +189,10 @@ struct XLink ...@@ -181,6 +189,10 @@ struct XLink
/* show a node */ /* show a node */
static static
void ShowNode(FILE * file, XTensor * node); void ShowNode(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */
static
XTensor * SearchNode(XTensor * top, const char * name);
}; };
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,32 +15,31 @@ ...@@ -15,32 +15,31 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* *
* Implementation of list that keeps data items * Implementation of template list that keeps data items
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17 * $Created by: HU Chi (huchinlp@foxmail.com)
* The first coding job this year! *
* */
*/
#ifndef __XLIST_H__
#define __XLIST_H__
#include "XMem.h" #include "XMem.h"
#include "XGlobal.h" #include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ #ifndef __TensorList_H__
namespace nts{ #define __TensorList_H__
typedef int (* ListCompare)(const void * item1, const void * item2);
/* the XList class */ /* the nts (NiuTrans.Tensor) namespace */
class XList namespace nts {
{
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
public: public:
/* data items */ /* data items */
void ** items; T *items;
/* number of items */ /* number of items */
int count; int count;
...@@ -49,56 +48,105 @@ public: ...@@ -49,56 +48,105 @@ public:
int maxNum; int maxNum;
/* the memory pool for data array allocation */ /* the memory pool for data array allocation */
XMem * mem; XMem* mem;
/* indicates whether data items are integers */
bool isIntList;
public: public:
/* constructor */ /* constructor */
XList(); TensorListBase();
/* constructor */ /* constructor */
XList(int myMaxNum, bool isIntListOrNot = false); TensorListBase(int myMaxNum);
/* constructor */ /* constructor */
XList(int myMaxNum, XMem * myMem, bool isIntListOrNot = false); TensorListBase(int myMaxNum, XMem* myMem);
/* de-constructor */ /* de-constructor */
~XList(); ~TensorListBase();
/* utilities */ /* add an item into the list */
void Create(int myMaxNum, XMem * myMem); void Add(T&& item);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount); /* return number of elements */
void AddList(XList * l); size_t Size();
void AddInt(int i);
void Insert(int pos, void * item); /* add an item into the list */
void * GetItem(int i) const; void Add(const T& item);
int GetItemInt(int i);
void SetItem(int i, void * item); /* add a number of items into the list */
void SetItemInt(int i, int item); void Add(const T* inputItems, int inputItemCount);
int FindFirst(void * item); /* append a list to the current list */
void AddList(TensorListBase* l);
/* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
void Clear(); void Clear();
void ClearStringList();
void Sort(int itemSize, ListCompare comp); /* sort the list */
void Sort(int itemSize);
/* reverse the list */
void Reverse(); void Reverse();
/* remove the item at position i */
void Remove(int i); void Remove(int i);
XList * Copy(XMem * myMem);
/* reserve space for data entry */
void Reserve(int n);
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
_XINLINE_ void * Get(int i) {return GetItem(i);}; T& operator[] (int i) { return GetItem(i); };
_XINLINE_ int GetInt(int i) {return GetItemInt(i);}; T& Get(int i) { return GetItem(i); };
_XINLINE_ void Set(int i, void * item) {SetItem(i, item);}; void Set(int i, T item) { SetItem(i, item); };
_XINLINE_ void SetInt(int i, int item) {SetItemInt(i, item);}; };
struct XTensor;
typedef TensorListBase<void*> XList;
typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
struct Example {
int id;
IntList data;
};
struct Result {
int id;
IntList data;
}; };
extern XList NULLList; typedef TensorListBase<Result> ResultList;
typedef TensorListBase<Example> ExampleList;
typedef TensorListBase<XTensor*> TensorList;
} } /* end of the nts (NiuTrans.Tensor) namespace */
/* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif // __TensorList_H__
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#ifndef __XMEM_H__ #ifndef __XMEM_H__
#define __XMEM_H__ #define __XMEM_H__
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#ifdef CUDA_BLAS #ifdef CUDA_BLAS
...@@ -38,6 +39,15 @@ ...@@ -38,6 +39,15 @@
#include <curand.h> #include <curand.h>
#endif #endif
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#elif WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
...@@ -50,8 +60,10 @@ typedef long long INT_64; ...@@ -50,8 +60,10 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1 #define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH #define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_MEM_NUM 16
#define MAX_GPU_MEM_NUM 16
/* /*
mode of runnig a memory pool mode of runnig a memory pool
...@@ -201,6 +213,9 @@ public: ...@@ -201,6 +213,9 @@ public:
MTYPE curUsedPin; MTYPE curUsedPin;
MTYPE bufUsedPin; MTYPE bufUsedPin;
/* indicates whether the memory pool is initialized */
bool isInitialized;
#ifdef USE_CUDA #ifdef USE_CUDA
/* handle used for cublas */ /* handle used for cublas */
cublasHandle_t cublasHandle; cublasHandle_t cublasHandle;
...@@ -402,6 +417,9 @@ public: ...@@ -402,6 +417,9 @@ public:
/* create a new cublas handle */ /* create a new cublas handle */
void CreateBLASHandle(); void CreateBLASHandle();
/* show profile of the memory pool */
void ShowMemUsage(FILE * file);
#ifdef USE_CUDA #ifdef USE_CUDA
/* get the handle of cublas */ /* get the handle of cublas */
cublasHandle_t * GetCublasHandle(); cublasHandle_t * GetCublasHandle();
...@@ -409,6 +427,61 @@ public: ...@@ -409,6 +427,61 @@ public:
}; };
/*
a class for the management of memory
*/
class XMemManager
{
private:
/* cpu memory pool information */
XMem CPUMems[MAX_CPU_MEM_NUM];
/* number of cpu memory pools */
int nCPUMem;
/* gpu memory pool information */
XMem GPUMems[MAX_GPU_MEM_NUM];
/* number of gpu memory pools */
int nGPUMem;
public:
/* constructor */
XMemManager();
/* de-constructor */
~XMemManager();
/* get memory size */
MTYPE GetAvailableMemory();
/* get GPU memory size */
MTYPE GetAvailableGPUMemory(int devID);
/* get buffer size */
void GetBufferSize(MTYPE freeMem, MTYPE * myBufSize);
/* initialize it and set the global memory information */
void Initialize();
/* free it */
void Free();
/* get global memory pool */
XMem * GetMem(const int devID);
/* get global memory size */
int GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize);
/* show memory information */
void ShowMemInfo();
};
/* managing the memories */
extern XMemManager GMems;
extern XMem * GMem; extern XMem * GMem;
extern int testxmemid; extern int testxmemid;
......
...@@ -59,6 +59,8 @@ const char * GetOPName(int type) ...@@ -59,6 +59,8 @@ const char * GetOPName(int type)
return "M_DIV"; return "M_DIV";
else if (type == MATH_DIVDIM) else if (type == MATH_DIVDIM)
return "M_DIVDIM"; return "M_DIVDIM";
else if (type == MATH_MASK)
return "M_MASK";
else if (type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
...@@ -67,6 +69,8 @@ const char * GetOPName(int type) ...@@ -67,6 +69,8 @@ const char * GetOPName(int type)
return "M_MULTIPLY"; return "M_MULTIPLY";
else if (type == MATH_MULTIPLYDIM) else if (type == MATH_MULTIPLYDIM)
return "M_MULTIPLYDIM"; return "M_MULTIPLYDIM";
else if (type == MATH_MULTIPLYBROADCAST)
return "M_MULTIPLYBROADCAST";
else if (type == MATH_NEGATE) else if (type == MATH_NEGATE)
return "M_NEGATE"; return "M_NEGATE";
else if (type == MATH_NORMALIZE) else if (type == MATH_NORMALIZE)
...@@ -75,6 +79,14 @@ const char * GetOPName(int type) ...@@ -75,6 +79,14 @@ const char * GetOPName(int type)
return "M_POWER"; return "M_POWER";
else if (type == MATH_SCALEANDSHIFT) else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT"; return "M_SCALEANDSHIFT";
else if (type == MATH_SCALE)
return "M_SCALE";
else if (type == MATH_DESCALE)
return "M_DESCALE";
else if (type == MATH_SHIFT)
return "M_SHIFT";
else if (type == MATH_MULANDSHIFT)
return "M_OPERATION";
else if (type == MATH_SIGN) else if (type == MATH_SIGN)
return "M_SIGN"; return "M_SIGN";
else if (type == MATH_SUB) else if (type == MATH_SUB)
...@@ -85,6 +97,8 @@ const char * GetOPName(int type) ...@@ -85,6 +97,8 @@ const char * GetOPName(int type)
return "M_SUM"; return "M_SUM";
else if (type == MATH_SUMDIM) else if (type == MATH_SUMDIM)
return "M_SUMDIM"; return "M_SUMDIM";
else if (type == MATH_SUMBROADCAST)
return "M_SUMBROADCAST";
else if (type == REDUCE_REDUCEMAX) else if (type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX"; return "R_REDUCEMAX";
else if (type == REDUCE_REDUCEMEAN) else if (type == REDUCE_REDUCEMEAN)
...@@ -97,13 +111,7 @@ const char * GetOPName(int type) ...@@ -97,13 +111,7 @@ const char * GetOPName(int type)
return "R_REDUCEVARIANCE"; return "R_REDUCEVARIANCE";
} }
else if ((type & DATA_BASE) != 0){ else if ((type & DATA_BASE) != 0){
if (type == GETANDSET_CONVERTDATATYPE) if (type == GETANDSET_SELECT)
return "G_CONVERTDATATYPE";
else if (type == GETANDSET_INDEXTOONEHOT)
return "G_INDEXTOONEHOT";
else if (type == GETANDSET_ONEHOTTOINDEX)
return "G_ONEHOTTOINDEX";
else if (type == GETANDSET_SELECT)
return "G_SELECT"; return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED) else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED"; return "M_COPYINDEXED";
...@@ -111,6 +119,8 @@ const char * GetOPName(int type) ...@@ -111,6 +119,8 @@ const char * GetOPName(int type)
return "M_COPYVALUES"; return "M_COPYVALUES";
else if (type == MOVEMENT_GATHER) else if (type == MOVEMENT_GATHER)
return "M_GATHER"; return "M_GATHER";
else if (type == MOVEMENT_DROPOUTWITHINDEX)
return "M_DROPOUTWITHINDEX";
else if (type == SHAPE_CONCATENATE) else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE"; return "S_CONCATENATE";
else if (type == SHAPE_MERGE) else if (type == SHAPE_MERGE)
...@@ -152,6 +162,10 @@ const char * GetOPName(int type) ...@@ -152,6 +162,10 @@ const char * GetOPName(int type)
else if (type == FUNC_SOFTMAX) else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX"; return "F_SOFTMAX";
} }
else if ((type & LOSS_BASE) != 0) {
if (type == LOSS_CROSSENTROPY)
return "L_CROSSENTROPY";
}
return "NULL"; return "NULL";
} }
......
...@@ -48,21 +48,29 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -48,21 +48,29 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CLIP MATH_ROUND + 1 #define MATH_CLIP MATH_ROUND + 1
#define MATH_DIV MATH_CLIP + 1 #define MATH_DIV MATH_CLIP + 1
#define MATH_DIVDIM MATH_DIV + 1 #define MATH_DIVDIM MATH_DIV + 1
#define MATH_MATRIXMUL MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_NEGATE MATH_MULTIPLYDIM + 1 #define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
#define MATH_NORMALIZE MATH_NEGATE + 1 #define MATH_NORMALIZE MATH_NEGATE + 1
#define MATH_POWER MATH_NORMALIZE + 1 #define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1 #define MATH_SCALEANDSHIFT MATH_POWER + 1
#define MATH_SIGN MATH_SCALEANDSHIFT + 1 #define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1
#define MATH_SCALE MATH_MULANDSHIFT + 1
#define MATH_DESCALE MATH_SCALE + 1
#define MATH_SHIFT MATH_DESCALE + 1
#define MATH_MOD MATH_SHIFT + 1
#define MATH_SIGN MATH_MOD + 1
#define MATH_SUB MATH_SIGN + 1 #define MATH_SUB MATH_SIGN + 1
#define MATH_SUBDIM MATH_SUB + 1 #define MATH_SUBDIM MATH_SUB + 1
#define MATH_SUM MATH_SUBDIM + 1 #define MATH_SUM MATH_SUBDIM + 1
#define MATH_SUMDIM MATH_SUM + 1 #define MATH_SUMDIM MATH_SUM + 1
#define MATH_SUMBROADCAST MATH_SUMDIM + 1
#define REDUCE MATH_SUMDIM + 1 #define REDUCE MATH_SUMBROADCAST + 1
#define REDUCE_REDUCEMAX REDUCE + 1 #define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1 #define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1 #define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
...@@ -73,16 +81,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -73,16 +81,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define DATA_BASE MATH_BASE * 2 #define DATA_BASE MATH_BASE * 2
#define GETANDSET DATA_BASE + 1 #define GETANDSET DATA_BASE + 1
#define GETANDSET_CONVERTDATATYPE GETANDSET + 1 #define GETANDSET_CONVERTDATATYPE GETANDSET + 1
#define GETANDSET_INDEXTOONEHOT GETANDSET_CONVERTDATATYPE + 1 #define GETANDSET_SELECT GETANDSET_CONVERTDATATYPE + 1
#define GETANDSET_ONEHOTTOINDEX GETANDSET_INDEXTOONEHOT + 1
#define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1
#define MOVEMENT GETANDSET_SELECT + 1 #define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1 #define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1 #define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1 #define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define MOVEMENT_DROPOUTWITHINDEX MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_GATHER + 1 #define SHAPE MOVEMENT_DROPOUTWITHINDEX + 1
#define SHAPE_CONCATENATE SHAPE + 1 #define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1 #define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1 #define SHAPE_MERGE_LIST SHAPE_MERGE + 1
...@@ -108,6 +115,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -108,6 +115,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define FUNC_SIGMOID FUNC_RECTIFY + 1 #define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1 #define FUNC_SOFTMAX FUNC_SIGMOID + 1
#define LOSS_BASE FUNCTION_BASE * 2
#define LOSS_CROSSENTROPY LOSS_BASE + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type); const char * GetOPName(int type);
......
...@@ -146,7 +146,7 @@ run a set of jobs in parallel ...@@ -146,7 +146,7 @@ run a set of jobs in parallel
>> jobArgs - the list of arguments for each job >> jobArgs - the list of arguments for each job
>> sleepTime - time to sleep (in ms) for each round >> sleepTime - time to sleep (in ms) for each round
*/ */
void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
{ {
if(threadNum <= 0){ if(threadNum <= 0){
XPRINT(1, stderr, "Error! No threads were created!\n"); XPRINT(1, stderr, "Error! No threads were created!\n");
...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) ...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c); TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);
/* the arguments that are passed to the function */ /* the arguments that are passed to the function */
volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c); volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);
/* thread */ /* thread */
XThread * thread = threads + availableThreads[i]; XThread * thread = threads + availableThreads[i];
......
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
void KillThreads(); void KillThreads();
/* run a set of jobs in parallel */ /* run a set of jobs in parallel */
void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0); void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);
/* get the number of parallel jobs to run */ /* get the number of parallel jobs to run */
int GetJobNum(int size); int GetJobNum(int size);
......
...@@ -42,7 +42,7 @@ job item used in queues ...@@ -42,7 +42,7 @@ job item used in queues
JobQueueNode::JobQueueNode() JobQueueNode::JobQueueNode()
{ {
job = NULL; job = NULL;
args = new XList(1); args = new TensorList(1);
} }
/* de-constructor */ /* de-constructor */
...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize) ...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
head = 0; head = 0;
tail = 0; tail = 0;
isJobQueue = false; isJobQueue = false;
jobDequeuerArgs = new XList(1); jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false; jobDequeuerBreak = false;
runningJobCount = 0; runningJobCount = 0;
jobStream = NULL; jobStream = NULL;
...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID) ...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true; isJobQueue = true;
jobDequeuerArgs->Clear(); jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid); // warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
jobDequeuer.function = (TFunction)DequeueJobs; jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs; jobDequeuer.argv = jobDequeuerArgs;
...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer() ...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer()
} }
/* add a job item to process */ /* add a job item to process */
void XQueue::EnqueueJob(void * job, XList * jobArgs) void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{ {
MUTEX_LOCK(jobQueueMutex); MUTEX_LOCK(jobQueueMutex);
runningJobCount++; runningJobCount++;
...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs) ...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
} }
/* job item consumer */ /* job item consumer */
void XQueue::DequeueJobs(XList * args) void XQueue::DequeueJobs(TensorList * args)
{ {
CheckNTErrors((args->count == 2), "Illegal arguments!"); CheckNTErrors((args->count == 2), "Illegal arguments!");
......
...@@ -52,7 +52,7 @@ public: ...@@ -52,7 +52,7 @@ public:
void * job; void * job;
/* arguments of the job */ /* arguments of the job */
XList * args; TensorList * args;
public: public:
/* constructor */ /* constructor */
...@@ -102,7 +102,7 @@ private: ...@@ -102,7 +102,7 @@ private:
XThread jobDequeuer; XThread jobDequeuer;
/* argument list of jobDequeuer */ /* argument list of jobDequeuer */
XList * jobDequeuerArgs; TensorList * jobDequeuerArgs;
/* indicates whether jobDequeuer stops */ /* indicates whether jobDequeuer stops */
bool jobDequeuerBreak; bool jobDequeuerBreak;
...@@ -141,11 +141,11 @@ public: ...@@ -141,11 +141,11 @@ public:
void StopJobConsumer(); void StopJobConsumer();
/* add a job item to process */ /* add a job item to process */
void EnqueueJob(void * job, XList * jobArgs); void EnqueueJob(void * job, TensorList * jobArgs);
/* job item consumer */ /* job item consumer */
static static
void DequeueJobs(XList * args); void DequeueJobs(TensorList * args);
/* get the break flag */ /* get the break flag */
bool GetJobBreak(); bool GetJobBreak();
......
...@@ -85,7 +85,7 @@ namespace nts{ ...@@ -85,7 +85,7 @@ namespace nts{
#endif #endif
typedef void (*TFunction) (volatile XList*); typedef void (*TFunction) (volatile TensorList*);
/* /*
This is a class that wraps the standard implementation of threading This is a class that wraps the standard implementation of threading
...@@ -133,7 +133,7 @@ public: ...@@ -133,7 +133,7 @@ public:
/* arguments (for the function to run) */ /* arguments (for the function to run) */
volatile volatile
XList * argv; TensorList * argv;
/* a flag to break */ /* a flag to break */
volatile volatile
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "arithmetic/Div.h" #include "arithmetic/Div.h"
#include "arithmetic/DivDim.h" #include "arithmetic/DivDim.h"
#include "arithmetic/Mask.h"
#include "arithmetic/MatrixMul.h" #include "arithmetic/MatrixMul.h"
#include "arithmetic/MatrixMul2D.h" #include "arithmetic/MatrixMul2D.h"
#include "arithmetic/MatrixMul2DMultiTheading.h" #include "arithmetic/MatrixMul2DMultiTheading.h"
...@@ -35,25 +36,22 @@ ...@@ -35,25 +36,22 @@
#include "arithmetic/MatrixMulBatched.h" #include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/Multiply.h" #include "arithmetic/Multiply.h"
#include "arithmetic/MultiplyDim.h" #include "arithmetic/MultiplyDim.h"
#include "arithmetic/Negate.h"
#include "arithmetic/Sign.h"
#include "arithmetic/Sub.h" #include "arithmetic/Sub.h"
#include "arithmetic/SubDim.h" #include "arithmetic/SubDim.h"
#include "arithmetic/Sum.h" #include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "arithmetic/SumDim.h" #include "arithmetic/SumDim.h"
#include "arithmetic/XTensorBLAS.h" #include "arithmetic/XTensorBLAS.h"
#include "arithmetic/MulAndShift.h"
#include "getandset/ConvertDataType.h" #include "getandset/ConvertDataType.h"
#include "getandset/OnehotAndIndex.h" #include "getandset/OnehotAndIndex.h"
#include "getandset/Select.h" #include "getandset/Select.h"
#include "getandset/SetData.h" #include "getandset/SetData.h"
#include "math/Binary.h"
#include "math/Clip.h" #include "math/Clip.h"
#include "math/Compare.h" #include "math/Compare.h"
#include "math/Normalize.h" #include "math/Normalize.h"
#include "math/Power.h"
#include "math/ScaleAndShift.h" #include "math/ScaleAndShift.h"
#include "math/Unary.h" #include "math/Unary.h"
...@@ -87,11 +85,14 @@ ...@@ -87,11 +85,14 @@
#include "shape/Squeeze.h" #include "shape/Squeeze.h"
#include "shape/Transpose.h" #include "shape/Transpose.h"
#include "shape/Unsqueeze.h" #include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
#include "sort/Sort.h" #include "sort/Sort.h"
#include "sort/TopK.h" #include "sort/TopK.h"
#include "utilities/XMatrixSegment.h" #include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h" #include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/SetAscendingOrder.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论