Commit 03b1a0fc by 单韦乔

2019.07.31合并最新NiuTensor并修改test接口。除了与先前一致的test以外,所有的test均测试通过

parent 7da1bec1
...@@ -35,111 +35,35 @@ ...@@ -35,111 +35,35 @@
void BackwardTest(); void BackwardTest();
void TransposeTest(); void TransposeTest();
void SumDimTest(); void SumDimTest();
void SplitBackwardTest();
void MemTest();
void xcTest();
using namespace nts; using namespace nts;
using namespace fnnlm; using namespace fnnlm;
using namespace transformer; using namespace transformer;
int main(int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
//xcTest(); //_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//return 0; //_CrtSetBreakAlloc(2708);
//MemTest();
//return 0; if(argc > 1 && !strcmp(argv[1], "-test"))
//SplitBackwardTest(); Test();
//return 0; else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
//_CrtSetBreakAlloc(896); FNNLMMain(argc - 1, argv + 1);
//BackwardTest(); else if(argc > 1 && !strcmp(argv[1], "-t2t"))
//return 0; TransformerMain(argc - 1, argv + 1);
//Test(); else{
//return 0; fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
//if (argc > 1 && !strcmp(argv[1], "-test")) fprintf(stderr, "neural networks in an easy way. \n\n");
Test(); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
//else if(argc > 1 && !strcmp(argv[1], "-fnnlm")) fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
// FNNLMMain(argc - 1, argv + 1); fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
//else if(argc > 1 && !strcmp(argv[1], "-t2t")) }
// TransformerMain(argc - 1, argv + 1);
//else{
// fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
// fprintf(stderr, "neural networks in an easy way. \n\n");
// fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
// fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
//}
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
return 0; return 0;
} }
XTensor * stack(XList& list, int leadingDim)
{
size_t size = list.count;
if (list.count == 0)
return NULL;
XTensor * sample = (XTensor*)list.Get(0);
XTensor merge_tensor;
int order = sample->order;
int * dim = new int[order];
for (int i = 0; i < order; i++)
dim[i] = sample->GetDim(i);
dim[leadingDim] *= size;
InitTensor(&merge_tensor, order, dim, DEFAULT_DTYPE, sample->denseRatio, sample->devID, sample->mem);
_Merge(&list, &merge_tensor, leadingDim);
delete[] dim;
order += 1;
dim = new int[order];
dim[0] = size;
for (size_t i = 1; i < order; i++) {
if (i != leadingDim)
dim[i] = sample->GetDim(i-1);
else
dim[i] = sample->GetDim(i-1)/size;
}
XTensor * split_tensor = new XTensor(order, dim, DEFAULT_DTYPE, sample->denseRatio, sample->devID, sample->mem);
_Split(&merge_tensor, split_tensor, leadingDim, size);
delete[] dim;
return split_tensor;
}
void xcTest()
{
int * dimSize = new int[2];
dimSize[0] = 2;
dimSize[1] = 4;
XTensor t1;
InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
XTensor t2;
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor;
_SetDataFixedFloat(&t1, 1.0F);
_SetDataFixedFloat(&t2, 2.0F);
tensor = t1 + t2;
XList smalls;
XTensor first;
XTensor second;
InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
smalls.Add(&t1);
smalls.Add(&t2);
XTensor* result = stack(smalls, 0);
result->Dump(stderr, "", 100);
}
void BackwardTest() void BackwardTest()
{ {
XNet net; XNet net;
...@@ -147,6 +71,9 @@ void BackwardTest() ...@@ -147,6 +71,9 @@ void BackwardTest()
XTensor a; XTensor a;
XTensor b; XTensor b;
XTensor c; XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2D(&a, 2, 3);
...@@ -164,14 +91,15 @@ void BackwardTest() ...@@ -164,14 +91,15 @@ void BackwardTest()
b.Set1D(2.0F, 0); b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1); b.Set1D(1.0F, 1);
c = DivDim(a, b, 0); DivDim(a, b, c, 0);
c.Dump(stderr, "c:"); c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c); //XLink::ShowNetwork(stderr, &c);
net.Backward(c); net.Backward(loss);
net.Dump(stderr); a.grad->Dump(stderr);
} }
...@@ -287,67 +215,3 @@ void SumDimTest() ...@@ -287,67 +215,3 @@ void SumDimTest()
delete[] data; delete[] data;
} }
void SplitBackwardTest()
{
int * dimSize = new int[2];
dimSize[0] = 2;
dimSize[1] = 4;
XTensor t1;
InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
XTensor t2;
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor;
//_SetDataFixedFloat(&t1, 1.0F);
//_SetDataFixedFloat(&t2, 2.0F);
t1.SetDataRand();
t2.SetDataRand();
tensor = t1 + t2;
XList smalls;
XTensor first;
XTensor second;
InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
smalls.Add(&first);
smalls.Add(&second);
Split(tensor, smalls, 1, 2);
XTensor mul;
mul = Sum(first, second);
XNet net;
net.Backward(mul);
net.Dump(stderr);
printf("Done!");
}
void MemTest()
{
XMem * mem;
mem = new XMem(0, FREE_ON_THE_FLY, (MTYPE)MILLION, 1024, MILLION);
XTensor tensor;
InitTensor2D(&tensor, 2, 4, X_FLOAT, 0, mem);
tensor.SetZeroAll();
tensor.Dump(stderr);
delete mem;
if (tensor.mem != NULL) {
printf("It isn't null!\n");
printf("%d\n", (int)tensor.mem->signature);
}
else {
printf("It's null\n");
}
tensor.Dump(stderr);
}
\ No newline at end of file
...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH) if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _HardTanHBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_IDENTITY) else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _IdentityBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_LOGSOFTMAX){ else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS); _LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS);
} }
else if(operID == FUNC_RECTIFY) else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _RectifyBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SIGMOID) else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _SigmoidBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SOFTMAX){ else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
......
...@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * dedy = output->grad; XTensor * dedy = output->grad;
if (income.tailNum == 1) { if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT) _SetDataFixed(dedy, 1.0F);
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
return; return;
} }
...@@ -69,7 +61,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -69,7 +61,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
if(operID == LOSS_CROSSENTROPY) { if(operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3) if (income.tailNum == 3)
padding = income.tails[2]; padding = income.tails[2];
leadingDim = income.GetParamInt(0); leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!"); CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim); _CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
} }
...@@ -98,39 +90,39 @@ compute dE/dx for a given function y = f(x) ...@@ -98,39 +90,39 @@ compute dE/dx for a given function y = f(x)
>> params - parameters of the function >> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x, //void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
CheckNTErrors(gold && y && x, "Empty input tensors!"); // CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!"); // CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id"); // CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
//
if(funcID == FUNC_HARDTANH){ // if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName); // _HardTanHBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_IDENTITY){ // else if(funcID == FUNC_IDENTITY){
_IdentityBackward(gold, y, x, dedy, dedx, lossName); // _IdentityBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_LOGSOFTMAX){ // else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else if(funcID == FUNC_RECTIFY){ // else if(funcID == FUNC_RECTIFY){
_RectifyBackward(gold, y, x, dedy, dedx, lossName); // _RectifyBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_SIGMOID){ // else if(funcID == FUNC_SIGMOID){
_SigmoidBackward(gold, y, x, dedy, dedx, lossName); // _SigmoidBackward(gold, y, x, dedy, dedx, lossName);
}else if(funcID == FUNC_SOFTMAX){ // }else if(funcID == FUNC_SOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else{ // else{
ShowNTErrors("wrong function found when call the backward process!"); // ShowNTErrors("wrong function found when call the backward process!");
} // }
//
} //}
/* /*
compute dE/dy for variable y and error(loss) function E compute dE/dy for variable y and error(loss) function E
...@@ -139,27 +131,27 @@ compute dE/dy for variable y and error(loss) function E ...@@ -139,27 +131,27 @@ compute dE/dy for variable y and error(loss) function E
>> dedy - dE/dy >> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, //void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * padding, // XTensor * dedy, XTensor * padding,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
if(gold == NULL){ // if(gold == NULL){
if(dedy->dataType == X_FLOAT) // if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F); // _SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) // else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0); // _SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT) // else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); // _SetDataFixedInt(dedy, 1);
else{ // else{
ShowNTErrors("TODO"); // ShowNTErrors("TODO");
} // }
return; // return;
} // }
//
//_LossBackward(dedy, gold, y, lossName); // //_LossBackward(dedy, gold, y, lossName);
if(lossName == CROSSENTROPY) // if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold, NULL, padding); // _CrossEntropyBackward(dedy, y, gold, NULL, padding);
//
} //}
} }
\ No newline at end of file
...@@ -43,11 +43,11 @@ public: ...@@ -43,11 +43,11 @@ public:
static static
bool IsLossOP(XTensor * node); bool IsLossOP(XTensor * node);
/* compute dE/dx for a given function y = f(x) */ ///* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x, //void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName); // LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */ /* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y, void Compute(XTensor * gold, XTensor * y,
......
...@@ -530,7 +530,7 @@ void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient) ...@@ -530,7 +530,7 @@ void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient)
XTensor * dedc = node->grad; XTensor * dedc = node->grad;
XTensor * deda = a->grad; XTensor * deda = a->grad;
XTensor * dedb = b->grad; XTensor * dedb = b->grad;
if(a->order == 2 && b->order == 2) if(a->order == 2 && b->order == 2)
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient); GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){ else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){
...@@ -735,7 +735,7 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient) ...@@ -735,7 +735,7 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
if (!isEfficient || b->isGrad) { if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
_Multiply(node->grad, a, b->grad, 1.0F);; _Multiply(node->grad, a, b->grad, 1.0F);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient) ...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
/* dE/da */ /* dE/da */
_MultiplyDim(node->grad, b, a->grad, n, 1.0F); _MultiplyDim(node->grad, b, a->grad, n, 1.0F);
/* dE/db */ /* dE/db */
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem); XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem);
_Multiply(node->grad, a, bGradTMP); _Multiply(node->grad, a, bGradTMP);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum/dimSize[order - 1]; reshapedSize[0] = a->unitNum/dimSize[order - 1];
...@@ -855,7 +855,6 @@ void XMathGrad::GradMultiplyBroadcast(XTensor * node, bool isEfficient) ...@@ -855,7 +855,6 @@ void XMathGrad::GradMultiplyBroadcast(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
_MultiplyBroadcast(node->grad, b, a->grad, 1.0F); _MultiplyBroadcast(node->grad, b, a->grad, 1.0F);
...@@ -1079,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta ...@@ -1079,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
*/ */
void XMathGrad::GradSubDim(XTensor * node, bool isEfficient) void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
int n = income.GetParamInt(0); int n = income.GetParamInt(0);
DTYPE beta = income.GetParam(1); DTYPE beta = income.GetParam(1);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum / dimSize[order - 1]; reshapedSize[0] = a->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1]; reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the /* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize); node->grad->Reshape(2, reshapedSize);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0); _ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(node->grad, b->grad, 0); _ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
} }
else{ else{
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1; reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n]; reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1; reshapedSize[2] = 1;
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
if(i < n) if(i < n)
reshapedSize[0] *= dimSize[i]; reshapedSize[0] *= dimSize[i];
} }
reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]); reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|. /* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */ Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize); node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0); _ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/* /*
...@@ -1319,7 +1318,7 @@ void XMathGrad::GradSumBroadcast(XTensor * node, bool isEfficient) ...@@ -1319,7 +1318,7 @@ void XMathGrad::GradSumBroadcast(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0); //DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
......
...@@ -146,10 +146,10 @@ private: ...@@ -146,10 +146,10 @@ private:
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node, bool isEfficient); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
......
...@@ -271,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -271,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL; XTensor * last = NULL;
XList smalls(income.tailNum); TensorList smalls(income.tailNum);
XList smallsGrad(income.tailNum); TensorList smallsGrad(income.tailNum);
bool mergeOnly = true; bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){ for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i]; XTensor * tail = income.tails[i];
...@@ -440,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -440,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
/* we compute the gradient for current node, rather than for /* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */ child node, i.e., we use the outgoing edge here */
XLink &outgo = node->outgo; XLink &outgo = node->outgo;
XList splits(outgo.tailNum); TensorList splits(outgo.tailNum);
int whereToSplit = -1; int whereToSplit = -1;
int splitNum = 0; int splitNum = 0;
...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){ if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0); int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1); int splitID = income.GetParamInt(1);
if(whereToSplit < 0) if(whereToSplit < 0)
whereToSplit = w; whereToSplit = w;
splitNum++; splitNum++;
......
...@@ -54,7 +54,7 @@ private: ...@@ -54,7 +54,7 @@ private:
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for dropout with indexs */ /* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static static
void GradDropoutWithIndex(XTensor * node, bool isEfficent); void GradDropoutWithIndex(XTensor * node, bool isEfficent);
......
...@@ -37,16 +37,16 @@ struct XNet ...@@ -37,16 +37,16 @@ struct XNet
unsigned int id; unsigned int id;
/* tensor nodes of the network (in order) */ /* tensor nodes of the network (in order) */
XList nodes; TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/ /* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes; TensorList gradNodes;
/* output nodes of the network */ /* output nodes of the network */
XList outputs; TensorList outputs;
/* input nodes of the network */ /* input nodes of the network */
XList inputs; TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */ /* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient; bool isGradEfficient;
...@@ -71,15 +71,15 @@ struct XNet ...@@ -71,15 +71,15 @@ struct XNet
/* backward propagation to obtain gradient /* backward propagation to obtain gradient
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient /* backward propagation to obtain gradient
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
...@@ -93,10 +93,10 @@ struct XNet ...@@ -93,10 +93,10 @@ struct XNet
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots); void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */ /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code); void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */ /* dump network information */
void Dump(FILE * file); void Dump(FILE * file);
......
...@@ -51,14 +51,12 @@ initialize the model ...@@ -51,14 +51,12 @@ initialize the model
>> myIgnored - number of position ignored in attention (from the begining) >> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask >> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TAttention::InitModel(int argc, char ** argv, void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
isMasked = myIsMasked; isMasked = myIsMasked;
ignored = myIgnored; ignored = myIgnored;
...@@ -71,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -71,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem); InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID, mem); InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
...@@ -127,7 +125,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -127,7 +125,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
XTensor q2; XTensor q2;
XTensor v2; XTensor v2;
XTensor kqv2; XTensor kqv2;
XList split; TensorList split;
kqv2 = MMul(kqv, wbig); kqv2 = MMul(kqv, wbig);
...@@ -135,9 +133,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -135,9 +133,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1); int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3; int d3 = kqv2.GetDim(2) / 3;
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2); split.Add(&q2);
split.Add(&k2); split.Add(&k2);
......
...@@ -42,9 +42,6 @@ public: ...@@ -42,9 +42,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* head number */ /* head number */
int nhead; int nhead;
...@@ -61,7 +58,7 @@ public: ...@@ -61,7 +58,7 @@ public:
XTensor wa; XTensor wa;
XTensor wbig; XTensor wbig;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
...@@ -94,7 +91,7 @@ public: ...@@ -94,7 +91,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining); XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-25
* it is cold today but i'll move to a warm place tomorrow :)
*/
#ifndef __T2TBATCHLOADER_H__
#define __T2TBATCHLOADER_H__
#include "../../network/XNet.h"
using namespace nts;
namespace transformer
{
#define MAX_SEQUENCE_LENGTH 1024 * 4
/* node to keep batch information */
struct BatchNode
{
/* begining position */
int beg;
/* end position */
int end;
/* maximum word number on the encoder side */
int maxEnc;
/* maximum word number on the decoder side */
int maxDec;
/* a key for sorting */
int key;
};
class T2TBatchLoader
{
public:
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* batch buf */
BatchNode * bufBatch;
/* buffer size */
int bufSize;
/* size of batch buffer */
int bufBatchSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* offset for next batch */
int nextBatch;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* bucket size */
int bucketSize;
public:
/* constructor */
T2TBatchLoader();
/* de-constructor */
~T2TBatchLoader();
/* initialization */
void Init(int argc, char ** argv);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* set the random batch flag */
void SetRandomBatch(bool flag = true);
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
};
}
#endif
\ No newline at end of file
...@@ -31,6 +31,10 @@ namespace transformer ...@@ -31,6 +31,10 @@ namespace transformer
/* constructor */ /* constructor */
AttDecoder::AttDecoder() AttDecoder::AttDecoder()
{ {
attentions = NULL;
fnns = NULL;
attLayerNorms = NULL;
fnnLayerNorms = NULL;
attentionsEnde = NULL; attentionsEnde = NULL;
attEndeLayerNorms = NULL; attEndeLayerNorms = NULL;
} }
...@@ -38,6 +42,10 @@ AttDecoder::AttDecoder() ...@@ -38,6 +42,10 @@ AttDecoder::AttDecoder()
/* de-constructor */ /* de-constructor */
AttDecoder::~AttDecoder() AttDecoder::~AttDecoder()
{ {
delete[] attentions;
delete[] fnns;
delete[] attLayerNorms;
delete[] fnnLayerNorms;
delete[] attentionsEnde; delete[] attentionsEnde;
delete[] attEndeLayerNorms; delete[] attEndeLayerNorms;
} }
...@@ -49,16 +57,14 @@ initialize the model ...@@ -49,16 +57,14 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttDecoder::InitModel(int argc, char ** argv, void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); //AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -68,10 +74,10 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -68,10 +74,10 @@ void AttDecoder::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0); LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!"); CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem, false); embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -82,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) { for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID, myMem); attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
...@@ -159,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X ...@@ -159,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
} }
x.SetName(DECODING_NAME);
return x; return x;
} }
......
...@@ -26,6 +26,9 @@ ...@@ -26,6 +26,9 @@
namespace transformer namespace transformer
{ {
#define DECODING_NAME "decoding"
#define DECODING_INPUT_NAME "decoding_input"
class AttDecoder class AttDecoder
{ {
...@@ -34,9 +37,6 @@ public: ...@@ -34,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -92,7 +92,7 @@ public: ...@@ -92,7 +92,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the decoding network */ /* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder() T2TEmbedder::T2TEmbedder()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
maxLength = -1; maxLength = -1;
} }
...@@ -46,12 +45,10 @@ initialize the model ...@@ -46,12 +45,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, bool isEnc) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
if(isEnc){ if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsize", &vSize, -1);
...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b ...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length) ...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem); XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -41,9 +41,6 @@ public: ...@@ -41,9 +41,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -71,7 +68,7 @@ public: ...@@ -71,7 +68,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL, bool isEnc = true); void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int eSize, int d, int length);
......
...@@ -52,15 +52,12 @@ initialize the model ...@@ -52,15 +52,12 @@ initialize the model
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id*/
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem); embedder.InitModel(argc, argv, devID);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
...@@ -139,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo ...@@ -139,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
} }
x.SetName(ENCODING_NAME);
input.SetName(ENCODING_INPUT_NAME);
return x; return x;
} }
......
...@@ -32,6 +32,9 @@ using namespace nts; ...@@ -32,6 +32,9 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define ENCODING_NAME "encoding"
#define ENCODING_INPUT_NAME "encoding_input"
/* /*
base class of the encoder base class of the encoder
...@@ -62,9 +65,6 @@ public: ...@@ -62,9 +65,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -115,7 +115,7 @@ public: ...@@ -115,7 +115,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -47,12 +47,10 @@ initialize the model ...@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -62,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize));
......
...@@ -36,9 +36,6 @@ public: ...@@ -36,9 +36,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -72,7 +69,7 @@ public: ...@@ -72,7 +69,7 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input, bool isTraining); XTensor Make(XTensor &input, bool isTraining);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN() T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL;
d = 0; d = 0;
} }
...@@ -46,18 +45,16 @@ initialize the model ...@@ -46,18 +45,16 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -36,9 +36,6 @@ class T2TLN ...@@ -36,9 +36,6 @@ class T2TLN
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */ /* the transformation matrix w */
XTensor w; XTensor w;
...@@ -57,7 +54,7 @@ public: ...@@ -57,7 +54,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "../../tensor/core/CHeader.h"
#include "T2TLengthPenalty.h"
using namespace nts;
namespace transformer
{
/*
GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence
>> length - length of the sequence (for each entry)
>> alpha - the parameter controls the length preference
<< return - length penaltyof the sequence (for each entry)
*/
XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
{
XTensor base;
XTensor lp;
//base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5);
lp = Power(base, alpha);
return lp;
}
}
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-08
*/ * Start of a new week - I just finished several documents.
* Writing document is harder than writing code :)
*/
#ifndef __SUMBYCOLUMNTV_H__ #ifndef __T2TLENGTHPENALTY_H__
#define __SUMBYCOLUMNTV_H__ #define __T2TLENGTHPENALTY_H__
#include "../../XTensor.h" #include "../../tensor/XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) using namespace nts;
/* sum of a tensor and a (column) vector */ namespace transformer
void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); {
} // namespace nts(NiuTrans.Tensor) /* We intend to penalize short sequences because they have higher score
in product of a sequence of probability-like terms and have more chances
to beat others in search. */
class T2TLengthPenalizer
{
public:
/* GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence */
static
XTensor GNMT(const XTensor & length, float alpha);
};
#endif // __SUMBYCOLUMNTV_H__ }
#endif
...@@ -40,9 +40,6 @@ public: ...@@ -40,9 +40,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the encoder */ /* the encoder */
AttEncoder * encoder; AttEncoder * encoder;
...@@ -98,7 +95,7 @@ public: ...@@ -98,7 +95,7 @@ public:
XTensor &maskDec, XTensor &maskEncDec); XTensor &maskDec, XTensor &maskEncDec);
/* get parameter matrics */ /* get parameter matrics */
void GetParams(XList &list); void GetParams(TensorList &list);
/* dump the parameters */ /* dump the parameters */
void Dump(const char * fn); void Dump(const char * fn);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TOutput::T2TOutput() T2TOutput::T2TOutput()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
inSize = -1; inSize = -1;
hSize = -1; hSize = -1;
...@@ -47,12 +46,10 @@ initialize the model ...@@ -47,12 +46,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
...@@ -95,6 +92,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output) ...@@ -95,6 +92,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
//output = LogSoftmax(MMul(x, w), -1); //output = LogSoftmax(MMul(x, w), -1);
output = Softmax(MMul(x, w), -1); output = Softmax(MMul(x, w), -1);
output.SetName(OUTPUT_NAME);
} }
} }
...@@ -28,6 +28,8 @@ using namespace nts; ...@@ -28,6 +28,8 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define OUTPUT_NAME "output"
/* output layer */ /* output layer */
class T2TOutput class T2TOutput
...@@ -36,9 +38,6 @@ public: ...@@ -36,9 +38,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -59,7 +58,7 @@ public: ...@@ -59,7 +58,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/
#include "T2TPredictor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TStateBundle::T2TStateBundle()
{
states = NULL;
isStart = false;
}
/* de-constructor */
T2TStateBundle::~T2TStateBundle()
{
if(states != NULL)
delete[] states;
}
/*
create states
>> num - number of states
*/
void T2TStateBundle::MakeStates(int num)
{
CheckNTErrors(num > 0, "invalid number");
if(states != NULL)
delete[] states;
states = new T2TState[num];
for(int i = 0; i < num; i++){
states[i].prediction = -1;
states[i].pid = T2T_PID_EMPTY;
states[i].isEnd = false;
states[i].isStart = false;
states[i].isCompleted = false;
states[i].prob = 0;
states[i].probPath = 0;
states[i].modelScore = 0;
states[i].nstep = 0;
states[i].last = NULL;
}
stateNum = num;
}
/* constructor */
T2TPredictor::T2TPredictor()
{
startSymbol = -1;
}
/* de-constructor */
T2TPredictor::~T2TPredictor()
{
}
/*
create an initial state
>> model - the t2t model
>> top - the top-most layer of the network
>> input - input of the network
>> beamSize - beam size
>> state - the state to be initialized
*/
void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state)
{
state->layersEnc.Clear();
state->layersDec.Clear();
XTensor * encoding = XLink::SearchNode(top, ENCODING_NAME);
CheckNTErrors(encoding != NULL, "No encoding layers found!");
state->layersEnc.Add(encoding);
state->layersDec.Add(NULL);
int dims[MAX_TENSOR_DIM_NUM];
for (int i = 0; i < input->order - 1; i++)
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll();
state->nstep.SetZeroAll();
state->endMark.SetZeroAll();
state->stateNum = 0;
}
/*
set start symbol
>> symbol - the symbol (in integer)
*/
void T2TPredictor::SetStartSymbol(int symbol)
{
startSymbol = symbol;
}
/*
read a state
>> model - the t2t model that keeps the network created so far
>> state - a set of states. It keeps
1) hypotheses (states)
2) probablities of hypotheses
3) parts of the network for expanding toward the next state
*/
void T2TPredictor::Read(T2TModel * model, T2TStateBundle * state)
{
m = model;
s = state;
}
/*
predict the next state
>> next - next states (assuming that the current state has been read)
>> encoding - encoder output
>> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder
*/
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor * inputEnc, XTensor * paddingEnc)
{
int dims[MAX_TENSOR_DIM_NUM];
next->layersEnc.Clear();
next->layersDec.Clear();
AttDecoder &decoder = *m->decoder;
/* word indices of previous positions */
XTensor * inputLast = (XTensor*)s->layersDec.GetItem(0);
/* word indices of positions up to next state */
XTensor inputDec;
/* the first token */
XTensor first;
CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!");
for(int i = 0; i < inputEnc->order - 1; i++)
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixed(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) {
inputDec = Identity(first);
}
else{
inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID);
inputDec = Concatenate(first, inputDec, inputDec.order - 1);
}
/* prediction probabilities */
XTensor &output = next->prob;
XTensor decoding;
XTensor decodingStep;
for(int i = 0; i < inputDec.order - 1; i++)
dims[i] = inputDec.GetDim(i);
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixed(paddingDec, 1);
XTensor maskDec;
XTensor maskEncDec;
/* decoder mask */
m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec);
/* make the decoding network */
decoding = decoder.Make(inputDec, *encoding, maskDec, maskEncDec, false);
XTensor selectSrc;
XTensor selectTgt;
CheckNTErrors(decoding.order >= 2, "The tensor must be of order 2 or larger!");
int stride = decoding.GetDim(decoding.order - 2);
InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0);
selectSrc.SetDevice(decoding.devID);
selectTgt.SetDevice(decoding.devID);
/* the decoder output of the last position */
decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt);
/* generate the output probabilities */
m->outputLayer->Make(decodingStep, output);
next->layersEnc.AddList(&s->layersEnc);
next->layersDec.Add(&inputDec);
next->layersDec.Add(&output);
}
/*
generate paths up to the states of the current step
>> state - state bundle of the current step
*/
XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
{
CheckNTErrors(state->stateNum >= 0, "Illegal state!");
int distance = -1;
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
cur = cur->last;
}
if(nsteps > distance)
distance = nsteps;
}
XTensor path;
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
path.Set2DInt(cur->prediction, i, distance - nsteps);
cur = cur->last;
}
}
return path;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start!
*/
#ifndef __T2TPREDICTOR_H__
#define __T2TPREDICTOR_H__
#include "T2TModel.h"
#include "T2TLengthPenalty.h"
namespace transformer
{
#define T2T_PID_EMPTY -1
/* state for search. It keeps the path (back-pointer), prediction distribution,
and etc. It can be regarded as a hypothsis in translation. */
class T2TState
{
public:
/* we assume that the prediction is an integer */
int prediction;
/* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis
is empty if id = -1 */
int pid;
/* indicates whether the state is an end */
bool isEnd;
/* indicates whether the state is the start */
bool isStart;
/* indicates whether the state is completed */
bool isCompleted;
/* probability of every prediction (last state of the path) */
float prob;
/* probability of every path */
float probPath;
/* model score of every path. A model score = path probability + some other stuff */
float modelScore;
/* nubmer of steps we go over so far */
int nstep;
/* pointer to the previous state */
T2TState * last;
};
/* a bundle of states */
class T2TStateBundle
{
public:
/* predictions */
XTensor prediction;
/* id of the previous state that generates the current one */
XTensor preID;
/* mark that indicates whether each hypothesis is completed */
XTensor endMark;
/* probability of every prediction (last state of the path) */
XTensor prob;
/* probability of every path */
XTensor probPath;
/* model score of every path */
XTensor modelScore;
/* step number of each hypothesis */
XTensor nstep;
/* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */
TensorList layersEnc;
/* layers on the decoder side */
TensorList layersDec;
/* list of states */
T2TState * states;
/* number of states */
int stateNum;
/* indicates whether it is the first state */
bool isStart;
public:
/* constructor */
T2TStateBundle();
/* de-constructor */
~T2TStateBundle();
/* create states */
void MakeStates(int num);
};
/* The predictor reads the current state and then predicts the next.
It is exactly the same procedure of MT inference -
we get the state of previous words and then generate the next word.
Here, a state can be regared as the representation of words (word
indices, hidden states, embeddings and etc.). */
class T2TPredictor
{
private:
/* pointer to the transformer model */
T2TModel * m;
/* current state */
T2TStateBundle * s;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TPredictor();
/* de-constructor */
~T2TPredictor();
/* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */
void SetStartSymbol(int symbol);
/* read a state */
void Read(T2TModel * model, T2TStateBundle * state);
/* predict the next state */
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc);
/* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#ifndef __T2TSEARCH_H__
#define __T2TSEARCH_H__
#include "T2TModel.h"
#include "T2TPredictor.h"
namespace transformer
{
/* The class orgnizes the search process. It calls "predictors" to generate
distributions of the predictions and prunes the search space by beam pruning.
This makes a graph where each path respresents a translation hypothsis.
The output can be the path with the highest model score. */
class T2TSearch
{
private:
/* the alpha parameter controls the length preference */
float alpha;
/* predictor */
T2TPredictor predictor;
/* max length of the generated sequence */
int maxLength;
/* beam size */
int beamSize;
/* batch size */
int batchSize;
/* we keep the final hypotheses in a heap for each sentence in the batch. */
XHeap<MIN_HEAP, float> * fullHypos;
/* array of the end symbols */
int * endSymbols;
/* number of the end symbols */
int endSymbolNum;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TSearch();
/* de-constructor */
~T2TSearch();
/* initialize the model */
void Init(int argc, char ** argv);
/* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
/* preparation */
void Prepare(int myBatchSize,int myBeamSize);
/* compute the model score for each hypothesis */
void Score(T2TStateBundle * prev, T2TStateBundle * beam);
/* generate token indices via beam pruning */
void Generate(T2TStateBundle * beam);
/* expand the search graph */
void Expand(T2TStateBundle * prev, T2TStateBundle * beam);
/* collect hypotheses with ending symbol */
void Collect(T2TStateBundle * beam);
/* fill the hypotheis heap with incomplete hypothses */
void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */
void Dump(XTensor * output);
/* check if the token is an end symbol */
bool IsEnd(int token);
/* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum);
/* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include <math.h>
#include "T2TUtility.h"
#include "T2TTester.h"
#include "T2TSearch.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TTester::T2TTester()
{
}
/* de-constructor */
T2TTester::~T2TTester()
{
}
/* initialize the model */
void T2TTester::Init(int argc, char ** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
batchLoader.Init(argc, argv);
seacher.Init(argc, argv);
}
/*
test the model
>> fn - test data file
>> ofn - output data file
>> model - model that is trained
*/
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
{
int wc = 0;
int ws = 0;
int wordCount = 0;
int wordCountTotal = 0;
int sentCount = 0;
int batchCount = 0;
float loss = 0;
/* data files */
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot read the test file");
FILE * ofile = fopen(ofn, "wb");
CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID;
XNet net;
double startT = GetClockSec();
wordCount = 0;
/* batch of input sequences */
XTensor batchEnc;
XTensor batchDec;
/* label */
XTensor label;
/* padding */
XTensor paddingEnc;
XTensor paddingDec;
/* gold standard */
XTensor gold;
/* an array that keeps the sequences */
int * seqs = new int[MILLION];
batchLoader.SetRandomBatch(false);
batchLoader.ClearBuf();
while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, false))
{
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!");
CheckNTErrors(!model->isLM, "Only MT model is supported!");
XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output);
Dump(ofile, &output);
float prob = 0;
loss += -prob;
wc = batchEnc.GetDim(-1);
wordCount += wc;
wordCountTotal += wc;
sentCount += batchEnc.GetDim(-2);
batchCount += 1;
if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
}
fclose(file);
fclose(ofile);
delete[] seqs;
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, exp(loss/wordCount));
}
/*
dump the result into the file
>> file - data file
>> output - output tensor
*/
void T2TTester::Dump(FILE * file, XTensor * output)
{
int seqLength = output->GetDim(-1);
for (int i = 0; i < output->unitNum; i += seqLength) {
for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j);
fprintf(file, "%d ", w);
if (w < 0)
break;
}
fprintf(file, "\n");
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :)
*/
#ifndef __T2TTESTER_H__
#define __T2TTESTER_H__
#include "T2TSearch.h"
#include "T2TBatchLoader.h"
namespace transformer
{
/* This class translates test sentences with a trained model. */
class T2TTester
{
public:
/* vocabulary size of the source side */
int vSize;
/* vocabulary size of the target side */
int vSizeTgt;
/* for batching */
T2TBatchLoader batchLoader;
/* decoder for inference */
T2TSearch seacher;
public:
/* constructor */
T2TTester();
/* de-constructor */
~T2TTester();
/* initialize the model */
void Init(int argc, char ** argv);
/* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */
void Dump(FILE * file, XTensor * output);
};
}
#endif
\ No newline at end of file
...@@ -23,35 +23,14 @@ ...@@ -23,35 +23,14 @@
#define __T2TTRAINER_H__ #define __T2TTRAINER_H__
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TBatchLoader.h"
#include "../../tensor/function/FHeader.h" #include "../../tensor/function/FHeader.h"
#define MAX_SEQUENCE_LENGTH 1024 * 4
using namespace nts; using namespace nts;
namespace transformer namespace transformer
{ {
/* node to keep batch information */
struct BatchNode
{
/* begining position */
int beg;
/* end position */
int end;
/* maximum word number on the encoder side */
int maxEnc;
/* maximum word number on the decoder side */
int maxDec;
/* a key for sorting */
int key;
};
/* trainer of the T2T model */ /* trainer of the T2T model */
class T2TTrainer class T2TTrainer
{ {
...@@ -61,42 +40,6 @@ public: ...@@ -61,42 +40,6 @@ public:
/* parameter array */ /* parameter array */
char ** argArray; char ** argArray;
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* batch buf */
BatchNode * bufBatch;
/* buffer size */
int bufSize;
/* size of batch buffer */
int bufBatchSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* offset for next batch */
int nextBatch;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* dimension size of each inner layer */ /* dimension size of each inner layer */
int d; int d;
...@@ -139,10 +82,10 @@ public: ...@@ -139,10 +82,10 @@ public:
float adamBeta2T; float adamBeta2T;
/* list of the moment of the parameter matrics */ /* list of the moment of the parameter matrics */
XList moments; TensorList moments;
/* list of the 2nd order moment of the parameter matrics */ /* list of the 2nd order moment of the parameter matrics */
XList moments2nd; TensorList moments2nd;
/* indicates whether the data file is shuffled for training */ /* indicates whether the data file is shuffled for training */
bool isShuffled; bool isShuffled;
...@@ -158,26 +101,15 @@ public: ...@@ -158,26 +101,15 @@ public:
/* number of batches on which we do model update */ /* number of batches on which we do model update */
int updateStep; int updateStep;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* indicates whether we intend to debug the net */ /* indicates whether we intend to debug the net */
bool isDebugged; bool isDebugged;
/* bucket size */ /* indicates whether the sequence is sorted by length */
int bucketSize; bool isLenSorted;
/* for batching */
T2TBatchLoader batchLoader;
public: public:
/* constructor */ /* constructor */
...@@ -197,46 +129,6 @@ public: ...@@ -197,46 +129,6 @@ public:
/* make a checkpoint */ /* make a checkpoint */
void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id); void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
/* get word probabilities for a batch of sequences */ /* get word probabilities for a batch of sequences */
float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs); float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs);
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TUtility.h" #include "T2TUtility.h"
#include "T2TTrainer.h" #include "T2TTrainer.h"
#include "T2TPredictor.h"
#include "T2TTester.h"
#include "../../tensor/XDevice.h" #include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h" #include "../../tensor/XGlobal.h"
...@@ -47,6 +49,7 @@ int TransformerMain(int argc, const char ** argv) ...@@ -47,6 +49,7 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, args); ShowParams(argc, args);
bool isBeamSearch = false;
char * trainFN = new char[MAX_LINE_LENGTH]; char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH]; char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH]; char * testFN = new char[MAX_LINE_LENGTH];
...@@ -56,6 +59,7 @@ int TransformerMain(int argc, const char ** argv) ...@@ -56,6 +59,7 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString(argc, args, "model", modelFN, ""); LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "test", testFN, ""); LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "output", outputFN, ""); LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
...@@ -64,28 +68,35 @@ int TransformerMain(int argc, const char ** argv) ...@@ -64,28 +68,35 @@ int TransformerMain(int argc, const char ** argv)
T2TModel model; T2TModel model;
model.InitModel(argc, args); model.InitModel(argc, args);
//if(strcmp(modelFN, ""))
// model.Read(modelFN);
/* learn model parameters */ /* learn model parameters */
if(strcmp(trainFN, "")) if(strcmp(trainFN, ""))
trainer.Train(trainFN, testFN, strcmp(modelFN, "") ? modelFN : "checkpoint.model", &model); trainer.Train(trainFN, testFN, strcmp(modelFN, "") ? modelFN : "checkpoint.model", &model);
/* save the final model */ /* save the final model */
//if(strcmp(modelFN, "") && strcmp(trainFN, "")) if(strcmp(modelFN, "") && strcmp(trainFN, ""))
//model.Dump(modelFN); model.Dump(modelFN);
/* load the model if neccessary */ /* load the model if neccessary */
//if(strcmp(modelFN, "")) if(strcmp(modelFN, ""))
//model.Read(modelFN); model.Read(modelFN);
T2TTrainer tester;
tester.Init(argc, args);
/* test the model on the new data */ /* test the model on the new data */
if(strcmp(testFN, "") && strcmp(outputFN, "")) if(strcmp(testFN, "") && strcmp(outputFN, "")){
tester.Test(testFN, outputFN, &model); /* beam search */
if(isBeamSearch){
T2TTester searcher;
searcher.Init(argc, args);
searcher.Test(testFN, outputFN, &model);
}
/* forced decoding */
else{
T2TTrainer tester;
tester.Init(argc, args);
tester.Test(testFN, outputFN, &model);
}
}
delete[] trainFN; delete[] trainFN;
delete[] modelFN; delete[] modelFN;
......
...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName) ...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName)
} }
} }
/**************************************************** /*
Below is for calling CPU BLAS for fast matrix operations Below is for calling CPU BLAS for fast matrix operations
I'm not sure how fast it is. But it seems that other I'm not sure how fast it is. But it seems that other
guys are crazy about this. So I decided to have a try. guys are crazy about this. So I decided to have a try.
...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h) ...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
return f; return f;
} }
/*
data type conversion
>> devID - device id
>> s - source data array
>> typeS - source data type
>> t - target data array
>> typeT - target data type
>> size - number of the items in s (and t)
*/
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{
CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT)
return;
if(typeS == X_FLOAT && typeT == X_FLOAT16){
for(int i = 0; i < size; i++){
((unsigned short*)t)[i] = FloatToFloat16(((float*)s)[i]);
}
}
else if(typeS == X_FLOAT16 && typeT == X_FLOAT){
for(int i = 0; i < size; i++){
((float*)t)[i] = Float16ToFloat(((unsigned short*)s)[i]);
}
}
else{
ShowNTErrors("Unsupported data types for conversion!");
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName); ...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
/* data conversion (for lower precision computation) */ /* data conversion (for lower precision computation) */
unsigned short FloatToFloat16(float f); unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h); float Float16ToFloat(unsigned short h);
void ConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#ifdef USE_CUDA
void CudaConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#endif
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID) ...@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
cudaError_t error = cudaSetDevice(devID); cudaError_t error = cudaSetDevice(devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
#else #else
...@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID) ...@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
SetFastFlags(); SetFastFlags();
} }
/* switch to a get current dev */ /* get the id of the current GPU device */
int XDevice::GetGPUDevice() int XDevice::GetGPUDevice()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice() ...@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice()
cudaError_t error = cudaGetDevice(&devID); cudaError_t error = cudaGetDevice(&devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
...@@ -248,7 +250,7 @@ void XDevice::SetFastFlags() ...@@ -248,7 +250,7 @@ void XDevice::SetFastFlags()
#endif #endif
} }
/* reset cuda flag for more efficient cuda execution (all devices) */ /* reset the cuda flag for more efficient cuda execution (all devices) */
void XDevice::SetFastFlagsAllDevices() void XDevice::SetFastFlagsAllDevices()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -266,10 +268,6 @@ XDevManager::XDevManager() ...@@ -266,10 +268,6 @@ XDevManager::XDevManager()
{ {
Clear(); Clear();
Init(); Init();
#ifndef USE_CPP11
fprintf(stderr, "Warning!!! c++ 11 is RECOMMENDED for compilation.\n");
#endif
} }
/* de-constructor */ /* de-constructor */
...@@ -278,7 +276,7 @@ XDevManager::~XDevManager() ...@@ -278,7 +276,7 @@ XDevManager::~XDevManager()
} }
/* initialize it and get the CPU and GPU information */ /* initialization */
void XDevManager::Init() void XDevManager::Init()
{ {
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
...@@ -322,7 +320,7 @@ void XDevManager::Clear() ...@@ -322,7 +320,7 @@ void XDevManager::Clear()
#ifdef USE_CUDA #ifdef USE_CUDA
/* get the handle of GPU */ /* get the handle of a given GPU */
cublasHandle_t * XDevManager::GetCudaHandle(const int devID) cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -330,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID) ...@@ -330,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
return GPUs[devID].GetCublasHandle(); return GPUs[devID].GetCublasHandle();
} }
/* get the stream of cuda */ /* get the stream of a given GPU */
cudaStream_t * XDevManager::GetCudaStream(const int devID) cudaStream_t * XDevManager::GetCudaStream(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -478,7 +476,7 @@ split a string ...@@ -478,7 +476,7 @@ split a string
>> items - splitting result >> items - splitting result
<< return - how many items are there << return - how many items are there
*/ */
int SplitALine(char * inputString, const char * seperator, XList * items) int SplitALine(char * inputString, const char * seperator, StrList* items)
{ {
items->Clear(); items->Clear();
...@@ -527,12 +525,12 @@ get device ids for the given device information ...@@ -527,12 +525,12 @@ get device ids for the given device information
devInfo = "0:CPU-1 1:GPU-0 2:CPU-1" devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
means that the first device is CPU, the second device means that the first device is CPU, the second device
is GPU-0, the third device is CPU. is GPU-0, the third device is CPU.
>> devIDs - device sequence specified by devInfo >> devIDs - device IDs specified by devInfo
<< return - number of devices << return - number of devices
*/ */
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{ {
XList * terms = new XList(1); StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms); SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){ for(int i = 0; i < terms->count; i++){
...@@ -569,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) ...@@ -569,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
return devCount; return devCount;
} }
/* show id sequence */ /* show device IDs */
void XDevManager::ShowDeviceIDs(char * devInfo, char * msg) void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
{ {
msg[0] = 0; msg[0] = 0;
......
...@@ -236,6 +236,18 @@ extern XDevManager GDevs; ...@@ -236,6 +236,18 @@ extern XDevManager GDevs;
cudaSetDevice(devIDBackup); \ cudaSetDevice(devIDBackup); \
} \ } \
#define CheckDev(a, b) \
{ \
if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
else if (a >= 0 && b >= 0 && a != b) { \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
...@@ -51,7 +51,13 @@ bool CONST_TRUE = true; ...@@ -51,7 +51,13 @@ bool CONST_TRUE = true;
int verboseLevel = 0; int verboseLevel = 0;
bool useBLAS = false; bool useBLAS = false;
bool useCUDA = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL; FILE * tmpLog = NULL;
double myTime = 0; double myTime = 0;
......
...@@ -45,10 +45,6 @@ typedef int8_t __int8; ...@@ -45,10 +45,6 @@ typedef int8_t __int8;
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
#if (__cplusplus >= 201103L || _MSC_VER >= 1700)
#define USE_CPP11
#endif
#define _XINLINE_ #define _XINLINE_
//#define DOUBELPRICSION //#define DOUBELPRICSION
...@@ -159,7 +155,9 @@ extern bool useCUDA; ...@@ -159,7 +155,9 @@ extern bool useCUDA;
#define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}} #define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}}
#define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}} #define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}}
#define B2I(V) V==0?false:true #define B2I(V) V == 0 ? false : true
#define MODX(a, b) int(b == 0 ? a : a - floor(double(a)/b) * b)
/* BLAS interfaces */ /* BLAS interfaces */
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
......
...@@ -31,15 +31,15 @@ namespace nts{ ...@@ -31,15 +31,15 @@ namespace nts{
/* constructor */ /* constructor */
template<HeapType hType, typename T> template<HeapType hType, typename T>
XHeap<hType, T>::XHeap()
{
}
/* constructor */
template<HeapType hType, typename T>
XHeap<hType, T>::XHeap(int mySize, XMem * myMem) XHeap<hType, T>::XHeap(int mySize, XMem * myMem)
{ {
mem = myMem; Init(mySize, myMem);
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
} }
/* deconstructor */ /* deconstructor */
...@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap() ...@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap()
} }
template<HeapType hType, typename T> template<HeapType hType, typename T>
void XHeap<hType, T>::Init(int mySize, XMem * myMem)
{
mem = myMem;
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
}
template<HeapType hType, typename T>
void XHeap<hType, T>::Clear(T initValue) void XHeap<hType, T>::Clear(T initValue)
{ {
count = 0; count = 0;
...@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End() ...@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End()
template<HeapType hType, typename T> template<HeapType hType, typename T>
_XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node) _XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node)
{ {
//CheckNTErrors((count < size), "Heap is full!"); if (count < size) {
items[count] = node; items[count] = node;
Up(count); Up(count);
count++; count++;
}
else if(count == size){
HeapNode<T> & item0 = items[0];
if (hType == MIN_HEAP && item0.value >= node.value)
return;
else if (hType == MAX_HEAP && item0.value <= node.value)
return;
items[0] = node;
Down(0);
}
else {
ShowNTErrors("Overflow of the heap!");
}
} }
/* replace the top-most item and update the heap */ /* replace the top-most item and update the heap */
...@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node) ...@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node)
template<HeapType hType, typename T> template<HeapType hType, typename T>
_XINLINE_ HeapNode<T> XHeap<hType, T>::Pop() _XINLINE_ HeapNode<T> XHeap<hType, T>::Pop()
{ {
//CheckNTErrors((size > 0), "Empty heap!"); CheckNTErrors(count > 0, "Empty heap!");
HeapNode<T> node = items[0]; HeapNode<T> node = items[0];
items[0] = items[count - 1]; items[0] = items[count - 1];
count--; count--;
......
...@@ -39,7 +39,7 @@ template <typename T> ...@@ -39,7 +39,7 @@ template <typename T>
struct HeapNode struct HeapNode
{ {
/* node index */ /* node index */
int index; long long index;
/* value of the node */ /* value of the node */
T value; T value;
...@@ -52,9 +52,16 @@ struct HeapNode ...@@ -52,9 +52,16 @@ struct HeapNode
HeapNode(int i, T v) HeapNode(int i, T v)
{ {
index = i; index = (long long)i;
value = v; value = v;
}; };
HeapNode(void * i, T v)
{
index = (long long)i;
value = v;
}
}; };
/* a heap that keeps a data array of T */ /* a heap that keeps a data array of T */
...@@ -76,11 +83,17 @@ public: ...@@ -76,11 +83,17 @@ public:
public: public:
/* constructor */ /* constructor */
XHeap();
/* constructor */
XHeap(int mySize, XMem * myMem = NULL); XHeap(int mySize, XMem * myMem = NULL);
/* deconstructor */ /* deconstructor */
~XHeap(); ~XHeap();
/* initialization */
void Init(int mySize, XMem * myMem = NULL);
/* clear the data */ /* clear the data */
void Clear(T initValue); void Clear(T initValue);
...@@ -107,6 +120,9 @@ public: ...@@ -107,6 +120,9 @@ public:
/* move item k up the tree */ /* move item k up the tree */
void Up(int k); void Up(int k);
/* how many items are kept in the heap */
inline int Count() { return count; };
}; };
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id ...@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL) if(h == NULL)
return; return;
XList list(2); TensorList list(2);
list.Add(t1); list.Add((XTensor*)t1);
list.Add(t2); list.Add((XTensor*)t2);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, ...@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
if (h == NULL) if (h == NULL)
return; return;
XList list(3); TensorList list(3);
list.Add(t1); list.Add((XTensor*)t1);
list.Add(t2); list.Add((XTensor*)t2);
list.Add(t3); list.Add((XTensor*)t3);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor ...@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor >> h - head tensor
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(const XList * list, XTensor * h, int id) void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
{ {
/* forward */ /* forward */
XLink &income = h->income; XLink &income = h->income;
...@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors ...@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors
>> list - a list of output tensors >> list - a list of output tensors
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(XTensor * t, XList * list, int id) void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{ {
/* forward */ /* forward */
for(int i = 0; i < list->count; i++){ for(int i = 0; i < list->count; i++){
...@@ -624,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target) ...@@ -624,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
ClearIncoming(target); ClearIncoming(target);
int tailNum = reference->income.tailNum; int tailNum = reference->income.tailNum;
XList tails(tailNum); TensorList tails(tailNum);
for(int i = 0; i < tailNum; i++){ for(int i = 0; i < tailNum; i++){
XTensor * tail = (XTensor*)reference->income.tails[i]; XTensor * tail = (XTensor*)reference->income.tails[i];
tails.Add(tail); tails.Add(tail);
...@@ -743,7 +743,7 @@ search for a node in a top-down manner by its name ...@@ -743,7 +743,7 @@ search for a node in a top-down manner by its name
>> top - the top most node >> top - the top most node
<< return - the node we found << return - the node we found
*/ */
/*XTensor * XLink::SearchNode(XTensor * top, const char * name) XTensor * XLink::SearchNode(XTensor * top, const char * name)
{ {
if(!strcmp(top->name, name)) if(!strcmp(top->name, name))
return top; return top;
...@@ -758,7 +758,7 @@ search for a node in a top-down manner by its name ...@@ -758,7 +758,7 @@ search for a node in a top-down manner by its name
} }
return NULL; return NULL;
}*/ }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -138,17 +138,17 @@ struct XLink ...@@ -138,17 +138,17 @@ struct XLink
static static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with two input tensors and a output tensor */ /* create a hyper edge with three input tensors and a output tensor */
static static
void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);
/* create a hyper edge with a list of input tensors and a output tensor */ /* create a hyper edge with a list of input tensors and a output tensor */
static static
void MakeLink(const XList * list, XTensor * h, int id); void MakeLink(const TensorList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */ /* create a hyper edge with a input tensors and a list of output tensors */
static static
void MakeLink(XTensor * h, XList * list, int id); void MakeLink(XTensor * h, TensorList * list, int id);
/* add a parameter */ /* add a parameter */
static static
...@@ -191,8 +191,8 @@ struct XLink ...@@ -191,8 +191,8 @@ struct XLink
void ShowNode(FILE * file, XTensor * node); void ShowNode(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */ /* search a node in a top-down manner by its name */
//static static
//XTensor * SearchNode(XTensor * top, const char * name); XTensor * SearchNode(XTensor * top, const char * name);
}; };
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,32 +15,31 @@ ...@@ -15,32 +15,31 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* *
* Implementation of list that keeps data items * Implementation of template list that keeps data items
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17 * $Created by: HU Chi (huchinlp@foxmail.com)
* The first coding job this year! *
* */
*/
#ifndef __XLIST_H__
#define __XLIST_H__
#include "XMem.h" #include "XMem.h"
#include "XGlobal.h" #include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ #ifndef __TensorList_H__
namespace nts{ #define __TensorList_H__
typedef int (* ListCompare)(const void * item1, const void * item2);
/* the XList class */ /* the nts (NiuTrans.Tensor) namespace */
class XList namespace nts {
{
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
public: public:
/* data items */ /* data items */
void ** items; T *items;
/* number of items */ /* number of items */
int count; int count;
...@@ -49,56 +48,88 @@ public: ...@@ -49,56 +48,88 @@ public:
int maxNum; int maxNum;
/* the memory pool for data array allocation */ /* the memory pool for data array allocation */
XMem * mem; XMem* mem;
/* indicates whether data items are integers */
bool isIntList;
public: public:
/* constructor */ /* constructor */
XList(); TensorListBase();
/* constructor */ /* constructor */
XList(int myMaxNum, bool isIntListOrNot = false); TensorListBase(int myMaxNum);
/* constructor */ /* constructor */
XList(int myMaxNum, XMem * myMem, bool isIntListOrNot = false); TensorListBase(int myMaxNum, XMem* myMem);
/* de-constructor */ /* de-constructor */
~XList(); ~TensorListBase();
/* utilities */ /* add an item into the list */
void Create(int myMaxNum, XMem * myMem); void Add(T&& item);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount); /* add an item into the list */
void AddList(XList * l); void Add(const T& item);
void AddInt(int i);
void Insert(int pos, void * item); /* add a number of items into the list */
void * GetItem(int i) const; void Add(T* inputItems, int inputItemCount);
int GetItemInt(int i);
void SetItem(int i, void * item); /* append a list to the current list */
void SetItemInt(int i, int item); void AddList(TensorListBase* l);
int FindFirst(void * item); /* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
void Clear(); void Clear();
void ClearStringList();
void Sort(int itemSize, ListCompare comp); /* sort the list */
void Sort(int itemSize);
/* reverse the list */
void Reverse(); void Reverse();
/* remove the item at position i */
void Remove(int i); void Remove(int i);
XList * Copy(XMem * myMem);
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
_XINLINE_ void * Get(int i) {return GetItem(i);}; T& operator[] (int i) {
_XINLINE_ int GetInt(int i) {return GetItemInt(i);}; return GetItem(i);
_XINLINE_ void Set(int i, void * item) {SetItem(i, item);}; };
_XINLINE_ void SetInt(int i, int item) {SetItemInt(i, item);}; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
}; };
extern XList NULLList; struct XTensor;
typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
typedef TensorListBase<void*> XList;
typedef TensorListBase<XTensor*> TensorList;
} } /* end of the nts (NiuTrans.Tensor) namespace */
/* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif // __TensorList_H__
...@@ -34,6 +34,11 @@ namespace nts{ ...@@ -34,6 +34,11 @@ namespace nts{
int testxmemid = 0; int testxmemid = 0;
void * recordp = NULL; void * recordp = NULL;
/*
for managing the memories
*/
XMemManager GMems;
XMem * GMem; XMem * GMem;
/* constructor */ /* constructor */
...@@ -48,6 +53,7 @@ XMem::XMem() ...@@ -48,6 +53,7 @@ XMem::XMem()
strcpy(name, "xmem"); strcpy(name, "xmem");
signature = 0; signature = 0;
mergeFreeOTF = true; mergeFreeOTF = true;
isInitialized = false;
} }
/* /*
...@@ -58,7 +64,7 @@ constructor ...@@ -58,7 +64,7 @@ constructor
>> myMode - mode of running the memory pool >> myMode - mode of running the memory pool
UNI_FREE: free all the space at the end of using the memory pool UNI_FREE: free all the space at the end of using the memory pool
FREE_ON_THE_FLY: normal "malloc" and "free" mode FREE_ON_THE_FLY: normal "malloc" and "free" mode
>> myBlockSize - size of memory block >> myBlockSize - size of a memory block
>> myBlockNum - number of memory blocks >> myBlockNum - number of memory blocks
>> myBufSize - size of buffer >> myBufSize - size of buffer
*/ */
...@@ -103,7 +109,7 @@ initialize it ...@@ -103,7 +109,7 @@ initialize it
>> myMode - mode of running the memory pool >> myMode - mode of running the memory pool
UNI_FREE: free all the space at the end of using the memory pool UNI_FREE: free all the space at the end of using the memory pool
FREE_ON_THE_FLY: normal "malloc" and "free" mode FREE_ON_THE_FLY: normal "malloc" and "free" mode
>> myBlockSize - size of memory block >> myBlockSize - size of a memory block
>> myBlockNum - number of memory blocks >> myBlockNum - number of memory blocks
>> myBufSize - size of buffer >> myBufSize - size of buffer
*/ */
...@@ -164,6 +170,7 @@ void XMem::Initialize(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int m ...@@ -164,6 +170,7 @@ void XMem::Initialize(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int m
#endif #endif
signature++; signature++;
isInitialized = true;
} }
/* free memory */ /* free memory */
...@@ -216,9 +223,9 @@ void XMem::Free(int myDevID, void * mem) ...@@ -216,9 +223,9 @@ void XMem::Free(int myDevID, void * mem)
} }
} }
/* /*
get signature get the signature
<< return - return the signature << return - the signature
*/ */
MTYPE XMem::GetSignature() MTYPE XMem::GetSignature()
{ {
...@@ -226,7 +233,7 @@ MTYPE XMem::GetSignature() ...@@ -226,7 +233,7 @@ MTYPE XMem::GetSignature()
} }
/* /*
use string as the name of the memory pool set the name of the memory pool
>> myName - name of the memory pool >> myName - name of the memory pool
*/ */
void XMem::SetName(const char * myName) void XMem::SetName(const char * myName)
...@@ -259,7 +266,7 @@ void XMem::SetDevice(int myDevID) ...@@ -259,7 +266,7 @@ void XMem::SetDevice(int myDevID)
} }
/* /*
switch to the device (with fast cuda execution mode) we want to work switch to the device (with fast cuda execution mode) we intend to work on
>> myDevID - device id(-1: CPU memory, >=0: GPU device ID) >> myDevID - device id(-1: CPU memory, >=0: GPU device ID)
*/ */
void XMem::SetDeviceFast(int myDevID) void XMem::SetDeviceFast(int myDevID)
...@@ -275,7 +282,7 @@ void XMem::SetDeviceFast(int myDevID) ...@@ -275,7 +282,7 @@ void XMem::SetDeviceFast(int myDevID)
} }
/* /*
run in static mode run in the static mode
>> myIsStatic - specify if the memory allocation is static >> myIsStatic - specify if the memory allocation is static
*/ */
void XMem::SetStaticMode(bool myIsStatic) void XMem::SetStaticMode(bool myIsStatic)
...@@ -300,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation) ...@@ -300,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(myIsForComputation) if(myIsForComputation)
CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS, CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS,
"Cannot create the cublas handle."); "Cannot create the cublas handle.");
SetDevice(devIDBackup); SetDevice(devIDBackup);
#endif #endif
...@@ -316,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -316,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{ {
delete[] memIndex; delete[] memIndex;
delete[] memIndex2; delete[] memIndex2;
delete[] minSizeIndex; delete[] minSizeIndex;
nodeNum = indexSize; nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2; nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum; indexEntryNum = minSizeNum;
memIndex = new MPieceNode[nodeNum]; memIndex = new MPieceNode[nodeNum];
memset(memIndex, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex, 0, sizeof(MPieceNode) * nodeNum);
...@@ -328,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -328,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2 = new MPieceNode[nodeNum]; memIndex2 = new MPieceNode[nodeNum];
memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum);
minSizeIndex = new MTYPE[indexEntryNum]; minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum); memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex[0] = minSizeFirst; minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++) for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2; minSizeIndex[i] = minSizeIndex[i - 1] * 2;
indexOffset = GetMSB(minSizeFirst); indexOffset = GetMSB(minSizeFirst);
} }
...@@ -752,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -752,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */ /* if all index nodes are used, we rebuild the index to release the nodes that are free */
if(nodeNumUsed == nodeNum){ if(nodeNumUsed == nodeNum){
RebuildIndex(); RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!"); CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
} }
/*if(testxmemid == 30){ /*if(testxmemid == 30){
...@@ -956,8 +963,8 @@ release a piece of memory as "free" ...@@ -956,8 +963,8 @@ release a piece of memory as "free"
*/ */
void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size) void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size)
{ {
if(p == NULL) if(p == NULL)
return; return;
if(size <= minSizeIndex[0]) if(size <= minSizeIndex[0])
size = minSizeIndex[0]; size = minSizeIndex[0];
...@@ -1087,7 +1094,7 @@ void XMem::RebuildIndex() ...@@ -1087,7 +1094,7 @@ void XMem::RebuildIndex()
block->mem = NULL; block->mem = NULL;
} }
else{ else{
/* if the block is in use, we build the index */ /* if the block is in use, we build the index */
int pieceCount = 0; int pieceCount = 0;
MTYPE size = 0; MTYPE size = 0;
MHeader * newLast = NULL; MHeader * newLast = NULL;
...@@ -1488,4 +1495,179 @@ cublasHandle_t * XMem::GetCublasHandle() ...@@ -1488,4 +1495,179 @@ cublasHandle_t * XMem::GetCublasHandle()
#endif #endif
/* constructor */
XMemManager::XMemManager()
{
Initialize();
}
/* de-constructor */
XMemManager::~XMemManager()
{
}
/* get memory size */
MTYPE XMemManager::GetAvailableMemory()
{
unsigned long freeMem = 0;
#if __APPLE__
int mib[2] = {CTL_HW, HW_MEMSIZE};
unsigned int namelen = sizeof(mib) / sizeof(mib[0]);
unsigned long long size;
size_t len = sizeof(size);
if (sysctl(mib, namelen, &size, &len, NULL, 0) < 0){
ShowNTErrors("Cannot get memory size on Mac!");
}
else{
return size;
}
#elif _WIN32
MEMORYSTATUSEX memoryStatus;
memoryStatus.dwLength = sizeof(memoryStatus);
if (GlobalMemoryStatusEx(&memoryStatus)){
freeMem = memoryStatus.ullAvailPhys;
}
#else
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#endif
return (MTYPE)freeMem;
}
/* get GPU memory size */
MTYPE XMemManager::GetAvailableGPUMemory(int devID)
{
size_t freeMem = 0;
#ifdef USE_CUDA
size_t totalMem = 0;
cudaSetDevice(devID);
if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
XPRINT(0, stderr, "cannot get GPU memory information.");
exit(1);
}
#endif
return (MTYPE)freeMem;
}
/* get buffer size */
void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
{
*myBufSize = 0;
if (freeMem >= MILLION * 128){
*myBufSize = MILLION * 32;
if (freeMem >= MILLION * 256){
*myBufSize = MILLION * 64;
if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256;
if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512;
}
}
}
}
}
/* initialize it and set the global memory information */
void XMemManager::Initialize()
{
srand((unsigned int)time(NULL));
Free();
/* CPUs (we actually do not care about how many CPUs are using) */
nCPUMem = 1;
/* GPUs */
nGPUMem = 0;
#ifdef USE_CUDA
if (cudaGetDeviceCount(&nGPUMem) != cudaSuccess) {
XPRINT(0, stderr, "cannot get GPU information.");
exit(1);
}
#endif
}
/* free it */
void XMemManager::Free()
{
for (int i = 0; i < MAX_CPU_MEM_NUM; i++)
CPUMems[i].Free();
for (int i = 0; i < MAX_GPU_MEM_NUM; i++)
GPUMems[i].Free();
}
/* get global memory pool */
XMem * XMemManager::GetMem(const int devID)
{
XMem * mem = NULL;
if (devID < 0){
if(!CPUMems[0].isInitialized){
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, FREE_ON_THE_FLY,
MIN_BLOCK_SIZE_FOR_MEMPOOL,
MIN_BLOCK_NUM_FOR_MEMPOOL,
myBufSize);
}
mem = CPUMems;
}
else{
if (devID < nGPUMem){
if(!GPUMems[devID].isInitialized){
MTYPE freeMem = GetAvailableGPUMemory(devID);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[devID].Initialize(devID, FREE_ON_THE_FLY,
MIN_BLOCK_SIZE_FOR_MEMPOOL,
MIN_BLOCK_NUM_FOR_MEMPOOL,
myBufSize);
}
mem = GPUMems + devID;
}
else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
}
}
return mem;
}
/* get global memory size */
int XMemManager::GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize)
{
XMem * mem = GetMem(devID);
int result = 0;
if (mem != NULL){
*myBlockSize = mem->maxBlockSize;
*myBlockNum = mem->blockNum;
*myBufSize = mem->bufSize;
result = 1;
}
return result;
}
/* show memory information */
void XMemManager::ShowMemInfo()
{
XPRINT(1, stderr, "Memory Information:\n");
MTYPE myBlockSize, myBufSize;
int myBlockNum;
for(int i = 0; i < nCPUMem; i++){
GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", myBlockSize, myBlockNum, myBufSize);
}
for(int i = 0; i < nGPUMem; i++){
GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", i, myBlockSize, myBlockNum, myBufSize);
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -39,6 +39,15 @@ ...@@ -39,6 +39,15 @@
#include <curand.h> #include <curand.h>
#endif #endif
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#elif WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
...@@ -51,8 +60,10 @@ typedef long long INT_64; ...@@ -51,8 +60,10 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1 #define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH #define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_MEM_NUM 16
#define MAX_GPU_MEM_NUM 16
/* /*
mode of runnig a memory pool mode of runnig a memory pool
...@@ -202,6 +213,9 @@ public: ...@@ -202,6 +213,9 @@ public:
MTYPE curUsedPin; MTYPE curUsedPin;
MTYPE bufUsedPin; MTYPE bufUsedPin;
/* indicates whether the memory pool is initialized */
bool isInitialized;
#ifdef USE_CUDA #ifdef USE_CUDA
/* handle used for cublas */ /* handle used for cublas */
cublasHandle_t cublasHandle; cublasHandle_t cublasHandle;
...@@ -413,6 +427,61 @@ public: ...@@ -413,6 +427,61 @@ public:
}; };
/*
a class for the management of memory
*/
class XMemManager
{
private:
/* cpu memory pool information */
XMem CPUMems[MAX_CPU_MEM_NUM];
/* number of cpu memory pools */
int nCPUMem;
/* gpu memory pool information */
XMem GPUMems[MAX_GPU_MEM_NUM];
/* number of gpu memory pools */
int nGPUMem;
public:
/* constructor */
XMemManager();
/* de-constructor */
~XMemManager();
/* get memory size */
MTYPE GetAvailableMemory();
/* get GPU memory size */
MTYPE GetAvailableGPUMemory(int devID);
/* get buffer size */
void GetBufferSize(MTYPE freeMem, MTYPE * myBufSize);
/* initialize it and set the global memory information */
void Initialize();
/* free it */
void Free();
/* get global memory pool */
XMem * GetMem(const int devID);
/* get global memory size */
int GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize);
/* show memory information */
void ShowMemInfo();
};
/* managing the memories */
extern XMemManager GMems;
extern XMem * GMem; extern XMem * GMem;
extern int testxmemid; extern int testxmemid;
......
...@@ -59,6 +59,8 @@ const char * GetOPName(int type) ...@@ -59,6 +59,8 @@ const char * GetOPName(int type)
return "M_DIV"; return "M_DIV";
else if (type == MATH_DIVDIM) else if (type == MATH_DIVDIM)
return "M_DIVDIM"; return "M_DIVDIM";
else if (type == MATH_MASK)
return "M_MASK";
else if (type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
...@@ -108,7 +110,7 @@ const char * GetOPName(int type) ...@@ -108,7 +110,7 @@ const char * GetOPName(int type)
else if (type == REDUCE_REDUCEVARIANCE) else if (type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE"; return "R_REDUCEVARIANCE";
} }
else if ((type & DATA_BASE) != 0) { else if ((type & DATA_BASE) != 0) {
if (type == GETANDSET_CONVERTDATATYPE) if (type == GETANDSET_CONVERTDATATYPE)
return "G_CONVERTDATATYPE"; return "G_CONVERTDATATYPE";
else if (type == GETANDSET_INDEXTOONEHOT) else if (type == GETANDSET_INDEXTOONEHOT)
...@@ -118,8 +120,10 @@ const char * GetOPName(int type) ...@@ -118,8 +120,10 @@ const char * GetOPName(int type)
else if (type == GETANDSET_SELECT) else if (type == GETANDSET_SELECT)
return "G_SELECT"; return "G_SELECT";
} }
else if ((type & SHAPE_BASE) != 0) { else if ((type & SHAPE_BASE) != 0){
if (type == MOVEMENT_COPYINDEXED) if (type == GETANDSET_SELECT)
return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED"; return "M_COPYINDEXED";
else if (type == MOVEMENT_COPYVALUES) else if (type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES"; return "M_COPYVALUES";
......
...@@ -48,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -48,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CLIP MATH_ROUND + 1 #define MATH_CLIP MATH_ROUND + 1
#define MATH_DIV MATH_CLIP + 1 #define MATH_DIV MATH_CLIP + 1
#define MATH_DIVDIM MATH_DIV + 1 #define MATH_DIVDIM MATH_DIV + 1
#define MATH_MATRIXMUL MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
...@@ -85,6 +86,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -85,6 +86,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1 #define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1
#define SHAPE_BASE DATA_BASE * 2 #define SHAPE_BASE DATA_BASE * 2
#define MOVEMENT SHAPE_BASE + 1 #define MOVEMENT SHAPE_BASE + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1 #define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1 #define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
......
...@@ -146,7 +146,7 @@ run a set of jobs in parallel ...@@ -146,7 +146,7 @@ run a set of jobs in parallel
>> jobArgs - the list of arguments for each job >> jobArgs - the list of arguments for each job
>> sleepTime - time to sleep (in ms) for each round >> sleepTime - time to sleep (in ms) for each round
*/ */
void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
{ {
if(threadNum <= 0){ if(threadNum <= 0){
XPRINT(1, stderr, "Error! No threads were created!\n"); XPRINT(1, stderr, "Error! No threads were created!\n");
...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) ...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c); TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);
/* the arguments that are passed to the function */ /* the arguments that are passed to the function */
volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c); volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);
/* thread */ /* thread */
XThread * thread = threads + availableThreads[i]; XThread * thread = threads + availableThreads[i];
......
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
void KillThreads(); void KillThreads();
/* run a set of jobs in parallel */ /* run a set of jobs in parallel */
void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0); void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);
/* get the number of parallel jobs to run */ /* get the number of parallel jobs to run */
int GetJobNum(int size); int GetJobNum(int size);
......
...@@ -42,7 +42,7 @@ job item used in queues ...@@ -42,7 +42,7 @@ job item used in queues
JobQueueNode::JobQueueNode() JobQueueNode::JobQueueNode()
{ {
job = NULL; job = NULL;
args = new XList(1); args = new TensorList(1);
} }
/* de-constructor */ /* de-constructor */
...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize) ...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
head = 0; head = 0;
tail = 0; tail = 0;
isJobQueue = false; isJobQueue = false;
jobDequeuerArgs = new XList(1); jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false; jobDequeuerBreak = false;
runningJobCount = 0; runningJobCount = 0;
jobStream = NULL; jobStream = NULL;
...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID) ...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true; isJobQueue = true;
jobDequeuerArgs->Clear(); jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid); // warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
jobDequeuer.function = (TFunction)DequeueJobs; jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs; jobDequeuer.argv = jobDequeuerArgs;
...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer() ...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer()
} }
/* add a job item to process */ /* add a job item to process */
void XQueue::EnqueueJob(void * job, XList * jobArgs) void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{ {
MUTEX_LOCK(jobQueueMutex); MUTEX_LOCK(jobQueueMutex);
runningJobCount++; runningJobCount++;
...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs) ...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
} }
/* job item consumer */ /* job item consumer */
void XQueue::DequeueJobs(XList * args) void XQueue::DequeueJobs(TensorList * args)
{ {
CheckNTErrors((args->count == 2), "Illegal arguments!"); CheckNTErrors((args->count == 2), "Illegal arguments!");
......
...@@ -52,7 +52,7 @@ public: ...@@ -52,7 +52,7 @@ public:
void * job; void * job;
/* arguments of the job */ /* arguments of the job */
XList * args; TensorList * args;
public: public:
/* constructor */ /* constructor */
...@@ -102,7 +102,7 @@ private: ...@@ -102,7 +102,7 @@ private:
XThread jobDequeuer; XThread jobDequeuer;
/* argument list of jobDequeuer */ /* argument list of jobDequeuer */
XList * jobDequeuerArgs; TensorList * jobDequeuerArgs;
/* indicates whether jobDequeuer stops */ /* indicates whether jobDequeuer stops */
bool jobDequeuerBreak; bool jobDequeuerBreak;
...@@ -141,11 +141,11 @@ public: ...@@ -141,11 +141,11 @@ public:
void StopJobConsumer(); void StopJobConsumer();
/* add a job item to process */ /* add a job item to process */
void EnqueueJob(void * job, XList * jobArgs); void EnqueueJob(void * job, TensorList * jobArgs);
/* job item consumer */ /* job item consumer */
static static
void DequeueJobs(XList * args); void DequeueJobs(TensorList * args);
/* get the break flag */ /* get the break flag */
bool GetJobBreak(); bool GetJobBreak();
......
...@@ -85,7 +85,7 @@ namespace nts{ ...@@ -85,7 +85,7 @@ namespace nts{
#endif #endif
typedef void (*TFunction) (volatile XList*); typedef void (*TFunction) (volatile TensorList*);
/* /*
This is a class that wraps the standard implementation of threading This is a class that wraps the standard implementation of threading
...@@ -133,7 +133,7 @@ public: ...@@ -133,7 +133,7 @@ public:
/* arguments (for the function to run) */ /* arguments (for the function to run) */
volatile volatile
XList * argv; TensorList * argv;
/* a flag to break */ /* a flag to break */
volatile volatile
......
...@@ -36,13 +36,9 @@ ...@@ -36,13 +36,9 @@
#include "arithmetic/MatrixMulBatched.h" #include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/Multiply.h" #include "arithmetic/Multiply.h"
#include "arithmetic/MultiplyDim.h" #include "arithmetic/MultiplyDim.h"
#include "arithmetic/Negate.h"
#include "arithmetic/Sign.h"
#include "arithmetic/Sub.h" #include "arithmetic/Sub.h"
#include "arithmetic/SubDim.h" #include "arithmetic/SubDim.h"
#include "arithmetic/Sum.h" #include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "arithmetic/SumDim.h" #include "arithmetic/SumDim.h"
#include "arithmetic/XTensorBLAS.h" #include "arithmetic/XTensorBLAS.h"
#include "arithmetic/MulAndShift.h" #include "arithmetic/MulAndShift.h"
...@@ -56,7 +52,6 @@ ...@@ -56,7 +52,6 @@
#include "math/Clip.h" #include "math/Clip.h"
#include "math/Compare.h" #include "math/Compare.h"
#include "math/Normalize.h" #include "math/Normalize.h"
#include "math/Power.h"
#include "math/ScaleAndShift.h" #include "math/ScaleAndShift.h"
#include "math/Unary.h" #include "math/Unary.h"
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "Div.h" #include "Div.h"
#include "Div.cuh" #include "Div.cuh"
#include "DivDim.h" #include "DivDim.h"
...@@ -41,12 +42,15 @@ where i is the index of the item ...@@ -41,12 +42,15 @@ where i is the index of the item
*/ */
void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim); _CudaDiv(a, b, c, alpha, leadingDim);
...@@ -138,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -138,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div(a, b, a, alpha, leadingDim); _Div(a, b, a, alpha, leadingDim);
} }
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the division is performed as DivDim (in more details in DivDim.h) return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor >> a - a tensor
...@@ -225,9 +246,8 @@ where i is the index of the item ...@@ -225,9 +246,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -241,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -241,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */ /* call _Div function */
_Div(&a, &b, &c, 0, leadingDim); _Div(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -252,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -252,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-05 float16 added
*/ */
#include "../../XDevice.h" #include "../../XDevice.h"
...@@ -34,8 +35,9 @@ division of data arrays in a element-wise manner c(i) = a(i)/b(i) ...@@ -34,8 +35,9 @@ division of data arrays in a element-wise manner c(i) = a(i)/b(i)
>> c - result data array >> c - result data array
>> size - size of c >> size - size of c
*/ */
template <class T>
__global__ __global__
void KernelDivElementWise(DTYPE * a, DTYPE * b, DTYPE * c, int size) void KernelDivElementWise(T * a, T * b, T * c, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -51,8 +53,9 @@ division of data arrays in a element-wise manner c(i) = a(i)/b(i) + \alpha*c(i) ...@@ -51,8 +53,9 @@ division of data arrays in a element-wise manner c(i) = a(i)/b(i) + \alpha*c(i)
>> size - size of c >> size - size of c
>> alpha - the coefficient >> alpha - the coefficient
*/ */
template <class T>
__global__ __global__
void KernelDivElementWiseV2(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE alpha) void KernelDivElementWiseV2(T * a, T * b, T * c, int size, T alpha)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -75,13 +78,13 @@ where |a_lead| means the size of the leading dimension of a ...@@ -75,13 +78,13 @@ where |a_lead| means the size of the leading dimension of a
>> ldSizeC - size of the leading dimension of c >> ldSizeC - size of the leading dimension of c
>> blockNum - number of blocks >> blockNum - number of blocks
*/ */
template<int nonZeroAlpha> __global__ template<class T, int nonZeroAlpha> __global__
void KernelDivElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha, void KernelDivElementWiseTensorDynamic(T * a, T * b, T * c, T alpha,
int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum) int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum)
{ {
__shared__ DTYPE* ap[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T* ap[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE* bp[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T* bp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE* cp[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T* cp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y; int j = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -169,17 +172,48 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in ...@@ -169,17 +172,48 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
dim3 blocks(cudaGridSize[0], cudaGridSize[1]), threads(cudaBlockSize[0], cudaBlockSize[1]); dim3 blocks(cudaGridSize[0], cudaGridSize[1]), threads(cudaBlockSize[0], cudaBlockSize[1]);
if (alpha == 0) { if (alpha == 0) {
KernelDivElementWiseTensorDynamic<0> << <blocks, threads >> > KernelDivElementWiseTensorDynamic<DTYPE, 0> << <blocks, threads >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, 0, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, 0,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum); stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
} }
else { else {
KernelDivElementWiseTensorDynamic<1> << <blocks, threads >> > KernelDivElementWiseTensorDynamic<DTYPE, 1> << <blocks, threads >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, alpha, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, alpha,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum); stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
} }
} }
} }
else if (a->dataType == X_FLOAT16 && b->dataType == X_FLOAT16) {
int cudaGridSize[3];
int cudaBlockSize[3];
half alpha1 = __float2half(alpha);
if (a->unitNum == c->unitNum && b->unitNum == c->unitNum) {
GDevs.GetCudaThread(a->devID, c->unitNum, cudaGridSize, cudaBlockSize);
dim3 blocks(cudaGridSize[0]), threads(cudaBlockSize[0]);
if (alpha == 0)
KernelDivElementWise << <blocks, threads >> > ((__half *)a->data, (__half *)b->data, (__half *)c->data, c->unitNum);
else
KernelDivElementWiseV2 << <blocks, threads >> > ((__half *)a->data, (__half *)b->data, (__half *)c->data, c->unitNum, alpha1);
}
else {
GDevs.GetCudaThread2D(c->devID, stride * blockNum, dimensionSizeC, MAX_INT, cudaGridSize, cudaBlockSize);
dim3 blocks(cudaGridSize[0], cudaGridSize[1]), threads(cudaBlockSize[0], cudaBlockSize[1]);
if (alpha == 0) {
KernelDivElementWiseTensorDynamic<__half, 0> << <blocks, threads >> >
((__half *)a->data, (__half *)b->data, (__half *)c->data, 0,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
}
else {
KernelDivElementWiseTensorDynamic<__half, 1> << <blocks, threads >> >
((__half *)a->data, (__half *)b->data, (__half *)c->data, alpha1,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
}
}
}
else { else {
// TODO!! // TODO!!
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
...@@ -195,4 +229,4 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in ...@@ -195,4 +229,4 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,16 +29,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,16 +29,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* division of two tensors in a element-wise manner c(i) = a(i)/b(i) */ /* division of two tensors in a element-wise manner c(i) = a(i)/b(i) */
__global__ template<class T> __global__
void KernelDivElementWise(DTYPE * a, DTYPE * b, DTYPE * c, int size); void KernelDivElementWise(T * a, T * b, T * c, int size);
/* division of two tensors in a element-wise manner c(i) = a(i)/b(i) + \alpha*c(i) */ /* division of two tensors in a element-wise manner c(i) = a(i)/b(i) + \alpha*c(i) */
__global__ template<class T> __global__
void KernelDivElementWiseV2(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE alpha); void KernelDivElementWiseV2(T * a, T * b, T * c, int size, T alpha);
/* division of two tensors in a element-wise manner c(i) = a(i)/b(i)+ \alpha*c(i) */ /* division of two tensors in a element-wise manner c(i) = a(i)/b(i)+ \alpha*c(i) */
template<int nonZeroAlpha>__global__ template<class T, int nonZeroAlpha>__global__
void KernelDivElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha, int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum); void KernelDivElementWiseTensorDynamic(T * a, T * b, T * c, T alpha, int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum);
/* element-wise division of two tensors */ /* element-wise division of two tensors */
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha = 0, int leadingDim = 0); void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha = 0, int leadingDim = 0);
......
...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise division of two tensors (return an XTensor structure) element-wise division of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise division of two tensors: ...@@ -54,7 +55,7 @@ element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i) c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -19,10 +19,12 @@ ...@@ -19,10 +19,12 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15
*/ */
#include <math.h>
#include "Div.h" #include "Div.h"
#include "DivDim.h" #include "DivDim.h"
#include "DivDim.cuh" #include "DivDim.cuh"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -42,6 +44,8 @@ i.e., a is divided with b by broadcasting ...@@ -42,6 +44,8 @@ i.e., a is divided with b by broadcasting
*/ */
void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alpha) void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alpha)
{ {
n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in division!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in division!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
...@@ -50,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp ...@@ -50,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(XTensor::IsSameShaped(a, b)){
_Div(a, b, c, alpha); _Div(a, b, c, alpha);
return; return;
...@@ -151,6 +157,8 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha) ...@@ -151,6 +157,8 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order);
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
...@@ -175,9 +183,8 @@ i.e., a is divided with b by broadcasting ...@@ -175,9 +183,8 @@ i.e., a is divided with b by broadcasting
>> c - where we put result. we save it in a if c is NULL >> c - where we put result. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> alpha - the scaling factor >> alpha - the scaling factor
>> requireLink - if add operation to network
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -186,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, ...@@ -186,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha,
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad == true) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-15 float16 added
*/ */
#include "DivDim.cuh" #include "DivDim.cuh"
...@@ -168,6 +169,34 @@ void _CudaDivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE ...@@ -168,6 +169,34 @@ void _CudaDivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else if (a->dataType == X_FLOAT16) {
half alpha1 = __float2half(alpha);
if (stride > 1){
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (alpha == (DTYPE)0.0F)
KernelDivWithCol<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockSize, stride, blockSize * stride, blockNum, alpha1);
else
KernelDivWithCol<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockSize, stride, blockSize * stride, blockNum, alpha1);
}
else if (stride == 1){
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (alpha == (DTYPE)0.0F)
KernelDivWithRow<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockNum, blockSize, alpha1);
else
KernelDivWithRow<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockNum, blockSize, alpha1);
}
else {
ShowNTErrors("Something is wrong!");
}
}
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
......
...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c ...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a, where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false); void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha) ...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
} }
/* /*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha) ...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
/* call _Sum function */ /* call _Mask function */
_Mask(&a, &mask, &c, alpha); _Mask(&a, &mask, &c, alpha);
/* tensor connections */ /* tensor connections */
//XLink::MakeLink(&a, &mask, &c, MATH_SUM); XLink::MakeLink(&a, &mask, &c, MATH_MASK);
//XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
// TODO!!
ShowNTErrors("TODO!");
return c; return c;
} }
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Mask function */
_Mask(&a, &mask, &c, alpha);
if (c.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
}
}
} }
\ No newline at end of file
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks - * I'll attend several conferences and workshops in the following weeks -
* busy days :( * busy days :(
*/ */
#ifndef __MASK_H__ #ifndef __MASK_H__
#define __MASK_H__ #define __MASK_H__
...@@ -28,21 +28,22 @@ ...@@ -28,21 +28,22 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
mask entries of a given tensor: mask entries of a given tensor:
c(i) = a(i) if mask(i) is non-zero c(i) = a(i) if mask(i) is non-zero
c(i) = alpha if mask(i) = 0 c(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha); void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (on site): mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha); void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha = 0.0);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
...@@ -52,7 +53,14 @@ where i is the index of the element ...@@ -52,7 +53,14 @@ where i is the index of the element
*/ */
XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0); XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0);
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha = 0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MASK_H__ #endif // __MASK_H__
...@@ -54,8 +54,6 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -54,8 +54,6 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner) XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(a && b && c, "Empty input tensors!"); CheckNTErrors(a && b && c, "Empty input tensors!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Input tensors should have the same data type!");
CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2, CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2,
"Input tensors must have a order >= 2!"); "Input tensors must have a order >= 2!");
CheckNTErrors(c->order == a->order + b->order - 2, "wrong tensor order") CheckNTErrors(c->order == a->order + b->order - 2, "wrong tensor order")
...@@ -108,9 +106,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -108,9 +106,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
cBlockNum *= b->dimSizeRDI[i]; cBlockNum *= b->dimSizeRDI[i];
} }
XList * aList = new XList(10); TensorList * aList = new TensorList(10);
XList * bList = new XList(10); TensorList * bList = new TensorList(10);
XList * cList = new XList(10); TensorList * cList = new TensorList(10);
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] }; int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] }; int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] }; int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
...@@ -202,7 +200,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -202,7 +200,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList; delete cList;
} }
bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c) bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c)
{ {
if (!(a && b && c)) if (!(a && b && c))
return false; return false;
...@@ -231,10 +231,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen ...@@ -231,10 +231,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen
dimSize[sub++] = bm; dimSize[sub++] = bm;
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
if (dimSize[i] != c->dimSize[i]) if (dimSize[i] != c->dimSize[i]) {
delete[] dimSize;
return false; return false;
}
} }
delete[] dimSize;
return true; return true;
} }
...@@ -302,9 +305,64 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -302,9 +305,64 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
return c; return c;
} }
/*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> dataType - indicates what datatype is needed
>> alpha - a coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication
*/
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB,
TENSOR_DATA_TYPE dataType, DTYPE alpha, XPRunner * parallelRunner)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, dataType, dr, a.devID, a.mem);
c.SetTMPFlag();
/* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
/* destroy variables */
delete[] dimSize;
return c;
}
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -337,9 +395,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -337,9 +395,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
} }
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
...@@ -400,7 +458,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -400,7 +458,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
} }
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -435,7 +493,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, ...@@ -435,7 +493,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* /*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
...@@ -56,19 +59,28 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -56,19 +59,28 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0,
XPRunner * parallelRunner = NULL);
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB,
TENSOR_DATA_TYPE dataType, DTYPE alpha = (DTYPE)1.0,
XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/ /* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b, XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!"); "Input tensors must have a order = 2!");
int an = a->dimSize[0], am = a->dimSize[1]; int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1]; int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1]; int cn = c->dimSize[0], cm = c->dimSize[1];
int am2 = transposedA == X_TRANS ? an : am; int am2 = transposedA == X_TRANS ? an : am;
int an2 = transposedA == X_TRANS ? am : an; int an2 = transposedA == X_TRANS ? am : an;
int bm2 = transposedB == X_TRANS ? bn : bm; int bm2 = transposedB == X_TRANS ? bn : bm;
int bn2 = transposedB == X_TRANS ? bm : bn; int bn2 = transposedB == X_TRANS ? bm : bn;
int cm2 = cm; int cm2 = cm;
int cn2 = cn; int cn2 = cn;
CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2), CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
......
...@@ -38,17 +38,23 @@ argument5: matrix a ...@@ -38,17 +38,23 @@ argument5: matrix a
argument6: matrix b argument6: matrix b
argument7: matrix c (c=a*b*\alpha + c*beta) argument7: matrix c (c=a*b*\alpha + c*beta)
*/ */
void _MatrixMul2DMultiTheading(XList * args) void _MatrixMul2DMultiTheading(TensorList * args)
{ {
int x1 = *(int*)args->GetItem(0); CheckNTErrors(args->count == 2, "invalid argument number!");
int y1 = *(int*)args->GetItem(1); IntList * indexArgs = (IntList*)args->GetItem(0);
int x2 = *(int*)args->GetItem(2); TensorList * matrixArgs = (TensorList*)args->GetItem(1);
int y2 = *(int*)args->GetItem(3); CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
XTensor * a = (XTensor*)args->GetItem(4); CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * b = (XTensor*)args->GetItem(5);
XTensor * c = (XTensor*)args->GetItem(6); XTensor * a = matrixArgs->GetItem(0);
DTYPE alpha = *(DTYPE*)args->GetItem(7); XTensor * b = matrixArgs->GetItem(1);
DTYPE beta = *(DTYPE*)args->GetItem(8); XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX #ifdef FAST_MATRIX
int am = a->dimSize[1]; int am = a->dimSize[1];
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论