Commit 29d2352b by ltb

merge float16/int8 implement into liyinqiao'sbranch

parent 2c4061e9
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "../tensor/XUtility.h" #include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h" #include "../tensor/core/CHeader.h"
#include "../tensor/test/Test.h"
#include "../sample/fnnlm/FNNLM.h" #include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h" #include "../sample/transformer/Transformer.h"
...@@ -44,7 +45,9 @@ int main( int argc, const char ** argv ) ...@@ -44,7 +45,9 @@ int main( int argc, const char ** argv )
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF); //_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708); //_CrtSetBreakAlloc(2708);
if(argc > 1 && !strcmp(argv[1], "-fnnlm")) if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1); FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t")) else if(argc > 1 && !strcmp(argv[1], "-t2t"))
TransformerMain(argc - 1, argv + 1); TransformerMain(argc - 1, argv + 1);
...@@ -53,6 +56,7 @@ int main( int argc, const char ** argv ) ...@@ -53,6 +56,7 @@ int main( int argc, const char ** argv )
fprintf(stderr, "neural networks in an easy way. \n\n"); fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n"); fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
} }
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
...@@ -67,6 +71,9 @@ void BackwardTest() ...@@ -67,6 +71,9 @@ void BackwardTest()
XTensor a; XTensor a;
XTensor b; XTensor b;
XTensor c; XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2D(&a, 2, 3);
...@@ -84,14 +91,15 @@ void BackwardTest() ...@@ -84,14 +91,15 @@ void BackwardTest()
b.Set1D(2.0F, 0); b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1); b.Set1D(1.0F, 1);
c = DivDim(a, b, 0); DivDim(a, b, c, 0);
c.Dump(stderr, "c:"); c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c); //XLink::ShowNetwork(stderr, &c);
net.Backward(c); net.Backward(loss);
net.Dump(stderr); a.grad->Dump(stderr);
} }
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for data operation
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-26
*/
#include "XNoder.h"
#include "XBackwardData.h"
#include "../tensor/XName.h"
#include "../tensor/XUtility.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/core/getandset/SetData.h"
namespace nts{
/* compute dE/dx of a node */
void XDataGrad::MakeGrad(XTensor * node, bool isEfficent)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == GETANDSET_CONVERTDATATYPE)
GradConvertDataType(node, isEfficent);
else if(operID == GETANDSET_INDEXTOONEHOT)
GradIndexToOnehot(node, isEfficent);
else if(operID == GETANDSET_ONEHOTTOINDEX)
GradOnehotToIndex(node, isEfficent);
else{
ShowNTErrors("TODO!");
}
}
/* indicates whether the node is for a data operation */
bool XDataGrad::IsDataOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & DATA_BASE) != 0;
}
/*
gradient computation for convert datatype
for
b = converdatatype(a)
we have
dE/da = convertdatatype(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradConvertDataType(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for ConvertDataType!");
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
XTensor * tmp = NewTensorBuf(input->grad, input->devID, input->mem);
_ConvertDataType(node->grad, tmp);
_SumMe(input->grad, tmp);
DelTensorBuf(tmp);
node->visitMark = NODE_FINISHED;
}
/*
gradient computation for OnehotToIndex
for
b = OnehotToIndex(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradOnehotToIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for IndexToOnehot!");
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
node->visitMark = NODE_FINISHED;
}
/*
gradient computation for IndexToOnehot
for
b = IndexToOnehot(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradIndexToOnehot(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for IndexToOnehot!");
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
node->visitMark = NODE_FINISHED;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for data operation
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-26
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDDATA_H__
#define __XBACKWARDDATA_H__
namespace nts{
/* this class computes the gradient for tensor data operation given a node */
class XDataGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node, bool isEfficent);
/* indicates whether the node is for a shaping operation */
static
bool IsDataOP(XTensor * node);
private:
/* gradient computation for ConverDataType: b = converdatatype(a, datatype) */
static
void GradConvertDataType(XTensor * node, bool isEfficent);
/* gradient computation for IndexToOnehot: b = indextoonehot(a, num) */
static
void GradIndexToOnehot(XTensor * node, bool isEfficent);
/* gradient computation for OnehotToIndex: b = onehottoindex(a, num) */
static
void GradOnehotToIndex(XTensor * node, bool isEfficent);
};
} // namespace nts(NiuTrans.Tensor)
#endif
\ No newline at end of file
...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH) if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _HardTanHBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_IDENTITY) else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _IdentityBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_LOGSOFTMAX){ else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS); _LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS);
} }
else if(operID == FUNC_RECTIFY) else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _RectifyBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SIGMOID) else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _SigmoidBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SOFTMAX){ else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
......
...@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * dedy = output->grad; XTensor * dedy = output->grad;
if (income.tailNum == 1) { if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixed(dedy, 1.0F); _SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixed(dedy, 1);
else
ShowNTErrors("TODO");
return; return;
} }
...@@ -98,39 +90,39 @@ compute dE/dx for a given function y = f(x) ...@@ -98,39 +90,39 @@ compute dE/dx for a given function y = f(x)
>> params - parameters of the function >> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x, //void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
CheckNTErrors(gold && y && x, "Empty input tensors!"); // CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!"); // CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id"); // CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
//
if(funcID == FUNC_HARDTANH){ // if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName); // _HardTanHBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_IDENTITY){ // else if(funcID == FUNC_IDENTITY){
_IdentityBackward(gold, y, x, dedy, dedx, lossName); // _IdentityBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_LOGSOFTMAX){ // else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else if(funcID == FUNC_RECTIFY){ // else if(funcID == FUNC_RECTIFY){
_RectifyBackward(gold, y, x, dedy, dedx, lossName); // _RectifyBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_SIGMOID){ // else if(funcID == FUNC_SIGMOID){
_SigmoidBackward(gold, y, x, dedy, dedx, lossName); // _SigmoidBackward(gold, y, x, dedy, dedx, lossName);
}else if(funcID == FUNC_SOFTMAX){ // }else if(funcID == FUNC_SOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else{ // else{
ShowNTErrors("wrong function found when call the backward process!"); // ShowNTErrors("wrong function found when call the backward process!");
} // }
//
} //}
/* /*
compute dE/dy for variable y and error(loss) function E compute dE/dy for variable y and error(loss) function E
...@@ -139,27 +131,27 @@ compute dE/dy for variable y and error(loss) function E ...@@ -139,27 +131,27 @@ compute dE/dy for variable y and error(loss) function E
>> dedy - dE/dy >> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, //void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * padding, // XTensor * dedy, XTensor * padding,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
if(gold == NULL){ // if(gold == NULL){
if(dedy->dataType == X_FLOAT) // if(dedy->dataType == X_FLOAT)
_SetDataFixed(dedy, 1.0F); // _SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) // else if(dedy->dataType == X_DOUBLE)
_SetDataFixed(dedy, 1.0); // _SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT) // else if(dedy->dataType == X_INT)
_SetDataFixed(dedy, 1); // _SetDataFixedInt(dedy, 1);
else{ // else{
ShowNTErrors("TODO"); // ShowNTErrors("TODO");
} // }
return; // return;
} // }
//
//_LossBackward(dedy, gold, y, lossName); // //_LossBackward(dedy, gold, y, lossName);
if(lossName == CROSSENTROPY) // if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold, NULL, padding); // _CrossEntropyBackward(dedy, y, gold, NULL, padding);
//
} //}
} }
\ No newline at end of file
...@@ -43,11 +43,11 @@ public: ...@@ -43,11 +43,11 @@ public:
static static
bool IsLossOP(XTensor * node); bool IsLossOP(XTensor * node);
/* compute dE/dx for a given function y = f(x) */ ///* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x, //void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName); // LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */ /* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y, void Compute(XTensor * gold, XTensor * y,
......
...@@ -68,7 +68,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent) ...@@ -68,7 +68,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
bool XShapeGrad::IsShapeOP(XTensor * node) bool XShapeGrad::IsShapeOP(XTensor * node)
{ {
XLink &income = node->income; XLink &income = node->income;
return (income.typeID & DATA_BASE) != 0; return (income.typeID & SHAPE_BASE) != 0;
} }
/* post processing of a node */ /* post processing of a node */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "XBackwardLoss.h" #include "XBackwardLoss.h"
#include "XBackwardMath.h" #include "XBackwardMath.h"
#include "XBackwardFunc.h" #include "XBackwardFunc.h"
#include "XBackwardData.h"
#include "XBackwardShape.h" #include "XBackwardShape.h"
#include "../tensor/XName.h" #include "../tensor/XName.h"
...@@ -264,6 +265,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -264,6 +265,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
XMathGrad::MakeGrad(node, isEfficent); XMathGrad::MakeGrad(node, isEfficent);
else if(XFuncGrad::IsFunc(node)) else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node, isEfficent); XFuncGrad::MakeGrad(node, isEfficent);
else if (XDataGrad::IsDataOP(node))
XDataGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node)) else if(XLossGrad::IsLossOP(node))
......
...@@ -51,14 +51,12 @@ initialize the model ...@@ -51,14 +51,12 @@ initialize the model
>> myIgnored - number of position ignored in attention (from the begining) >> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask >> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TAttention::InitModel(int argc, char ** argv, void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
isMasked = myIsMasked; isMasked = myIsMasked;
ignored = myIgnored; ignored = myIgnored;
...@@ -71,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -71,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem); InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID, mem); InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
...@@ -135,9 +133,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -135,9 +133,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1); int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3; int d3 = kqv2.GetDim(2) / 3;
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2); split.Add(&q2);
split.Add(&k2); split.Add(&k2);
......
...@@ -42,9 +42,6 @@ public: ...@@ -42,9 +42,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* head number */ /* head number */
int nhead; int nhead;
...@@ -94,7 +91,7 @@ public: ...@@ -94,7 +91,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining); XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
......
...@@ -62,7 +62,7 @@ initialization ...@@ -62,7 +62,7 @@ initialization
*/ */
void T2TBatchLoader::Init(int argc, char ** argv) void T2TBatchLoader::Init(int argc, char ** argv)
{ {
LoadParamInt(argc, argv, "bufsize", &bufSize, 65000); LoadParamInt(argc, argv, "bufsize", &bufSize, 50000);
LoadParamBool(argc, argv, "doubledend", &isDoubledEnd, false); LoadParamBool(argc, argv, "doubledend", &isDoubledEnd, false);
LoadParamBool(argc, argv, "smallbatch", &isSmallBatch, true); LoadParamBool(argc, argv, "smallbatch", &isSmallBatch, true);
LoadParamBool(argc, argv, "bigbatch", &isBigBatch, false); LoadParamBool(argc, argv, "bigbatch", &isBigBatch, false);
...@@ -167,8 +167,7 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step) ...@@ -167,8 +167,7 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step)
if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH) if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH)
break; break;
CheckNTErrors(seqCount % step == 0, CheckNTErrors(seqCount % step == 0, "Wrong number of sequences!");
"Wrong number of sequences! This line might be too long!");
} }
nseqBuf = seqCount; nseqBuf = seqCount;
...@@ -176,8 +175,7 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step) ...@@ -176,8 +175,7 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step)
/* sort the sequences by length */ /* sort the sequences by length */
if (isSorted) { if (isSorted) {
CheckNTErrors(seqCount % step == 0, CheckNTErrors(seqCount % step == 0, "Wrong number of sequences!");
"Wrong number of sequences! This line might be too long!");
SampleNode * nodes = new SampleNode[seqCount]; SampleNode * nodes = new SampleNode[seqCount];
int count = 0; int count = 0;
int offset = 0; int offset = 0;
...@@ -282,7 +280,6 @@ load a batch of sequences ...@@ -282,7 +280,6 @@ load a batch of sequences
>> isSorted - indicates whether the sequences are sorted by length >> isSorted - indicates whether the sequences are sorted by length
>> wCount - word count >> wCount - word count
>> devID - device id >> devID - device id
>> mem - memory pool
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
*/ */
int T2TBatchLoader::LoadBatch(FILE * file, bool isLM, int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
...@@ -292,18 +289,17 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM, ...@@ -292,18 +289,17 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
int * seqs, int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining)
bool isTraining)
{ {
if(isLM){ if(isLM){
return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label, return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label,
seqs, vsEnc, sBatch, wBatch, seqs, vsEnc, sBatch, wBatch,
isSorted, wCount, devID, mem, isTraining); isSorted, wCount, devID, isTraining);
} }
else{ else{
return LoadBatchMT(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label, return LoadBatchMT(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label,
seqs, vsEnc, vsDec, sBatch, wBatch, seqs, vsEnc, vsDec, sBatch, wBatch,
isSorted, ws, wCount, devID, mem, isTraining); isSorted, ws, wCount, devID, isTraining);
} }
} }
...@@ -324,7 +320,6 @@ load a batch of sequences (for LM) ...@@ -324,7 +320,6 @@ load a batch of sequences (for LM)
>> isSorted - indicates whether the sequences are sorted by length >> isSorted - indicates whether the sequences are sorted by length
>> wCount - word count >> wCount - word count
>> devID - device id >> devID - device id
>> mem - memory pool
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
*/ */
int T2TBatchLoader::LoadBatchLM(FILE * file, int T2TBatchLoader::LoadBatchLM(FILE * file,
...@@ -334,8 +329,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -334,8 +329,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
int * seqs, int * seqs,
int vSize, int sBatch, int wBatch, int vSize, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem, int devID, bool isTraining)
bool isTraining)
{ {
if(nextSeq < 0 || nextSeq >= nseqBuf) if(nextSeq < 0 || nextSeq >= nseqBuf)
LoadBuf(file, isSorted, 1); LoadBuf(file, isSorted, 1);
...@@ -371,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -371,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
dims[1] = max; dims[1] = max;
dims[2] = vSize; dims[2] = vSize;
InitTensor2D(batchEnc, sc, max, X_INT, devID, mem); InitTensor2DV2(batchEnc, sc, max, X_INT, devID);
InitTensor2D(label, sc, max, X_INT, devID, mem); InitTensor2DV2(label, sc, max, X_INT, devID);
InitTensor(gold, 3, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(gold, 3, dims, X_FLOAT, devID);
InitTensor2D(paddingEnc, sc, max, X_FLOAT, devID, mem); InitTensor2DV2(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2D(paddingDec, sc, max, X_FLOAT, devID, mem); InitTensor2DV2(paddingDec, sc, max, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
label->SetZeroAll(); label->SetZeroAll();
...@@ -439,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -439,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount);
/*XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem); /*XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
_ConvertDataType(batchEnc, tmp); _ConvertDataType(batchEnc, tmp);
_NotEqual(tmp, paddingEnc, 0); _NotEqual(tmp, paddingEnc, 0);
DelTensorBuf(tmp); DelTensorBuf(tmp);
XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem); XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
_ConvertDataType(batchEnc, tmp2); _ConvertDataType(batchEnc, tmp2);
_NotEqual(tmp2, paddingDec, 0); _NotEqual(tmp2, paddingDec, 0);
DelTensorBuf(tmp2);*/ DelTensorBuf(tmp2);*/
...@@ -483,7 +477,6 @@ load a batch of sequences (for MT) ...@@ -483,7 +477,6 @@ load a batch of sequences (for MT)
>> isSorted - indicates whether the sequences are sorted by length >> isSorted - indicates whether the sequences are sorted by length
>> wCount - word count >> wCount - word count
>> devID - device id >> devID - device id
>> mem - memory pool
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
*/ */
int T2TBatchLoader::LoadBatchMT(FILE * file, int T2TBatchLoader::LoadBatchMT(FILE * file,
...@@ -493,8 +486,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -493,8 +486,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int * seqs, int * seqs,
int vSizeEnc, int vSizeDec, int sBatch, int wBatch, int vSizeEnc, int vSizeDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining)
bool isTraining)
{ {
if (nextBatch < 0 || nextBatch >= bufBatchSize) { if (nextBatch < 0 || nextBatch >= bufBatchSize) {
LoadBuf(file, isSorted, 2); LoadBuf(file, isSorted, 2);
...@@ -571,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -571,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int sCount = sc/2; int sCount = sc/2;
int seqSize = 0; int seqSize = 0;
InitTensor2D(batchEnc, sCount, maxEnc, X_INT, devID, mem); InitTensor2DV2(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2D(paddingEnc, sCount, maxEnc, X_FLOAT, devID, mem); InitTensor2DV2(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2D(batchDec, sCount, maxDec, X_INT, devID, mem); InitTensor2DV2(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2D(paddingDec, sCount, maxDec, X_FLOAT, devID, mem); InitTensor2DV2(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2D(label, sCount, maxDec, X_INT, devID, mem); InitTensor2DV2(label, sCount, maxDec, X_INT, devID);
//InitTensor(gold, 3, dimsDec, X_FLOAT, 1.0F, devID, mem); //InitTensorV2(gold, 3, dimsDec, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
paddingEnc->SetZeroAll(); paddingEnc->SetZeroAll();
...@@ -615,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -615,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
ws = wCountEnc; ws = wCountEnc;
batchEnc->SetData(batchEncValues, batchEnc->unitNum); batchEnc->SetData(batchEncValues, batchEnc->unitNum);
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc);
//XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem); //XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
//_ConvertDataType(batchEnc, tmp); //_ConvertDataType(batchEnc, tmp);
//tmp->Dump(stderr, "tmp:"); //tmp->Dump(stderr, "tmp:");
//_NotEqual(tmp, paddingEnc, 0); //_NotEqual(tmp, paddingEnc, 0);
...@@ -664,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -664,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
label->SetData(labelValues, label->unitNum); label->SetData(labelValues, label->unitNum);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad);
//XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem); //XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
//_ConvertDataType(batchDec, tmp2); //_ConvertDataType(batchDec, tmp2);
//_NotEqual(tmp2, paddingDec, 0); //_NotEqual(tmp2, paddingDec, 0);
//DelTensorBuf(tmp2); //DelTensorBuf(tmp2);
......
...@@ -30,7 +30,7 @@ using namespace nts; ...@@ -30,7 +30,7 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define MAX_SEQUENCE_LENGTH 1024 * 16 #define MAX_SEQUENCE_LENGTH 1024 * 4
/* node to keep batch information */ /* node to keep batch information */
struct BatchNode struct BatchNode
...@@ -131,8 +131,7 @@ public: ...@@ -131,8 +131,7 @@ public:
int * seqs, int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining);
bool isTraining);
/* load a batch of sequences (for language modeling) */ /* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file, int LoadBatchLM(FILE * file,
...@@ -141,8 +140,7 @@ public: ...@@ -141,8 +140,7 @@ public:
XTensor * gold, XTensor * label, XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch, int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem, int devID, bool isTraining);
bool isTraining);
/* load a batch of sequences (for machine translation) */ /* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file, int LoadBatchMT(FILE * file,
...@@ -151,8 +149,7 @@ public: ...@@ -151,8 +149,7 @@ public:
XTensor * gold, XTensor * label, XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch, int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining);
bool isTraining);
/* shuffle the data file */ /* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile); void Shuffle(const char * srcFile, const char * tgtFile);
......
...@@ -57,16 +57,14 @@ initialize the model ...@@ -57,16 +57,14 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttDecoder::InitModel(int argc, char ** argv, void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); //AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -79,7 +77,7 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -79,7 +77,7 @@ void AttDecoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem, false); embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -90,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -90,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) { for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID, myMem); attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
......
...@@ -37,9 +37,6 @@ public: ...@@ -37,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -95,7 +92,7 @@ public: ...@@ -95,7 +92,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the decoding network */ /* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder() T2TEmbedder::T2TEmbedder()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
maxLength = -1; maxLength = -1;
} }
...@@ -46,12 +45,10 @@ initialize the model ...@@ -46,12 +45,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, bool isEnc) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
if(isEnc){ if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsize", &vSize, -1);
...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b ...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length) ...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem); XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -41,9 +41,6 @@ public: ...@@ -41,9 +41,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -71,7 +68,7 @@ public: ...@@ -71,7 +68,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL, bool isEnc = true); void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int eSize, int d, int length);
......
...@@ -52,15 +52,12 @@ initialize the model ...@@ -52,15 +52,12 @@ initialize the model
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id*/
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem); embedder.InitModel(argc, argv, devID);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
......
...@@ -65,9 +65,6 @@ public: ...@@ -65,9 +65,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -118,7 +115,7 @@ public: ...@@ -118,7 +115,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -47,12 +47,10 @@ initialize the model ...@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -62,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize));
......
...@@ -36,9 +36,6 @@ public: ...@@ -36,9 +36,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -72,7 +69,7 @@ public: ...@@ -72,7 +69,7 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input, bool isTraining); XTensor Make(XTensor &input, bool isTraining);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN() T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL;
d = 0; d = 0;
} }
...@@ -46,18 +45,16 @@ initialize the model ...@@ -46,18 +45,16 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -37,9 +37,6 @@ public: ...@@ -37,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */ /* the transformation matrix w */
XTensor w; XTensor w;
...@@ -57,7 +54,7 @@ public: ...@@ -57,7 +54,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -35,7 +35,9 @@ XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha) ...@@ -35,7 +35,9 @@ XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
XTensor base; XTensor base;
XTensor lp; XTensor lp;
//base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5); base = (length + 5)/(1 + 5);
lp = Power(base, alpha); lp = Power(base, alpha);
return lp; return lp;
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TModel::T2TModel() T2TModel::T2TModel()
{ {
devID = -1; devID = -1;
mem = NULL;
isLM = false; isLM = false;
isMT = false; isMT = false;
nhead = 1; nhead = 1;
...@@ -48,10 +47,6 @@ T2TModel::~T2TModel() ...@@ -48,10 +47,6 @@ T2TModel::~T2TModel()
delete encoder; delete encoder;
delete decoder; delete decoder;
delete outputLayer; delete outputLayer;
/* we delete "mem" at the end because other members are using it and we must
remove the memory space before all tensors are destroyed. */
delete mem;
} }
/* /*
...@@ -61,29 +56,16 @@ initialize the model ...@@ -61,29 +56,16 @@ initialize the model
*/ */
void T2TModel::InitModel(int argc, char ** argv) void T2TModel::InitModel(int argc, char ** argv)
{ {
bool useMem = false;
int memSize = 0;
bool isMemFreeOTF = false;
LoadParamInt(argc, argv, "dev", &devID, -1); LoadParamInt(argc, argv, "dev", &devID, -1);
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamInt(argc, argv, "memsize", &memSize, 1024);
LoadParamBool(argc, argv, "mt", &isMT, false); LoadParamBool(argc, argv, "mt", &isMT, false);
LoadParamBool(argc, argv, "lm", &isLM, !isMT); LoadParamBool(argc, argv, "lm", &isLM, !isMT);
LoadParamInt(argc, argv, "nhead", &nhead, 8); LoadParamInt(argc, argv, "nhead", &nhead, 8);
LoadParamBool(argc, argv, "freeotf", &isMemFreeOTF, false);
if(useMem){
delete mem;
mem = new XMem(devID, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
}
encoder->InitModel(argc, argv, true, 0, devID, mem); encoder->InitModel(argc, argv, true, 0, devID);
outputLayer->InitModel(argc, argv, devID, mem); outputLayer->InitModel(argc, argv, devID);
if(isMT) if(isMT)
decoder->InitModel(argc, argv, true, 0, devID, mem); decoder->InitModel(argc, argv, true, 0, devID);
TensorList params(10); TensorList params(10);
GetParams(params); GetParams(params);
...@@ -149,7 +131,8 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -149,7 +131,8 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dims[i + 1] = input.GetDim(i); dims[i + 1] = input.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[input.order + 1] = len; dims[input.order + 1] = len;
XTensor mask(input.order + 2, dims, X_FLOAT, 1.0F, padding.devID, padding.mem); XTensor mask;
InitTensorV2(&mask, input.order + 2, dims, X_FLOAT, padding.devID);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -163,15 +146,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -163,15 +146,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding[padding.order - 1] = padding.GetDim(-1); dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1); dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType, XTensor * padding2 = NewTensorBufV2(padding.order + 1, dimsPadding, padding.dataType,
padding.denseRatio, padding.devID, padding.mem); padding.devID);
for(int i = 0; i < padding2->order; i++) for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
//XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType, //XTensor * padding3 = NewTensorBufV2(padding.order + 2, dimsPadding, padding.dataType,
// padding.denseRatio, padding.devID, padding.mem); // padding.devID);
// //
///* mask of the padding */ ///* mask of the padding */
//_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1)); //_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1));
...@@ -241,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -241,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -251,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -251,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F); _ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
...@@ -271,15 +254,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -271,15 +254,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -287,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -287,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -315,15 +298,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -315,15 +298,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -331,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -331,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -361,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -361,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in This matrix can be used to block the attention to current or following words in
...@@ -376,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -376,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
......
...@@ -40,9 +40,6 @@ public: ...@@ -40,9 +40,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the encoder */ /* the encoder */
AttEncoder * encoder; AttEncoder * encoder;
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TOutput::T2TOutput() T2TOutput::T2TOutput()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
inSize = -1; inSize = -1;
hSize = -1; hSize = -1;
...@@ -47,12 +46,10 @@ initialize the model ...@@ -47,12 +46,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......
...@@ -38,9 +38,6 @@ public: ...@@ -38,9 +38,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -61,7 +58,7 @@ public: ...@@ -61,7 +58,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input ...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
dims[i] = input->GetDim(i); dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize; dims[input->order - 1] = beamSize;
InitTensor(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem); InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem); InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID, input->mem); InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll(); state->probPath.SetZeroAll();
state->nstep.SetZeroAll(); state->nstep.SetZeroAll();
...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[i] = inputEnc->GetDim(i); dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem); InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixed(&first, startSymbol); _SetDataFixed(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
...@@ -179,7 +179,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -179,7 +179,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
} }
else{ else{
inputDec = GeneratePaths(s); inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID, inputEnc->mem); inputDec.SetDevice(inputEnc->devID);
inputDec = Concatenate(first, inputDec, inputDec.order - 1); inputDec = Concatenate(first, inputDec, inputDec.order - 1);
} }
...@@ -194,8 +194,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -194,8 +194,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputDec.order - 1] = inputDec.GetDim(-1); dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec; XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, 1.0F, paddingEnc->devID, paddingEnc->mem); InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1); SetDataFixed(paddingDec, 1);
XTensor maskDec; XTensor maskDec;
XTensor maskEncDec; XTensor maskEncDec;
...@@ -213,14 +213,14 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -213,14 +213,14 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
int stride = decoding.GetDim(decoding.order - 2); int stride = decoding.GetDim(decoding.order - 2);
InitTensor1D(&selectSrc, 1, X_INT); InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1D(&selectTgt, 1, X_INT); InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0); selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0); selectTgt.SetInt(0, 0);
selectSrc.SetDevice(decoding.devID, decoding.mem); selectSrc.SetDevice(decoding.devID);
selectTgt.SetDevice(decoding.devID, decoding.mem); selectTgt.SetDevice(decoding.devID);
/* the decoder output of the last position */ /* the decoder output of the last position */
decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt); decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt);
...@@ -228,8 +228,6 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -228,8 +228,6 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
/* generate the output probabilities */ /* generate the output probabilities */
m->outputLayer->Make(decodingStep, output); m->outputLayer->Make(decodingStep, output);
_LogMe(&output);
next->layersEnc.AddList(&s->layersEnc); next->layersEnc.AddList(&s->layersEnc);
next->layersDec.Add(&inputDec); next->layersDec.Add(&inputDec);
next->layersDec.Add(&output); next->layersDec.Add(&output);
...@@ -259,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state) ...@@ -259,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
} }
XTensor path; XTensor path;
InitTensor2D(&path, state->stateNum, distance, X_INT); InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll(); path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){ for(int i = 0; i < state->stateNum; i++){
......
...@@ -73,8 +73,7 @@ public: ...@@ -73,8 +73,7 @@ public:
void Init(int argc, char ** argv); void Init(int argc, char ** argv);
/* search for the most promising states */ /* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
XTensor * output, XTensor * score);
/* preparation */ /* preparation */
void Prepare(int myBatchSize,int myBeamSize); void Prepare(int myBatchSize,int myBeamSize);
...@@ -95,7 +94,7 @@ public: ...@@ -95,7 +94,7 @@ public:
void FillHeap(T2TStateBundle * beam); void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */ /* save the output sequences in a tensor */
void Dump(XTensor * output, XTensor * score); void Dump(XTensor * output);
/* check if the token is an end symbol */ /* check if the token is an end symbol */
bool IsEnd(int token); bool IsEnd(int token);
...@@ -103,9 +102,6 @@ public: ...@@ -103,9 +102,6 @@ public:
/* set end symbols for search */ /* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum); void SetEnd(const int * tokens, const int tokenNum);
/* check whether all hypotheses are completed */
bool IsAllCompleted(T2TStateBundle * beam);
/* make a mask to prevent duplicated entries in beam expansion for the first position */ /* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam); XTensor MakeFirstMask(T2TStateBundle * beam);
}; };
......
...@@ -75,7 +75,6 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -75,7 +75,6 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
CheckNTErrors(ofile, "Cannot open the output file"); CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem;
XNet net; XNet net;
...@@ -106,15 +105,14 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -106,15 +105,14 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
while(batchLoader.LoadBatch(file, model->isLM, while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label, &batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt, seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, mem, false)) 1, 1, false, ws, wc, devID, false))
{ {
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!"); CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!");
CheckNTErrors(!model->isLM, "Only MT model is supported!"); CheckNTErrors(!model->isLM, "Only MT model is supported!");
XTensor output; XTensor output;
XTensor score;
seacher.Search(model, &batchEnc, &paddingEnc, &output, &score); seacher.Search(model, &batchEnc, &paddingEnc, &output);
Dump(ofile, &output); Dump(ofile, &output);
...@@ -130,7 +128,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -130,7 +128,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
if (batchCount % 1 == 0) { if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sent=%d, sword=%d\n", "[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount); elapsed, sentCount, wordCount);
} }
} }
...@@ -142,8 +140,8 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -142,8 +140,8 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT4(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d, and ppl=%.3f)\n", XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, sentCount, exp(loss/wordCount)); elapsed,wordCountTotal, exp(loss/wordCount));
} }
/* /*
......
...@@ -75,9 +75,6 @@ void T2TTrainer::Init(int argc, char ** argv) ...@@ -75,9 +75,6 @@ void T2TTrainer::Init(int argc, char ** argv)
strcpy(argArray[i], argv[i]); strcpy(argArray[i], argv[i]);
} }
bool useMem = false;
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F); LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F);
LoadParamFloat(argc, argv, "lrbias", &lrbias, 0); LoadParamFloat(argc, argv, "lrbias", &lrbias, 0);
LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1); LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
...@@ -142,7 +139,6 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -142,7 +139,6 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
#endif #endif
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem;
XNet net; XNet net;
if(isDebugged) if(isDebugged)
...@@ -184,7 +180,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -184,7 +180,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
while (batchLoader.LoadBatch(file, model->isLM, while (batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label, &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
NULL, vSize, vSizeTgt, NULL, vSize, vSizeTgt,
sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, mem, true)) sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
{ {
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch"); CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch");
...@@ -321,7 +317,6 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -321,7 +317,6 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
CheckNTErrors(ofile, "Cannot open the output file"); CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem;
XNet net; XNet net;
...@@ -351,7 +346,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -351,7 +346,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
while(batchLoader.LoadBatch(file, model->isLM, while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label, &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt, seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, mem, false)) 1, 1, false, ws, wc, devID, false))
{ {
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch"); CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch");
...@@ -372,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -372,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
/* prediction probabilities */ /* prediction probabilities */
XTensor probs; XTensor probs;
InitTensor1D(&probs, bSize * length); InitTensor1DV2(&probs, bSize * length);
XTensor labelOnehot; XTensor labelOnehot;
...@@ -463,7 +458,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -463,7 +458,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1D(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID, output->mem); InitTensor1DV2(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)}; int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)};
probs.Reshape(2, dims); probs.Reshape(2, dims);
...@@ -480,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -480,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1D(&result, 1, X_FLOAT, output->devID, output->mem); InitTensor1DV2(&result, 1, X_FLOAT, output->devID);
_ReduceSum(&probs, &result, 1); _ReduceSum(&probs, &result, 1);
return result.Get1D(0); return result.Get1D(0);
...@@ -527,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -527,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0); _ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */ /* v2 = m / (sqrt(v) + delta) */
XTensor * v2 = NewTensorBuf(v, v->devID, v->mem); XTensor * v2 = NewTensorBufV2(v, v->devID);
_Power(v, v2, 0.5F); _Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d); _ScaleAndShiftMe(v2, 1.0F, d);
_Div(m, v2, v2); _Div(m, v2, v2);
...@@ -598,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding) ...@@ -598,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]); output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem); XTensor * padding2 = NewTensorBufV2(1, &padding->unitNum, X_FLOAT, padding->devID);
_CopyValues(padding, padding2); _CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0); _MultiplyDim(output, padding2, output, 0);
...@@ -652,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p) ...@@ -652,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
DTYPE q = 1.0F - p; DTYPE q = 1.0F - p;
DTYPE gift = p / n; DTYPE gift = p / n;
InitTensor(smoothed, gold); InitTensorV2(smoothed, gold);
_CopyValues(gold, smoothed); _CopyValues(gold, smoothed);
if(p == 0) if(p == 0)
......
...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName) ...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName)
} }
} }
/**************************************************** /*
Below is for calling CPU BLAS for fast matrix operations Below is for calling CPU BLAS for fast matrix operations
I'm not sure how fast it is. But it seems that other I'm not sure how fast it is. But it seems that other
guys are crazy about this. So I decided to have a try. guys are crazy about this. So I decided to have a try.
...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h) ...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
return f; return f;
} }
/*
data type conversion
>> devID - device id
>> s - source data array
>> typeS - source data type
>> t - target data array
>> typeT - target data type
>> size - number of the items in s (and t)
*/
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{
CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT)
return;
if(typeS == X_FLOAT && typeT == X_FLOAT16){
for(int i = 0; i < size; i++){
((unsigned short*)t)[i] = FloatToFloat16(((float*)s)[i]);
}
}
else if(typeS == X_FLOAT16 && typeT == X_FLOAT){
for(int i = 0; i < size; i++){
((float*)t)[i] = Float16ToFloat(((unsigned short*)s)[i]);
}
}
else{
ShowNTErrors("Unsupported data types for conversion!");
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName); ...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
/* data conversion (for lower precision computation) */ /* data conversion (for lower precision computation) */
unsigned short FloatToFloat16(float f); unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h); float Float16ToFloat(unsigned short h);
void ConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#ifdef USE_CUDA
void CudaConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#endif
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -51,7 +51,13 @@ bool CONST_TRUE = true; ...@@ -51,7 +51,13 @@ bool CONST_TRUE = true;
int verboseLevel = 0; int verboseLevel = 0;
bool useBLAS = false; bool useBLAS = false;
bool useCUDA = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL; FILE * tmpLog = NULL;
double myTime = 0; double myTime = 0;
......
...@@ -45,10 +45,6 @@ typedef int8_t __int8; ...@@ -45,10 +45,6 @@ typedef int8_t __int8;
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
#if (__cplusplus >= 201103L || _MSC_VER >= 1700)
#define USE_CPP11
#endif
#define _XINLINE_ #define _XINLINE_
//#define DOUBELPRICSION //#define DOUBELPRICSION
......
...@@ -101,7 +101,6 @@ void TensorListBase<T>::Add(T&& item) ...@@ -101,7 +101,6 @@ void TensorListBase<T>::Add(T&& item)
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
items[count++] = item; items[count++] = item;
} }
/* /*
......
...@@ -1596,9 +1596,9 @@ void XMemManager::Initialize() ...@@ -1596,9 +1596,9 @@ void XMemManager::Initialize()
/* free it */ /* free it */
void XMemManager::Free() void XMemManager::Free()
{ {
for (int i = 0; i < MAX_CPU_NUM; i++) for (int i = 0; i < MAX_CPU_MEM_NUM; i++)
CPUMems[i].Free(); CPUMems[i].Free();
for (int i = 0; i < MAX_GPU_NUM; i++) for (int i = 0; i < MAX_GPU_MEM_NUM; i++)
GPUMems[i].Free(); GPUMems[i].Free();
} }
......
...@@ -60,10 +60,10 @@ typedef long long INT_64; ...@@ -60,10 +60,10 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1 #define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH #define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_NUM 16 #define MAX_CPU_MEM_NUM 16
#define MAX_GPU_NUM 16 #define MAX_GPU_MEM_NUM 16
/* /*
mode of runnig a memory pool mode of runnig a memory pool
...@@ -434,13 +434,13 @@ class XMemManager ...@@ -434,13 +434,13 @@ class XMemManager
{ {
private: private:
/* cpu memory pool information */ /* cpu memory pool information */
XMem CPUMems[MAX_CPU_NUM]; XMem CPUMems[MAX_CPU_MEM_NUM];
/* number of cpu memory pools */ /* number of cpu memory pools */
int nCPUMem; int nCPUMem;
/* gpu memory pool information */ /* gpu memory pool information */
XMem GPUMems[MAX_GPU_NUM]; XMem GPUMems[MAX_GPU_MEM_NUM];
/* number of gpu memory pools */ /* number of gpu memory pools */
int nGPUMem; int nGPUMem;
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
*/ */
...@@ -24,9 +24,9 @@ ...@@ -24,9 +24,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */ /* get operator name */
const char * GetOPName(int type) const char * GetOPName(int type)
{ {
if ((type & MATH_BASE) != 0) { if ((type & MATH_BASE) != 0){
if (type == MATH_ABSOLUTE) if (type == MATH_ABSOLUTE)
return "M_ABSOLUTE"; return "M_ABSOLUTE";
else if (type == MATH_CEIL) else if (type == MATH_CEIL)
...@@ -59,6 +59,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -59,6 +59,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
return "M_DIV"; return "M_DIV";
else if (type == MATH_DIVDIM) else if (type == MATH_DIVDIM)
return "M_DIVDIM"; return "M_DIVDIM";
else if (type == MATH_MASK)
return "M_MASK";
else if (type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
...@@ -109,6 +111,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -109,6 +111,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
return "R_REDUCEVARIANCE"; return "R_REDUCEVARIANCE";
} }
else if ((type & DATA_BASE) != 0) { else if ((type & DATA_BASE) != 0) {
if (type == GETANDSET_CONVERTDATATYPE)
return "G_CONVERTDATATYPE";
else if (type == GETANDSET_INDEXTOONEHOT)
return "G_INDEXTOONEHOT";
else if (type == GETANDSET_ONEHOTTOINDEX)
return "G_ONEHOTTOINDEX";
else if (type == GETANDSET_SELECT)
return "G_SELECT";
}
else if ((type & SHAPE_BASE) != 0){
if (type == GETANDSET_SELECT) if (type == GETANDSET_SELECT)
return "G_SELECT"; return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED) else if (type == MOVEMENT_COPYINDEXED)
...@@ -144,7 +156,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -144,7 +156,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
else if (type == SORT_TOPK) else if (type == SORT_TOPK)
return "S_TOPK"; return "S_TOPK";
} }
else if ((type & FUNCTION_BASE) != 0) { else if ((type & FUNCTION_BASE) != 0){
if (type == FUNC_DROPOUT) if (type == FUNC_DROPOUT)
return "F_DROPOUT"; return "F_DROPOUT";
else if (type == FUNC_HARDTANH) else if (type == FUNC_HARDTANH)
...@@ -166,7 +178,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -166,7 +178,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
} }
return "NULL"; return "NULL";
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* *
* We define various names here * We define various names here
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
* It was really HOT these days. I can't imagine it is SO hot here in Shenyang! * It was really HOT these days. I can't imagine it is SO hot here in Shenyang!
*/ */
#ifndef __XNAME_H__ #ifndef __XNAME_H__
#define __XNAME_H__ #define __XNAME_H__
...@@ -31,7 +31,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,7 +31,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* math operations */ /* math operations */
#define MATH_BASE 0x00001000 #define MATH_BASE 0x00001000
#define GETANDSET_CONVERTDATATYPE MATH_BASE * 8
#define MATH_ABSOLUTE MATH_BASE + 1 #define MATH_ABSOLUTE MATH_BASE + 1
#define MATH_CEIL MATH_ABSOLUTE + 1 #define MATH_CEIL MATH_ABSOLUTE + 1
#define MATH_EXP MATH_CEIL + 1 #define MATH_EXP MATH_CEIL + 1
...@@ -49,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -49,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CLIP MATH_ROUND + 1 #define MATH_CLIP MATH_ROUND + 1
#define MATH_DIV MATH_CLIP + 1 #define MATH_DIV MATH_CLIP + 1
#define MATH_DIVDIM MATH_DIV + 1 #define MATH_DIVDIM MATH_DIV + 1
#define MATH_MATRIXMUL MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
...@@ -80,9 +80,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -80,9 +80,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* data and shape related operations */ /* data and shape related operations */
#define DATA_BASE MATH_BASE * 2 #define DATA_BASE MATH_BASE * 2
#define GETANDSET DATA_BASE + 1 #define GETANDSET DATA_BASE + 1
#define GETANDSET_SELECT GETANDSET + 1 #define GETANDSET_CONVERTDATATYPE GETANDSET + 1
#define GETANDSET_INDEXTOONEHOT GETANDSET_CONVERTDATATYPE + 1
#define GETANDSET_ONEHOTTOINDEX GETANDSET_INDEXTOONEHOT + 1
#define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1
#define SHAPE_BASE DATA_BASE * 2
#define MOVEMENT GETANDSET_SELECT + 1 #define MOVEMENT SHAPE_BASE + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1 #define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1 #define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1 #define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
...@@ -105,7 +110,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -105,7 +110,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SORT_TOPK SORT_SORT + 1 #define SORT_TOPK SORT_SORT + 1
/* activation functions */ /* activation functions */
#define FUNCTION_BASE DATA_BASE * 2 #define FUNCTION_BASE SHAPE_BASE * 2
#define FUNC_DROPOUT FUNCTION_BASE + 1 #define FUNC_DROPOUT FUNCTION_BASE + 1
#define FUNC_HARDTANH FUNC_DROPOUT + 1 #define FUNC_HARDTANH FUNC_DROPOUT + 1
#define FUNC_IDENTITY FUNC_HARDTANH + 1 #define FUNC_IDENTITY FUNC_HARDTANH + 1
...@@ -118,7 +123,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -118,7 +123,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define LOSS_CROSSENTROPY LOSS_BASE + 1 #define LOSS_CROSSENTROPY LOSS_BASE + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type); const char * GetOPName(int type);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "core/math/ScaleAndShift.h" #include "core/math/ScaleAndShift.h"
#include "core/getandset/SetData.h" #include "core/getandset/SetData.h"
#include "function/Identity.h" #include "function/Identity.h"
#include "core/CHeader.h"
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -279,6 +280,7 @@ void XTensor::Init() ...@@ -279,6 +280,7 @@ void XTensor::Init()
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false; isVar = false;
enableGrad = false;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -309,6 +311,7 @@ void XTensor::ShallowCopy(const XTensor &tensor) ...@@ -309,6 +311,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{ {
strcpy(name, tensor.name); strcpy(name, tensor.name);
order = tensor.order; order = tensor.order;
enableGrad = tensor.enableGrad;
memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM); memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM);
memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM); memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM);
dataType = tensor.dataType; dataType = tensor.dataType;
...@@ -483,6 +486,12 @@ XTensor XTensor::operator- (const DTYPE shift) const ...@@ -483,6 +486,12 @@ XTensor XTensor::operator- (const DTYPE shift) const
return ScaleAndShift(*this, 1, -shift); return ScaleAndShift(*this, 1, -shift);
} }
/* overloading of the minus-sign */
XTensor XTensor::operator- () const
{
return Negate(*this);
}
/* overloading of the division-sign */ /* overloading of the division-sign */
XTensor XTensor::operator/ (const XTensor& tensor) const XTensor XTensor::operator/ (const XTensor& tensor) const
{ {
...@@ -517,7 +526,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem) ...@@ -517,7 +526,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
isInGlobalMem = false; isInGlobalMem = false;
} }
else { else {
ShowNTErrors("TODO!"); myMem = GMems.GetMem(myDevId);
} }
} }
...@@ -835,6 +844,12 @@ void XTensor::SetData(const void * d, int num, int beg) ...@@ -835,6 +844,12 @@ void XTensor::SetData(const void * d, int num, int beg)
XMemCopy((char*)data + beg * unitSize, devID, d, -1, num * unitSize); XMemCopy((char*)data + beg * unitSize, devID, d, -1, num * unitSize);
} }
/* generate data items with a uniform distribution in [0, 1] */
void XTensor::Rand(int rNum, int cNum)
{
_SetDataRand(this, rNum, cNum);
}
/* /*
set the tensor items by a uniform distribution in range [lower, upper] set the tensor items by a uniform distribution in range [lower, upper]
>> lower - lower value of the range >> lower - lower value of the range
...@@ -2168,6 +2183,11 @@ void InitTensorV2(XTensor * tensor, ...@@ -2168,6 +2183,11 @@ void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID) const int myDevID)
{ {
if (tensor->mem == NULL) {
XMem * myMem = GMems.GetMem(myDevID);
tensor->mem = myMem;
tensor->devID = myMem->devID;
}
if(tensor->mem != NULL){ if(tensor->mem != NULL){
tensor->Resize(myOrder, myDimSize, myDataType, 1.0F); tensor->Resize(myOrder, myDimSize, myDataType, 1.0F);
} }
...@@ -2440,6 +2460,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference) ...@@ -2440,6 +2460,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize, InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio, reference->dataType, reference->denseRatio,
reference->devID, reference->mem); reference->devID, reference->mem);
...@@ -2455,6 +2476,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference) ...@@ -2455,6 +2476,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad;
InitTensorV2(tensor, reference->order, reference->dimSize, InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->devID); reference->dataType, reference->devID);
} }
...@@ -2469,9 +2491,9 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference) ...@@ -2469,9 +2491,9 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
InitTensor(tensor, reference->order, reference->dimSize, tensor->enableGrad = reference->enableGrad;
reference->dataType, reference->denseRatio, InitTensorV2(tensor, reference->order, reference->dimSize,
-1); reference->dataType, -1);
} }
/* generate a XTensor with no initialization */ /* generate a XTensor with no initialization */
...@@ -2574,7 +2596,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize, ...@@ -2574,7 +2596,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
dims[0] = -abs(dims[0]); dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensor(myOrder, dims, myDataType, 1.0F, devID); XTensor * tensor = NewTensorV2(myOrder, dims, myDataType, devID);
if (tensor->unitNum * tensor->unitSize == 176657664) { if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200); tensor->Dump(stderr, "", 200);
......
...@@ -151,6 +151,9 @@ public: ...@@ -151,6 +151,9 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */ /* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad; bool isGrad;
/* indicates whether the gradient of the tensor should be computed */
bool enableGrad;
/* indicates whether the tensor is used as paramters (or variables) */ /* indicates whether the tensor is used as paramters (or variables) */
bool isVar; bool isVar;
...@@ -235,6 +238,9 @@ public: ...@@ -235,6 +238,9 @@ public:
/* overloading of the minus-sign */ /* overloading of the minus-sign */
XTensor operator- (const DTYPE shift) const; XTensor operator- (const DTYPE shift) const;
/* overloading of the minus-sign */
XTensor operator- () const;
/* overloading of the division-sign */ /* overloading of the division-sign */
XTensor operator/ (const XTensor &tensor) const; XTensor operator/ (const XTensor &tensor) const;
...@@ -298,6 +304,9 @@ public: ...@@ -298,6 +304,9 @@ public:
/* set the tensor with an data array */ /* set the tensor with an data array */
void SetData(const void * d, int num, int beg = 0); void SetData(const void * d, int num, int beg = 0);
/* generate data items with a uniform distribution in [0, 1] */
void Rand(int rNum, int cNum);
/* set tensor items by a uniform distribution */ /* set tensor items by a uniform distribution */
void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F); void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F);
......
...@@ -36,13 +36,9 @@ ...@@ -36,13 +36,9 @@
#include "arithmetic/MatrixMulBatched.h" #include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/Multiply.h" #include "arithmetic/Multiply.h"
#include "arithmetic/MultiplyDim.h" #include "arithmetic/MultiplyDim.h"
#include "arithmetic/Negate.h"
#include "arithmetic/Sign.h"
#include "arithmetic/Sub.h" #include "arithmetic/Sub.h"
#include "arithmetic/SubDim.h" #include "arithmetic/SubDim.h"
#include "arithmetic/Sum.h" #include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "arithmetic/SumDim.h" #include "arithmetic/SumDim.h"
#include "arithmetic/XTensorBLAS.h" #include "arithmetic/XTensorBLAS.h"
#include "arithmetic/MulAndShift.h" #include "arithmetic/MulAndShift.h"
...@@ -56,7 +52,6 @@ ...@@ -56,7 +52,6 @@
#include "math/Clip.h" #include "math/Clip.h"
#include "math/Compare.h" #include "math/Compare.h"
#include "math/Normalize.h" #include "math/Normalize.h"
#include "math/Power.h"
#include "math/ScaleAndShift.h" #include "math/ScaleAndShift.h"
#include "math/Unary.h" #include "math/Unary.h"
...@@ -97,5 +92,4 @@ ...@@ -97,5 +92,4 @@
#include "utilities/XMatrixSegment.h" #include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h" #include "utilities/FlushToMem.h"
#include "../function/DropoutWithIndex.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
...@@ -143,6 +143,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -143,6 +143,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
} }
/* /*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/*
return a dimension if the division is performed as DivDim (in more details in DivDim.h) return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor >> a - a tensor
>> b - another tensor for division >> b - another tensor for division
...@@ -229,9 +246,8 @@ where i is the index of the item ...@@ -229,9 +246,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -245,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -245,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */ /* call _Div function */
_Div(&a, &b, &c, 0, leadingDim); _Div(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -256,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -256,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise division of two tensors (return an XTensor structure) element-wise division of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise division of two tensors: ...@@ -54,7 +55,7 @@ element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i) c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -183,9 +183,8 @@ i.e., a is divided with b by broadcasting ...@@ -183,9 +183,8 @@ i.e., a is divided with b by broadcasting
>> c - where we put result. we save it in a if c is NULL >> c - where we put result. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> alpha - the scaling factor >> alpha - the scaling factor
>> requireLink - if add operation to network
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -194,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, ...@@ -194,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha,
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad == true) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-15 float16 added
*/ */
#include "DivDim.cuh" #include "DivDim.cuh"
...@@ -168,6 +169,34 @@ void _CudaDivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE ...@@ -168,6 +169,34 @@ void _CudaDivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else if (a->dataType == X_FLOAT16) {
half alpha1 = __float2half(alpha);
if (stride > 1){
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (alpha == (DTYPE)0.0F)
KernelDivWithCol<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockSize, stride, blockSize * stride, blockNum, alpha1);
else
KernelDivWithCol<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockSize, stride, blockSize * stride, blockNum, alpha1);
}
else if (stride == 1){
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (alpha == (DTYPE)0.0F)
KernelDivWithRow<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockNum, blockSize, alpha1);
else
KernelDivWithRow<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockNum, blockSize, alpha1);
}
else {
ShowNTErrors("Something is wrong!");
}
}
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
......
...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c ...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a, where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false); void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha) ...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
} }
/* /*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha) ...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
/* call _Sum function */ /* call _Mask function */
_Mask(&a, &mask, &c, alpha); _Mask(&a, &mask, &c, alpha);
/* tensor connections */ /* tensor connections */
//XLink::MakeLink(&a, &mask, &c, MATH_SUM); XLink::MakeLink(&a, &mask, &c, MATH_MASK);
//XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
// TODO!!
ShowNTErrors("TODO!");
return c; return c;
} }
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Mask function */
_Mask(&a, &mask, &c, alpha);
if (c.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
}
}
} }
\ No newline at end of file
...@@ -34,7 +34,7 @@ c(i) = a(i) if mask(i) is non-zero ...@@ -34,7 +34,7 @@ c(i) = a(i) if mask(i) is non-zero
c(i) = alpha if mask(i) = 0 c(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha); void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (on site): mask entries of a given tensor (on site):
...@@ -42,7 +42,8 @@ a(i) = a(i) if mask(i) is non-zero ...@@ -42,7 +42,8 @@ a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha); void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha = 0.0);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
...@@ -52,6 +53,14 @@ where i is the index of the element ...@@ -52,6 +53,14 @@ where i is the index of the element
*/ */
XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0); XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0);
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha = 0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MASK_H__ #endif // __MASK_H__
...@@ -54,8 +54,6 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -54,8 +54,6 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner) XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(a && b && c, "Empty input tensors!"); CheckNTErrors(a && b && c, "Empty input tensors!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Input tensors should have the same data type!");
CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2, CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2,
"Input tensors must have a order >= 2!"); "Input tensors must have a order >= 2!");
CheckNTErrors(c->order == a->order + b->order - 2, "wrong tensor order") CheckNTErrors(c->order == a->order + b->order - 2, "wrong tensor order")
...@@ -202,7 +200,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -202,7 +200,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList; delete cList;
} }
bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c) bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c)
{ {
if (!(a && b && c)) if (!(a && b && c))
return false; return false;
...@@ -231,10 +231,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen ...@@ -231,10 +231,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen
dimSize[sub++] = bm; dimSize[sub++] = bm;
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
if (dimSize[i] != c->dimSize[i]) if (dimSize[i] != c->dimSize[i]) {
delete[] dimSize;
return false; return false;
} }
}
delete[] dimSize;
return true; return true;
} }
...@@ -357,11 +360,9 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -357,11 +360,9 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
return c; return c;
} }
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -394,9 +395,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -394,9 +395,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
} }
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
...@@ -457,7 +458,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -457,7 +458,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
} }
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -492,7 +493,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, ...@@ -492,7 +493,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* /*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
...@@ -56,22 +59,28 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -56,22 +59,28 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0,
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XPRunner * parallelRunner = NULL);
TENSOR_DATA_TYPE dataType, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); TENSOR_DATA_TYPE dataType, DTYPE alpha = (DTYPE)1.0,
XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/ /* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b, XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -154,7 +154,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -154,7 +154,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta) XTensor * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors(a && b && c, "Empty input tensors!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Input tensors should have the same data type!"); "Input tensors should have the same data type!");
CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2, CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2,
......
...@@ -129,9 +129,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, ...@@ -129,9 +129,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
DelTensorBuf(tmp); DelTensorBuf(tmp);
return c; return c;
} }
} }
\ No newline at end of file
...@@ -144,6 +144,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -144,6 +144,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
} }
/* /*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void MultiplyMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Multiply(&a, &b, &a, alpha, leadingDim);
}
/*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h) return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor >> a - a tensor
>> b - another tensor for multiplication >> b - another tensor for multiplication
...@@ -230,9 +247,8 @@ where i is the index of the item ...@@ -230,9 +247,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -246,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -246,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */ /* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -257,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -257,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */ /* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -123,9 +123,9 @@ where i is the item index ...@@ -123,9 +123,9 @@ where i is the item index
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!");
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
......
...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void MultiplyMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise product of two tensors (return an XTensor structure) element-wise product of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise product of two tensors: ...@@ -54,7 +55,7 @@ element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha) ...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
} }
/* /*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void MultiplyDimMe(XTensor& a, const XTensor& b, int n, DTYPE alpha)
{
_MultiplyDim(&a, &b, &a, n, alpha);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections) tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -180,9 +198,8 @@ i.e., a is multiplied with b by broadcasting ...@@ -180,9 +198,8 @@ i.e., a is multiplied with b by broadcasting
>> b - another tensor whose size is equal to that of dimension n of a >> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a * b + \alpha * c. we save it in a if c is NULL >> c - where we put a * b + \alpha * c. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> requireLink - if add operation to network
*/ */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink) void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -191,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req ...@@ -191,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -347,9 +364,8 @@ where some of dimensions of b can be of size 1 ...@@ -347,9 +364,8 @@ where some of dimensions of b can be of size 1
>> a - a tensor >> a - a tensor
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> requireLink - if add operation to network
*/ */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink) void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -358,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ ...@@ -358,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0); _MultiplyBroadcast(&a, &b, &c, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
/* /*
* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2018-08-14 * $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2018-08-14
* $Updated by: LinYe (email: linye2015@outlook.com) 2019-07-30 float16 added
*/ */
#include "../../XDevice.h" #include "../../XDevice.h"
......
...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */ i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0); void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
void MultiplyDimMe(XTensor & a, const XTensor & b, int n, DTYPE alpha = 0.0);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n); ...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting */ i.e., a is multiplied with b by broadcasting */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink = false); void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b); XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink = false); void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Negate(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaNegate(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++)
db[i] = -d[i];
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _NegateMe(XTensor * a)
{
_Negate(a, a);
}
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor Negate(const XTensor & a)
{
XTensor b(&a);
b.SetTMPFlag();
/* call _Negate function */
_Negate(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
return b;
}
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Negate(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Negate function */
_Negate(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its negtive value (CUDA Kernel)
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
template <class T>
__global__
void KernelNegate(T * a, T * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
b[i] = -a[i];
}
/*
set each entry to its negtive value
>> a - input tensor
>> b - output tensor
*/
void _CudaNegate(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_CUH__
#define __NEGATE_CUH__
#include "Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its negtive value (CUDA Kernel) */
template <class T>
__global__
void KernelNegate(T * a, T * b, int size);
/* set each entry to its negtive value */
void _CudaNegate(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */
void _Negate(const XTensor * a, XTensor * b);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
/* set every entry to its minus value */
void Negate(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Sign(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaSign(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0)
db[i] = 1.0F;
else if (d[i] == 0)
db[i] = 0.0F;
else
db[i] = -1.0F;
}
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _SignMe(XTensor * a)
{
_Sign(a, a);
}
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor Sign(const XTensor & a)
{
XTensor b(&a);
b.SetTMPFlag();
/* call _Sign function */
_Sign(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
return b;
}
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Sign(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Sign function */
_Sign(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-24 float16 added
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its sign value (CUDA Kernel)
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
template<class T>
__global__
void KernelSign(T * a, T * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
if (a[i] > (T)0)
b[i] = 1.0F;
else if (a[i] == (T)0)
b[i] = 0.0F;
else
b[i] = -1.0F;
}
}
/*
set each entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _CudaSign(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelSign<<<blocks, threads>>>((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelSign<<<blocks, threads>>>((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_CUH__
#define __SIGN_CUH__
#include "Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its sign value (CUDA Kernel) */
template<class T>
__global__
void KernelSign(T * a, T * b, int size);
/* set each entry to its sign value */
void _CudaSign(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_H__
#define __SIGN_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
void _Sign(const XTensor * a, XTensor * b);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _SignMe(XTensor * a);
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Sign(const XTensor & a);
/* set every entry to its sign value */
void Sign(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
...@@ -128,6 +128,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -128,6 +128,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
} }
/* /*
tensor subtraction a = a - b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SubMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sub(&a, &b, &a, beta);
}
/*
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h) return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
>> a - a tensor >> a - a tensor
>> b - another tensor for subtraction >> b - another tensor for subtraction
...@@ -203,9 +216,8 @@ tensor subtraction c = a - b * \beta ...@@ -203,9 +216,8 @@ tensor subtraction c = a - b * \beta
>> b - another tensor >> b - another tensor
>> c - where we put a-b*\beta. we save it in a if c is NULL >> c - where we put a-b*\beta. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -217,7 +229,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -217,7 +229,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _Sub function */ /* call _Sub function */
_Sub(&a, &b, &c, beta); _Sub(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB); XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -227,7 +239,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -227,7 +239,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _SubDim function */ /* call _SubDim function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -47,6 +47,7 @@ void KernelSUB(T * a, T * b, T * c, int size, T beta) ...@@ -47,6 +47,7 @@ void KernelSUB(T * a, T * b, T * c, int size, T beta)
c[i] = a[i] - b[i] * beta; c[i] = a[i] - b[i] * beta;
} }
/* /*
tensor subtraction c = a - b * \beta (cuda version) tensor subtraction c = a - b * \beta (cuda version)
>> a - a tensor >> a - a tensor
...@@ -79,7 +80,7 @@ void _CudaSub(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -79,7 +80,7 @@ void _CudaSub(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]); dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]); dim3 threads(blockSize[0]);
KernelSUB<<<blocks, threads>>>((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, a->unitNum, beta); KernelSUB << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, a->unitNum, beta);
} }
else if (a->dataType == X_FLOAT16 && else if (a->dataType == X_FLOAT16 &&
b->dataType == X_FLOAT16 && b->dataType == X_FLOAT16 &&
......
...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta ...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor subtraction c = a - b * \beta tensor subtraction c = a - b * \beta
...@@ -43,7 +44,7 @@ make a new tensor c to keep the result and return it ...@@ -43,7 +44,7 @@ make a new tensor c to keep the result and return it
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */ /* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -183,9 +183,8 @@ i.e., a is subtracted with b by broadcasting ...@@ -183,9 +183,8 @@ i.e., a is subtracted with b by broadcasting
>> c - where we put a-b*\beta. we save it in a if c is NULL >> c - where we put a-b*\beta. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink) void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -194,7 +193,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b ...@@ -194,7 +193,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -40,7 +40,7 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1. ...@@ -40,7 +40,7 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.
/* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a, /* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting*/ i.e., a is subtracted with b by broadcasting*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -133,6 +133,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -133,6 +133,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
} }
/* /*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h) return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor >> a - a tensor
>> b - another tensor for sum >> b - another tensor for sum
...@@ -207,9 +220,8 @@ tensor summation c = a + b * \beta ...@@ -207,9 +220,8 @@ tensor summation c = a + b * \beta
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -221,7 +233,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -221,7 +233,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _Sum function */ /* call _Sum function */
_Sum(&a, &b, &c, beta); _Sum(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM); XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -231,7 +243,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -231,7 +243,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -48,15 +48,6 @@ void KernelADD(T * a, T * b, T * c, int size, T beta) ...@@ -48,15 +48,6 @@ void KernelADD(T * a, T * b, T * c, int size, T beta)
} }
__global__
void KernelADDInt(int * a, int * b, int * c, int size, DTYPE beta)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
c[i] = a[i] + b[i] * (int)beta;
}
/* /*
tensor summation c = a + b * \beta (cuda version) tensor summation c = a + b * \beta (cuda version)
......
...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta ...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
...@@ -42,7 +43,7 @@ make a new tensor c to keep the result and return it ...@@ -42,7 +43,7 @@ make a new tensor c to keep the result and return it
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */ /* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
sum of a tensor and a vector (column vector) in a column by column manner
for each column a_col (in a block), we have
c_col = a_col + b * \beta
where b is a vector.
>> a - a tensor
>> b - a vector with the same column size with a
>> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor
*/
void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((b->order == 2 && b->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
"Illegal input vector size!");
int rowNum = a->dimSize[0];
int colNum = a->dimSize[1];
int blockNum = 1;
for (int i = 2; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int blockSize = colNum * rowNum;
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
_CudaSumByColumnTV(a, b, c, beta);
#endif
}
else {
if (!a->isSparse && !b->isSparse) {
CheckNTErrors(!c->isSparse, "TODO!");
if (a->dataType == DEFAULT_DTYPE &&
b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE)
{
for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < rowNum; i++) {
DTYPE * ap = (DTYPE*)a->data + k * blockSize + i * colNum;
DTYPE * bp = (DTYPE*)b->data;
DTYPE * cp = (DTYPE*)c->data + k * blockSize + i * colNum;
DTYPE v = bp[i];
for (int j = 0; j < colNum; j++)
cp[j] = ap[j] + v * beta;
}
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
summation of a tensor and a vector (column vector)
c_col = a_col + b * \beta
>> a - a tensor
>> b - a vector with the same column size with a
>> c - where we put a+b. we save it in a
>> colNum - column number (of a block)
>> blockSize - size of a block
>> size - size of the entire data array
>> beta - the scaling factor
*/
__global__
void KernelADDByColumnTV(DTYPE * a, DTYPE * b, DTYPE * c, int colNum, int blockSize, int size, DTYPE beta)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i >= size)
return;
int offset = i % blockSize;
int row = offset / colNum;
c[i] = a[i] + b[row] * beta;
}
/*
summation of a tensor and a vector (column vector)
for each column a_col (in a block), we have
c_col = a_col + b * \beta
where b is a vector.
>> a - a tensor
>> b - a vector with the same column size with a
>> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor
*/
void _CudaSumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((b->order == 2 && b->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
"Illegal input vector size!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE), "TODO");
int rowNum = a->dimSize[0];
int colNum = a->dimSize[1];
int blockNum = 1;
for (int i = 2; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int cudaGridSize[3];
int cudaBlockSize[3];
GDevs.GetCudaThread(c->devID, a->unitNum, cudaGridSize, cudaBlockSize);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
KernelADDByColumnTV << <dim3(cudaGridSize[0]), dim3(cudaBlockSize[0]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, colNum, rowNum * colNum, a->unitNum, beta);
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __REDUCEMAX_CUH__
#define __REDUCEMAX_CUH__
#include "../reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* summation of a tensor and a vector (column vector) */
void _CudaSumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCEMAX_CUH__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __SUMBYCOLUMNTV_H__
#define __SUMBYCOLUMNTV_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* sum of a tensor and a (column) vector */
void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUMBYCOLUMNTV_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
sum of a vector (column vector) and a tensor in a column by column manner
for each column b_col, we have
c = a + \sum{col} b_col * \beta
where c and a are vectors, and b_col is a column in b.
>> a - a tensor
>> b - a vector with the same column size with a
>> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor
*/
void _SumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((a->order == 2 && a->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
"Illegal input vector size!");
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
_CudaSumByColumnVT(a, b, c, beta);
#endif
}
else {
int rowNum = b->dimSize[0];
int colNum = b->dimSize[1];
int blockNum = 1;
for (int i = 2; i < b->order; i++)
blockNum *= b->dimSizeRDI[i];
int blockSize = colNum * rowNum;
if (!a->isSparse && !b->isSparse) {
CheckNTErrors(!c->isSparse, "TODO!");
if (a->dataType == DEFAULT_DTYPE &&
b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE)
{
for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < rowNum; i++) {
DTYPE * ap = (DTYPE*)a->data;
DTYPE * bp = (DTYPE*)b->data + k * blockSize + i * colNum;
DTYPE * cp = (DTYPE*)c->data;
DTYPE sum = 0;
for (int j = 0; j < colNum; j++)
sum += bp[j];
cp[i] = ap[i] + sum * beta;
}
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
summation of a vector (column vector) and a tensor
c = a + \sum{col} b_col * \beta
>> a - a vector with the same column size with b
>> b - a tensor
>> c - where we put a+b. we save it in a
>> colNum - column number (of a block)
>> blockSize - size of a block
>> size - size of the entire data array
>> beta - the scaling factor
*/
__global__
void KernelADDByColumnVT(DTYPE * a, DTYPE * b, DTYPE * c, int colNum, int rowNum, int blockNum, DTYPE beta)
{
int row = blockDim.x * blockIdx.x + threadIdx.x;
if (row >= rowNum)
return;
DTYPE sum = 0;
for (int k = 0; k < blockNum; k++) {
DTYPE * bp = b + (rowNum * k + row) * colNum;
if (colNum % 4 == 0) {
for (int i = 0; i < colNum; i += 4)
sum += bp[i] + bp[i + 1] + bp[i + 2] + bp[i + 3];
}
else if (colNum % 2 == 0) {
for (int i = 0; i < colNum; i += 2)
sum += bp[i] + bp[i + 1];
}
else {
for (int i = 0; i < colNum; i++)
sum += bp[i];
}
__syncthreads();
}
c[row] = a[row] + beta * sum;
}
/*
summation of a vector (column vector) and a tensor
for each column b_col, we have
c = a + \sum{col} b_col * \beta
where c and a are vectors, and b_col is a column in b.
>> a - a vector with the same column size with b
>> b - a tensor
>> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor
*/
void _CudaSumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((a->order == 2 && a->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
"Illegal input vector size!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE), "TODO");
int rowNum = b->dimSize[0];
int colNum = b->dimSize[1];
int blockNum = 1;
for (int i = 2; i < b->order; i++)
blockNum *= b->dimSizeRDI[i];
int cudaGridSize[3];
int cudaBlockSize[3];
GDevs.GetCudaThread(c->devID, a->dimSizeRDI[1], cudaGridSize, cudaBlockSize);
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
KernelADDByColumnVT << <dim3(cudaGridSize[0]), dim3(cudaBlockSize[0]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, colNum, rowNum, blockNum, beta);
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __SUMBYCOLUMNVT_CUH__
#define __SUMBYCOLUMNVT_CUH__
#include "SumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* summation of a vector (column vector) and a tensor */
void _CudaSumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __SUMBYCOLUMNVT_CUH__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __SUMBYCOLUMNVT_H__
#define __SUMBYCOLUMNVT_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* sum of a (column) vector and a tensor */
void _SumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUMBYCOLUMNVT_H__
...@@ -200,9 +200,8 @@ i.e., a is summed with b by broadcasting ...@@ -200,9 +200,8 @@ i.e., a is summed with b by broadcasting
>> c - where we put a+b*\beta. we save it in a if c is NULL >> c - where we put a+b*\beta. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink) void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -211,7 +210,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b ...@@ -211,7 +210,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -368,9 +367,8 @@ c = a + b * \beta ...@@ -368,9 +367,8 @@ c = a + b * \beta
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -379,7 +377,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bo ...@@ -379,7 +377,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bo
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta); _SumBroadcast(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
......
...@@ -88,17 +88,17 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize ...@@ -88,17 +88,17 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize
int col = colIndex % colNum; int col = colIndex % colNum;
int block = colIndex / colNum; int block = colIndex / colNum;
if(row >= rowNum || block >= blockNum) if (row >= rowNum || block >= blockNum)
return; return;
if(threadIdx.x == 0) if (threadIdx.x == 0)
bv[threadIdx.y] = b[row]; bv[threadIdx.y] = b[row];
__syncthreads(); __syncthreads();
int offset = block * blockSize + row * colNum + col; int offset = block * blockSize + row * colNum + col;
if(betaFired) if (betaFired)
c[offset] = a[offset] + bv[threadIdx.y] * beta; c[offset] = a[offset] + bv[threadIdx.y] * beta;
else else
c[offset] = a[offset] + bv[threadIdx.y]; c[offset] = a[offset] + bv[threadIdx.y];
......
...@@ -44,7 +44,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1. ...@@ -44,7 +44,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a, /* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */ i.e., a is summed with b by broadcasting */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -54,7 +54,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta ...@@ -54,7 +54,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论