Commit d3a0b984 by xuchen

1. remove old interface of XNet

2. fix the bug of onehotandindex
3. modify the data initilization of transformer
4. add the DumpFormat (need reimplement)
parent a0aa3d49
...@@ -55,7 +55,7 @@ void XNetClearAll() ...@@ -55,7 +55,7 @@ void XNetClearAll()
XNet::XNet() XNet::XNet()
{ {
nodes.Clear(); nodes.Clear();
isGradEfficient = false; isGradEfficient = true;
} }
/* de-constructor */ /* de-constructor */
...@@ -77,104 +77,20 @@ backward propagation to obtain gradient ...@@ -77,104 +77,20 @@ backward propagation to obtain gradient
>> root - root node (output) of the network >> root - root node (output) of the network
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root)
{ {
TensorList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
TensorList golds(1); Backward(roots);
golds.Add(NULL);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the gradient computation
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
{
TensorList golds(roots.count);
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
}
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
{
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
} }
/* /*
backward propagation to obtain gradient wrt. the loss/error function backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes with a number of root nodes
>> roots - a list of root nodes (output) of the network >> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/ */
void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots)
{ {
Traverse(roots); Traverse(roots);
...@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, ...@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings,
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
} }
//XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
/*for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;*/
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
/*if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, padding, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
else {
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}*/
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
/*else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}*/
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i); XTensor * node = (XTensor*)nodes.Get(i);
...@@ -267,15 +150,10 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -267,15 +150,10 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node)) else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent); XLossGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
//FILE *f = fopen("debug", "a");
//node->Dump(f, "node", 10);
//if (node->grad != NULL)
// node->grad->Dump(f, "node->grad", 10);
} }
else{ else{
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
...@@ -465,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node) ...@@ -465,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
} }
} }
/* /*
search for a node in a top-down manner by its name search for a node in a top-down manner by its name
>> top - the top most node >> top - the top most node
...@@ -473,7 +350,7 @@ search for a node in a top-down manner by its name ...@@ -473,7 +350,7 @@ search for a node in a top-down manner by its name
*/ */
//XTensor * XNet::SearchNode(XTensor * top, const char * name) //XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{ //{
//return XLink::SearchNode(top, name); //return XLink::SearchNode(top, name);
//} //}
} }
...@@ -61,25 +61,11 @@ struct XNet ...@@ -61,25 +61,11 @@ struct XNet
void Clear(); void Clear();
/* backward propagation to obtain gradient */ /* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
......
...@@ -68,8 +68,8 @@ void Read(const char * fn, FNNModel &model); ...@@ -68,8 +68,8 @@ void Read(const char * fn, FNNModel &model);
void Test(const char * test, const char * result, FNNModel &model); void Test(const char * test, const char * result, FNNModel &model);
int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum); int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum);
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols, void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID, XMem * mem); int itemNum, int devID);
void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSize, int devID, XMem * mem); void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSize, int devID);
void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net); void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss, void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, FNNModel &grad, FNNNet &net); FNNModel &model, FNNModel &grad, FNNNet &net);
...@@ -229,11 +229,6 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model) ...@@ -229,11 +229,6 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model)
fprintf(stderr, " -dev=%d\n", model.devID); fprintf(stderr, " -dev=%d\n", model.devID);
} }
} }
for(int i = 0; i < argc; i++){
if (!strcmp(argv[i], "-mem"))
model.mem = new XMem(model.devID, FREE_ON_THE_FLY, 256 * MILLION, 512, 256 * MILLION);
}
} }
/* check model settings */ /* check model settings */
...@@ -262,11 +257,6 @@ void Copy(FNNModel &tgt, FNNModel &src) ...@@ -262,11 +257,6 @@ void Copy(FNNModel &tgt, FNNModel &src)
tgt.vSize = src.vSize; tgt.vSize = src.vSize;
tgt.devID = src.devID; tgt.devID = src.devID;
tgt.useMemPool = src.useMemPool; tgt.useMemPool = src.useMemPool;
if(src.mem != NULL){
tgt.mem = new XMem(src.mem->devID, src.mem->mode,
src.mem->maxBlockSize, src.mem->blockNum,
src.mem->bufSize);
}
} }
/* /*
...@@ -332,7 +322,7 @@ void Init(FNNModel &model) ...@@ -332,7 +322,7 @@ void Init(FNNModel &model)
/* create embedding parameter matrix: vSize * eSize */ /* create embedding parameter matrix: vSize * eSize */
InitModelTensor2D(model.embeddingW, model.vSize, model.eSize, model); InitModelTensor2D(model.embeddingW, model.vSize, model.eSize, model);
model.embeddingW.SetVarFlag(); model.embeddingW.SetVarFlag();
/* create hidden layer parameter matrics */ /* create hidden layer parameter matrics */
for(int i = 0; i < model.hDepth; i++){ for(int i = 0; i < model.hDepth; i++){
/* hidden layer parameter matrix: (n-1)eSize * hsize if it is the first layer /* hidden layer parameter matrix: (n-1)eSize * hsize if it is the first layer
...@@ -351,9 +341,8 @@ void Init(FNNModel &model) ...@@ -351,9 +341,8 @@ void Init(FNNModel &model)
/* create the output layer parameter matrix and bias term */ /* create the output layer parameter matrix and bias term */
int iSize = model.hDepth == 0 ? (model.n - 1) * model.eSize : model.hSize; int iSize = model.hDepth == 0 ? (model.n - 1) * model.eSize : model.hSize;
InitModelTensor2D(model.outputW, iSize, model.vSize, model); InitModelTensor2D(model.outputW, iSize, model.vSize, model);
model.outputW.SetVarFlag();
InitModelTensor1D(model.outputB, model.vSize, model); InitModelTensor1D(model.outputB, model.vSize, model);
model.outputW.SetVarFlag();
model.outputB.SetVarFlag(); model.outputB.SetVarFlag();
/* then, we initialize model parameters using a uniform distribution in range /* then, we initialize model parameters using a uniform distribution in range
...@@ -460,10 +449,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -460,10 +449,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* make the input tensor for position i */ /* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++) for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem); MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID);
/* make the gold tensor */ /* make the gold tensor */
MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem); MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID);
if(!autoDiff){ if(!autoDiff){
/* prepare an empty network for building the fnn */ /* prepare an empty network for building the fnn */
...@@ -490,7 +479,11 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -490,7 +479,11 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Clear(model, true); Clear(model, true);
/* forward + backward process */ /* forward + backward process */
/* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model); ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */
lossTensor = CrossEntropy(output, gold); lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */ /* automatic differentiation */
...@@ -719,10 +712,9 @@ The indexed cell is set to 1, and 0 otherwise. ...@@ -719,10 +712,9 @@ The indexed cell is set to 1, and 0 otherwise.
>> cols - column index >> cols - column index
>> itemNum - number of non-zero items >> itemNum - number of non-zero items
>> devID - device id >> devID - device id
>> mem - memory pool
*/ */
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols, void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID, XMem * mem) int itemNum, int devID)
{ {
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, devID); InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, devID);
...@@ -741,9 +733,8 @@ make a tensor that encodes a batch of words ...@@ -741,9 +733,8 @@ make a tensor that encodes a batch of words
>> n - indicate which word is encode for each ngram >> n - indicate which word is encode for each ngram
>> vSize - vocabulary size >> vSize - vocabulary size
>> devID - device id >> devID - device id
>> mem - memory pool
*/ */
void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSize, int devID, XMem * mem) void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSize, int devID)
{ {
int * rows = new int[ngramNum]; int * rows = new int[ngramNum];
int * cols = new int[ngramNum]; int * cols = new int[ngramNum];
...@@ -753,7 +744,7 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz ...@@ -753,7 +744,7 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
cols[i] = ngrams[i].words[n]; cols[i] = ngrams[i].words[n];
} }
InitZeroOneTensor2D(batch, ngramNum, vSize, rows, cols, ngramNum, devID, mem); InitZeroOneTensor2D(batch, ngramNum, vSize, rows, cols, ngramNum, devID);
delete[] rows; delete[] rows;
delete[] cols; delete[] cols;
...@@ -1162,10 +1153,10 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1162,10 +1153,10 @@ void Test(const char * test, const char * result, FNNModel &model)
/* make the input tensor for position i */ /* make the input tensor for position i */
for (int i = 0; i < model.n - 1; i++) for (int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem); MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID);
/* make the gold tensor */ /* make the gold tensor */
MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem); MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID);
if (!autoDiff) { if (!autoDiff) {
/* prepare an empty network for building the fnn */ /* prepare an empty network for building the fnn */
...@@ -1174,8 +1165,8 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1174,8 +1165,8 @@ void Test(const char * test, const char * result, FNNModel &model)
/* forward computation */ /* forward computation */
Forward(inputs, output, model, net); Forward(inputs, output, model, net);
} }
else { else {
/* this is implemented by gather function */ /* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model); ForwardAutoDiff(ngrams, ngramNum, output, model);
output = Log(output); output = Log(output);
......
...@@ -51,14 +51,12 @@ initialize the model ...@@ -51,14 +51,12 @@ initialize the model
>> myIgnored - number of position ignored in attention (from the begining) >> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask >> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TAttention::InitModel(int argc, char ** argv, void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
isMasked = myIsMasked; isMasked = myIsMasked;
ignored = myIgnored; ignored = myIgnored;
...@@ -71,23 +69,18 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -71,23 +69,18 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem); InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID, mem); InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); _SetDataFanInOut(&wk, scale);
float finfoutv = (float)sqrt(6.0F * scale/(d + dv)); _SetDataFanInOut(&wq, scale);
float finfouta = (float)sqrt(6.0F * scale / (d + d)); _SetDataFanInOut(&wv, scale);
float finfoutbig = (float)sqrt(6.0F * scale / (d + 3*d)); _SetDataFanInOut(&wa, scale);
_SetDataFanInOut(&wbig, scale);
wk.SetDataRand(-finfoutk, finfoutk);
wq.SetDataRand(-finfoutk, finfoutk);
wv.SetDataRand(-finfoutv, finfoutv);
wa.SetDataRand(-finfouta, finfouta);
wbig.SetDataRand(-finfoutbig, finfoutbig);
} }
/* /*
...@@ -135,9 +128,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -135,9 +128,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1); int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3; int d3 = kqv2.GetDim(2) / 3;
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2); split.Add(&q2);
split.Add(&k2); split.Add(&k2);
......
...@@ -42,9 +42,6 @@ public: ...@@ -42,9 +42,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* head number */ /* head number */
int nhead; int nhead;
...@@ -61,7 +58,7 @@ public: ...@@ -61,7 +58,7 @@ public:
XTensor wa; XTensor wa;
XTensor wbig; XTensor wbig;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
...@@ -94,7 +91,7 @@ public: ...@@ -94,7 +91,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining); XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
......
...@@ -86,7 +86,7 @@ struct SampleNode ...@@ -86,7 +86,7 @@ struct SampleNode
int * p; int * p;
int size; int size;
int value; int value;
int key; int key;
}; };
int CompareSampleNode(const void * a, const void * b) int CompareSampleNode(const void * a, const void * b)
...@@ -280,7 +280,6 @@ load a batch of sequences ...@@ -280,7 +280,6 @@ load a batch of sequences
>> isSorted - indicates whether the sequences are sorted by length >> isSorted - indicates whether the sequences are sorted by length
>> wCount - word count >> wCount - word count
>> devID - device id >> devID - device id
>> mem - memory pool
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
*/ */
int T2TBatchLoader::LoadBatch(FILE * file, bool isLM, int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
...@@ -290,18 +289,17 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM, ...@@ -290,18 +289,17 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
int * seqs, int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining)
bool isTraining)
{ {
if(isLM){ if(isLM){
return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label, return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label,
seqs, vsEnc, sBatch, wBatch, seqs, vsEnc, sBatch, wBatch,
isSorted, wCount, devID, mem, isTraining); isSorted, wCount, devID, isTraining);
} }
else{ else{
return LoadBatchMT(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label, return LoadBatchMT(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label,
seqs, vsEnc, vsDec, sBatch, wBatch, seqs, vsEnc, vsDec, sBatch, wBatch,
isSorted, ws, wCount, devID, mem, isTraining); isSorted, ws, wCount, devID, isTraining);
} }
} }
...@@ -322,7 +320,6 @@ load a batch of sequences (for LM) ...@@ -322,7 +320,6 @@ load a batch of sequences (for LM)
>> isSorted - indicates whether the sequences are sorted by length >> isSorted - indicates whether the sequences are sorted by length
>> wCount - word count >> wCount - word count
>> devID - device id >> devID - device id
>> mem - memory pool
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
*/ */
int T2TBatchLoader::LoadBatchLM(FILE * file, int T2TBatchLoader::LoadBatchLM(FILE * file,
...@@ -332,8 +329,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -332,8 +329,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
int * seqs, int * seqs,
int vSize, int sBatch, int wBatch, int vSize, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem, int devID, bool isTraining)
bool isTraining)
{ {
if(nextSeq < 0 || nextSeq >= nseqBuf) if(nextSeq < 0 || nextSeq >= nseqBuf)
LoadBuf(file, isSorted, 1); LoadBuf(file, isSorted, 1);
...@@ -369,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -369,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
dims[1] = max; dims[1] = max;
dims[2] = vSize; dims[2] = vSize;
InitTensor2D(batchEnc, sc, max, X_INT, devID, mem); InitTensor2DV2(batchEnc, sc, max, X_INT, devID);
InitTensor2D(label, sc, max, X_INT, devID, mem); InitTensor2DV2(label, sc, max, X_INT, devID);
InitTensor(gold, 3, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(gold, 3, dims, X_FLOAT, devID);
InitTensor2D(paddingEnc, sc, max, X_FLOAT, devID, mem); InitTensor2DV2(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2D(paddingDec, sc, max, X_FLOAT, devID, mem); InitTensor2DV2(paddingDec, sc, max, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
label->SetZeroAll(); label->SetZeroAll();
...@@ -437,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -437,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount);
/*XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem); /*XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
_ConvertDataType(batchEnc, tmp); _ConvertDataType(batchEnc, tmp);
_NotEqual(tmp, paddingEnc, 0); _NotEqual(tmp, paddingEnc, 0);
DelTensorBuf(tmp); DelTensorBuf(tmp);
XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem); XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
_ConvertDataType(batchEnc, tmp2); _ConvertDataType(batchEnc, tmp2);
_NotEqual(tmp2, paddingDec, 0); _NotEqual(tmp2, paddingDec, 0);
DelTensorBuf(tmp2);*/ DelTensorBuf(tmp2);*/
...@@ -481,7 +477,6 @@ load a batch of sequences (for MT) ...@@ -481,7 +477,6 @@ load a batch of sequences (for MT)
>> isSorted - indicates whether the sequences are sorted by length >> isSorted - indicates whether the sequences are sorted by length
>> wCount - word count >> wCount - word count
>> devID - device id >> devID - device id
>> mem - memory pool
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
*/ */
int T2TBatchLoader::LoadBatchMT(FILE * file, int T2TBatchLoader::LoadBatchMT(FILE * file,
...@@ -491,8 +486,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -491,8 +486,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int * seqs, int * seqs,
int vSizeEnc, int vSizeDec, int sBatch, int wBatch, int vSizeEnc, int vSizeDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining)
bool isTraining)
{ {
if (nextBatch < 0 || nextBatch >= bufBatchSize) { if (nextBatch < 0 || nextBatch >= bufBatchSize) {
LoadBuf(file, isSorted, 2); LoadBuf(file, isSorted, 2);
...@@ -569,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -569,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int sCount = sc/2; int sCount = sc/2;
int seqSize = 0; int seqSize = 0;
InitTensor2D(batchEnc, sCount, maxEnc, X_INT, devID, mem); InitTensor2DV2(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2D(paddingEnc, sCount, maxEnc, X_FLOAT, devID, mem); InitTensor2DV2(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2D(batchDec, sCount, maxDec, X_INT, devID, mem); InitTensor2DV2(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2D(paddingDec, sCount, maxDec, X_FLOAT, devID, mem); InitTensor2DV2(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2D(label, sCount, maxDec, X_INT, devID, mem); InitTensor2DV2(label, sCount, maxDec, X_INT, devID);
//InitTensor(gold, 3, dimsDec, X_FLOAT, 1.0F, devID, mem); //InitTensorV2(gold, 3, dimsDec, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
paddingEnc->SetZeroAll(); paddingEnc->SetZeroAll();
...@@ -613,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -613,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
ws = wCountEnc; ws = wCountEnc;
batchEnc->SetData(batchEncValues, batchEnc->unitNum); batchEnc->SetData(batchEncValues, batchEnc->unitNum);
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc);
//XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem); //XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
//_ConvertDataType(batchEnc, tmp); //_ConvertDataType(batchEnc, tmp);
//tmp->Dump(stderr, "tmp:"); //tmp->Dump(stderr, "tmp:");
//_NotEqual(tmp, paddingEnc, 0); //_NotEqual(tmp, paddingEnc, 0);
...@@ -662,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -662,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
label->SetData(labelValues, label->unitNum); label->SetData(labelValues, label->unitNum);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad);
//XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem); //XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
//_ConvertDataType(batchDec, tmp2); //_ConvertDataType(batchDec, tmp2);
//_NotEqual(tmp2, paddingDec, 0); //_NotEqual(tmp2, paddingDec, 0);
//DelTensorBuf(tmp2); //DelTensorBuf(tmp2);
......
...@@ -131,8 +131,7 @@ public: ...@@ -131,8 +131,7 @@ public:
int * seqs, int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining);
bool isTraining);
/* load a batch of sequences (for language modeling) */ /* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file, int LoadBatchLM(FILE * file,
...@@ -141,8 +140,7 @@ public: ...@@ -141,8 +140,7 @@ public:
XTensor * gold, XTensor * label, XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch, int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem, int devID, bool isTraining);
bool isTraining);
/* load a batch of sequences (for machine translation) */ /* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file, int LoadBatchMT(FILE * file,
...@@ -151,8 +149,7 @@ public: ...@@ -151,8 +149,7 @@ public:
XTensor * gold, XTensor * label, XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch, int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, bool isTraining);
bool isTraining);
/* shuffle the data file */ /* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile); void Shuffle(const char * srcFile, const char * tgtFile);
......
...@@ -57,16 +57,14 @@ initialize the model ...@@ -57,16 +57,14 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttDecoder::InitModel(int argc, char ** argv, void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); //AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -79,7 +77,7 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -79,7 +77,7 @@ void AttDecoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem, false); embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -90,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -90,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) { for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID, myMem); attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
......
...@@ -37,9 +37,6 @@ public: ...@@ -37,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -95,7 +92,7 @@ public: ...@@ -95,7 +92,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the decoding network */ /* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder() T2TEmbedder::T2TEmbedder()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
maxLength = -1; maxLength = -1;
} }
...@@ -46,12 +45,10 @@ initialize the model ...@@ -46,12 +45,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, bool isEnc) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
if(isEnc){ if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsize", &vSize, -1);
...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b ...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length) ...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem); XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -41,9 +41,6 @@ public: ...@@ -41,9 +41,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -71,7 +68,7 @@ public: ...@@ -71,7 +68,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL, bool isEnc = true); void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int eSize, int d, int length);
......
...@@ -52,15 +52,12 @@ initialize the model ...@@ -52,15 +52,12 @@ initialize the model
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id*/
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem); embedder.InitModel(argc, argv, devID);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
......
...@@ -65,9 +65,6 @@ public: ...@@ -65,9 +65,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -118,7 +115,7 @@ public: ...@@ -118,7 +115,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -47,12 +47,10 @@ initialize the model ...@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -62,19 +60,17 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,19 +60,17 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); _SetDataFanInOut(&w1, scale);
float finfout2 = (float)sqrt(6.0F * scale/(hSize + outSize)); _SetDataFanInOut(&w2, scale);
w1.SetDataRand(-finfout1, finfout1);
b1.SetZeroAll(); b1.SetZeroAll();
w2.SetDataRand(-finfout2, finfout2);
b2.SetZeroAll(); b2.SetZeroAll();
} }
......
...@@ -36,9 +36,6 @@ public: ...@@ -36,9 +36,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -72,7 +69,7 @@ public: ...@@ -72,7 +69,7 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input, bool isTraining); XTensor Make(XTensor &input, bool isTraining);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN() T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL;
d = 0; d = 0;
} }
...@@ -46,18 +45,16 @@ initialize the model ...@@ -46,18 +45,16 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -36,9 +36,6 @@ class T2TLN ...@@ -36,9 +36,6 @@ class T2TLN
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */ /* the transformation matrix w */
XTensor w; XTensor w;
...@@ -57,7 +54,7 @@ public: ...@@ -57,7 +54,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TModel::T2TModel() T2TModel::T2TModel()
{ {
devID = -1; devID = -1;
mem = NULL;
isLM = false; isLM = false;
isMT = false; isMT = false;
nhead = 1; nhead = 1;
...@@ -48,10 +47,6 @@ T2TModel::~T2TModel() ...@@ -48,10 +47,6 @@ T2TModel::~T2TModel()
delete encoder; delete encoder;
delete decoder; delete decoder;
delete outputLayer; delete outputLayer;
/* we delete "mem" at the end because other members are using it and we must
remove the memory space before all tensors are destroyed. */
delete mem;
} }
/* /*
...@@ -61,29 +56,16 @@ initialize the model ...@@ -61,29 +56,16 @@ initialize the model
*/ */
void T2TModel::InitModel(int argc, char ** argv) void T2TModel::InitModel(int argc, char ** argv)
{ {
bool useMem = false;
int memSize = 0;
bool isMemFreeOTF = false;
LoadParamInt(argc, argv, "dev", &devID, -1); LoadParamInt(argc, argv, "dev", &devID, -1);
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamInt(argc, argv, "memsize", &memSize, 1024);
LoadParamBool(argc, argv, "mt", &isMT, false); LoadParamBool(argc, argv, "mt", &isMT, false);
LoadParamBool(argc, argv, "lm", &isLM, !isMT); LoadParamBool(argc, argv, "lm", &isLM, !isMT);
LoadParamInt(argc, argv, "nhead", &nhead, 8); LoadParamInt(argc, argv, "nhead", &nhead, 8);
LoadParamBool(argc, argv, "freeotf", &isMemFreeOTF, false);
if(useMem){
delete mem;
mem = new XMem(devID, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
}
encoder->InitModel(argc, argv, true, 0, devID, mem); encoder->InitModel(argc, argv, true, 0, devID);
outputLayer->InitModel(argc, argv, devID, mem); outputLayer->InitModel(argc, argv, devID);
if(isMT) if(isMT)
decoder->InitModel(argc, argv, true, 0, devID, mem); decoder->InitModel(argc, argv, true, 0, devID);
TensorList params(10); TensorList params(10);
GetParams(params); GetParams(params);
...@@ -149,7 +131,8 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -149,7 +131,8 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dims[i + 1] = input.GetDim(i); dims[i + 1] = input.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[input.order + 1] = len; dims[input.order + 1] = len;
XTensor mask(input.order + 2, dims, X_FLOAT, 1.0F, padding.devID, padding.mem); XTensor mask;
InitTensorV2(&mask, input.order + 2, dims, X_FLOAT, padding.devID);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -163,15 +146,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -163,15 +146,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding[padding.order - 1] = padding.GetDim(-1); dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1); dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType, XTensor * padding2 = NewTensorBufV2(padding.order + 1, dimsPadding, padding.dataType,
padding.denseRatio, padding.devID, padding.mem); padding.devID);
for(int i = 0; i < padding2->order; i++) for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
//XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType, //XTensor * padding3 = NewTensorBufV2(padding.order + 2, dimsPadding, padding.dataType,
// padding.denseRatio, padding.devID, padding.mem); // padding.devID);
// //
///* mask of the padding */ ///* mask of the padding */
//_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1)); //_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1));
...@@ -241,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -241,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -251,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -251,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F); _ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
...@@ -271,15 +254,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -271,15 +254,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -287,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -287,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -315,15 +298,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -315,15 +298,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -331,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -331,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -361,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -361,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in This matrix can be used to block the attention to current or following words in
...@@ -376,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -376,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
......
...@@ -40,9 +40,6 @@ public: ...@@ -40,9 +40,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the encoder */ /* the encoder */
AttEncoder * encoder; AttEncoder * encoder;
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TOutput::T2TOutput() T2TOutput::T2TOutput()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
inSize = -1; inSize = -1;
hSize = -1; hSize = -1;
...@@ -47,12 +46,10 @@ initialize the model ...@@ -47,12 +46,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......
...@@ -38,9 +38,6 @@ public: ...@@ -38,9 +38,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -61,7 +58,7 @@ public: ...@@ -61,7 +58,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input ...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
dims[i] = input->GetDim(i); dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize; dims[input->order - 1] = beamSize;
InitTensor(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem); InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem); InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID, input->mem); InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll(); state->probPath.SetZeroAll();
state->nstep.SetZeroAll(); state->nstep.SetZeroAll();
...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[i] = inputEnc->GetDim(i); dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem); InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol); _SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
...@@ -179,7 +179,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -179,7 +179,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
} }
else{ else{
inputDec = GeneratePaths(s); inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID, inputEnc->mem); inputDec.SetDevice(inputEnc->devID);
inputDec = Concatenate(first, inputDec, inputDec.order - 1); inputDec = Concatenate(first, inputDec, inputDec.order - 1);
} }
...@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputDec.order - 1] = inputDec.GetDim(-1); dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec; XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, 1.0F, paddingEnc->devID, paddingEnc->mem); InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1); SetDataFixedInt(paddingDec, 1);
XTensor maskDec; XTensor maskDec;
...@@ -213,14 +213,14 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -213,14 +213,14 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
int stride = decoding.GetDim(decoding.order - 2); int stride = decoding.GetDim(decoding.order - 2);
InitTensor1D(&selectSrc, 1, X_INT); InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1D(&selectTgt, 1, X_INT); InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0); selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0); selectTgt.SetInt(0, 0);
selectSrc.SetDevice(decoding.devID, decoding.mem); selectSrc.SetDevice(decoding.devID);
selectTgt.SetDevice(decoding.devID, decoding.mem); selectTgt.SetDevice(decoding.devID);
/* the decoder output of the last position */ /* the decoder output of the last position */
decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt); decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt);
...@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state) ...@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
} }
XTensor path; XTensor path;
InitTensor2D(&path, state->stateNum, distance, X_INT); InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll(); path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){ for(int i = 0; i < state->stateNum; i++){
......
...@@ -141,10 +141,6 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -141,10 +141,6 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
/* push complete hypotheses into the heap */ /* push complete hypotheses into the heap */
Collect(next); Collect(next);
/* stop searching when all hypotheses are completed */
if(IsAllCompleted(next))
break;
} }
/* fill the heap with imcomplete hypotheses if neccesary */ /* fill the heap with imcomplete hypotheses if neccesary */
...@@ -196,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -196,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
for(int i = 0; i < order; i++) for(int i = 0; i < order; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensor(&score, &prob); InitTensorV2(&score, &prob);
InitTensor(&probPath, &prob); InitTensorV2(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize); prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize); score.Reshape(score.unitNum/outputSize, outputSize);
...@@ -208,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -208,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&prob, &probPathPrev, &probPath, 0); _SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensor(&len, &lenPrev); InitTensorV2(&len, &lenPrev);
InitTensor(&lp, &lenPrev); InitTensorV2(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F); _ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
...@@ -229,9 +225,9 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -229,9 +225,9 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&score, &firstMask, &score, 0); _SumDim(&score, &firstMask, &score, 0);
} }
InitTensor(&mask, InitTensorV2(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT, 1.0F, prev->endMark.order, prev->endMark.dimSize, X_FLOAT,
prev->endMark.devID, prev->endMark.mem); prev->endMark.devID);
_SetDataFixedCond(&mask, &prev->endMark, -1e9F); _SetDataFixedCond(&mask, &prev->endMark, -1e9F);
mask.Reshape(mask.unitNum); mask.Reshape(mask.unitNum);
...@@ -266,15 +262,15 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -266,15 +262,15 @@ void T2TSearch::Generate(T2TStateBundle * beam)
XTensor &prob = beam->prob; XTensor &prob = beam->prob;
int order = score.order; int order = score.order;
CheckNTErrors(order >= 3, "The tensor must be of order 2 or larger.");
CheckNTErrors(dimsBeam[order - 3] % beamSize == 0, "Wrong dimension size!");
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
dims[i] = score.GetDim(i); dims[i] = score.GetDim(i);
dimsBeam[i] = score.GetDim(i); dimsBeam[i] = score.GetDim(i);
dimsTopK[i] = score.GetDim(i); dimsTopK[i] = score.GetDim(i);
} }
CheckNTErrors(order >= 3, "The tensor must be of order 2 or larger.");
CheckNTErrors(dimsBeam[order - 3] % beamSize == 0, "Wrong dimension size!");
int sizeVocab = score.GetDim(-1); int sizeVocab = score.GetDim(-1);
int stride = score.GetDim(-1); int stride = score.GetDim(-1);
...@@ -283,12 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -283,12 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsTopK[order - 3] = dimsBeam[order - 3]; dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize; dimsTopK[order - 1] = beamSize;
InitTensor(&scoreTopK, order, dimsTopK, score.dataType, InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType,
1.0F, score.devID, score.mem); score.devID);
InitTensor(&index, order, dimsTopK, X_INT, InitTensorV2(&index, order, dimsTopK, X_INT,
1.0F, score.devID, score.mem); score.devID);
InitTensor(&preID, order, dimsTopK, X_INT, InitTensorV2(&preID, order, dimsTopK, X_INT, -1);
1.0F, -1);
score.Reshape(order, dimsBeam); score.Reshape(order, dimsBeam);
...@@ -312,7 +307,7 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -312,7 +307,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims); score.Reshape(order, dims);
/* we keep the top-k scores */ /* we keep the top-k scores */
InitTensor(&score, &scoreTopK); InitTensorV2(&score, &scoreTopK);
CopyValues(scoreTopK, score); CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */ /* CPU data (TODO: remove GPU->CPU data copy!!!) */
...@@ -328,9 +323,9 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -328,9 +323,9 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* sequence probability of top-k candidates */ /* sequence probability of top-k candidates */
XTensor probPathTopK; XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK); InitTensorV2(&probPathTopK, &scoreTopK);
XTensor probTopK; XTensor probTopK;
InitTensor(&probTopK, &scoreTopK); InitTensorV2(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) { for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i); dims[i] = probPath.GetDim(i);
...@@ -386,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -386,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
InitTensorOnCPU(&probPath, &probPathRef); InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef); InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef); InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensor(&endMark, &predictionRef); InitTensorV2(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow /* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */ and we can speed-up the process by doing the job on CPU. */
...@@ -507,7 +502,7 @@ void T2TSearch::Dump(XTensor * output) ...@@ -507,7 +502,7 @@ void T2TSearch::Dump(XTensor * output)
int dims[3] = {batchSize, beamSize, maxLength}; int dims[3] = {batchSize, beamSize, maxLength};
int * words = new int[maxLength]; int * words = new int[maxLength];
InitTensor(output, 3, dims, X_INT); InitTensorV2(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1); SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */ /* heap for an input sentence in the batch */
...@@ -578,23 +573,6 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum) ...@@ -578,23 +573,6 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum)
endSymbolNum = tokenNum; endSymbolNum = tokenNum;
} }
/*
check whether all hypotheses are completed
>> beam - the beam that keeps the searching states
*/
bool T2TSearch::IsAllCompleted(T2TStateBundle * beam)
{
T2TState * states = beam->states;
for (int i = 0; i < beam->stateNum; i++) {
T2TState & state = states[i];
if(!state.isCompleted)
return false;
}
return true;
}
/* /*
make a mask to prevent duplicated entries in beam expansion for the first position make a mask to prevent duplicated entries in beam expansion for the first position
>> beam - the beam that keeps the searching states >> beam - the beam that keeps the searching states
...@@ -609,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) ...@@ -609,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++) for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensor(&mask, order - 1, dims, X_FLOAT); InitTensorV2(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll(); mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) { for (int i = 0; i < mask.unitNum; i++) {
...@@ -617,7 +595,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) ...@@ -617,7 +595,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
mask.Set(-1e9, i); mask.Set(-1e9, i);
} }
mask.SetDevice(prob.devID, prob.mem); mask.SetDevice(prob.devID);
return mask; return mask;
} }
......
...@@ -102,9 +102,6 @@ public: ...@@ -102,9 +102,6 @@ public:
/* set end symbols for search */ /* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum); void SetEnd(const int * tokens, const int tokenNum);
/* check whether all hypotheses are completed */
bool IsAllCompleted(T2TStateBundle * beam);
/* make a mask to prevent duplicated entries in beam expansion for the first position */ /* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam); XTensor MakeFirstMask(T2TStateBundle * beam);
}; };
......
...@@ -75,7 +75,6 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -75,7 +75,6 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
CheckNTErrors(ofile, "Cannot open the output file"); CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem;
XNet net; XNet net;
...@@ -106,7 +105,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -106,7 +105,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
while(batchLoader.LoadBatch(file, model->isLM, while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label, &batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt, seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, mem, false)) 1, 1, false, ws, wc, devID, false))
{ {
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!"); CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!");
CheckNTErrors(!model->isLM, "Only MT model is supported!"); CheckNTErrors(!model->isLM, "Only MT model is supported!");
...@@ -129,7 +128,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -129,7 +128,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
if (batchCount % 1 == 0) { if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sent=%d, sword=%d\n", "[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount); elapsed, sentCount, wordCount);
} }
} }
...@@ -141,8 +140,8 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -141,8 +140,8 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT4(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d, and ppl=%.3f)\n", XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, sentCount, exp(loss/wordCount)); elapsed,wordCountTotal, exp(loss/wordCount));
} }
/* /*
......
...@@ -75,9 +75,6 @@ void T2TTrainer::Init(int argc, char ** argv) ...@@ -75,9 +75,6 @@ void T2TTrainer::Init(int argc, char ** argv)
strcpy(argArray[i], argv[i]); strcpy(argArray[i], argv[i]);
} }
bool useMem = false;
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F); LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F);
LoadParamFloat(argc, argv, "lrbias", &lrbias, 0); LoadParamFloat(argc, argv, "lrbias", &lrbias, 0);
LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1); LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
...@@ -142,7 +139,6 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -142,7 +139,6 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
#endif #endif
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem;
XNet net; XNet net;
if(isDebugged) if(isDebugged)
...@@ -184,7 +180,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -184,7 +180,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
while (batchLoader.LoadBatch(file, model->isLM, while (batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label, &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
NULL, vSize, vSizeTgt, NULL, vSize, vSizeTgt,
sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, mem, true)) sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
{ {
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch"); CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch");
...@@ -321,7 +317,6 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -321,7 +317,6 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
CheckNTErrors(ofile, "Cannot open the output file"); CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem;
XNet net; XNet net;
...@@ -351,7 +346,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -351,7 +346,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
while(batchLoader.LoadBatch(file, model->isLM, while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label, &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt, seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, mem, false)) 1, 1, false, ws, wc, devID, false))
{ {
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch"); CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch");
...@@ -372,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -372,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
/* prediction probabilities */ /* prediction probabilities */
XTensor probs; XTensor probs;
InitTensor1D(&probs, bSize * length); InitTensor1DV2(&probs, bSize * length);
XTensor labelOnehot; XTensor labelOnehot;
...@@ -463,7 +458,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -463,7 +458,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1D(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID, output->mem); InitTensor1DV2(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)}; int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)};
probs.Reshape(2, dims); probs.Reshape(2, dims);
...@@ -480,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -480,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1D(&result, 1, X_FLOAT, output->devID, output->mem); InitTensor1DV2(&result, 1, X_FLOAT, output->devID);
_ReduceSum(&probs, &result, 1); _ReduceSum(&probs, &result, 1);
return result.Get1D(0); return result.Get1D(0);
...@@ -527,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -527,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0); _ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */ /* v2 = m / (sqrt(v) + delta) */
XTensor * v2 = NewTensorBuf(v, v->devID, v->mem); XTensor * v2 = NewTensorBufV2(v, v->devID);
_Power(v, v2, 0.5F); _Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d); _ScaleAndShiftMe(v2, 1.0F, d);
_Div(m, v2, v2); _Div(m, v2, v2);
...@@ -598,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding) ...@@ -598,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]); output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem); XTensor * padding2 = NewTensorBufV2(1, &padding->unitNum, X_FLOAT, padding->devID);
_CopyValues(padding, padding2); _CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0); _MultiplyDim(output, padding2, output, 0);
...@@ -652,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p) ...@@ -652,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
DTYPE q = 1.0F - p; DTYPE q = 1.0F - p;
DTYPE gift = p / n; DTYPE gift = p / n;
InitTensor(smoothed, gold); InitTensorV2(smoothed, gold);
_CopyValues(gold, smoothed); _CopyValues(gold, smoothed);
if(p == 0) if(p == 0)
......
...@@ -528,8 +528,6 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne) ...@@ -528,8 +528,6 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
CheckNTErrors(hit, "No proper node found in parent.income edge!"); CheckNTErrors(hit, "No proper node found in parent.income edge!");
} }
} }
strcpy(newOne->name, oldOne->name);
} }
...@@ -737,11 +735,6 @@ void XLink::ShowNode(FILE * file, XTensor * node) ...@@ -737,11 +735,6 @@ void XLink::ShowNode(FILE * file, XTensor * node)
} }
} }
fprintf(file, "shape[%d] ", node->order);
for (int i = 0; i < node->order; i++)
fprintf(file, "%d ", node->GetDim(i));
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
......
...@@ -53,6 +53,7 @@ XMem::XMem() ...@@ -53,6 +53,7 @@ XMem::XMem()
strcpy(name, "xmem"); strcpy(name, "xmem");
signature = 0; signature = 0;
mergeFreeOTF = true; mergeFreeOTF = true;
isInitialized = false;
} }
/* /*
...@@ -169,6 +170,7 @@ void XMem::Initialize(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int m ...@@ -169,6 +170,7 @@ void XMem::Initialize(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int m
#endif #endif
signature++; signature++;
isInitialized = true;
} }
/* free memory */ /* free memory */
...@@ -305,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation) ...@@ -305,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(myIsForComputation) if(myIsForComputation)
CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS, CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS,
"Cannot create the cublas handle."); "Cannot create the cublas handle.");
SetDevice(devIDBackup); SetDevice(devIDBackup);
#endif #endif
...@@ -321,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -321,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{ {
delete[] memIndex; delete[] memIndex;
delete[] memIndex2; delete[] memIndex2;
delete[] minSizeIndex; delete[] minSizeIndex;
nodeNum = indexSize; nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2; nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum; indexEntryNum = minSizeNum;
memIndex = new MPieceNode[nodeNum]; memIndex = new MPieceNode[nodeNum];
memset(memIndex, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex, 0, sizeof(MPieceNode) * nodeNum);
...@@ -333,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -333,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2 = new MPieceNode[nodeNum]; memIndex2 = new MPieceNode[nodeNum];
memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum);
minSizeIndex = new MTYPE[indexEntryNum]; minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum); memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex[0] = minSizeFirst; minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++) for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2; minSizeIndex[i] = minSizeIndex[i - 1] * 2;
indexOffset = GetMSB(minSizeFirst); indexOffset = GetMSB(minSizeFirst);
} }
...@@ -757,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -757,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */ /* if all index nodes are used, we rebuild the index to release the nodes that are free */
if(nodeNumUsed == nodeNum){ if(nodeNumUsed == nodeNum){
RebuildIndex(); RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!"); CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
} }
/*if(testxmemid == 30){ /*if(testxmemid == 30){
...@@ -961,8 +963,8 @@ release a piece of memory as "free" ...@@ -961,8 +963,8 @@ release a piece of memory as "free"
*/ */
void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size) void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size)
{ {
if(p == NULL) if(p == NULL)
return; return;
if(size <= minSizeIndex[0]) if(size <= minSizeIndex[0])
size = minSizeIndex[0]; size = minSizeIndex[0];
...@@ -1092,7 +1094,7 @@ void XMem::RebuildIndex() ...@@ -1092,7 +1094,7 @@ void XMem::RebuildIndex()
block->mem = NULL; block->mem = NULL;
} }
else{ else{
/* if the block is in use, we build the index */ /* if the block is in use, we build the index */
int pieceCount = 0; int pieceCount = 0;
MTYPE size = 0; MTYPE size = 0;
MHeader * newLast = NULL; MHeader * newLast = NULL;
...@@ -1579,11 +1581,6 @@ void XMemManager::Initialize() ...@@ -1579,11 +1581,6 @@ void XMemManager::Initialize()
/* CPUs (we actually do not care about how many CPUs are using) */ /* CPUs (we actually do not care about how many CPUs are using) */
nCPUMem = 1; nCPUMem = 1;
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
/* GPUs */ /* GPUs */
nGPUMem = 0; nGPUMem = 0;
...@@ -1592,23 +1589,16 @@ void XMemManager::Initialize() ...@@ -1592,23 +1589,16 @@ void XMemManager::Initialize()
XPRINT(0, stderr, "cannot get GPU information."); XPRINT(0, stderr, "cannot get GPU information.");
exit(1); exit(1);
} }
for (int i = 0; i < nGPUMem; i++) {
MTYPE freeMem = GetAvailableGPUMemory(i);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[i].Initialize(i, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
}
#endif #endif
} }
/* free it */ /* free it */
void XMemManager::Free() void XMemManager::Free()
{ {
for (int i = 0; i < MAX_CPU_NUM; i++) for (int i = 0; i < MAX_CPU_MEM_NUM; i++)
CPUMems[i].Free(); CPUMems[i].Free();
for (int i = 0; i < MAX_GPU_NUM; i++) for (int i = 0; i < MAX_GPU_MEM_NUM; i++)
GPUMems[i].Free(); GPUMems[i].Free();
} }
...@@ -1616,13 +1606,34 @@ void XMemManager::Free() ...@@ -1616,13 +1606,34 @@ void XMemManager::Free()
XMem * XMemManager::GetMem(const int devID) XMem * XMemManager::GetMem(const int devID)
{ {
XMem * mem = NULL; XMem * mem = NULL;
if (devID < 0) if (devID < 0){
if(!CPUMems[0].isInitialized){
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, FREE_ON_THE_FLY,
MIN_BLOCK_SIZE_FOR_MEMPOOL,
MIN_BLOCK_NUM_FOR_MEMPOOL,
myBufSize);
}
mem = CPUMems; mem = CPUMems;
}
else{ else{
if (devID < nGPUMem) if (devID < nGPUMem){
if(!GPUMems[devID].isInitialized){
MTYPE freeMem = GetAvailableGPUMemory(devID);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[devID].Initialize(devID, FREE_ON_THE_FLY,
MIN_BLOCK_SIZE_FOR_MEMPOOL,
MIN_BLOCK_NUM_FOR_MEMPOOL,
myBufSize);
}
mem = GPUMems + devID; mem = GPUMems + devID;
else }
else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID); XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
}
} }
return mem; return mem;
......
...@@ -60,10 +60,10 @@ typedef long long INT_64; ...@@ -60,10 +60,10 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1 #define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH #define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_NUM 16 #define MAX_CPU_MEM_NUM 16
#define MAX_GPU_NUM 16 #define MAX_GPU_MEM_NUM 16
/* /*
mode of runnig a memory pool mode of runnig a memory pool
...@@ -213,6 +213,9 @@ public: ...@@ -213,6 +213,9 @@ public:
MTYPE curUsedPin; MTYPE curUsedPin;
MTYPE bufUsedPin; MTYPE bufUsedPin;
/* indicates whether the memory pool is initialized */
bool isInitialized;
#ifdef USE_CUDA #ifdef USE_CUDA
/* handle used for cublas */ /* handle used for cublas */
cublasHandle_t cublasHandle; cublasHandle_t cublasHandle;
...@@ -429,15 +432,15 @@ a class for the management of memory ...@@ -429,15 +432,15 @@ a class for the management of memory
*/ */
class XMemManager class XMemManager
{ {
public: private:
/* cpu memory pool information */ /* cpu memory pool information */
XMem CPUMems[MAX_CPU_NUM]; XMem CPUMems[MAX_CPU_MEM_NUM];
/* number of cpu memory pools */ /* number of cpu memory pools */
int nCPUMem; int nCPUMem;
/* gpu memory pool information */ /* gpu memory pool information */
XMem GPUMems[MAX_GPU_NUM]; XMem GPUMems[MAX_GPU_MEM_NUM];
/* number of gpu memory pools */ /* number of gpu memory pools */
int nGPUMem; int nGPUMem;
......
...@@ -526,7 +526,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem) ...@@ -526,7 +526,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
isInGlobalMem = false; isInGlobalMem = false;
} }
else { else {
ShowNTErrors("TODO!"); myMem = GMems.GetMem(myDevId);
} }
} }
...@@ -923,11 +923,9 @@ set the tensor items by a normal distribution ...@@ -923,11 +923,9 @@ set the tensor items by a normal distribution
void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation) void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
{ {
// TODO: cuda code!!!!!!! // TODO: cuda code!!!!!!!
if (data == NULL) if (data == NULL)
return; return;
// srand((unsigned)time(0));
void * d = NULL; void * d = NULL;
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
d = new float[unitNum]; d = new float[unitNum];
...@@ -1330,7 +1328,7 @@ set the value of a cell ...@@ -1330,7 +1328,7 @@ set the value of a cell
*/ */
bool XTensor::Set(DTYPE value, int index[], int size) bool XTensor::Set(DTYPE value, int index[], int size)
{ {
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
return SetToDevice(devID, GetCell(index, size), value); return SetToDevice(devID, GetCell(index, size), value);
} }
...@@ -1826,7 +1824,6 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1826,7 +1824,6 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
fprintf(file, "%s ", label); fprintf(file, "%s ", label);
if(isInit){ if(isInit){
fprintf(file, "id=%d ", id);
fprintf(file, "order=%d dimsize=", order); fprintf(file, "order=%d dimsize=", order);
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]); fprintf(file, "%d", dimSize[i]);
...@@ -1879,7 +1876,149 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1879,7 +1876,149 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
fprintf(file, "[%d]%e ", key, value); fprintf(file, "[%d]%e ", key, value);
} }
} }
fprintf(file, "\n\n"); fprintf(file, "\n");
if (isNewData) {
delete[](char*)d;
#ifdef USE_CUDA
if (devID >= 0)
dataHost = NULL;
#endif
}
}
void * RecursionData(XTensor * s, int dim, int * index, void * d, FILE * file)
{
if (dim == s->order - 2) {
/* print index */
printf("Index: ");
for (int i = 0; i < s->order-2; i++)
printf("[%d]", index[i]);
int dimSize1 = s->dimSize[dim];
int dimSize2 = s->dimSize[dim+1];
printf(" %d * %d\n", dimSize1, dimSize2);
/* print 2D data */
if (s->dataType == X_FLOAT) {
float * data = (float*)d;
for (int i = 0; i < dimSize1; i++) {
printf("\t");
for (int j = 0; j < dimSize2; j++)
fprintf(file, "%e ", *data++);
fprintf(file, "\n");
}
d = (float*)d + dimSize1 *dimSize2;
}
else if (s->dataType == X_INT) {
int * data = (int*)d;
for (int i = 0; i < dimSize1; i++) {
printf("\t");
for (int j = 0; j < dimSize2; j++)
fprintf(file, "%d ", *data++);
fprintf(file, "\n");
}
d = (int*)d + dimSize1 *dimSize2;
}
else
ShowNTErrors("TODO!");
return d;
}
/* recursion for deeper dimsion */
int levelSize = s->dimSize[dim];
for (int k = 0; k < levelSize; k++) {
index[dim] = k;
d = RecursionData(s, dim+1, index, d, file);
}
return d;
}
/*
dump data to a file
>> file - where to domp the data
>> label - label of the tensor
>> n - number of items to dump
>> beg - the first item id
>> verbose - verbose level
*/
void XTensor::DumpFormat(FILE * file, const char * label, const int n, const int beg, const int verbose)
{
if (verbose > verboseLevel)
return;
void * d = data;
bool isNewData = false;
#ifdef USE_CUDA
if (devID >= 0) {
CudaGPUToCPUFlush(this);
d = dataHost;
isNewData = true;
}
#endif
if (d == NULL) {
if (isSparse) {
int num = 0;
for (int i = 0; i < order; i++)
num *= dimSizeRDI[i];
num = int(num * denseRatio + 1);
int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num);
d = new char[size];
memset(d, 0, size);
}
else {
d = new char[unitNum * unitSize];
memset(d, 0, unitNum * unitSize);
}
isNewData = true;
}
if (label != NULL)
fprintf(file, "%s ", label);
if(isInit){
fprintf(file, "id=%d ", id);
fprintf(file, "order=%d dimsize=", order);
for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]);
if (i < order - 1)
fprintf(file, ",");
}
}
else{
fprintf(file, "order=-1 dimsize=-1");
}
fprintf(file, " dtype=%s dense=%f\n", GetDataTypeName(dataType), denseRatio);
if(!isInit){
fprintf(file, "NULL");
}
if (order == 1) {
for (int i = 0; i < unitNum; i++) {
if (dataType == X_FLOAT)
fprintf(file, "%e ", ((float*)d)[i]);
else if (dataType == X_INT)
fprintf(file, "%d ", ((int*)d)[i]);
else
ShowNTErrors("TODO!");
}
printf("\n");
}
/* print multi-dimensional tensor */
else {
int * index = new int[order];
RecursionData(this, 0, index, d, file);
delete[] index;
}
fprintf(file, "\n");
if (isNewData) { if (isNewData) {
delete[](char*)d; delete[](char*)d;
...@@ -2184,6 +2323,11 @@ void InitTensorV2(XTensor * tensor, ...@@ -2184,6 +2323,11 @@ void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID) const int myDevID)
{ {
if (tensor->mem == NULL) {
XMem * myMem = GMems.GetMem(myDevID);
tensor->mem = myMem;
tensor->devID = myMem->devID;
}
if(tensor->mem != NULL){ if(tensor->mem != NULL){
tensor->Resize(myOrder, myDimSize, myDataType, 1.0F); tensor->Resize(myOrder, myDimSize, myDataType, 1.0F);
} }
...@@ -2488,9 +2632,8 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference) ...@@ -2488,9 +2632,8 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
return; return;
tensor->enableGrad = reference->enableGrad; tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize, InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio, reference->dataType, -1);
-1);
} }
/* generate a XTensor with no initialization */ /* generate a XTensor with no initialization */
...@@ -2565,9 +2708,6 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, ...@@ -2565,9 +2708,6 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, devID, myMem); XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, devID, myMem);
if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200);
}
if(myMem != NULL) if(myMem != NULL)
tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize); tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize);
else else
...@@ -2593,7 +2733,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize, ...@@ -2593,7 +2733,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
dims[0] = -abs(dims[0]); dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensor(myOrder, dims, myDataType, 1.0F, devID); XTensor * tensor = NewTensorV2(myOrder, dims, myDataType, devID);
if (tensor->unitNum * tensor->unitSize == 176657664) { if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200); tensor->Dump(stderr, "", 200);
......
...@@ -425,6 +425,9 @@ public: ...@@ -425,6 +425,9 @@ public:
/* dump data to a file */ /* dump data to a file */
void Dump(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0); void Dump(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a file */
void DumpFormat(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a file */ /* dump data to a file */
static static
...@@ -459,7 +462,7 @@ extern int MakeTensorID(); ...@@ -459,7 +462,7 @@ extern int MakeTensorID();
void InitTensor(XTensor * tensor, void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL); const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */ /* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor, void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
......
...@@ -125,7 +125,7 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph ...@@ -125,7 +125,7 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum, CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!");
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
......
...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */ i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0); void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
void MultiplyDimMe(XTensor & a, const XTensor & b, int n, DTYPE alpha = 0.0);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n); ...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting */ i.e., a is multiplied with b by broadcasting */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink = false); void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b); XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink = false); void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
{ {
n = MODX(n, a->order); n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!"); "Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!"); CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if (beta == 0) { if (beta == 0) {
_CopyValues(a, c); _CopyValues(a, c);
return; return;
} }
if (XTensor::IsSameShaped(a, b)) { if (XTensor::IsSameShaped(a, b)) {
_Sub(a, b, c, beta); _Sub(a, b, c, beta);
return; return;
} }
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSubDim(a, b, c, n, beta); _CudaSubDim(a, b, c, n, beta);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
} }
else { else {
int stride = 1; int stride = 1;
int blockSize = a->dimSize[n]; int blockSize = a->dimSize[n];
int blockNum = 1; int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) { for (int i = a->order - 1; i >= 0; i--) {
if (i > n) if (i > n)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
else if (i < n) else if (i < n)
blockNum *= a->dimSize[i]; blockNum *= a->dimSize[i];
} }
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
int num = a->unitNum; int num = a->unitNum;
if (stride > 1) { if (stride > 1) {
for (int i = 0, j = 0; i < num; i += stride, j++) { for (int i = 0, j = 0; i < num; i += stride, j++) {
DTYPE * ap = (DTYPE*)a->data + i; DTYPE * ap = (DTYPE*)a->data + i;
DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta; DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta;
DTYPE * cp = (DTYPE*)c->data + i; DTYPE * cp = (DTYPE*)c->data + i;
for (int k = 0; k < stride; k++) for (int k = 0; k < stride; k++)
cp[k] = ap[k] - bv; cp[k] = ap[k] - bv;
} }
} }
else if (stride == 1) { else if (stride == 1) {
DTYPE * bp = (DTYPE*)b->data; DTYPE * bp = (DTYPE*)b->data;
for (int i = 0; i < num; i += blockSize) { for (int i = 0; i < num; i += blockSize) {
DTYPE * ap = (DTYPE*)a->data + i; DTYPE * ap = (DTYPE*)a->data + i;
DTYPE * cp = (DTYPE*)c->data + i; DTYPE * cp = (DTYPE*)c->data + i;
if (beta == 1.0F) { if (beta == 1.0F) {
for (int j = 0; j < blockSize; j++) for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j]; cp[j] = ap[j] - bp[j];
} }
else { else {
for (int j = 0; j < blockSize; j++) for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j] * beta; cp[j] = ap[j] - bp[j] * beta;
} }
} }
} }
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
} }
} }
/* /*
...@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting ...@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting
*/ */
void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta) void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta)
{ {
_SubDim(a, b, a, n, beta); _SubDim(a, b, a, n, beta);
} }
/* /*
...@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting ...@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting
*/ */
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order); n = MODX(n, a.order);
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
return c; return c;
} }
/* /*
...@@ -183,9 +183,8 @@ i.e., a is subtracted with b by broadcasting ...@@ -183,9 +183,8 @@ i.e., a is subtracted with b by broadcasting
>> c - where we put a-b*\beta. we save it in a if c is NULL >> c - where we put a-b*\beta. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink) void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -194,7 +193,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b ...@@ -194,7 +193,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector ...@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta) void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x; int col = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
if (col >= colNum || row >= rowNum) if (col >= colNum || row >= rowNum)
return; return;
if (threadIdx.y == 0) if (threadIdx.y == 0)
bv[threadIdx.x] = b[col]; bv[threadIdx.x] = b[col];
__syncthreads(); __syncthreads();
int offset = colNum * row + col; int offset = colNum * row + col;
if (betaFired) if (betaFired)
c[offset] = a[offset] - bv[threadIdx.x] * beta; c[offset] = a[offset] - bv[threadIdx.x] * beta;
else else
c[offset] = a[offset] - bv[threadIdx.x]; c[offset] = a[offset] - bv[threadIdx.x];
} }
/* /*
...@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector ...@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta) void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int colIndex = blockDim.x * blockIdx.x + threadIdx.x; int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % colNum; int col = colIndex % colNum;
int block = colIndex / colNum; int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum) if (row >= rowNum || block >= blockNum)
return; return;
if (threadIdx.x == 0) if (threadIdx.x == 0)
bv[threadIdx.y] = b[row]; bv[threadIdx.y] = b[row];
__syncthreads(); __syncthreads();
int offset = block * blockSize + row * colNum + col; int offset = block * blockSize + row * colNum + col;
if (betaFired) if (betaFired)
c[offset] = a[offset] - bv[threadIdx.y] * beta; c[offset] = a[offset] - bv[threadIdx.y] * beta;
else else
c[offset] = a[offset] - bv[threadIdx.y]; c[offset] = a[offset] - bv[threadIdx.y];
} }
/* /*
...@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting ...@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting
*/ */
void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta) void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta)
{ {
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!"); "Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!"); CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
int stride = 1; int stride = 1;
int blockSize = a->dimSize[n]; int blockSize = a->dimSize[n];
int blockNum = 1; int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) { for (int i = a->order - 1; i >= 0; i--) {
if (i > n) if (i > n)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
else if (i < n) else if (i < n)
blockNum *= a->dimSize[i]; blockNum *= a->dimSize[i];
} }
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
int devIDBackup = 0; int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
if (stride > 1) { if (stride > 1) {
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F) if (beta == (DTYPE)1.0F)
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>> KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta); blockSize, stride, blockSize * stride, blockNum, beta);
else else
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>> KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta); blockSize, stride, blockSize * stride, blockNum, beta);
} }
else if (stride == 1) { else if (stride == 1) {
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F) if (beta == (DTYPE)1.0F)
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta); blockNum, blockSize, beta);
else else
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta); blockNum, blockSize, beta);
} }
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
} }
#endif #endif
......
...@@ -134,12 +134,12 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot, ...@@ -134,12 +134,12 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot,
for (int i = 0; i < index->order; i++) for (int i = 0; i < index->order; i++)
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!"); CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
onehot->SetZeroAll(); //onehot->SetZeroAll();
float confidence = 1 - labelSmoothingP; float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size; float lowconfidence = labelSmoothingP / size;
//_SetDataFixedFloat(onehot, lowconfidence); _SetDataFixedFloat(onehot, lowconfidence);
#ifdef USE_CUDA #ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
......
...@@ -110,13 +110,11 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int ...@@ -110,13 +110,11 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int
DTYPE * od = onehotData + i * stride; DTYPE * od = onehotData + i * stride;
int id = indexData[i]; int id = indexData[i];
//od[id] = confidence;
if (offset == id) if (offset == id)
od[offset] = confidence; od[offset] = confidence;
else{ //else
od[offset] = lowconfidence; // od[offset] = lowconfidence;
}
} }
/* /*
...@@ -126,7 +124,8 @@ convert index tensor to onehot tensor (cuda version) ...@@ -126,7 +124,8 @@ convert index tensor to onehot tensor (cuda version)
>> onehot - onehot tensor, which value is 0 or 1 >> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence) void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence)
{ {
int devID = onehot->devID; int devID = onehot->devID;
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* /*
set an integer data array with a fixed value p (in int) set an integer data array with a fixed value p (in int)
>> d - pointer to the data array >> d - pointer to the data array
...@@ -740,4 +742,5 @@ void _CudaSetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * va ...@@ -740,4 +742,5 @@ void _CudaSetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * va
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel) ...@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size) void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) { if (i < size) {
if (a[i] > upper) if (a[i] > upper)
b[i] = upper; b[i] = upper;
else if (a[i] < lower) else if (a[i] < lower)
b[i] = lower; b[i] = lower;
else else
b[i] = a[i]; b[i] = a[i];
} }
} }
/* /*
...@@ -62,7 +62,7 @@ This is for float16 computation ...@@ -62,7 +62,7 @@ This is for float16 computation
__global__ __global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size) void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{ {
return; return;
} }
/* /*
...@@ -74,31 +74,31 @@ set each entry to its clip value ...@@ -74,31 +74,31 @@ set each entry to its clip value
*/ */
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]); dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]); dim3 threads(blockSize[0]);
int devIDBackup; int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
} }
/* /*
......
...@@ -41,19 +41,19 @@ float shflDownReduceMax(float input) ...@@ -41,19 +41,19 @@ float shflDownReduceMax(float input)
"{" "{"
".reg .f32 r0;" ".reg .f32 r0;"
".reg .pred p;" ".reg .pred p;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;" "shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"setp.lt.f32 p,%1,r0;" "setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;" "@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x8, 0xf;" "shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"setp.lt.f32 p,%1,r0;" "setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;" "@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x4, 0x7;" "shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"setp.lt.f32 p,%1,r0;" "setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;" "@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x2, 0x3;" "shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"setp.lt.f32 p,%1,r0;" "setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;" "@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x1, 0x1;" "shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"setp.lt.f32 p, %1, r0; " "setp.lt.f32 p, %1, r0; "
"@p mov.f32 %1,r0;" "@p mov.f32 %1,r0;"
"mov.f32 %0,%1;" "mov.f32 %0,%1;"
...@@ -73,19 +73,19 @@ int shflDownReduceMax(int input) ...@@ -73,19 +73,19 @@ int shflDownReduceMax(int input)
"{" "{"
".reg .s32 r0;" ".reg .s32 r0;"
".reg .pred p;" ".reg .pred p;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;" "shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"setp.lt.s32 p,%1,r0;" "setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;" "@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x8, 0xf;" "shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"setp.lt.s32 p,%1,r0;" "setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;" "@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x4, 0x7;" "shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"setp.lt.s32 p,%1,r0;" "setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;" "@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x2, 0x3;" "shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"setp.lt.s32 p,%1,r0;" "setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;" "@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x1, 0x1;" "shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"setp.lt.s32 p, %1, r0; " "setp.lt.s32 p, %1, r0; "
"@p mov.s32 %1,r0;" "@p mov.s32 %1,r0;"
"mov.s32 %0,%1;" "mov.s32 %0,%1;"
......
...@@ -37,15 +37,15 @@ float shflDownReduceSum(float input) ...@@ -37,15 +37,15 @@ float shflDownReduceSum(float input)
asm volatile( asm volatile(
"{" "{"
".reg .f32 r0;" ".reg .f32 r0;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;" "shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"add.f32 %1, r0, %1;" "add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x8, 0xf;" "shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"add.f32 %1, r0, %1;" "add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x4, 0x7;" "shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"add.f32 %1, r0, %1;" "add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x2, 0x3;" "shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"add.f32 %1, r0, %1;" "add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x1, 0x1;" "shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"add.f32 %0, r0, %1;" "add.f32 %0, r0, %1;"
"}" "}"
: "=f"(output) : "f"(input)); : "=f"(output) : "f"(input));
...@@ -62,15 +62,15 @@ int shflDownReduceSum(int input) ...@@ -62,15 +62,15 @@ int shflDownReduceSum(int input)
asm volatile( asm volatile(
"{" "{"
".reg .s32 r0;" ".reg .s32 r0;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;" "shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"add.s32 %1, r0, %1;" "add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x8, 0xf;" "shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"add.s32 %1, r0, %1;" "add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x4, 0x7;" "shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"add.s32 %1, r0, %1;" "add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x2, 0x3;" "shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"add.s32 %1, r0, %1;" "add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x1, 0x1;" "shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"add.s32 %0, r0, %1;" "add.s32 %0, r0, %1;"
"}" "}"
: "=r"(output) : "r"(input)); : "=r"(output) : "r"(input));
......
...@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing ...@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing
*/ */
void _SortMe(XTensor * a, XTensor * index, int dim); void _SortMe(XTensor * a, XTensor * index, int dim);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void SortMe(XTensor & a, XTensor & index, int dim);
/* /*
sort the data along a given dimension (return an XTensor structure) sort the data along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -171,7 +171,7 @@ float broadcast(float input) ...@@ -171,7 +171,7 @@ float broadcast(float input)
float output; float output;
asm( asm(
"{" "{"
"shfl.idx.b32 %0,%1,0x0,0x1f;" "shfl.sync.idx.b32 %0,%1,0x0,0x1f,0xffffffff;"
"}" "}"
:"=f"(output) : "f"(input) :"=f"(output) : "f"(input)
); );
......
...@@ -35,7 +35,7 @@ bool Test() ...@@ -35,7 +35,7 @@ bool Test()
wrong = !TestConcatenate() || wrong; wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong; wrong = !TestConcatenateSolely() || wrong;
wrong = !TestCos() || wrong; wrong = !TestCos() || wrong;
wrong = !TestConvertDataType() || wrong; //wrong = !TestConvertDataType() || wrong;
wrong = !TestCopyIndexed() || wrong; wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong; wrong = !TestCopyValues() || wrong;
wrong = !TestDiv() || wrong; wrong = !TestDiv() || wrong;
...@@ -79,7 +79,7 @@ bool Test() ...@@ -79,7 +79,7 @@ bool Test()
wrong = !TestXMem() || wrong; wrong = !TestXMem() || wrong;
wrong = !TestCrossEntropy() || wrong; wrong = !TestCrossEntropy() || wrong;
wrong = !TestDropout() || wrong; wrong = !TestDropout() || wrong;
wrong = !TestHardTanH() || wrong; wrong = !TestHardTanH() || wrong;
wrong = !TestIdentity() || wrong; wrong = !TestIdentity() || wrong;
wrong = !TestLogSoftmax() || wrong; wrong = !TestLogSoftmax() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论