Commit 7786f2b7 by 张裕浩

Test and merge.

parent 467c2ed7
......@@ -27,7 +27,6 @@
#include "../tensor/test/Test.h"
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
//#include "../tensor/timer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
......@@ -36,37 +35,19 @@
void BackwardTest();
void TransposeTest();
void SumDimTest();
void SplitBackwardTest();
void MemTest();
using namespace nts;
using namespace fnnlm;
using namespace transformer;
void test()
{
XTensor a;
InitTensor2D(&a, 100, 100, X_FLOAT, 0);
XTensor b;
InitTensor2D(&b, 100, 100, X_FLOAT16, 0);
_ConvertDataType(&a, &b);
return;
}
int main( int argc, const char ** argv )
{
//timer_c asd;
test();
//MemTest();
//return 0;
//SplitBackwardTest();
//return 0;
//_CrtSetBreakAlloc(896);
//BackwardTest();
//return 0;
/*if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708);
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t"))
TransformerMain(argc - 1, argv + 1);
......@@ -75,7 +56,8 @@ int main( int argc, const char ** argv )
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}*/
fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
}
//_CrtDumpMemoryLeaks();
......@@ -89,6 +71,9 @@ void BackwardTest()
XTensor a;
XTensor b;
XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean;
XTensor origin;
InitTensor2D(&a, 2, 3);
......@@ -106,14 +91,15 @@ void BackwardTest()
b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1);
c = DivDim(a, b, 0);
DivDim(a, b, c, 0);
c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c);
net.Backward(c);
net.Backward(loss);
net.Dump(stderr);
a.grad->Dump(stderr);
}
......@@ -229,67 +215,3 @@ void SumDimTest()
delete[] data;
}
void SplitBackwardTest()
{
int * dimSize = new int[2];
dimSize[0] = 2;
dimSize[1] = 4;
XTensor t1;
InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
XTensor t2;
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor;
//_SetDataFixedFloat(&t1, 1.0F);
//_SetDataFixedFloat(&t2, 2.0F);
t1.SetDataRand();
t2.SetDataRand();
tensor = t1 + t2;
XList smalls;
XTensor first;
XTensor second;
InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
smalls.Add(&first);
smalls.Add(&second);
Split(tensor, smalls, 1, 2);
XTensor mul;
mul = Sum(first, second);
XNet net;
net.Backward(mul);
net.Dump(stderr);
printf("Done!");
}
void MemTest()
{
XMem * mem;
mem = new XMem(0, FREE_ON_THE_FLY, (MTYPE)MILLION, 1024, MILLION);
XTensor tensor;
InitTensor2D(&tensor, 2, 4, X_FLOAT, 0, mem);
tensor.SetZeroAll();
tensor.Dump(stderr);
delete mem;
if (tensor.mem != NULL) {
printf("It isn't null!\n");
printf("%d\n", (int)tensor.mem->signature);
}
else {
printf("It's null\n");
}
tensor.Dump(stderr);
}
\ No newline at end of file
......@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
_HardTanHBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
_IdentityBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS);
}
else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
_RectifyBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
_SigmoidBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
......
......@@ -20,7 +20,9 @@
*/
#include "XBackwardLoss.h"
#include "XNoder.h"
#include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/getandset/SetData.h"
#include "../tensor/function/HardTanH.h"
#include "../tensor/function/Identity.h"
......@@ -31,6 +33,60 @@
namespace nts{
/* compute dE/dx of a node */
void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(income.tailNum >= 1, "Wrong number of tensors for loss computation!");
XTensor * output = income.tails[0];
XTensor * gold = NULL;
XTensor * weight = NULL;
XTensor * padding = NULL;
int leadingDim;
XNoder::MakeGrad(output);
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
return;
}
gold = income.tails[1];
if(operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
padding = income.tails[2];
leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
}
else{
ShowNTErrors("Wrong activation function type!");
}
node->visitMark = NODE_FINISHED;
}
/* indicates whether the node is for a loss computation */
bool XLossGrad::IsLossOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & LOSS_BASE) != 0;
}
/*
compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss)
......@@ -42,39 +98,39 @@ compute dE/dx for a given function y = f(x)
>> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName);
}
else if(funcID == FUNC_IDENTITY){
_IdentityBackward(gold, y, x, dedy, dedx, lossName);
}
else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
}
else if(funcID == FUNC_RECTIFY){
_RectifyBackward(gold, y, x, dedy, dedx, lossName);
}
else if(funcID == FUNC_SIGMOID){
_SigmoidBackward(gold, y, x, dedy, dedx, lossName);
}else if(funcID == FUNC_SOFTMAX){
int leadDim = *(int*)params;
_SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
}
else{
ShowNTErrors("wrong function found when call the backward process!");
}
}
//void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
// XTensor * dedy, XTensor * dedx, XTensor * padding,
// int funcID, void * params,
// LOSS_FUNCTION_NAME lossName)
//{
// CheckNTErrors(gold && y && x, "Empty input tensors!");
// CheckNTErrors(dedx, "Empty gradient tensors!");
// CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
//
// if(funcID == FUNC_HARDTANH){
// _HardTanHBackward(gold, y, x, dedy, dedx, lossName);
// }
// else if(funcID == FUNC_IDENTITY){
// _IdentityBackward(gold, y, x, dedy, dedx, lossName);
// }
// else if(funcID == FUNC_LOGSOFTMAX){
// int leadDim = *(int*)params;
// _LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
// }
// else if(funcID == FUNC_RECTIFY){
// _RectifyBackward(gold, y, x, dedy, dedx, lossName);
// }
// else if(funcID == FUNC_SIGMOID){
// _SigmoidBackward(gold, y, x, dedy, dedx, lossName);
// }else if(funcID == FUNC_SOFTMAX){
// int leadDim = *(int*)params;
// _SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
// }
// else{
// ShowNTErrors("wrong function found when call the backward process!");
// }
//
//}
/*
compute dE/dy for variable y and error(loss) function E
......@@ -83,27 +139,27 @@ compute dE/dy for variable y and error(loss) function E
>> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * padding,
LOSS_FUNCTION_NAME lossName)
{
if(gold == NULL){
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else{
ShowNTErrors("TODO");
}
return;
}
//_LossBackward(dedy, gold, y, lossName);
if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold, NULL, padding);
}
//void XLossGrad::Compute(XTensor * gold, XTensor * y,
// XTensor * dedy, XTensor * padding,
// LOSS_FUNCTION_NAME lossName)
//{
// if(gold == NULL){
// if(dedy->dataType == X_FLOAT)
// _SetDataFixedFloat(dedy, 1.0F);
// else if(dedy->dataType == X_DOUBLE)
// _SetDataFixedDouble(dedy, 1.0);
// else if(dedy->dataType == X_INT)
// _SetDataFixedInt(dedy, 1);
// else{
// ShowNTErrors("TODO");
// }
// return;
// }
//
// //_LossBackward(dedy, gold, y, lossName);
// if(lossName == CROSSENTROPY)
// _CrossEntropyBackward(dedy, y, gold, NULL, padding);
//
//}
}
\ No newline at end of file
......@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__
......@@ -34,11 +35,19 @@ namespace nts{
class XLossGrad
{
public:
/* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName);
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a Loss computation */
static
bool IsLossOP(XTensor * node);
///* compute dE/dx for a given function y = f(x) */
//void Compute(XTensor * gold, XTensor * y, XTensor * x,
// XTensor * dedy, XTensor * dedx, XTensor * padding,
// int funcID, void * params,
// LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y,
......
......@@ -109,6 +109,11 @@ private:
static
void GradMultiplyDim(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b
where some dimensions of b are of size 1 */
static
void GradMultiplyBroadcast(XTensor * node, bool isEfficient);
/* gradient for negate */
static
void GradNegate(XTensor * node, bool isEfficient);
......@@ -125,14 +130,26 @@ private:
static
void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Scale */
static
void GradScale(XTensor * node, bool isEfficient);
/* gradient for Shift */
static
void GradShift(XTensor * node, bool isEfficient);
/* gradient for Descale */
static
void GradDescale(XTensor * node, bool isEfficient);
/* gradient for Minus */
static
void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */
static
......@@ -143,6 +160,11 @@ private:
static
void GradSumDim(XTensor * node, bool isEfficient);
/* gradient for sum by broadcasting: c = a + b * \beta
where some dimensions of b are of size 1 */
static
void GradSumBroadcast(XTensor * node, bool isEfficient);
/* gradient for reduceMean */
static
void GradReduceMean(XTensor * node, bool isEfficient);
......@@ -158,6 +180,10 @@ private:
/* gradient for reduceVariance */
static
void GradReduceVariance(XTensor * node, bool isEfficient);
/* gradient for operation */
static
void GradMulAndShift(XTensor * node, bool isEfficient);
};
}
......
......@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
GradCopyIndexed(node, isEfficent);
else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent);
else if (operID == MOVEMENT_DROPOUTWITHINDEX)
GradDropoutWithIndex(node, isEfficent);
else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST)
......@@ -62,7 +64,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
}
}
/* indicates whether the node is for a shape operation */
/* indicates whether the node is for a math operation */
bool XShapeGrad::IsShapeOP(XTensor * node)
{
XLink &income = node->income;
......@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!");
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
......@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
node->visitMark = NODE_FINISHED;
}
/*
gradient computation for DropoutWithIndex function
*/
void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(input);
//_Identity(node->grad, input->grad);
_CopyValues(node->grad, input->grad);
int order = node->grad->order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = node->grad->dimSize[i];
}
int order1 = 1;
int * dimSize1 = new int[order1];
dimSize1[0] = input->grad->unitNum;
input->grad->Reshape(order1, dimSize1);
_DropoutWithIndex(node->grad, index, input->grad);
_ScaleAndShiftMe(input->grad, scale);
input->grad->Reshape(order, dimSize);
node->visitMark = NODE_FINISHED;
}
/*
gradient for merge
for
......@@ -232,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL;
XList smalls(income.tailNum);
XList smallsGrad(income.tailNum);
TensorList smalls(income.tailNum);
TensorList smallsGrad(income.tailNum);
bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i];
......@@ -242,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
smallsGrad.Add(tail->grad);
if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail),
CheckNTErrors(_IsSameShaped(last, tail),
"Input tensors must be of the same size!");
}
......@@ -401,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
/* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */
XLink &outgo = node->outgo;
XList splits(outgo.tailNum);
TensorList splits(outgo.tailNum);
int whereToSplit = -1;
int splitNum = 0;
......@@ -411,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1);
if(whereToSplit < 0)
whereToSplit = w;
splitNum++;
......
......@@ -54,6 +54,10 @@ private:
static
void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node, bool isEfficent);
......
......@@ -55,7 +55,7 @@ void XNetClearAll()
XNet::XNet()
{
nodes.Clear();
isGradEfficient = false;
isGradEfficient = true;
}
/* de-constructor */
......@@ -77,104 +77,20 @@ backward propagation to obtain gradient
>> root - root node (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
void XNet::Backward(XTensor &root)
{
XList roots(1);
TensorList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(NULL);
XList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
XList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the gradient computation
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
XList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
{
XList golds(roots.count);
XList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
}
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
XList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
Backward(roots);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss)
void XNet::Backward(TensorList &roots)
{
Traverse(roots);
......@@ -187,39 +103,6 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N
node->visitMark = NODE_UNFINISHED;
}
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, padding, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
else {
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);
......@@ -266,6 +149,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent);
else{
ShowNTErrors("Wrong node type!");
}
......@@ -300,7 +185,7 @@ depth-first search (Tarjan's algorithm)
*/
void XNet::Traverse(XTensor &root)
{
XList roots(1);
TensorList roots(1);
roots.Add(&root);
Traverse(roots);
......@@ -311,7 +196,7 @@ traverse the net and find the topological order by
depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes)
*/
void XNet::Traverse(XList &roots)
void XNet::Traverse(TensorList &roots)
{
id = MakeNetID();
nodes.Clear();
......@@ -336,7 +221,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering)
>> orders - topological order of the nodes
>> code - code of the network
*/
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
void XNet::TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code)
{
if(node == NULL)
return;
......@@ -444,7 +329,7 @@ show network topology
*/
void XNet::ShowNetwork(FILE * file, XTensor * node)
{
XList roots(1);
TensorList roots(1);
roots.Add(node);
Traverse(roots);
......@@ -458,4 +343,14 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
}
}
}
\ No newline at end of file
/*
search for a node in a top-down manner by its name
>> top - the top most node
<< return - the node we found
*/
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{
//return XLink::SearchNode(top, name);
//}
}
......@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XNET_H__
#define __XNET_H__
......@@ -36,16 +37,16 @@ struct XNet
unsigned int id;
/* tensor nodes of the network (in order) */
XList nodes;
TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes;
TensorList gradNodes;
/* output nodes of the network */
XList outputs;
TensorList outputs;
/* input nodes of the network */
XList inputs;
TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient;
......@@ -60,25 +61,11 @@ struct XNet
void Clear();
/* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */
void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(TensorList &roots);
/* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false);
......@@ -92,10 +79,10 @@ struct XNet
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots);
void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code);
void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */
void Dump(FILE * file);
......@@ -111,6 +98,10 @@ struct XNet
/* show network topology */
void ShowNetwork(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */
//static
//XTensor * SearchNode(XTensor * top, const char * name);
};
/* we make a unique id for every tensor */
......
......@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL)
return;
if(!XTensor::IsSameShaped(node, node->grad)){
if(!_IsSameShaped(node, node->grad)){
delete node->grad;
node->grad = NewTensor(node);
node->grad->SetZeroAll();
......
......@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__
#define __XNODER_H__
......
......@@ -51,14 +51,12 @@ initialize the model
>> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
int myDevID)
{
devID = myDevID;
mem = myMem;
isMasked = myIsMasked;
ignored = myIgnored;
......@@ -71,20 +69,18 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem);
float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
float finfoutv = (float)sqrt(6.0F * scale/(d + dv));
float finfouta = (float)sqrt(6.0F * scale / (d + d));
InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
wk.SetDataRand(-finfoutk, finfoutk);
wq.SetDataRand(-finfoutk, finfoutk);
wv.SetDataRand(-finfoutv, finfoutv);
wa.SetDataRand(-finfouta, finfouta);
float scale = 1.0F;
_SetDataFanInOut(&wk, scale);
_SetDataFanInOut(&wq, scale);
_SetDataFanInOut(&wv, scale);
_SetDataFanInOut(&wa, scale);
_SetDataFanInOut(&wbig, scale);
}
/*
......@@ -103,40 +99,88 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bo
XTensor k2;
XTensor q2;
XTensor v2;
/* linear transofmration before self-attention */
/* linear transformation before self-attention */
k2 = MMul(k, wk);
q2 = MMul(q, wq);
v2 = MMul(v, wv);
return MakeAttention(k2, q2, v2, mask, isTraining);
}
/*
make the network given a big tensor that keeps keys, queries and values
>> kqv - the big tensor
>> mask - as it is
>> isTraining - indicates whether the model is used for training
*/
XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
{
XTensor k2;
XTensor q2;
XTensor v2;
XTensor kqv2;
TensorList split;
kqv2 = MMul(kqv, wbig);
int d1 = kqv2.GetDim(0);
int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3;
InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2);
split.Add(&k2);
split.Add(&v2);
Split(kqv2, split, 2, 3);
return MakeAttention(k2, q2, v2, mask, isTraining);
}
/*
make the attention network given keys, queries and values (after linear transformation)
>> k - keys. It might be of size B * L * H
where B = batch size, L = sequence length,
and H = vector size of each position
>> q - queries
>> v - values
>> mask - as it is
>> isTraining - indicates whether the model is used for training
*/
XTensor T2TAttention::MakeAttention(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining)
{
XTensor kheads;
XTensor qheads;
XTensor vheads;
/* multi head */
kheads = Split(k2, k2.order - 1, nhead);
qheads = Split(q2, q2.order - 1, nhead);
vheads = Split(v2, v2.order - 1, nhead);
kheads = Split(k, k.order - 1, nhead);
qheads = Split(q, q.order - 1, nhead);
vheads = Split(v, v.order - 1, nhead);
XTensor att;
XTensor dot;
XTensor scalar;
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */
dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS);
if(isMasked)
dot = dot + mask;
dot = Linear(dot, 1.0F/(float)sqrt((float)dk/nhead));
scalar = Softmax(dot, -1);
scalar = Softmax(dot, -1);
if(isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);
att = BMMul(scalar, vheads);
/* concatenate the heads */
return MMul(Merge(att, att.order - 1), wa);
}
......
......@@ -42,9 +42,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* head number */
int nhead;
......@@ -59,7 +56,9 @@ public:
/* transformation after dot-product attention */
XTensor wa;
XTensor wbig;
/* size of transformed Q and K */
int dk;
......@@ -92,10 +91,16 @@ public:
/* initialize the model */
void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
int myDevID = -1);
/* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
/* make the network given a big tensor that keeps keys, queries and values */
XTensor MakeBig(XTensor &kqv, XTensor &mask, bool isTraining);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeAttention(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
};
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-25
* it is cold today but i'll move to a warm place tomorrow :)
*/
#ifndef __T2TBATCHLOADER_H__
#define __T2TBATCHLOADER_H__
#include "../../network/XNet.h"
using namespace nts;
namespace transformer
{
#define MAX_SEQUENCE_LENGTH 1024 * 4
/* node to keep batch information */
struct BatchNode
{
/* begining position */
int beg;
/* end position */
int end;
/* maximum word number on the encoder side */
int maxEnc;
/* maximum word number on the decoder side */
int maxDec;
/* a key for sorting */
int key;
};
class T2TBatchLoader
{
public:
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* batch buf */
BatchNode * bufBatch;
/* buffer size */
int bufSize;
/* size of batch buffer */
int bufBatchSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* offset for next batch */
int nextBatch;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* bucket size */
int bucketSize;
public:
/* constructor */
T2TBatchLoader();
/* de-constructor */
~T2TBatchLoader();
/* initialization */
void Init(int argc, char ** argv);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* set the random batch flag */
void SetRandomBatch(bool flag = true);
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
};
}
#endif
\ No newline at end of file
......@@ -21,6 +21,8 @@
#include <math.h>
#include "T2TDecoder.h"
#include "T2TUtility.h"
#include "T2TLayerNormal.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
......@@ -29,6 +31,10 @@ namespace transformer
/* constructor */
AttDecoder::AttDecoder()
{
attentions = NULL;
fnns = NULL;
attLayerNorms = NULL;
fnnLayerNorms = NULL;
attentionsEnde = NULL;
attEndeLayerNorms = NULL;
}
......@@ -36,6 +42,10 @@ AttDecoder::AttDecoder()
/* de-constructor */
AttDecoder::~AttDecoder()
{
delete[] attentions;
delete[] fnns;
delete[] attLayerNorms;
delete[] fnnLayerNorms;
delete[] attentionsEnde;
delete[] attEndeLayerNorms;
}
......@@ -47,21 +57,43 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id
>> myMem - the memory pool
*/
void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
int myDevID)
{
AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */
embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer];
fnnLayerNorms = new T2TLN[nlayer];
attentionsEnde = new T2TAttention[nlayer];
attEndeLayerNorms = new T2TLN[nlayer];
/* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){
attentionsEnde[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
}
}
......@@ -93,7 +125,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/******************/
/* self attention */
att = attentions[i].Make(x, x, x, mask, isTraining);
att = attentions[i].MakeBig(x, mask, isTraining);
/* dropout */
if(isTraining && dropoutP > 0)
......@@ -133,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/* layer normalization */
x = fnnLayerNorms[i].Make(res);
}
x.SetName(DECODING_NAME);
return x;
}
......
......@@ -26,10 +26,57 @@
namespace transformer
{
#define DECODING_NAME "decoding"
#define DECODING_INPUT_NAME "decoding_input"
class AttDecoder : public AttEncoder
class AttDecoder
{
public:
/* device id */
int devID;
/* layer number */
int nlayer;
/* hidden layer size of the FNN layer */
int hSize;
/* embedding size */
int eSize;
/* vocabulary size */
int vSize;
/* dropout probability */
DTYPE dropoutP;
/* some positions can be ignored in attention. this is useful in lm where the first position needs
* special design for the attention model. */
int ignored;
/* embedding of word at each position */
T2TEmbedder embedder;
/* FNN model of each layer */
T2TFNN * fnns;
/* attention model of each layer */
T2TAttention * attentions;
/* layer normalization for fnn */
T2TLN * fnnLayerNorms;
/* layer normalization for attention */
T2TLN * attLayerNorms;
/* input tensor of the encoder */
XTensor * input;
/* output tensor of the encoder */
XTensor * output;
/* encoder-decoder attention model of each layer */
T2TAttention * attentionsEnde;
......@@ -45,7 +92,7 @@ public:
/* initialize the model */
void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
int myDevID = -1);
/* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
......@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder()
{
devID = -1;
mem = NULL;
vSize = -1;
maxLength = -1;
}
......@@ -46,19 +45,23 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{
devID = myDevID;
mem = myMem;
LoadParamInt(argc, argv, "vsize", &vSize, -1);
if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1);
}
else{
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
}
//LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "maxlen", &maxLength, 512);
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v);
......@@ -75,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem);
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum];
......@@ -139,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */
//if(!match){
if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem);
InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem);
XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
......@@ -41,9 +41,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */
int vSize;
......@@ -71,7 +68,7 @@ public:
~T2TEmbedder();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length);
......
......@@ -52,15 +52,12 @@ initialize the model
>> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id
>> myMem - the memory pool
*/
>> myDevID - device id*/
void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
int myDevID)
{
devID = myDevID;
mem = myMem;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
......@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */
embedder.InitModel(argc, argv, devID, mem);
embedder.InitModel(argc, argv, devID);
attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
......@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
fnns[i].InitModel(argc, argv, myDevID, myMem);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID);
}
}
......@@ -103,8 +100,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
x = embedder.Make(input);
//x.Dump(tmpFILE, "embedding: ");
/* dropout */
if(isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
......@@ -116,8 +111,8 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
XTensor res;
/* self attention */
att = attentions[i].Make(x, x, x, mask, isTraining);
att = attentions[i].MakeBig(x, mask, isTraining);
/* dropout */
if(isTraining && dropoutP > 0)
att = Dropout(att, dropoutP);
......@@ -141,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
/* layer normalization */
x = fnnLayerNorms[i].Make(res);
}
x.SetName(ENCODING_NAME);
input.SetName(ENCODING_INPUT_NAME);
return x;
}
......
......@@ -32,6 +32,9 @@ using namespace nts;
namespace transformer
{
#define ENCODING_NAME "encoding"
#define ENCODING_INPUT_NAME "encoding_input"
/*
base class of the encoder
......@@ -62,9 +65,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* layer number */
int nlayer;
......@@ -115,7 +115,7 @@ public:
/* initialize the model */
void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
int myDevID = -1);
/* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
......@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{
devID = myDevID;
mem = myMem;
float minmax = 0;
......@@ -62,19 +60,17 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize));
float finfout2 = (float)sqrt(6.0F * scale/(hSize + outSize));
w1.SetDataRand(-finfout1, finfout1);
_SetDataFanInOut(&w1, scale);
_SetDataFanInOut(&w2, scale);
b1.SetZeroAll();
w2.SetDataRand(-finfout2, finfout2);
b2.SetZeroAll();
}
......@@ -89,13 +85,15 @@ XTensor T2TFNN::Make(XTensor &input, bool isTraining)
XTensor t1;
/* t1 = max(0, x * w1 + b1) */
t1 = Rectify(MMul(input, w1) + b1);
//t1 = Rectify(MMul(input, w1) + b1);
t1 = Rectify(MulAndShift(input, w1, b1));
if(isTraining && dropoutP > 0)
t1 = Dropout(t1, dropoutP);
/* result = t1 * w2 + b2 */
return MMul(t1, w2) + b2;
//return MMul(t1, w2) + b2;
return MulAndShift(t1, w2, b2);
}
......
......@@ -36,9 +36,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* size of input vector */
int inSize;
......@@ -72,7 +69,7 @@ public:
~T2TFNN();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */
XTensor Make(XTensor &input, bool isTraining);
......
......@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN()
{
devID = -1;
mem = NULL;
d = 0;
}
......@@ -46,18 +45,16 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{
devID = myDevID;
mem = myMem;
d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem);
InitTensor1D(&b, d, X_FLOAT, devID, mem);
InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll();
......
......@@ -36,9 +36,6 @@ class T2TLN
public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */
XTensor w;
......@@ -57,7 +54,7 @@ public:
~T2TLN();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */
XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "../../tensor/core/CHeader.h"
#include "T2TLengthPenalty.h"
using namespace nts;
namespace transformer
{
/*
GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence
>> length - length of the sequence (for each entry)
>> alpha - the parameter controls the length preference
<< return - length penaltyof the sequence (for each entry)
*/
XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
{
XTensor base;
XTensor lp;
//base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5);
lp = Power(base, alpha);
return lp;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-08
* Start of a new week - I just finished several documents.
* Writing document is harder than writing code :)
*/
#ifndef __T2TLENGTHPENALTY_H__
#define __T2TLENGTHPENALTY_H__
#include "../../tensor/XTensor.h"
using namespace nts;
namespace transformer
{
/* We intend to penalize short sequences because they have higher score
in product of a sequence of probability-like terms and have more chances
to beat others in search. */
class T2TLengthPenalizer
{
public:
/* GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence */
static
XTensor GNMT(const XTensor & length, float alpha);
};
}
#endif
......@@ -31,15 +31,15 @@
namespace transformer
{
/* a transformer model that keeps parameters of the encoder,
the decoder and the output layer (softmax). Also, it creates
the network used in transformer. */
class T2TModel
{
public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* the encoder */
AttEncoder * encoder;
......@@ -78,10 +78,24 @@ public:
void MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining);
/* make the network for machine translation (with the output softmax layer) */
void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, XTensor &paddingDec, bool isTraining);
void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output,
XTensor &paddingEnc, XTensor &paddingDec, bool isTraining);
/* make the mask for training MT models */
void MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
XTensor &paddingEnc, XTensor &paddingDec,
XTensor &maskEnc, XTensor &maskDec, XTensor &maskEncDec);
/* make the mask of the encoder */
void MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &maskEnc);
/* make the mask of the decoder */
void MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
XTensor &paddingEnc, XTensor &paddingDec,
XTensor &maskDec, XTensor &maskEncDec);
/* get parameter matrics */
void GetParams(XList &list);
void GetParams(TensorList &list);
/* dump the parameters */
void Dump(const char * fn);
......
......@@ -31,7 +31,6 @@ namespace transformer
T2TOutput::T2TOutput()
{
devID = -1;
mem = NULL;
vSize = -1;
inSize = -1;
hSize = -1;
......@@ -47,21 +46,19 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{
devID = myDevID;
mem = myMem;
float minmax = 0;
LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......@@ -93,8 +90,9 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{
XTensor &x = input;
output = LogSoftmax(MMul(x, w), -1);
//output = Softmax(MMul(x, w), -1);
//output = LogSoftmax(MMul(x, w), -1);
output = Softmax(MMul(x, w), -1);
output.SetName(OUTPUT_NAME);
}
}
......@@ -28,6 +28,8 @@ using namespace nts;
namespace transformer
{
#define OUTPUT_NAME "output"
/* output layer */
class T2TOutput
......@@ -36,9 +38,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */
int vSize;
......@@ -59,7 +58,7 @@ public:
~T2TOutput();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */
XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/
#include "T2TPredictor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TStateBundle::T2TStateBundle()
{
states = NULL;
isStart = false;
}
/* de-constructor */
T2TStateBundle::~T2TStateBundle()
{
if(states != NULL)
delete[] states;
}
/*
create states
>> num - number of states
*/
void T2TStateBundle::MakeStates(int num)
{
CheckNTErrors(num > 0, "invalid number");
if(states != NULL)
delete[] states;
states = new T2TState[num];
for(int i = 0; i < num; i++){
states[i].prediction = -1;
states[i].pid = T2T_PID_EMPTY;
states[i].isEnd = false;
states[i].isStart = false;
states[i].isCompleted = false;
states[i].prob = 0;
states[i].probPath = 0;
states[i].modelScore = 0;
states[i].nstep = 0;
states[i].last = NULL;
}
stateNum = num;
}
/* constructor */
T2TPredictor::T2TPredictor()
{
startSymbol = -1;
}
/* de-constructor */
T2TPredictor::~T2TPredictor()
{
}
/*
create an initial state
>> model - the t2t model
>> top - the top-most layer of the network
>> input - input of the network
>> beamSize - beam size
>> state - the state to be initialized
*/
void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state)
{
state->layersEnc.Clear();
state->layersDec.Clear();
XTensor * encoding = XLink::SearchNode(top, ENCODING_NAME);
CheckNTErrors(encoding != NULL, "No encoding layers found!");
state->layersEnc.Add(encoding);
state->layersDec.Add(NULL);
int dims[MAX_TENSOR_DIM_NUM];
for (int i = 0; i < input->order - 1; i++)
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll();
state->nstep.SetZeroAll();
state->endMark.SetZeroAll();
state->stateNum = 0;
}
/*
set start symbol
>> symbol - the symbol (in integer)
*/
void T2TPredictor::SetStartSymbol(int symbol)
{
startSymbol = symbol;
}
/*
read a state
>> model - the t2t model that keeps the network created so far
>> state - a set of states. It keeps
1) hypotheses (states)
2) probablities of hypotheses
3) parts of the network for expanding toward the next state
*/
void T2TPredictor::Read(T2TModel * model, T2TStateBundle * state)
{
m = model;
s = state;
}
/*
predict the next state
>> next - next states (assuming that the current state has been read)
>> encoding - encoder output
>> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder
*/
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor * inputEnc, XTensor * paddingEnc)
{
int dims[MAX_TENSOR_DIM_NUM];
next->layersEnc.Clear();
next->layersDec.Clear();
AttDecoder &decoder = *m->decoder;
/* word indices of previous positions */
XTensor * inputLast = (XTensor*)s->layersDec.GetItem(0);
/* word indices of positions up to next state */
XTensor inputDec;
/* the first token */
XTensor first;
CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!");
for(int i = 0; i < inputEnc->order - 1; i++)
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) {
inputDec = Identity(first);
}
else{
inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID);
inputDec = Concatenate(first, inputDec, inputDec.order - 1);
}
/* prediction probabilities */
XTensor &output = next->prob;
XTensor decoding;
XTensor decodingStep;
for(int i = 0; i < inputDec.order - 1; i++)
dims[i] = inputDec.GetDim(i);
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1);
XTensor maskDec;
XTensor maskEncDec;
/* decoder mask */
m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec);
/* make the decoding network */
decoding = decoder.Make(inputDec, *encoding, maskDec, maskEncDec, false);
XTensor selectSrc;
XTensor selectTgt;
CheckNTErrors(decoding.order >= 2, "The tensor must be of order 2 or larger!");
int stride = decoding.GetDim(decoding.order - 2);
InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0);
selectSrc.SetDevice(decoding.devID);
selectTgt.SetDevice(decoding.devID);
/* the decoder output of the last position */
decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt);
/* generate the output probabilities */
m->outputLayer->Make(decodingStep, output);
next->layersEnc.AddList(&s->layersEnc);
next->layersDec.Add(&inputDec);
next->layersDec.Add(&output);
}
/*
generate paths up to the states of the current step
>> state - state bundle of the current step
*/
XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
{
CheckNTErrors(state->stateNum >= 0, "Illegal state!");
int distance = -1;
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
cur = cur->last;
}
if(nsteps > distance)
distance = nsteps;
}
XTensor path;
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
path.Set2DInt(cur->prediction, i, distance - nsteps);
cur = cur->last;
}
}
return path;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start!
*/
#ifndef __T2TPREDICTOR_H__
#define __T2TPREDICTOR_H__
#include "T2TModel.h"
#include "T2TLengthPenalty.h"
namespace transformer
{
#define T2T_PID_EMPTY -1
/* state for search. It keeps the path (back-pointer), prediction distribution,
and etc. It can be regarded as a hypothsis in translation. */
class T2TState
{
public:
/* we assume that the prediction is an integer */
int prediction;
/* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis
is empty if id = -1 */
int pid;
/* indicates whether the state is an end */
bool isEnd;
/* indicates whether the state is the start */
bool isStart;
/* indicates whether the state is completed */
bool isCompleted;
/* probability of every prediction (last state of the path) */
float prob;
/* probability of every path */
float probPath;
/* model score of every path. A model score = path probability + some other stuff */
float modelScore;
/* nubmer of steps we go over so far */
int nstep;
/* pointer to the previous state */
T2TState * last;
};
/* a bundle of states */
class T2TStateBundle
{
public:
/* predictions */
XTensor prediction;
/* id of the previous state that generates the current one */
XTensor preID;
/* mark that indicates whether each hypothesis is completed */
XTensor endMark;
/* probability of every prediction (last state of the path) */
XTensor prob;
/* probability of every path */
XTensor probPath;
/* model score of every path */
XTensor modelScore;
/* step number of each hypothesis */
XTensor nstep;
/* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */
TensorList layersEnc;
/* layers on the decoder side */
TensorList layersDec;
/* list of states */
T2TState * states;
/* number of states */
int stateNum;
/* indicates whether it is the first state */
bool isStart;
public:
/* constructor */
T2TStateBundle();
/* de-constructor */
~T2TStateBundle();
/* create states */
void MakeStates(int num);
};
/* The predictor reads the current state and then predicts the next.
It is exactly the same procedure of MT inference -
we get the state of previous words and then generate the next word.
Here, a state can be regared as the representation of words (word
indices, hidden states, embeddings and etc.). */
class T2TPredictor
{
private:
/* pointer to the transformer model */
T2TModel * m;
/* current state */
T2TStateBundle * s;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TPredictor();
/* de-constructor */
~T2TPredictor();
/* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */
void SetStartSymbol(int symbol);
/* read a state */
void Read(T2TModel * model, T2TStateBundle * state);
/* predict the next state */
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc);
/* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#ifndef __T2TSEARCH_H__
#define __T2TSEARCH_H__
#include "T2TModel.h"
#include "T2TPredictor.h"
namespace transformer
{
/* The class orgnizes the search process. It calls "predictors" to generate
distributions of the predictions and prunes the search space by beam pruning.
This makes a graph where each path respresents a translation hypothsis.
The output can be the path with the highest model score. */
class T2TSearch
{
private:
/* the alpha parameter controls the length preference */
float alpha;
/* predictor */
T2TPredictor predictor;
/* max length of the generated sequence */
int maxLength;
/* beam size */
int beamSize;
/* batch size */
int batchSize;
/* we keep the final hypotheses in a heap for each sentence in the batch. */
XHeap<MIN_HEAP, float> * fullHypos;
/* array of the end symbols */
int * endSymbols;
/* number of the end symbols */
int endSymbolNum;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TSearch();
/* de-constructor */
~T2TSearch();
/* initialize the model */
void Init(int argc, char ** argv);
/* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
/* preparation */
void Prepare(int myBatchSize,int myBeamSize);
/* compute the model score for each hypothesis */
void Score(T2TStateBundle * prev, T2TStateBundle * beam);
/* generate token indices via beam pruning */
void Generate(T2TStateBundle * beam);
/* expand the search graph */
void Expand(T2TStateBundle * prev, T2TStateBundle * beam);
/* collect hypotheses with ending symbol */
void Collect(T2TStateBundle * beam);
/* fill the hypotheis heap with incomplete hypothses */
void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */
void Dump(XTensor * output);
/* check if the token is an end symbol */
bool IsEnd(int token);
/* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum);
/* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include <math.h>
#include "T2TUtility.h"
#include "T2TTester.h"
#include "T2TSearch.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TTester::T2TTester()
{
}
/* de-constructor */
T2TTester::~T2TTester()
{
}
/* initialize the model */
void T2TTester::Init(int argc, char ** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
batchLoader.Init(argc, argv);
seacher.Init(argc, argv);
}
/*
test the model
>> fn - test data file
>> ofn - output data file
>> model - model that is trained
*/
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
{
int wc = 0;
int ws = 0;
int wordCount = 0;
int wordCountTotal = 0;
int sentCount = 0;
int batchCount = 0;
float loss = 0;
/* data files */
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot read the test file");
FILE * ofile = fopen(ofn, "wb");
CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID;
XNet net;
double startT = GetClockSec();
wordCount = 0;
/* batch of input sequences */
XTensor batchEnc;
XTensor batchDec;
/* label */
XTensor label;
/* padding */
XTensor paddingEnc;
XTensor paddingDec;
/* gold standard */
XTensor gold;
/* an array that keeps the sequences */
int * seqs = new int[MILLION];
batchLoader.SetRandomBatch(false);
batchLoader.ClearBuf();
while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, false))
{
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!");
CheckNTErrors(!model->isLM, "Only MT model is supported!");
XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output);
Dump(ofile, &output);
float prob = 0;
loss += -prob;
wc = batchEnc.GetDim(-1);
wordCount += wc;
wordCountTotal += wc;
sentCount += batchEnc.GetDim(-2);
batchCount += 1;
if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
}
fclose(file);
fclose(ofile);
delete[] seqs;
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, exp(loss/wordCount));
}
/*
dump the result into the file
>> file - data file
>> output - output tensor
*/
void T2TTester::Dump(FILE * file, XTensor * output)
{
int seqLength = output->GetDim(-1);
for (int i = 0; i < output->unitNum; i += seqLength) {
for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j);
fprintf(file, "%d ", w);
if (w < 0)
break;
}
fprintf(file, "\n");
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :)
*/
#ifndef __T2TTESTER_H__
#define __T2TTESTER_H__
#include "T2TSearch.h"
#include "T2TBatchLoader.h"
namespace transformer
{
/* This class translates test sentences with a trained model. */
class T2TTester
{
public:
/* vocabulary size of the source side */
int vSize;
/* vocabulary size of the target side */
int vSizeTgt;
/* for batching */
T2TBatchLoader batchLoader;
/* decoder for inference */
T2TSearch seacher;
public:
/* constructor */
T2TTester();
/* de-constructor */
~T2TTester();
/* initialize the model */
void Init(int argc, char ** argv);
/* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */
void Dump(FILE * file, XTensor * output);
};
}
#endif
\ No newline at end of file
......@@ -23,11 +23,9 @@
#define __T2TTRAINER_H__
#include "T2TModel.h"
#include "T2TBatchLoader.h"
#include "../../tensor/function/FHeader.h"
#define MAX_SEQUENCE_LENGTH 1024 * 4
using namespace nts;
namespace transformer
......@@ -42,33 +40,6 @@ public:
/* parameter array */
char ** argArray;
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* buffer size */
int bufSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* dimension size of each inner layer */
int d;
......@@ -111,10 +82,10 @@ public:
float adamBeta2T;
/* list of the moment of the parameter matrics */
XList moments;
TensorList moments;
/* list of the 2nd order moment of the parameter matrics */
XList moments2nd;
TensorList moments2nd;
/* indicates whether the data file is shuffled for training */
bool isShuffled;
......@@ -130,20 +101,15 @@ public:
/* number of batches on which we do model update */
int updateStep;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* indicates whether we intend to debug the net */
bool isDebugged;
/* indicates whether we use small memory footprint for backward process */
bool isSmallFootprint;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* for batching */
T2TBatchLoader batchLoader;
public:
/* constructor */
......@@ -163,46 +129,6 @@ public:
/* make a checkpoint */
void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
/* get word probabilities for a batch of sequences */
float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs);
......
......@@ -25,6 +25,8 @@
#include "T2TModel.h"
#include "T2TUtility.h"
#include "T2TTrainer.h"
#include "T2TPredictor.h"
#include "T2TTester.h"
#include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h"
......@@ -36,8 +38,6 @@ int TransformerMain(int argc, const char ** argv)
{
if(argc == 0)
return 1;
fprintf(stderr, "%e\n", log(1e-8F));
char ** args = new char*[argc];
for(int i = 0; i < argc; i++){
......@@ -49,6 +49,7 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, args);
bool isBeamSearch = false;
char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH];
......@@ -58,8 +59,10 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
srand((unsigned int)time(NULL));
T2TTrainer trainer;
trainer.Init(argc, args);
......@@ -78,12 +81,22 @@ int TransformerMain(int argc, const char ** argv)
if(strcmp(modelFN, ""))
model.Read(modelFN);
T2TTrainer tester;
tester.Init(argc, args);
/* test the model on the new data */
if(strcmp(testFN, "") && strcmp(outputFN, ""))
tester.Test(testFN, outputFN, &model);
if(strcmp(testFN, "") && strcmp(outputFN, "")){
/* beam search */
if(isBeamSearch){
T2TTester searcher;
searcher.Init(argc, args);
searcher.Test(testFN, outputFN, &model);
}
/* forced decoding */
else{
T2TTrainer tester;
tester.Init(argc, args);
tester.Test(testFN, outputFN, &model);
}
}
delete[] trainFN;
delete[] modelFN;
......
......@@ -30,7 +30,9 @@
#include "XDevice.h"
#include "./test/Test.h"
#include "./core/CHeader.h"
#include "./XBLAS.h"
#include "./core/sort/TopK.h"
#include "./core/movement/Gather.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
......@@ -39,9 +41,6 @@ using namespace nts;
void SmallTest();
void TransposeTest();
void LittleTest();
void T2TTest();
void T2TTest2();
void PowerTest();
int main( int argc, const char ** argv )
......@@ -166,127 +165,5 @@ void TransposeTest()
delete[] data;
}
void LittleTest()
{
int a = 5000;
int b = 100000;
int c = a*b;
printf("%d\n", c);
exit(1);
}
void T2TTest()
{
XTensor * input;
XTensor * weight;
XTensor * output;
XTensor * gold;
XTensor * dedy;
XTensor * dedx;
XTensor * dedxTmp;
XTensor * dedw;
XTensor * padding;
DTYPE loss;
int * dimSize = new int[2];
dimSize[0] = 256;
dimSize[1] = 10001;
int * dimSize2 = new int[3];
dimSize2[0] = 2;
dimSize2[1] = 31;
dimSize2[2] = 256;
int * dimSize3 = new int[3];
dimSize3[0] = 2;
dimSize3[1] = 31;
dimSize3[2] = 10001;
int * dimSize4 = new int[2];
dimSize4[0] = 2;
dimSize4[1] = 31;
input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//weight = NewTensor(2, dimSize);
//dedw = NewTensor(2, dimSize);
//input = NewTensor(3, dimSize2);
//gold = NewTensor(3, dimSize3);
//output = NewTensor(3, dimSize3);
//dedy = NewTensor(3, dimSize3);
//dedx = NewTensor(3, dimSize3);
//dedxTmp = NewTensor(3, dimSize3);
//padding = NewTensor(2, dimSize4);
myRead(input, "x.txt", "x");
myRead(weight, "w.txt", "w");
myRead(gold, "gold.txt", "gold");
myRead(padding, "padding.txt", "padding");
XTensor inter;
inter = MMul(*input, *weight);
_Softmax(&inter, output, 2);
//_LogMe(output);
loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
printf("loss: %f\n", loss);
_CrossEntropyBackward(dedy, output, gold, NULL);
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
myDump(dedy, "dedy.txt", "dedy");
_SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
_Sub(output, gold, dedxTmp);
myDump(dedx, "dedx.txt", "dedx");
dedx->Dump(stderr, "dedx", 200);
dedxTmp->Dump(stderr, "dedxTmp", 200);
input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
_MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
myDump(dedw, "dedw.txt", "dedw");
}
void T2TTest2()
{
int dimSize[3];
dimSize[0] = 161;
dimSize[1] = 47;
dimSize[2] = 10001;
XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " ");
_SetDataFixedFloat(probs, 1.0F);
probs->Reshape(1, probs->unitNum);
DTYPE sum = _ReduceSumAll(probs);
printf("%e\n", sum);
//XTensor tmp;
//tmp = IsNonZero(*probs);
//DTYPE nonZeroNum = ReduceSumAll(tmp);
//printf("%f\n", nonZeroNum);
//
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//printf("%e\n", gpu);
}
......@@ -28,7 +28,6 @@
#ifndef __XBLAS_H__
#define __XBLAS_H__
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
......@@ -36,7 +35,7 @@ namespace nts{
#define OPENBLAS_CONST const
typedef int BLASINT;
typedef int BLASINT;
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-21
*/
#ifndef __XCALL_H__
#define __XCALL_H__
#include "XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
* we define the "new and delete" functions below
*/
/* initialize a XTensor */
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
/* generate a XTensor with no initialization */
XTensor * NewTensor();
/* generate a XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID, const bool isEnableGrad = true);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
} // namespace nts(NiuTrans.Tensor)
#endif // __XCALL_H__
\ No newline at end of file
......@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName)
}
}
/****************************************************
/*
Below is for calling CPU BLAS for fast matrix operations
I'm not sure how fast it is. But it seems that other
guys are crazy about this. So I decided to have a try.
......@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
return f;
}
/*
data type conversion
>> devID - device id
>> s - source data array
>> typeS - source data type
>> t - target data array
>> typeT - target data type
>> size - number of the items in s (and t)
*/
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{
CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT)
return;
if(typeS == X_FLOAT && typeT == X_FLOAT16){
for(int i = 0; i < size; i++){
((unsigned short*)t)[i] = FloatToFloat16(((float*)s)[i]);
}
}
else if(typeS == X_FLOAT16 && typeT == X_FLOAT){
for(int i = 0; i < size; i++){
((float*)t)[i] = Float16ToFloat(((unsigned short*)s)[i]);
}
}
else{
ShowNTErrors("Unsupported data types for conversion!");
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
/* data conversion (for lower precision computation) */
unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h);
void ConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#ifdef USE_CUDA
void CudaConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#endif
} /* end of the nts (NiuTrans.Tensor) namespace */
......
......@@ -24,6 +24,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "XDevice.h"
#include "XGlobal.h"
#include "XThread.h"
......@@ -59,6 +60,7 @@ XDevice::~XDevice()
cublasDestroy(cublasHandle);
if(stream != NULL)
delete stream;
curandDestroyGenerator(gen);
#endif
}
......@@ -68,6 +70,7 @@ void XDevice::Init(int myDevID)
Clear();
devID = myDevID;
seed = rand();
/* CPU information */
if(devID < 0){
......@@ -80,6 +83,10 @@ void XDevice::Init(int myDevID)
cudaDeviceProp prop;
cudaSetDevice(myDevID);
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen, seed);
if(cudaGetDeviceProperties(&prop, devID) != cudaSuccess){
XPRINT1(0, stderr, "cannot get GPU(%d) information.", devID);
exit(1);
......@@ -194,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
cudaError_t error = cudaSetDevice(devID);
if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error));
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1);
}
#else
......@@ -209,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
SetFastFlags();
}
/* switch to a get current dev */
/* get the id of the current GPU device */
int XDevice::GetGPUDevice()
{
#ifdef USE_CUDA
......@@ -217,7 +225,8 @@ int XDevice::GetGPUDevice()
cudaError_t error = cudaGetDevice(&devID);
if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error));
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1);
}
......@@ -241,7 +250,7 @@ void XDevice::SetFastFlags()
#endif
}
/* reset cuda flag for more efficient cuda execution (all devices) */
/* reset the cuda flag for more efficient cuda execution (all devices) */
void XDevice::SetFastFlagsAllDevices()
{
#ifdef USE_CUDA
......@@ -267,9 +276,11 @@ XDevManager::~XDevManager()
}
/* initialize it and get the CPU and GPU information */
/* initialization */
void XDevManager::Init()
{
srand((unsigned int)time(NULL));
Clear();
/* CPUs (we actually do not care about how many CPUs are using) */
......@@ -309,7 +320,7 @@ void XDevManager::Clear()
#ifdef USE_CUDA
/* get the handle of GPU */
/* get the handle of a given GPU */
cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
{
CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
......@@ -317,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
return GPUs[devID].GetCublasHandle();
}
/* get the stream of cuda */
/* get the stream of a given GPU */
cudaStream_t * XDevManager::GetCudaStream(const int devID)
{
CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
......@@ -465,7 +476,7 @@ split a string
>> items - splitting result
<< return - how many items are there
*/
int SplitALine(char * inputString, const char * seperator, XList * items)
int SplitALine(char * inputString, const char * seperator, StrList* items)
{
items->Clear();
......@@ -514,12 +525,12 @@ get device ids for the given device information
devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
means that the first device is CPU, the second device
is GPU-0, the third device is CPU.
>> devIDs - device sequence specified by devInfo
>> devIDs - device IDs specified by devInfo
<< return - number of devices
*/
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{
XList * terms = new XList(1);
StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){
......@@ -556,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
return devCount;
}
/* show id sequence */
/* show device IDs */
void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
{
msg[0] = 0;
......
......@@ -99,6 +99,9 @@ public:
/* default stream for the device */
XStream * stream;
/* seed for random number generation */
int seed;
#ifdef USE_CUDA
/* mutex for handle (GPU cublas) */
......@@ -109,6 +112,9 @@ public:
/* specify if the handle is initialized */
bool isHandleReady;
/* generater of random numbers */
curandGenerator_t gen;
#endif
......@@ -230,6 +236,18 @@ extern XDevManager GDevs;
cudaSetDevice(devIDBackup); \
} \
#define CheckDev(a, b) \
{ \
if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
else if (a >= 0 && b >= 0 && a != b) { \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
......@@ -49,7 +49,7 @@ namespace nts {
#ifdef DOUBELPRICSION
#define DTYPE double
#define DTYPE_MIN (DTYPE)1.79E+308
#define DTYPE_MIN (DTYPE)-1.79E+308
#else
#define DTYPE float
#define DTYPE_MIN (DTYPE)-3.40E+38
......@@ -151,7 +151,9 @@ extern int verboseLevel;
#define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}}
#define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}}
#define B2I(V) V==0?false:true
#define B2I(V) V == 0 ? false : true
#define MODX(a, b) int(b == 0 ? a : a - floor(double(a)/b) * b)
/* BLAS interfaces */
#ifdef DOUBELPRICSION
......
......@@ -31,15 +31,15 @@ namespace nts{
/* constructor */
template<HeapType hType, typename T>
XHeap<hType, T>::XHeap()
{
}
/* constructor */
template<HeapType hType, typename T>
XHeap<hType, T>::XHeap(int mySize, XMem * myMem)
{
mem = myMem;
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
Init(mySize, myMem);
}
/* deconstructor */
......@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap()
}
template<HeapType hType, typename T>
void XHeap<hType, T>::Init(int mySize, XMem * myMem)
{
mem = myMem;
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
}
template<HeapType hType, typename T>
void XHeap<hType, T>::Clear(T initValue)
{
count = 0;
......@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End()
template<HeapType hType, typename T>
_XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node)
{
//CheckNTErrors((count < size), "Heap is full!");
items[count] = node;
Up(count);
count++;
if (count < size) {
items[count] = node;
Up(count);
count++;
}
else if(count == size){
HeapNode<T> & item0 = items[0];
if (hType == MIN_HEAP && item0.value >= node.value)
return;
else if (hType == MAX_HEAP && item0.value <= node.value)
return;
items[0] = node;
Down(0);
}
else {
ShowNTErrors("Overflow of the heap!");
}
}
/* replace the top-most item and update the heap */
......@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node)
template<HeapType hType, typename T>
_XINLINE_ HeapNode<T> XHeap<hType, T>::Pop()
{
//CheckNTErrors((size > 0), "Empty heap!");
CheckNTErrors(count > 0, "Empty heap!");
HeapNode<T> node = items[0];
items[0] = items[count - 1];
count--;
......
......@@ -39,7 +39,7 @@ template <typename T>
struct HeapNode
{
/* node index */
int index;
long long index;
/* value of the node */
T value;
......@@ -52,9 +52,16 @@ struct HeapNode
HeapNode(int i, T v)
{
index = i;
index = (long long)i;
value = v;
};
HeapNode(void * i, T v)
{
index = (long long)i;
value = v;
}
};
/* a heap that keeps a data array of T */
......@@ -76,11 +83,17 @@ public:
public:
/* constructor */
XHeap();
/* constructor */
XHeap(int mySize, XMem * myMem = NULL);
/* deconstructor */
~XHeap();
/* initialization */
void Init(int mySize, XMem * myMem = NULL);
/* clear the data */
void Clear(T initValue);
......@@ -107,6 +120,9 @@ public:
/* move item k up the tree */
void Up(int k);
/* how many items are kept in the heap */
inline int Count() { return count; };
};
} /* end of the nts (NiuTrans.Tensor) namespace */
......
......@@ -300,9 +300,36 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL)
return;
XList list(2);
list.Add(t1);
list.Add(t2);
if (!t1->enableGrad)
return;
TensorList list(2);
list.Add((XTensor*)t1);
list.Add((XTensor*)t2);
MakeLink(&list, h, id);
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - the second tail tensor
>> t3 - the third tail tensor
>> h - head tensor
>> id - id of the edge type
*/
void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,XTensor * h, int id)
{
if (h == NULL)
return;
if (!t1->enableGrad || !t2->enableGrad)
return;
TensorList list(3);
list.Add((XTensor*)t1);
list.Add((XTensor*)t2);
list.Add((XTensor*)t3);
MakeLink(&list, h, id);
}
......@@ -313,7 +340,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor
>> id - id of the edge type
*/
void XLink::MakeLink(const XList * list, XTensor * h, int id)
void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
{
/* forward */
XLink &income = h->income;
......@@ -347,8 +374,11 @@ create a hyper edge with a input tensors and a list of output tensors
>> list - a list of output tensors
>> id - id of the edge type
*/
void XLink::MakeLink(XTensor * t, XList * list, int id)
void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{
if (!t->enableGrad)
return;
/* forward */
for(int i = 0; i < list->count; i++){
XTensor * h = (XTensor*)list->GetItem(i);
......@@ -509,6 +539,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
}
}
/*
copy a node with another, i.e., we add the links to the new node
>> src - the node to be copied
>> tgt - the new node
*/
void XLink::Copy(const XTensor * reference, XTensor * target)
{
if (reference == NULL || target == NULL)
return;
XLink &newIncome = target->income;
XLink &newOutgo = target->outgo;
XLink::ClearOutgoing(target);
XLink::ClearIncoming(target);
/* incoming nodes */
if (reference->income.typeID != 0) {
if (newIncome.tailNum < reference->income.tailNum) {
delete[] newIncome.tails;
newIncome.tails = new XTensor*[reference->income.tailNum];
}
newIncome.SetType(reference->income.typeID);
newIncome.head = target;
newIncome.tailNum = reference->income.tailNum;
memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = reference->income.paramNum * reference->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, reference->income.params, paraArraySize);
newIncome.paramNum = reference->income.paramNum;
/* update the link to each child node */
for (int i = 0; i < newIncome.tailNum; i++) {
XTensor * child = newIncome.tails[i];
XLink &childOutgo = child->outgo;
bool hit = false;
for (int j = 0; j < childOutgo.tailNum; j++) {
if (childOutgo.tails[j] == reference) {
//childOutgo.tails[j] = target;
childOutgo.AddTail(target);
hit = true;
break;
}
}
if (childOutgo.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in child.outgo edge!");
}
}
}
if (newOutgo.tailNum < reference->outgo.tailNum) {
delete[] newOutgo.tails;
newOutgo.tails = new XTensor*[reference->outgo.tailNum];
}
/* outgoing nodes */
newOutgo.head = target;
newOutgo.tailNum = reference->outgo.tailNum;
memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
/* update the link to each parent node */
for (int i = 0; i < newOutgo.tailNum; i++) {
XTensor * parent = newOutgo.tails[i];
XLink &parentIncome = parent->income;
bool hit = false;
for (int j = 0; j < parentIncome.tailNum; j++) {
if (parentIncome.tails[j] == reference) {
//parentIncome.tails[j] = target;
parentIncome.AddTail(target);
hit = true;
}
}
if (parentIncome.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
}
/*
copy incoming edges of a given node
>> reference - the node we copy from
......@@ -521,7 +633,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
ClearIncoming(target);
int tailNum = reference->income.tailNum;
XList tails(tailNum);
TensorList tails(tailNum);
for(int i = 0; i < tailNum; i++){
XTensor * tail = (XTensor*)reference->income.tails[i];
tails.Add(tail);
......@@ -634,6 +746,29 @@ void XLink::ShowNode(FILE * file, XTensor * node)
fprintf(stderr, "\n");
}
/*
search for a node in a top-down manner by its name
>> top - the top most node
<< return - the node we found
*/
XTensor * XLink::SearchNode(XTensor * top, const char * name)
{
if(!strcmp(top->name, name))
return top;
XLink &incoming = top->income;
for(int i = 0; i < incoming.tailNum; i++){
XTensor * child = incoming.tails[i];
XTensor * hit = SearchNode(child, name);
if(hit != NULL)
return hit;
}
return NULL;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
#define MAX_OP_NAME_LENGTH 64
#define PARAM_UNTI_SIZE 64
/*
......@@ -138,13 +138,17 @@ struct XLink
static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with three input tensors and a output tensor */
static
void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);
/* create a hyper edge with a list of input tensors and a output tensor */
static
void MakeLink(const XList * list, XTensor * h, int id);
void MakeLink(const TensorList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */
static
void MakeLink(XTensor * h, XList * list, int id);
void MakeLink(XTensor * h, TensorList * list, int id);
/* add a parameter */
static
......@@ -170,6 +174,10 @@ struct XLink
static
void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy a node with another, i.e., we add the links to the new node */
static
void Copy(const XTensor * reference, XTensor * target);
/* copy links of a given node */
static
void CopyIncoming(const XTensor * reference, XTensor * target);
......@@ -181,6 +189,10 @@ struct XLink
/* show a node */
static
void ShowNode(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */
static
XTensor * SearchNode(XTensor * top, const char * name);
};
} // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,32 +15,31 @@
* limitations under the License.
*/
/*
*
* Implementation of list that keeps data items
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17
* The first coding job this year!
*
*/
#ifndef __XLIST_H__
#define __XLIST_H__
/*
*
* Implementation of template list that keeps data items
*
* $Created by: HU Chi (huchinlp@foxmail.com)
*
*/
#include "XMem.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
#ifndef __TensorList_H__
#define __TensorList_H__
typedef int (* ListCompare)(const void * item1, const void * item2);
/* the XList class */
class XList
{
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
public:
/* data items */
void ** items;
T *items;
/* number of items */
int count;
......@@ -49,56 +48,105 @@ public:
int maxNum;
/* the memory pool for data array allocation */
XMem * mem;
/* indicates whether data items are integers */
bool isIntList;
XMem* mem;
public:
/* constructor */
XList();
TensorListBase();
/* constructor */
XList(int myMaxNum, bool isIntListOrNot = false);
TensorListBase(int myMaxNum);
/* constructor */
XList(int myMaxNum, XMem * myMem, bool isIntListOrNot = false);
TensorListBase(int myMaxNum, XMem* myMem);
/* de-constructor */
~XList();
/* utilities */
void Create(int myMaxNum, XMem * myMem);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount);
void AddList(XList * l);
void AddInt(int i);
void Insert(int pos, void * item);
void * GetItem(int i) const;
int GetItemInt(int i);
void SetItem(int i, void * item);
void SetItemInt(int i, int item);
int FindFirst(void * item);
~TensorListBase();
/* add an item into the list */
void Add(T&& item);
/* return number of elements */
size_t Size();
/* add an item into the list */
void Add(const T& item);
/* add a number of items into the list */
void Add(const T* inputItems, int inputItemCount);
/* append a list to the current list */
void AddList(TensorListBase* l);
/* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
void Clear();
void ClearStringList();
void Sort(int itemSize, ListCompare comp);
/* sort the list */
void Sort(int itemSize);
/* reverse the list */
void Reverse();
/* remove the item at position i */
void Remove(int i);
XList * Copy(XMem * myMem);
/* reserve space for data entry */
void Reserve(int n);
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */
_XINLINE_ void * Get(int i) {return GetItem(i);};
_XINLINE_ int GetInt(int i) {return GetItemInt(i);};
_XINLINE_ void Set(int i, void * item) {SetItem(i, item);};
_XINLINE_ void SetInt(int i, int item) {SetItemInt(i, item);};
T& operator[] (int i) { return GetItem(i); };
T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
};
struct XTensor;
typedef TensorListBase<void*> XList;
typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
struct Example {
int id;
IntList data;
};
struct Result {
int id;
IntList data;
};
extern XList NULLList;
typedef TensorListBase<Result> ResultList;
typedef TensorListBase<Example> ExampleList;
typedef TensorListBase<XTensor*> TensorList;
}
/* end of the nts (NiuTrans.Tensor) namespace */
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
#endif // __TensorList_H__
......@@ -24,6 +24,7 @@
#ifndef __XMEM_H__
#define __XMEM_H__
#include <stdio.h>
#include <stdlib.h>
#ifdef CUDA_BLAS
......@@ -38,6 +39,15 @@
#include <curand.h>
#endif
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#elif WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
......@@ -50,8 +60,10 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_MEM_NUM 16
#define MAX_GPU_MEM_NUM 16
/*
mode of runnig a memory pool
......@@ -201,6 +213,9 @@ public:
MTYPE curUsedPin;
MTYPE bufUsedPin;
/* indicates whether the memory pool is initialized */
bool isInitialized;
#ifdef USE_CUDA
/* handle used for cublas */
cublasHandle_t cublasHandle;
......@@ -402,6 +417,9 @@ public:
/* create a new cublas handle */
void CreateBLASHandle();
/* show profile of the memory pool */
void ShowMemUsage(FILE * file);
#ifdef USE_CUDA
/* get the handle of cublas */
cublasHandle_t * GetCublasHandle();
......@@ -409,6 +427,61 @@ public:
};
/*
a class for the management of memory
*/
class XMemManager
{
private:
/* cpu memory pool information */
XMem CPUMems[MAX_CPU_MEM_NUM];
/* number of cpu memory pools */
int nCPUMem;
/* gpu memory pool information */
XMem GPUMems[MAX_GPU_MEM_NUM];
/* number of gpu memory pools */
int nGPUMem;
public:
/* constructor */
XMemManager();
/* de-constructor */
~XMemManager();
/* get memory size */
MTYPE GetAvailableMemory();
/* get GPU memory size */
MTYPE GetAvailableGPUMemory(int devID);
/* get buffer size */
void GetBufferSize(MTYPE freeMem, MTYPE * myBufSize);
/* initialize it and set the global memory information */
void Initialize();
/* free it */
void Free();
/* get global memory pool */
XMem * GetMem(const int devID);
/* get global memory size */
int GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize);
/* show memory information */
void ShowMemInfo();
};
/* managing the memories */
extern XMemManager GMems;
extern XMem * GMem;
extern int testxmemid;
......
......@@ -59,6 +59,8 @@ const char * GetOPName(int type)
return "M_DIV";
else if (type == MATH_DIVDIM)
return "M_DIVDIM";
else if (type == MATH_MASK)
return "M_MASK";
else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED)
......@@ -67,6 +69,8 @@ const char * GetOPName(int type)
return "M_MULTIPLY";
else if (type == MATH_MULTIPLYDIM)
return "M_MULTIPLYDIM";
else if (type == MATH_MULTIPLYBROADCAST)
return "M_MULTIPLYBROADCAST";
else if (type == MATH_NEGATE)
return "M_NEGATE";
else if (type == MATH_NORMALIZE)
......@@ -75,6 +79,14 @@ const char * GetOPName(int type)
return "M_POWER";
else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT";
else if (type == MATH_SCALE)
return "M_SCALE";
else if (type == MATH_DESCALE)
return "M_DESCALE";
else if (type == MATH_SHIFT)
return "M_SHIFT";
else if (type == MATH_MULANDSHIFT)
return "M_OPERATION";
else if (type == MATH_SIGN)
return "M_SIGN";
else if (type == MATH_SUB)
......@@ -85,6 +97,8 @@ const char * GetOPName(int type)
return "M_SUM";
else if (type == MATH_SUMDIM)
return "M_SUMDIM";
else if (type == MATH_SUMBROADCAST)
return "M_SUMBROADCAST";
else if (type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX";
else if (type == REDUCE_REDUCEMEAN)
......@@ -97,13 +111,7 @@ const char * GetOPName(int type)
return "R_REDUCEVARIANCE";
}
else if ((type & DATA_BASE) != 0){
if (type == GETANDSET_CONVERTDATATYPE)
return "G_CONVERTDATATYPE";
else if (type == GETANDSET_INDEXTOONEHOT)
return "G_INDEXTOONEHOT";
else if (type == GETANDSET_ONEHOTTOINDEX)
return "G_ONEHOTTOINDEX";
else if (type == GETANDSET_SELECT)
if (type == GETANDSET_SELECT)
return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
......@@ -111,6 +119,8 @@ const char * GetOPName(int type)
return "M_COPYVALUES";
else if (type == MOVEMENT_GATHER)
return "M_GATHER";
else if (type == MOVEMENT_DROPOUTWITHINDEX)
return "M_DROPOUTWITHINDEX";
else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE";
else if (type == SHAPE_MERGE)
......@@ -152,6 +162,10 @@ const char * GetOPName(int type)
else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX";
}
else if ((type & LOSS_BASE) != 0) {
if (type == LOSS_CROSSENTROPY)
return "L_CROSSENTROPY";
}
return "NULL";
}
......
......@@ -48,21 +48,29 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CLIP MATH_ROUND + 1
#define MATH_DIV MATH_CLIP + 1
#define MATH_DIVDIM MATH_DIV + 1
#define MATH_MATRIXMUL MATH_DIVDIM + 1
#define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_NEGATE MATH_MULTIPLYDIM + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
#define MATH_NORMALIZE MATH_NEGATE + 1
#define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1
#define MATH_SIGN MATH_SCALEANDSHIFT + 1
#define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1
#define MATH_SCALE MATH_MULANDSHIFT + 1
#define MATH_DESCALE MATH_SCALE + 1
#define MATH_SHIFT MATH_DESCALE + 1
#define MATH_MOD MATH_SHIFT + 1
#define MATH_SIGN MATH_MOD + 1
#define MATH_SUB MATH_SIGN + 1
#define MATH_SUBDIM MATH_SUB + 1
#define MATH_SUM MATH_SUBDIM + 1
#define MATH_SUMDIM MATH_SUM + 1
#define MATH_SUMBROADCAST MATH_SUMDIM + 1
#define REDUCE MATH_SUMDIM + 1
#define REDUCE MATH_SUMBROADCAST + 1
#define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
......@@ -73,16 +81,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define DATA_BASE MATH_BASE * 2
#define GETANDSET DATA_BASE + 1
#define GETANDSET_CONVERTDATATYPE GETANDSET + 1
#define GETANDSET_INDEXTOONEHOT GETANDSET_CONVERTDATATYPE + 1
#define GETANDSET_ONEHOTTOINDEX GETANDSET_INDEXTOONEHOT + 1
#define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1
#define GETANDSET_SELECT GETANDSET_CONVERTDATATYPE + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define MOVEMENT_DROPOUTWITHINDEX MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_DROPOUTWITHINDEX + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1
......@@ -108,6 +115,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1
#define LOSS_BASE FUNCTION_BASE * 2
#define LOSS_CROSSENTROPY LOSS_BASE + 1
/* get operator name */
const char * GetOPName(int type);
......
......@@ -146,7 +146,7 @@ run a set of jobs in parallel
>> jobArgs - the list of arguments for each job
>> sleepTime - time to sleep (in ms) for each round
*/
void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
{
if(threadNum <= 0){
XPRINT(1, stderr, "Error! No threads were created!\n");
......@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);
/* the arguments that are passed to the function */
volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c);
volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);
/* thread */
XThread * thread = threads + availableThreads[i];
......
......@@ -106,7 +106,7 @@ public:
void KillThreads();
/* run a set of jobs in parallel */
void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0);
void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);
/* get the number of parallel jobs to run */
int GetJobNum(int size);
......
......@@ -42,7 +42,7 @@ job item used in queues
JobQueueNode::JobQueueNode()
{
job = NULL;
args = new XList(1);
args = new TensorList(1);
}
/* de-constructor */
......@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
head = 0;
tail = 0;
isJobQueue = false;
jobDequeuerArgs = new XList(1);
jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false;
runningJobCount = 0;
jobStream = NULL;
......@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true;
jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid);
// warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs;
......@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer()
}
/* add a job item to process */
void XQueue::EnqueueJob(void * job, XList * jobArgs)
void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{
MUTEX_LOCK(jobQueueMutex);
runningJobCount++;
......@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
}
/* job item consumer */
void XQueue::DequeueJobs(XList * args)
void XQueue::DequeueJobs(TensorList * args)
{
CheckNTErrors((args->count == 2), "Illegal arguments!");
......
......@@ -52,7 +52,7 @@ public:
void * job;
/* arguments of the job */
XList * args;
TensorList * args;
public:
/* constructor */
......@@ -102,7 +102,7 @@ private:
XThread jobDequeuer;
/* argument list of jobDequeuer */
XList * jobDequeuerArgs;
TensorList * jobDequeuerArgs;
/* indicates whether jobDequeuer stops */
bool jobDequeuerBreak;
......@@ -141,11 +141,11 @@ public:
void StopJobConsumer();
/* add a job item to process */
void EnqueueJob(void * job, XList * jobArgs);
void EnqueueJob(void * job, TensorList * jobArgs);
/* job item consumer */
static
void DequeueJobs(XList * args);
void DequeueJobs(TensorList * args);
/* get the break flag */
bool GetJobBreak();
......
......@@ -85,7 +85,7 @@ namespace nts{
#endif
typedef void (*TFunction) (volatile XList*);
typedef void (*TFunction) (volatile TensorList*);
/*
This is a class that wraps the standard implementation of threading
......@@ -133,7 +133,7 @@ public:
/* arguments (for the function to run) */
volatile
XList * argv;
TensorList * argv;
/* a flag to break */
volatile
......
......@@ -28,6 +28,7 @@
#include "arithmetic/Div.h"
#include "arithmetic/DivDim.h"
#include "arithmetic/Mask.h"
#include "arithmetic/MatrixMul.h"
#include "arithmetic/MatrixMul2D.h"
#include "arithmetic/MatrixMul2DMultiTheading.h"
......@@ -35,25 +36,22 @@
#include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/Multiply.h"
#include "arithmetic/MultiplyDim.h"
#include "arithmetic/Negate.h"
#include "arithmetic/Sign.h"
#include "arithmetic/Sub.h"
#include "arithmetic/SubDim.h"
#include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "arithmetic/SumDim.h"
#include "arithmetic/XTensorBLAS.h"
#include "arithmetic/MulAndShift.h"
#include "getandset/ConvertDataType.h"
#include "getandset/OnehotAndIndex.h"
#include "getandset/Select.h"
#include "getandset/SetData.h"
#include "math/Binary.h"
#include "math/Clip.h"
#include "math/Compare.h"
#include "math/Normalize.h"
#include "math/Power.h"
#include "math/ScaleAndShift.h"
#include "math/Unary.h"
......@@ -87,11 +85,14 @@
#include "shape/Squeeze.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
#include "sort/Sort.h"
#include "sort/TopK.h"
#include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/SetAscendingOrder.h"
#endif // __CHEADER_H__
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论