Commit 3852f15a by huchi

Merge with branch: xiaotong-working

parent 98a9130d
......@@ -27,6 +27,7 @@
#include "./tensor/test/Test.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/NMT.h"
#include "./train/TTrain.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
......@@ -38,8 +39,17 @@ using namespace nmt;
int main( int argc, const char ** argv )
{
if(argc > 1 && !strcmp(argv[1], "-test"))
XConfig config;
if(argc > 1){
config.Create(argc - 1, argv + 1);
verboseLevel = config.GetInt("verbose", 1);
}
if (argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if (argc > 1 && !strcmp(argv[1], "-testtrain"))
TestTrain();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t"))
......@@ -47,7 +57,8 @@ int main( int argc, const char ** argv )
else{
fprintf(stderr, "Thanks for using NiuTensor! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, " Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-testtrain\" for test of the trainer!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
}
......
......@@ -93,6 +93,7 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/* indicates whether the node is for an activation function */
......
......@@ -89,6 +89,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/* indicates whether the node is for a loss computation */
......
......@@ -105,12 +105,19 @@ void XShapeGrad::GradConvertDataType(XTensor* node, bool isEfficient)
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
if (a->mem != NULL)
a->mem->LockBuf();
XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
_ConvertDataType(node->grad, tmp);
_SumMe(a->grad, tmp);
DelTensorBuf(tmp);
if (a->mem != NULL)
a->mem->UnlockBuf();
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -138,12 +145,19 @@ void XShapeGrad::GradCopyIndexed(XTensor * node, bool isEfficient)
if (!isEfficient || input->isGrad) {
XNoder::MakeGrad(input);
if (input->mem != NULL)
input->mem->LockBuf();
XTensor * tmp = NewTensorBufV2(input, input->devID, input->mem);
_SpreadForCopyIndexed(tmp, node->grad, dim, srcIndex, tgtIndex, copyNum);
_SumMe(input->grad, tmp);
DelTensorBuf(tmp);
if (input->mem != NULL)
input->mem->UnlockBuf();
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -167,15 +181,20 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficient)
if (!isEfficient || input->isGrad) {
XNoder::MakeGrad(input);
if (input->mem != NULL)
input->mem->LockBuf();
XTensor * tmp = NewTensorBufV2(input, input->devID, input->mem);
tmp->SetZeroAll();
_SpreadForGather(tmp, node->grad, index);
_SumMe(input->grad, tmp);
DelTensorBuf(tmp);
if (input->mem != NULL)
input->mem->UnlockBuf();
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -193,6 +212,8 @@ void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficient)
if (!isEfficient || input->isGrad) {
XNoder::MakeGrad(input);
if (input->mem != NULL)
input->mem->LockBuf();
XTensor * tmp = NewTensorBufV2(input, input->devID, input->mem);
_CopyValues(node->grad, tmp);
......@@ -205,9 +226,12 @@ void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficient)
_SumMe(input->grad, tmp);
DelTensorBuf(tmp);
if (input->mem != NULL)
input->mem->UnlockBuf();
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -246,13 +270,16 @@ void XShapeGrad::GradMerge(XTensor * node, bool isEfficient)
dims[j++] = input->dimSize[i];
}
}
dims[0] = -dims[0];
dims[0] = -abs(dims[0]);
XTensor gradInputSmall(input->order - leadDim, dims,
input->dataType, input->denseRatio,
input->devID, input->mem);
dims[whereToMerge - leadDim] *= dims[0];
XTensor gradNodeSmall(node->order - leadDim, dims + leadDim + 1,
dims[whereToMerge - leadDim] *= abs(dims[0]);
int * dimsNode = dims + 1;
dimsNode[0] = -abs(dimsNode[0]);
XTensor gradNodeSmall(node->order - leadDim, dimsNode,
node->dataType, node->denseRatio,
node->devID, node->mem);
......@@ -296,6 +323,7 @@ void XShapeGrad::GradMerge(XTensor * node, bool isEfficient)
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -379,6 +407,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -407,6 +436,7 @@ void XShapeGrad::GradReshape(XTensor * node, bool isEfficient)
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -442,16 +472,21 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
/* if the tensor is used somewhere else, we need another SUM
for gradient accumulation */
else {
if (input->mem != NULL)
input->mem->LockBuf();
XTensor * inputGradTMP = NewTensorBufV2(input, input->devID, input->mem);
_Merge(node->grad, inputGradTMP, whereToSplit + 1, 0);
_Sum(input->grad, inputGradTMP, input->grad);
DelTensorBuf(inputGradTMP);
if (input->mem != NULL)
input->mem->UnlockBuf();
}
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -528,14 +563,21 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
somewhere else, we need another SUM for gradient
accumulation */
else {
if (node->mem != NULL)
node->mem->LockBuf();
XTensor * nodeGradTMP = NewTensorBufV2(node, node->devID, node->mem);
_Merge(&splits, nodeGradTMP, whereToSplit + 1);
_Sum(node->grad, nodeGradTMP, node->grad);
DelTensorBuf(nodeGradTMP);
if (node->mem != NULL)
node->mem->UnlockBuf();
}
}
node->visitMark = NODE_DOING;
node->isGradFinished = true;
}
/*
......@@ -566,14 +608,19 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
CheckNTErrors(input->order > i && i >= 0, "index of dimension is out of scope!");
CheckNTErrors(input->order > j && j >= 0, "index of dimension is out of scope!");
if (input->mem != NULL)
input->mem->LockBuf();
XTensor * tmp = NewTensorBufV2(input, input->devID, input->mem);
_Transpose(output->grad, tmp, i, j);
_Sum(input->grad, tmp, input->grad);
DelTensorBuf(tmp);
if (input->mem != NULL)
input->mem->UnlockBuf();
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
/*
......@@ -603,15 +650,20 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
if (!isEfficient || input->isGrad) {
XNoder::MakeGrad(input);
if (input->mem != NULL)
input->mem->LockBuf();
XTensor * tmp = NewTensorBufV2(input->grad, input->devID, input->mem);
_ReduceSum(output->grad, tmp, dim);
_Sum(input->grad, tmp, input->grad);
DelTensorBuf(tmp);
if (input->mem != NULL)
input->mem->UnlockBuf();
}
node->visitMark = NODE_FINISHED;
node->isGradFinished = true;
}
}
\ No newline at end of file
......@@ -101,6 +101,7 @@ void XNet::Backward(TensorList &roots)
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED;
node->isGradFinished = false;
}
/* back-propagation from output to input */
......@@ -108,7 +109,7 @@ void XNet::Backward(TensorList &roots)
XTensor * node = (XTensor*)nodes.Get(i);
if(node->mem != NULL){
CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
//CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
}
if(node->visitMark != NODE_FINISHED)
......@@ -127,7 +128,20 @@ void XNet::Backward(TensorList &roots)
delete node;
}
}
}
}
for (int i = 0; i < nodes.count; i++) {
XTensor* node = (XTensor*)nodes.Get(i);
if (node->income.tailNum >= 100 || node->outgo.tailNum >= 100) {
XPRINT(1, stderr, "Are you sure that the node should connect so many (100) nodes?\n");
}
if (node->grad != NULL) {
XTensor* grad = node->grad;
if (grad->income.tailNum >= 100 || grad->outgo.tailNum >= 100) {
XPRINT(1, stderr, "Are you sure that the grad node should connect so many (100) nodes?\n");
}
}
}
}
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -224,8 +224,6 @@ void Model::MakeMT(XTensor& inputEnc, XTensor& inputDec, XTensor& output,
XTensor maskDec;
XTensor maskEncDec;
bool debug(false);
/* encoder mask */
MakeMTMaskEnc(paddingEnc, maskEnc);
......@@ -234,25 +232,9 @@ void Model::MakeMT(XTensor& inputEnc, XTensor& inputDec, XTensor& output,
encoding = MakeEncoder(inputEnc, &maskEnc, isTraining);
if (debug) {
LOG("after encoding:");
encoding.mem->ShowMemUsage(stderr);
}
decoding = MakeDecoder(inputDec, encoding, &maskDec, maskEncDec, isTraining);
if (debug) {
LOG("after decoding:");
encoding.mem->ShowMemUsage(stderr);
}
outputLayer->Make(decoding, output, true, true);
if (debug) {
LOG("after outputing:");
encoding.mem->ShowMemUsage(stderr);
exit(0);
}
}
/*
......@@ -287,6 +269,7 @@ void Model::MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
GMems.GetMem(paddingEnc.devID)->LockBuf();
XTensor* maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1,
paddingEnc.dataType, 1.0F, paddingEnc.devID, paddingEnc.mem);
XTensor* maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem);
......@@ -297,6 +280,7 @@ void Model::MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
DelTensorBuf(maskEncDecTMPDec);
DelTensorBuf(maskEncDecTMPEnc);
GMems.GetMem(paddingEnc.devID)->UnlockBuf();
/* padding on the source side */
int* dimsPadding = new int[paddingEnc.order + 2];
......@@ -305,6 +289,7 @@ void Model::MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
GMems.GetMem(paddingEnc.devID)->LockBuf();
XTensor* padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, 1.0F,
paddingEnc.devID, paddingEnc.mem);
......@@ -331,6 +316,7 @@ void Model::MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
DelTensorBuf(padding3);
DelTensorBuf(padding2);
GMems.GetMem(paddingEnc.devID)->UnlockBuf();
}
/*
......@@ -344,7 +330,6 @@ void Model::MakeMTMaskEnc(XTensor& paddingEnc, XTensor& maskEnc)
/* mask of the padding */
Unsqueeze(paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
Unsqueeze(padding2, maskEnc, 0, nhead);
ScaleAndShiftMe(maskEnc, 1e9F, -1e9F);
}
......@@ -378,7 +363,6 @@ void Model::MakeMTMaskDec(XTensor& paddingEnc, XTensor& paddingDec,
Unsqueeze(paddingEnc, maskEncDecTMP, paddingEnc.order - 1, paddingDec.GetDim(-1));
ScaleAndShiftMe(maskEncDecTMP, 1e9F, -1e9F);
Unsqueeze(maskEncDecTMP, maskEncDec, 0, dims[0]);
delete[] dims;
......@@ -571,4 +555,14 @@ void Model::Read(FILE* file)
LOG("model loaded (took %.1fs)", elapsed);
}
XModel* Model::Clone(int devID)
{
return nullptr;
}
bool Model::RunSimple(XList* inputs, XList* outputs, XList* golds, XList* losses)
{
return false;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -24,17 +24,18 @@
#include "Encoder.h"
#include "Decoder.h"
#include "Utility.h"
#include "submodel/FNN.h"
#include "submodel/Output.h"
#include "Utility.h"
#include "submodel/Attention.h"
#include "../../train/XModel.h"
namespace nmt
{
/* a nmt model that keeps parameters of the encoder,
/* an nmt model that keeps parameters of the encoder,
the decoder and the output layer (softmax). */
class Model
class Model : public XModel
{
public:
/* device id */
......@@ -85,26 +86,26 @@ public:
/* make the encoding network */
XTensor MakeDecoder(XTensor& inputEnc, XTensor& inputDec, XTensor* mask,
XTensor& MaskEncDec, bool isTraining);
XTensor& MaskEncDec, bool isTraining);
/* make the network for language modeling (with the output softmax layer) */
void MakeLM(XTensor& input, XTensor& output, XTensor& padding, bool isTraining);
/* make the network for machine translation (with the output softmax layer) */
void MakeMT(XTensor& inputEnc, XTensor& inputDec, XTensor& output,
XTensor& paddingEnc, XTensor& paddingDec, bool isTraining);
XTensor& paddingEnc, XTensor& paddingDec, bool isTraining);
/* make the mask for training MT models */
void MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskEnc, XTensor& maskDec, XTensor& maskEncDec);
XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskEnc, XTensor& maskDec, XTensor& maskEncDec);
/* make the mask of the encoder */
void MakeMTMaskEnc(XTensor& paddingEnc, XTensor& maskEnc);
/* make the mask of the decoder */
void MakeMTMaskDec(XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskDec, XTensor& maskEncDec);
XTensor& maskDec, XTensor& maskEncDec);
/* get parameter matrices */
void GetParams(TensorList& list);
......@@ -114,6 +115,13 @@ public:
/* read the parameters */
void Read(FILE* file);
public:
/* clone the model (overloaded method of XModel) */
XModel * Clone(int devID);
/* run the neural network (overloaded method of XModel) */
bool RunSimple(XList * inputs, XList * outputs, XList * golds, XList * losses);
};
}
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -28,6 +28,7 @@
#include "Utility.h"
#include "../../tensor/XGlobal.h"
#include "../../tensor/XConfig.h"
using namespace nts;
using namespace std;
......@@ -165,89 +166,7 @@ int Config::LoadFromFile(const char* configFN, char** args) {
return argsNum;
}
void LoadParamString(int argc, char** argv, const char* name, char* p, const char* defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
strcpy(p, argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
strcpy(p, defaultP);
}
void LoadParamInt(int argc, char** argv, const char* name, int* p, int defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
*(int*)p = atoi(argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
void LoadParamBool(int argc, char** argv, const char* name, bool* p, bool defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname)) {
*(bool*)p = true;
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
void LoadParamFloat(int argc, char** argv, const char* name, float* p, float defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
*p = (float)atof(argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
void ShowParams(int argc, char** argv)
{
fprintf(stderr, "args:\n");
for (int i = 0; i < argc; i++) {
if (argv[i][1] == 0)
continue;
if (argv[i][0] == '-' && (argv[i][1] < '1' || argv[i][1] > '9')) {
if (i + 1 < argc && argv[i + 1][0] != '-')
fprintf(stderr, " %s=%s\n", argv[i], argv[i + 1]);
else
fprintf(stderr, " %s=yes\n", argv[i]);
}
}
fprintf(stderr, "\n");
}
/*
split string by delimiter, this will return indices of all sub-strings
......@@ -281,7 +200,9 @@ IntList SplitInt(const string& s, const string& delimiter)
IntList values;
auto indices = SplitToPos(s, delimiter);
for (int i = 0; i < indices.Size(); i++) {
values.Add(strtol(s.data() + indices[i], nullptr, 10));
/* this line is with problem. Why do we need an IntList to keep an int64*/
values.Add((int)strtol(s.data() + indices[i], nullptr, 10));
}
return values;
}
......@@ -297,4 +218,4 @@ FloatList SplitFloat(const string& s, const string& delimiter)
return values;
}
}
\ No newline at end of file
}
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -34,16 +34,6 @@ namespace nmt
{
#define MAX_PARAM_NUM 100
/* load arguments */
void LoadParamInt(int argc, char** argv, const char* name, int* p, int defaultP);
void LoadParamBool(int argc, char** argv, const char* name, bool* p, bool defaultP);
void LoadParamFloat(int argc, char** argv, const char* name, float* p, float defaultP);
void LoadParamString(int argc, char** argv, const char* name, char* p, const char* defaultP);
/* show arguments */
void ShowParams(int argc, char** argv);
/* split string */
IntList SplitInt(const string& s, const string& delimiter);
FloatList SplitFloat(const string& s, const string& delimiter);
......@@ -115,10 +105,10 @@ public:
/* the maximum length in positional embedding */
int maxPosition;
/* the maximum length for the source sequence */
/* the maximum length of the source sequence */
int maxSrcLen;
/* the maximum length for the target sequence */
/* the maximum length of the target sequence */
int maxTgtLen;
/* the dimension of fnn hidden layer */
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -259,7 +259,7 @@ XTensor Attention::MakeRPRAttention(XTensor& k, XTensor& q, XTensor& v,
relativeKey = ConvertDataType(relativeKey, X_FLOAT);
}
float scaling = float(sqrt(d / nhead));
float scaling = (float)sqrt(d / nhead);
qheads = ScaleAndShift(qheads, 1.0F / scaling);
dot = RPDotProduct(qheads, kheads, relativeKey, true);
......@@ -373,7 +373,7 @@ XTensor Attention::RPDotProduct(XTensor& x, XTensor& y, XTensor& z, const bool i
xTrans = Transpose(x, 0, 1);
XTensor relative;
relative = BMMul(xTrans, X_NOTRANS, z, transposeFlag);
relative = MatrixMulBatched(xTrans, X_NOTRANS, z, transposeFlag);
XTensor relativeTrans;
relativeTrans = Transpose(relative, 0, 1);
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -67,9 +67,7 @@ void FNN::InitModel(Config& config)
float scale = 1.0F;
_SetDataFanInOut(&w1, scale);
_SetDataFanInOut(&w2, scale);
//w1.SetDataRand(-(DTYPE)sqrt(6.0F / inSize), (DTYPE)sqrt(6.0F / inSize));
//w2.SetDataRand(-(DTYPE)sqrt(6.0F / hSize), (DTYPE)sqrt(6.0F / hSize));
b1.SetZeroAll();
b2.SetZeroAll();
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -29,6 +29,7 @@
#include "../../../tensor/XList.h"
#include "../../../tensor/XTensor.h"
#include "../../../tensor/XGlobal.h"
#include "../../../train/XBaseTemplate.h"
using namespace std;
......@@ -74,8 +75,8 @@ struct ReservedIDs {
};
/* A `TrainDataSet` is associated with a file which contains training data. */
struct TrainDataSet {
struct TrainDataSet : public DataDistributeBase
{
public:
/* the pointer to file stream */
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -97,7 +97,6 @@ initialization
void Trainer::Init(Config& config)
{
cfg = &config;
lrate = config.lrate;
lrbias = config.lrbias;
sBatchSize = config.sBatchSize;
......@@ -242,17 +241,8 @@ void Trainer::Train(const char* fn, const char* validFN,
DTYPE lossLocal = lossBatch / wc;
bool doUpdate = (!IsNAN(lossLocal) && !IsINF(lossLocal) && lossLocal < 1e3F);
net.isGradEfficient = true;
bool debug(false);
if (debug) {
LOG("after forward:");
batchEnc.mem->ShowMemUsage(stderr);
exit(0);
}
if (doUpdate) {
/* back-propagation */
net.Backward(lossTensor);
if (model->encoder->useHistory)
......@@ -502,6 +492,7 @@ void Trainer::Update(Model* model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */
GMems.GetMem(v->devID)->LockBuf();
XTensor* v2 = NewTensorBufV2(v, v->devID, v->mem);
_Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d);
......@@ -511,6 +502,7 @@ void Trainer::Update(Model* model, const float lr)
_Sum(para, v2, para, -e);
DelTensorBuf(v2);
GMems.GetMem(v->devID)->UnlockBuf();
}
else {
/* the delta rule */
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -42,7 +42,7 @@ float LengthPenalizer::GNMT(float length, float alpha)
base = (length + 5.0F) / (1.0F + 5.0F);
lp = float(pow(base, alpha));
lp = (float)pow(base, alpha);
return lp;
}
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -322,7 +322,7 @@ void BeamSearch::Generate(StateBundle* prev, StateBundle* beam)
/* keep the most promising candidates in the beam */
TopK(score, scoreTopK, index, -1, beamSize, true);
float lp = LengthPenalizer::GNMT(beam->nstep, alpha);
//float lp = LengthPenalizer::GNMT(beam->nstep, alpha);
CopyValues(index, indexCPU);
CopyValues(index, preID);
......@@ -493,8 +493,8 @@ void BeamSearch::Collect(StateBundle* beam)
/* check if this is the first end symbol. It is false
if there have been end symbols in previously generated words. */
bool isCompleted = state.isCompleted &&
(state.last == NULL || !state.last->isCompleted);
//bool isCompleted = state.isCompleted &&
// (state.last == NULL || !state.last->isCompleted);
/* we push the hypothesis into the heap when it is completed */
if ((state.isEnd || state.isCompleted)) {
......@@ -557,7 +557,6 @@ void BeamSearch::Dump(IntList* output, XTensor* score)
}
}
int count = 0;
bool isCompleted = true;
/* we track the state from the end to the beginning */
......@@ -874,4 +873,4 @@ void GreedySearch::Search(Model* model, XTensor& input,
delete[] finishedFlags;
}
}
\ No newline at end of file
}
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -161,7 +161,7 @@ void Translator::Translate(const char* ifn, const char* sfn,
batchLoader.outputBuffer.emplace_back(emptyRes);
}
double startDump = GetClockSec();
//double startDump = GetClockSec();
/* reorder the result */
batchLoader.SortOutput();
......@@ -169,10 +169,10 @@ void Translator::Translate(const char* ifn, const char* sfn,
/* print the result to a file */
batchLoader.DumpRes(ofn);
double elapsed = GetClockSec() - startDump;
//double elapsed = GetClockSec() - startDump;
LOG("translation completed (word=%d, sent=%zu)",
wordCountTotal, batchLoader.outputBuffer.size() + batchLoader.emptyLines.size());
wordCountTotal, batchLoader.inputBuffer.size() + batchLoader.emptyLines.size());
}
/*
......@@ -202,4 +202,4 @@ void Translator::Dump(FILE* file, XTensor* output)
}
}
}
\ No newline at end of file
}
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -34,14 +34,14 @@ void Vocab::Load(const string& src)
/* get the vocab size and the start id */
f >> vsz >> sid;
startID = stol(sid);
vocabSize = stol(vsz);
startID = (int)stol(sid);
vocabSize = (int)stol(vsz);
string word, id;
for (int i = 0; i < vocabSize - startID; i++) {
f >> word >> id;
word2id[word] = stol(id);
id2word[stol(id)] = word;
word2id[word] = (int)stol(id);
id2word[(int)stol(id)] = word;
}
f.close();
......@@ -75,4 +75,4 @@ void Vocab::CopyFrom(const Vocab& v)
id2word.insert(i2w);
}
}
\ No newline at end of file
}
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.NMT - an open-source neural machine translation system.
* Copyright (C) 2020 NiuTrans Research. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* this class keeps a batch of paramters.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-02-28
*/
#include "XConfig.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* constructor */
XConfig::XConfig()
{
n = 0;
args = NULL;
nReal = 0;
}
/* de-constructor */
XConfig::~XConfig()
{
for (int i = 0; i < n; i++) {
delete[] args[i];
}
delete[] args;
}
/* clear it */
void XConfig::Clear()
{
for (int i = 0; i < n; i++) {
delete[] args[i];
}
delete[] args;
n = 0;
args = NULL;
nReal = 0;
}
/*
create a config
>> myN - number of the input arguments
>> myArgs - the input arguments
*/
void XConfig::Create(const int myN, const char ** myArgs)
{
CheckNTErrors(myN > 0, "No input parameters to XConfig!");
for (int i = 0; i < n; i++) {
delete[] args[i];
}
delete[] args;
args = NULL;
n = myN;
nReal = n * 2;
args = new char*[nReal];
for (int i = 0; i < nReal; i++) {
args[i] = NULL;
}
for (int i = 0; i < n; i++) {
CheckNTErrors(myArgs[i] != NULL, "Illegal parameter input!");
args[i] = new char[strlen(myArgs[i]) + 1];
strcpy(args[i], myArgs[i]);
}
}
/*
add an argument
>> myArg - the argument
>> myValue - the value of the argument
*/
void XConfig::Add(const char * myArg, const char * myValue)
{
CheckNTErrors(myArg != NULL, "No argument!");
if (n + 2 > nReal) {
nReal = MAX(n * 2 + 1, 128);
char ** newArgs = new char*[nReal];
memset(newArgs, 0, sizeof(char*) * n);
memcpy(newArgs, args, sizeof(char*) * n);
delete[] args;
args = newArgs;
}
args[n] = new char[strlen(myArg) + 2];
args[n][0] = '-';
strcpy(args[n] + 1, myArg);
n++;
if (myValue != NULL) {
args[n] = new char[strlen(myValue) + 1];
strcpy(args[n], myValue);
n++;
}
}
/*
add an argument (in integer)
>> myArg - the argument
>> myValue - the value of the argument
*/
void XConfig::Add(const char * myArg, int myValue)
{
char value[MAX_WORD_LENGTH_IN_CONFIG];
sprintf(value, "%d", myValue);
Add(myArg, value);
}
/*
add an argument (in bool)
>> myArg - the argument
>> myValue - the value of the argument
*/
void XConfig::Add(const char * myArg, bool myValue)
{
char value[2];
if (myValue)
value[0] = '1';
else
value[0] = '0';
value[1] = 0;
Add(myArg, value);
}
/*
add an argument (in float)
>> myArg - the argument
>> myValue - the value of the argument
*/
void XConfig::Add(const char * myArg, float myValue)
{
char value[MAX_WORD_LENGTH_IN_CONFIG];
sprintf(value, "%f", myValue);
Add(myArg, value);
}
/*
load the value of an argument (in integer)
>> name - the name of the argument
>> p - where we place the loaded value
>> defaultP - the default value (used only if no argument is hit in the list)
*/
void XConfig::LoadInt(const char * name, int * p, int defaultP)
{
LoadParamInt(n, args, name, p, defaultP);
}
/*
load the value of an argument (in boolean)
>> name - the name of the argument
>> p - where we place the loaded value
>> defaultP - the default value (used only if no argument is hit in the list)
*/
void XConfig::LoadBool(const char * name, bool * p, bool defaultP)
{
LoadParamBool(n, args, name, p, defaultP);
}
/*
load the value of an argument (in float)
>> name - the name of the argument
>> p - where we place the loaded value
>> defaultP - the default value (used only if no argument is hit in the list)
*/void XConfig::LoadFloat(const char * name, float * p, float defaultP)
{
LoadParamFloat(n, args, name, p, defaultP);
}
/*
load the value of an argument (in char string)
>> name - the name of the argument
>> p - where we place the loaded value
>> defaultP - the default value (used only if no argument is hit in the list)
*/
void XConfig::LoadString(const char * name, char * p, const char* defaultP)
{
LoadParamString(n, args, name, p, defaultP);
}
/*
get the value of an argument (in integer)
>> name - the name of the argument
>> defaultP - the default value (used only if no argument is hit in the list)
*/
int XConfig::GetInt(const char * name, int defaultP)
{
int r;
LoadInt(name, &r, defaultP);
return r;
}
/*
get the value of an argument (in bool)
>> name - the name of the argument
>> defaultP - the default value (used only if no argument is hit in the list)
*/
bool XConfig::GetBool(const char * name, bool defaultP)
{
bool r;
LoadBool(name, &r, defaultP);
return r;
}
/*
get the value of an argument (in float)
>> name - the name of the argument
>> defaultP - the default value (used only if no argument is hit in the list)
*/
float XConfig::GetFloat(const char * name, float defaultP)
{
float r;
LoadFloat(name, &r, defaultP);
return r;
}
/* get item number */
int XConfig::GetItemNum()
{
return n;
}
/*
get the item with offset i
>> i - offset
*/
char * XConfig::GetItem(int i)
{
if (i < n && i >= 0)
return args[i];
else
return NULL;
}
/*
initialize with another config model
>> myConfig - the configure model that we want to copy
*/
void XConfig::CreateFromMe(XConfig & myConfig)
{
Clear();
for (int i = 0; i < myConfig.GetItemNum(); i++)
Add(myConfig.GetItem(i), i);
}
/*
load the value of an argument (in integer)
>> argc - number of arguments
>> argv - arguments
>> name - the argument we search for
>> p - the pointer to the target variable where we want to place the value
>> defaultP - the default value we use if no argument is found
*/
void LoadParamInt(int argc, char** argv, const char* name, int* p, int defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
*(int*)p = atoi(argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
/*
load the value of an argument (in boolean)
>> argc - number of arguments
>> argv - arguments
>> name - the argument we search for
>> p - the pointer to the target variable where we want to place the value
>> defaultP - the default value we use if no argument is found
*/
void LoadParamBool(int argc, char** argv, const char* name, bool* p, bool defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname)) {
*(bool*)p = true;
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
/*
load the value of an argument (in float)
>> argc - number of arguments
>> argv - arguments
>> name - the argument we search for
>> p - the pointer to the target variable where we want to place the value
>> defaultP - the default value we use if no argument is found
*/
void LoadParamFloat(int argc, char** argv, const char* name, float* p, float defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
*p = (float)atof(argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
*p = defaultP;
}
/*
load the value of an argument (in char string)
>> argc - number of arguments
>> argv - arguments
>> name - the argument we search for
>> p - the pointer to the target variable where we want to place the value
>> defaultP - the default value we use if no argument is found
*/
void LoadParamString(int argc, char** argv, const char* name, char* p, const char* defaultP)
{
char vname[128];
vname[0] = '-';
strcpy(vname + 1, name);
bool hit = false;
for (int i = 0; i < argc; i++) {
if (!strcmp(argv[i], vname) && i + 1 < argc) {
strcpy(p, argv[i + 1]);
hit = true;
break;
}
}
if (!hit)
strcpy(p, defaultP);
}
/*
show the argument list
>> argc - number of arguments
>> argv - arguments
*/
void ShowParams(int argc, char** argv)
{
fprintf(stderr, "args:\n");
for (int i = 0; i < argc; i++) {
if (argv[i][1] == 0)
continue;
if (argv[i][0] == '-' && (argv[i][1] < '1' || argv[i][1] > '9')) {
if (i + 1 < argc && argv[i + 1][0] != '-')
fprintf(stderr, " %s=%s\n", argv[i], argv[i + 1]);
else
fprintf(stderr, " %s=yes\n", argv[i]);
}
}
fprintf(stderr, "\n");
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* this class defines a parameter keeper.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-02-28
* A new semester begins today.
*/
#ifndef __XCONFIG_H__
#define __XCONFIG_H__
#include "XGlobal.h"
#include "XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_WORD_LENGTH_IN_CONFIG 256
/* the parameter keeper */
class XConfig
{
private:
/* number of arguments */
int n;
/* argument list (in char*) */
char ** args;
/* number of items we rellocate for these arguments */
int nReal;
public:
/* constructor */
XConfig();
/* de-constructor */
~XConfig();
/* clear it */
void Clear();
/* create a config */
void Create(const int myN, const char ** myArgs);
/* add an argument */
void Add(const char * myArg, const char * myValue);
/* add an argument (in integer) */
void Add(const char * myArg, int myValue);
/* add an argument (in bool) */
void Add(const char * myArg, bool myValue);
/* add an argument (in float) */
void Add(const char * myArg, float myValue);
/* load the value of an argument to a variable (in integer) */
void LoadInt(const char * name, int * p, int defaultP);
/* load the value of an argument to a variable (in boolean) */
void LoadBool(const char * name, bool * p, bool defaultP);
/* load the value of an argument to a variable (in float) */
void LoadFloat(const char * name, float * p, float defaultP);
/* load the value of an argument to a variable (in char string) */
void LoadString(const char * name, char * p, const char* defaultP);
/* get the value of an argument (in integer) */
int GetInt(const char * name, int defaultP);
/* get the value of an argument (in boolean) */
bool GetBool(const char * name, bool defaultP);
/* get the value of an argument (in float) */
float GetFloat(const char * name, float defaultP);
/* get item number */
int GetItemNum();
/* get the item with offset i */
char * GetItem(int i);
/* initialize with another config model */
void CreateFromMe(XConfig &myConfig);
};
#define MAX_PARAM_NUM 100
/* load arguments */
void extern LoadParamInt(int argc, char** argv, const char* name, int* p, int defaultP);
void extern LoadParamBool(int argc, char** argv, const char* name, bool* p, bool defaultP);
void extern LoadParamFloat(int argc, char** argv, const char* name, float* p, float defaultP);
void extern LoadParamString(int argc, char** argv, const char* name, char* p, const char* defaultP);
/* show arguments */
void extern ShowParams(int argc, char** argv);
} // namespace nts(NiuTrans.Tensor)
#endif
\ No newline at end of file
......@@ -182,10 +182,11 @@ void XDevice::Reset()
XMem * mem = GMems.GetMem(devID);
mem->Free();
#ifdef USE_CUDA
int devIDReset = devID;
Clear();
#ifdef USE_CUDA
if (devIDReset >= 0) {
int devIDBackup = -1;
cudaGetDevice(&devIDBackup);
......@@ -195,6 +196,8 @@ void XDevice::Reset()
cudaSetDevice(devIDBackup);
}
#else
Clear();
#endif
}
......
......@@ -132,6 +132,36 @@ extern int TRAINING_SAMPLE_BUF_SIZE;
extern int CONST_MINUSONE;
extern bool CONST_TRUE;
//////////////////////////////////////////////////
// mutex
#ifdef WIN32
#define THREAD_HANDLE HANDLE
#define MUTEX_HANDLE CRITICAL_SECTION
#define COND_HANDLE HANDLE
#define MUTEX_INIT( x ) InitializeCriticalSection( &(x) )
#define MUTEX_DELE( x ) DeleteCriticalSection( &(x) )
#define MUTEX_LOCK( x ) EnterCriticalSection( &(x) )
#define MUTEX_UNLOCK( x ) LeaveCriticalSection( &(x) )
#define COND_INIT( x ) ( x = CreateEvent( NULL, false, false, NULL ) )
#define COND_DELE( x ) CloseHandle( (x) )
#define COND_WAIT( x, y ) WaitForSingleObject( (x), INFINITE )
#define COND_SIGNAL( x ) SetEvent( (x) )
#define COND_RESET( x) ResetEvent( (x) )
#else
#define THREAD_HANDLE pthread_t
#define MUTEX_HANDLE pthread_mutex_t
#define COND_HANDLE pthread_cond_t
#define MUTEX_INIT( x ) pthread_mutex_init( &(x), NULL )
#define MUTEX_DELE( x ) pthread_mutex_destroy( &(x) )
#define MUTEX_LOCK( x ) pthread_mutex_lock( &(x) )
#define MUTEX_UNLOCK( x ) pthread_mutex_unlock( &(x) )
#define COND_INIT( x ) pthread_cond_init( &(x), NULL )
#define COND_DELE( x ) pthread_cond_destroy( &(x) )
#define COND_WAIT( x, y ) pthread_cond_wait( &(x), &(y) )
#define COND_SIGNAL( x ) pthread_cond_signal( &(x) )
#define COND_BROADCAST( x ) pthread_cond_broadcast( &(x) )
#endif
//#define USE_CUDA_RESURSION 1
#define NIUTRANSNNDEBUG
......
......@@ -75,6 +75,9 @@ public:
/* de-constructor */
~TensorListBase();
/* reallocate */
void Reallocate(int itemNum);
/* add an item into the list */
void Add(T&& item);
......@@ -84,6 +87,15 @@ public:
/* add an item into the list */
void Add(const T& item);
/* add an item (as an integer) into the list */
void AddInt(const int item);
/* add an item (as a float) into the list */
void AddFloat(const float item);
/* add an item (as a long long) into the list */
void AddLLong(const long long item);
/* add a number of items into the list */
void Add(const T* inputItems, int inputItemCount);
......@@ -99,12 +111,30 @@ public:
/* get the item at position i */
T& GetItem(int i) const;
/* get the item at position i and force it to an integer */
int GetItemInt(int i) const;
/* get the item at position i and force it to a float number */
float GetItemFloat(int i) const;
/* get the item at position i and force it to an long long number */
long long GetItemLLong(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* set the item (as an integer) at position i */
void SetItemInt(int i, const int item);
/* set the item (as a float) at position i */
void SetItemFloat(int i, const float item);
/* set the item (as a long long) at position i */
void SetItemLLong(int i, const long long item);
/* find the position of the first matched item */
int FindFirst(const T& item);
......@@ -135,7 +165,13 @@ public:
/* short */
T& operator[] (int i) const { return GetItem(i); };
T& Get(int i) const { return GetItem(i); };
int GetInt(int i) const { return GetItemInt(i); };
float GetFloat(int i) const { return GetItemFloat(i); };
long long GetLLong(int i) const { return GetItemLLong(i); };
void Set(int i, T item) { SetItem(i, item); };
void SetInt(int i, int item) { SetItemInt(i, item); };
void SetFloat(int i, float item) { SetItemFloat(i, item); };
void SetLLong(int i, long long item) { SetItemLLong(i, item); };
};
struct XTensor;
......
......@@ -54,6 +54,8 @@ XMem::XMem()
signature = 0;
mergeFreeOTF = true;
isInitialized = false;
MUTEX_INIT(allocMutex);
MUTEX_INIT(bufMutex);
}
/*
......@@ -77,6 +79,8 @@ XMem::XMem(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int myBlockNum,
strcpy(name, "xmem");
signature = 0;
mergeFreeOTF = true;
MUTEX_INIT(allocMutex);
MUTEX_INIT(bufMutex);
Initialize(myDevID, myMode, myBlockSize, myBlockNum, myBufSize);
}
......@@ -99,6 +103,8 @@ XMem::~XMem()
delete[] memIndex;
delete[] memIndex2;
delete[] minSizeIndex;
MUTEX_DELE(allocMutex);
MUTEX_DELE(bufMutex);
}
/*
......@@ -379,12 +385,18 @@ require a piece of memory
*/
void * XMem::Alloc(int myDevID, MTYPE mySize)
{
void * p = NULL;
MUTEX_LOCK(allocMutex);
if(mode == FREE_ON_THE_FLY)
return AllocStandard(myDevID, mySize);
p = AllocStandard(myDevID, mySize);
else if(isStatic)
return AllocStatic(myDevID, mySize);
p = AllocStatic(myDevID, mySize);
else
return AllocDynamic(myDevID, mySize);
p = AllocDynamic(myDevID, mySize);
MUTEX_UNLOCK(allocMutex);
return p;
}
/*
......@@ -521,6 +533,11 @@ void * XMem::AllocBuf(int myDevID, MTYPE mySize, int pitch)
{
MTYPE backOffset = 0;
/* NOTE THAT this is tricky because we lock the buffer
but DO NOT unlock it in this function. The unlock would
happans when we call ReleaseBuf() */
//MUTEX_LOCK(bufMutex);
if(pitch > 1){
MTYPE address = (MTYPE)((char*)buf + bufUsed);
int offset = address % pitch;
......@@ -560,8 +577,10 @@ release a piece of memory
*/
void XMem::Release(int myDevID, void * p, MTYPE size)
{
MUTEX_LOCK(allocMutex);
if(mode == FREE_ON_THE_FLY)
ReleaseStandard(myDevID, p, size);
MUTEX_UNLOCK(allocMutex);
}
/*
......@@ -583,6 +602,9 @@ void XMem::ReleaseBuf(int myDevID, MTYPE mySize, int pitch)
}
bufUsed -= (mySize + backOffset);
/* NOTE THAT this is a response to the lock in AllocBuf() */
//MUTEX_UNLOCK(bufMutex);
}
/*
......@@ -825,6 +847,18 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
return result;
}
/* lock the buffer mutex */
void XMem::LockBuf()
{
MUTEX_LOCK(bufMutex);
}
/* unlock the buffer mutex */
void XMem::UnlockBuf()
{
MUTEX_UNLOCK(bufMutex);
}
/*
find the highest set bit (or most significant set bit) in an integer-64
>> mySize - required size
......@@ -1604,6 +1638,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
}
}
}
else {
ShowNTErrors("No enough memory for buffer allocation!");
}
}
/* initialize it and set the global memory information */
......
......@@ -24,6 +24,7 @@
#ifndef __XMEM_H__
#define __XMEM_H__
#include "XGlobal.h"
#include <stdio.h>
#include <stdlib.h>
......@@ -249,6 +250,13 @@ public:
/* indicates whether we merge free memory pieces on the fly */
bool mergeFreeOTF;
private:
/* a mutex for memory allocation and release */
MUTEX_HANDLE allocMutex;
/* a mutex for buffer memory allocation and release */
MUTEX_HANDLE bufMutex;
public:
/* constructor */
......@@ -337,6 +345,12 @@ public:
/* allocate a piece of memory as "malloc" */
void * AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex = false);
/* lock the buffer mutex */
void LockBuf();
/* unlock the buffer mutex */
void UnlockBuf();
/* find the highest set bit (or most significant set bit) in an integer-64 */
int GetMSB(MTYPE mySize);
......
......@@ -215,7 +215,8 @@ void XQueue::DequeueJobs(XList * args)
int devID = *(int*)args->GetItem(1);
int devIDBackup = -1;
XDevice::SetDevice(devID, devIDBackup);
if(devID >= 0)
XDevice::SetDevice(devID, devIDBackup);
while(1){
JobQueueNode * node = (JobQueueNode*)q->Dequeue();
......@@ -236,7 +237,8 @@ void XQueue::DequeueJobs(XList * args)
}
XDevice::SetDevice(devIDBackup);
if(devID >= 0)
XDevice::SetDevice(devIDBackup);
}
/* get the break flag */
......@@ -248,7 +250,11 @@ bool XQueue::GetJobBreak()
/* get the number of jobs */
int XQueue::GetJobNum()
{
return runningJobCount;
MUTEX_LOCK(jobQueueMutex);
int c = runningJobCount;
MUTEX_UNLOCK(jobQueueMutex);
return c;
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -1985,6 +1985,19 @@ void XTensor::FlushToMem(XMem* targetMem)
}
}
/*
flush the data to the target device (with id)
>> myDevID - id of the target device
*/
void XTensor::FlushToDevice(int myDevID)
{
if (myDevID == devID)
return;
XMem * myMem = GMems.GetMem(myDevID);
FlushToMem(myMem);
}
/*
allocate the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process
......
......@@ -457,6 +457,9 @@ public:
/* flush the data to the target device */
void FlushToMem(XMem * targetMem);
/* flush the data to the target device (with id) */
void FlushToDevice(int myDevID);
/* allocate the memory space of the tensor (in the global memory) */
static
void AllocateData(XTensor * tensor, XMem * myMem = NULL, bool useBuf = false);
......
......@@ -54,37 +54,6 @@ namespace nts{
(unsigned)(flag), (unsigned *)(id))
#endif
//////////////////////////////////////////////////
// mutex
#ifdef WIN32
#define THREAD_HANDLE HANDLE
#define MUTEX_HANDLE CRITICAL_SECTION
#define COND_HANDLE HANDLE
#define MUTEX_INIT( x ) InitializeCriticalSection( &(x) )
#define MUTEX_DELE( x ) DeleteCriticalSection( &(x) )
#define MUTEX_LOCK( x ) EnterCriticalSection( &(x) )
#define MUTEX_UNLOCK( x ) LeaveCriticalSection( &(x) )
#define COND_INIT( x ) ( x = CreateEvent( NULL, false, false, NULL ) )
#define COND_DELE( x ) CloseHandle( (x) )
#define COND_WAIT( x, y ) WaitForSingleObject( (x), INFINITE )
#define COND_SIGNAL( x ) SetEvent( (x) )
#define COND_RESET( x) ResetEvent( (x) )
#else
#define THREAD_HANDLE pthread_t
#define MUTEX_HANDLE pthread_mutex_t
#define COND_HANDLE pthread_cond_t
#define MUTEX_INIT( x ) pthread_mutex_init( &(x), NULL )
#define MUTEX_DELE( x ) pthread_mutex_destroy( &(x) )
#define MUTEX_LOCK( x ) pthread_mutex_lock( &(x) )
#define MUTEX_UNLOCK( x ) pthread_mutex_unlock( &(x) )
#define COND_INIT( x ) pthread_cond_init( &(x), NULL )
#define COND_DELE( x ) pthread_cond_destroy( &(x) )
#define COND_WAIT( x, y ) pthread_cond_wait( &(x), &(y) )
#define COND_SIGNAL( x ) pthread_cond_signal( &(x) )
#define COND_BROADCAST( x ) pthread_cond_broadcast( &(x) )
#endif
typedef void (*TFunction) (volatile XList*);
/*
......
......@@ -155,13 +155,13 @@ void XMemSet(int devID, void * p, int value, size_t size)
cudaMemcpyKind GetMemcpyKind(int devIDFrom, int devIDTo)
{
if(devIDFrom < 0 && devIDTo < 0)
return cudaMemcpyHostToHost;
return cudaMemcpyKind::cudaMemcpyHostToHost;
else if(devIDFrom < 0 && devIDTo >= 0)
return cudaMemcpyHostToDevice;
return cudaMemcpyKind::cudaMemcpyHostToDevice;
else if(devIDFrom >= 0 && devIDTo < 0)
return cudaMemcpyDeviceToHost;
return cudaMemcpyKind::cudaMemcpyDeviceToHost;
else
return cudaMemcpyDeviceToDevice;
return cudaMemcpyKind::cudaMemcpyDeviceToDevice;
}
#endif
......@@ -485,6 +485,9 @@ unsigned int GetNextPower2(unsigned int n)
/* sleep for a while */
void XSleep(int sleepTime)
{
if (sleepTime <= 0)
return;
#ifdef _WIN32
Sleep((DWORD)sleepTime);
#else
......@@ -553,9 +556,9 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
stackptr = 0;
lo = (char*)data;
hi = (char*)data + realStride * (num - 1);
hi = (char*)data + (long)realStride * (num - 1);
indexlo = (int*)index;
indexhi = index != NULL ? (int*)index + stride * (num - 1) : NULL;
indexhi = index != NULL ? (int*)index + (long)stride * (num - 1) : NULL;
recurse:
......@@ -565,8 +568,8 @@ recurse:
if(size <= MIN_QSORT_NUM)
XShortSort(lo, hi, indexlo, indexhi, width, stride, comp);
else {
mid = lo + (size/2) * realStride;
indexmid = indexlo + (size/2) * stride;
mid = lo + (long)(size/2) * realStride;
indexmid = indexlo + (long)(size/2) * stride;
/* sort the first, last and middle elements into order */
if(comp(lo, mid) > 0)
......@@ -834,8 +837,7 @@ int SplitALine(char* inputString, const char* seperator, StrList* items)
return 0;
if (sepLen == 0) {
char* item = new char[inputLen + 1];
char* item = new char[(long)inputLen + 1];
strcpy(item, inputString);
items->Add(item);
}
......
......@@ -253,15 +253,25 @@ void Div(const XTensor & a, const XTensor & b, XTensor & c, DTYPE alpha, int lea
if (b.order == 0){
DTYPE scale = 1.0F / b.Get0D();
if (a.mem != NULL)
a.mem->LockBuf();
XTensor * tmp1 = NewTensorBufV2(&a, a.devID, a.mem);
if ((c.mem != NULL) && (c.mem != a.mem)) {
c.mem->LockBuf();
}
XTensor * tmp2 = NewTensorBufV2(&c, c.devID, c.mem);
ScaleAndShift(a, *tmp1, scale, 0.0F);
ScaleAndShift(c, *tmp2, alpha, 0.0F);
Sum(*tmp2, *tmp1, c);
DelTensorBuf(tmp1);
DelTensorBuf(tmp2);
if ((c.mem != NULL) && (c.mem != a.mem)) {
c.mem->UnlockBuf();
}
DelTensorBuf(tmp1);
if (a.mem != NULL)
a.mem->UnlockBuf();
}
else {
int n = GetBroadcastDimIndex(a, b);
......
......@@ -61,6 +61,8 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
if (x.mem != NULL)
x.mem->LockBuf();
XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
......@@ -101,6 +103,8 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
/* destroy variables */
delete[] dimSize;
DelTensorBuf(tmp);
if (x.mem != NULL)
x.mem->UnlockBuf();
return c;
}
......@@ -121,8 +125,8 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedX,
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = transposedX == X_TRANS ? x.dimSize[x.order - 1] : x.dimSize[x.order - 2];
int xm = transposedX == X_TRANS ? x.dimSize[x.order - 2] : x.dimSize[x.order - 1];
int wn = transposedW == X_TRANS ? w.dimSize[w.order - 1] : w.dimSize[w.order - 2];
//int xm = transposedX == X_TRANS ? x.dimSize[x.order - 2] : x.dimSize[x.order - 1];
//int wn = transposedW == X_TRANS ? w.dimSize[w.order - 1] : w.dimSize[w.order - 2];
int wm = transposedW == X_TRANS ? w.dimSize[w.order - 2] : w.dimSize[w.order - 1];
int order = x.order + w.order - 2;
......@@ -137,6 +141,8 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedX,
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
if (x.mem != NULL)
x.mem->LockBuf();
XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
......@@ -175,8 +181,10 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedX,
/* destroy variables */
delete[] dimSize;
DelTensorBuf(tmp);
if (x.mem != NULL)
x.mem->UnlockBuf();
return c;
}
}
\ No newline at end of file
}
......@@ -277,15 +277,25 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
if (b.order == 0){
DTYPE scale = b.Get0D();
if (a.mem != NULL)
a.mem->LockBuf();
XTensor * tmp1 = NewTensorBufV2(&a, a.devID, a.mem);
if ((c.mem != NULL) && (c.mem != a.mem)) {
c.mem->LockBuf();
}
XTensor * tmp2 = NewTensorBufV2(&c, c.devID, c.mem);
ScaleAndShift(a, *tmp1, scale, 0.0F);
ScaleAndShift(c, *tmp2, alpha, 0.0F);
Sum(*tmp2, *tmp1, c);
DelTensorBuf(tmp1);
DelTensorBuf(tmp2);
if ((c.mem != NULL) && (c.mem != a.mem)) {
c.mem->UnlockBuf();
}
DelTensorBuf(tmp1);
if (a.mem != NULL)
a.mem->UnlockBuf();
}
else {
int n = GetBroadcastDimIndex(a, b);
......
......@@ -290,9 +290,16 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
source = target;
}
target = t->mem != NULL ?
/*target = t->mem != NULL ?
t->mem->AllocBuf(t->devID, t->unitNum * t->unitSize):
XMemAlloc(t->devID, t->unitNum * t->unitSize);
XMemAlloc(t->devID, t->unitNum * t->unitSize);*/
if (t->mem != NULL) {
t->mem->LockBuf();
target = t->mem->AllocBuf(t->devID, t->unitNum * t->unitSize);
}
else {
target = XMemAlloc(t->devID, t->unitNum * t->unitSize);
}
s->data = source;
t->data = target;
......@@ -302,8 +309,9 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
/* free the memory space of the one before the last allocation */
if(count > 0){
int size = s->unitNum * s->unitSize;
if(t->mem != NULL)
if(t->mem != NULL) {
t->mem->ReleaseBuf(t->devID, size);
}
else
XMemFree(t->devID, source);
}
......@@ -312,8 +320,10 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
if(isLast){
CheckNTErrors(t->unitNum == c->unitNum, "Wrong tensor size!");
_Multiply(a, t, c, beta);
if(t->mem != NULL)
if(t->mem != NULL) {
t->mem->ReleaseBuf(t->devID, t->unitNum * t->unitSize);
t->mem->UnlockBuf();
}
else
XMemFree(t->devID, target);
target = NULL;
......
......@@ -293,10 +293,16 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
source = target;
}
target = t->mem != NULL ?
/*target = t->mem != NULL ?
t->mem->AllocBuf(t->devID, t->unitNum * t->unitSize):
XMemAlloc(t->devID, t->unitNum * t->unitSize);
XMemAlloc(t->devID, t->unitNum * t->unitSize);*/
if (t->mem != NULL) {
t->mem->LockBuf();
target = t->mem->AllocBuf(t->devID, t->unitNum * t->unitSize);
}
else {
target = XMemAlloc(t->devID, t->unitNum * t->unitSize);
}
s->data = source;
t->data = target;
......@@ -315,8 +321,10 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
if(isLast){
CheckNTErrors(t->unitNum == c->unitNum, "Wrong tensor size!");
_Sum(a, t, c, beta);
if(t->mem != NULL)
if(t->mem != NULL) {
t->mem->ReleaseBuf(t->devID, t->unitNum * t->unitSize);
t->mem->UnlockBuf();
}
else
XMemFree(t->devID, target);
target = NULL;
......
......@@ -330,6 +330,7 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
DTYPE ** cpGPU = NULL;
if (mem != NULL) {
mem->LockBuf();
mem->SetPinBuf();
apGPU = (DTYPE**)mem->AllocBuf(mem->devID, sizeof(DTYPE*) * a->count, 256);
bpGPU = (DTYPE**)mem->AllocBuf(mem->devID, sizeof(DTYPE*) * a->count, 256);
......@@ -356,8 +357,10 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
delete[] bp;
delete[] cp;
if(mem != NULL)
if (mem != NULL) {
mem->BackToPinBuf();
mem->UnlockBuf();
}
else {
XMemFree(a0->devID, apGPU);
XMemFree(a0->devID, bpGPU);
......
......@@ -96,9 +96,12 @@ XTensor OnehotToIndex(const XTensor & onehot, int size)
/*
convert index tensor to onehot tensor
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
>> index - index of the output dimension (over the vocabulary)
>> onehot - one-hot representation of the index
>> size - vocabuary size (last dimension size of onehot)
>> labelSmoothingP - the parameter that controls how smooth the output is.
E.g., p = 0 means no smoothing
p = 1 means a uniform distribution (almost)
*/
void _IndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float labelSmoothingP)
......
......@@ -696,13 +696,23 @@ void _SetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYPE nu
#ifdef USE_CUDA
XMem * mem = tensor->mem;
MTYPE size = num * sizeof(MTYPE);
MTYPE * offsetsCuda = mem != NULL ? (MTYPE*)mem->AllocBuf(mem->devID, size) : (MTYPE*)XMemAlloc(tensor->devID, size);
//MTYPE * offsetsCuda = mem != NULL ? (MTYPE*)mem->AllocBuf(mem->devID, size) : (MTYPE*)XMemAlloc(tensor->devID, size);
MTYPE * offsetsCuda;
if (mem != NULL) {
mem->LockBuf();
offsetsCuda = (MTYPE*)mem->AllocBuf(mem->devID, size);
}
else {
offsetsCuda = (MTYPE*)XMemAlloc(tensor->devID, size);
}
XMemCopy(offsetsCuda, tensor->devID, offsets, -1, num * sizeof(MTYPE));
_CudaSetDataWithOffset(tensor, offsetsCuda, value, num);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, size);
mem->UnlockBuf();
}
else
XMemFree(tensor->devID, offsetsCuda);
#else
......
......@@ -636,12 +636,23 @@ void _CudaSetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * va
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
MTYPE * offsetsCuda = mem != NULL ?
/*MTYPE * offsetsCuda = mem != NULL ?
(MTYPE*)mem->AllocBuf(mem->devID, offsetSize) :
(MTYPE*)XMemAlloc(tensor->devID, offsetSize);
void * valuesCuda = mem != NULL ?
mem->AllocBuf(mem->devID, valueSize) :
XMemAlloc(tensor->devID, valueSize);
void * valuesCuda = mem != NULL ?
mem->AllocBuf(mem->devID, valueSize) :
XMemAlloc(tensor->devID, valueSize);*/
MTYPE * offsetsCuda;
void * valuesCuda;
if (mem != NULL) {
mem->LockBuf();
offsetsCuda = (MTYPE*)mem->AllocBuf(mem->devID, offsetSize);
valuesCuda = mem->AllocBuf(mem->devID, valueSize);
}
else {
offsetsCuda = (MTYPE*)XMemAlloc(tensor->devID, offsetSize);
valuesCuda = XMemAlloc(tensor->devID, valueSize);
}
if (mem != NULL) {
XMemCopy(offsetsCuda, mem->devID, offsets, -1, offsetSize);
......@@ -657,6 +668,7 @@ void _CudaSetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * va
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, valueSize);
mem->ReleaseBuf(mem->devID, offsetSize);
mem->UnlockBuf();
}
else {
XMemFree(tensor->devID, valuesCuda);
......
......@@ -45,15 +45,25 @@ void _CopyBlocks(void * source, int unitSize, int blockSize, int blockNum, void
if (devID >= 0) {
#ifdef USE_CUDA
/* copy the index from host to device */
int * targetBlocksTMP = myMem != NULL ?
/*int * targetBlocksTMP = myMem != NULL ?
(int*)myMem->AllocBuf(devID, blockNum * sizeof(int)):
(int*)XMemAlloc(devID, blockNum * sizeof(int));
(int*)XMemAlloc(devID, blockNum * sizeof(int));*/
int * targetBlocksTMP;
if (myMem != NULL) {
myMem->LockBuf();
targetBlocksTMP = (int*)myMem->AllocBuf(devID, blockNum * sizeof(int));
}
else {
targetBlocksTMP = (int*)XMemAlloc(devID, blockNum * sizeof(int));
}
XMemCopy(targetBlocksTMP, devID, targetBlocks, -1, blockNum * sizeof(int));
_CopyBlocksOnSite(source, unitSize, blockSize, blockNum, target, targetBlocksTMP, devID);
if(myMem != NULL)
if (myMem != NULL) {
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
myMem->UnlockBuf();
}
else
XMemFree(devID, targetBlocksTMP);
#else
......
......@@ -47,14 +47,17 @@ void _CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum,
#ifdef USE_CUDA
int * indexGPU = index;
if (!isIndexOnDev) {
myMem->LockBuf();
indexGPU = (int*)myMem->AllocBuf(myMem->devID, blockNum * gridNum * sizeof(int));
XMemCopy(indexGPU, myMem->devID, index, -1, blockNum * gridNum * sizeof(int));
}
_CudaCopyBlocksInGrid(source, blockSize, blockNum, gridNum, target, indexGPU, unitSize, myMem);
if (!isIndexOnDev)
if (!isIndexOnDev) {
myMem->ReleaseBuf(myMem->devID, blockNum * gridNum * sizeof(int));
myMem->UnlockBuf();
}
#else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif
......
......@@ -80,12 +80,23 @@ void _CudaCopyBlocksSelected(void * source, int unitSize, int blockSize, int * s
ProtectCudaDev(devID, devIDBackup);
/* copy the index to the GPU memory */
int * sourceBlocksTMP = myMem != NULL ?
/*int * sourceBlocksTMP = myMem != NULL ?
(int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) :
(int *)XMemAlloc(devID, blockNum * sizeof(int));
int * targetBlocksTMP = myMem != NULL ?
(int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) :
(int *)XMemAlloc(devID, blockNum * sizeof(int));
(int *)XMemAlloc(devID, blockNum * sizeof(int));*/
int * sourceBlocksTMP;
int * targetBlocksTMP;
if (myMem != NULL) {
myMem->LockBuf();
sourceBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int));
targetBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int));
}
else {
sourceBlocksTMP = (int *)XMemAlloc(devID, blockNum * sizeof(int));
targetBlocksTMP = (int *)XMemAlloc(devID, blockNum * sizeof(int));
}
XMemCopy(sourceBlocksTMP, devID, sourceBlocks, -1, blockNum * sizeof(int));
XMemCopy(targetBlocksTMP, devID, targetBlocks, -1, blockNum * sizeof(int));
......@@ -107,6 +118,7 @@ void _CudaCopyBlocksSelected(void * source, int unitSize, int blockSize, int * s
if (myMem != NULL) {
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
myMem->UnlockBuf();
}
else {
XMemFree(devID, sourceBlocksTMP);
......
......@@ -115,7 +115,7 @@ void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex)
for (int i = 0; i < indexSize; i++) {
int sIndex = sIndexData[i] * stride;
CheckNTErrors(sIndex < s->unitNum, "Wrong index!");
CheckNTErrors(sIndex < s->unitNum && sIndex >= 0, "Wrong index!");
for (int j = 0; j < stride; j++)
tData[i * stride + j] = sData[sIndex + j];
}
......
......@@ -131,9 +131,16 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex)
CheckNTErrors(srcIndexValue < s->unitNum, "Wrong index!");
}
sIndex = mem != NULL ?
/*sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);*/
if (mem != NULL) {
mem->LockBuf();
sIndex = (int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize);
}
else {
sIndex = (int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
}
XMemCopy(sIndex, devID, srcIndex, -1, sizeof(int) * indexSize);
}
else {
......@@ -169,8 +176,10 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex)
}
if (srcIndex->devID < 0) {
if(mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize);
mem->UnlockBuf();
}
else
XMemFree(mem->devID, sIndex);
}
......@@ -209,9 +218,16 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
CheckNTErrors(srcIndexValue < s->unitNum, "Wrong index!");
}
sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
/*sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);*/
if (mem != NULL) {
mem->LockBuf();
sIndex = (int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize);
}
else {
sIndex = (int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
}
XMemCopy(sIndex, devID, srcIndex, -1, sizeof(int) * indexSize);
}
else {
......@@ -238,6 +254,15 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
else {
ShowNTErrors("Unsupported dataType!");
}
if (srcIndex->devID < 0) {
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize);
mem->UnlockBuf();
}
else
XMemFree(mem->devID, sIndex);
}
}
#endif // USE_CUDA
......
......@@ -231,8 +231,8 @@ And this is a special spread function for backward computation of gather functio
*/
void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index)
{
int dim = 0;
int order = source->order;
//int dim = 0;
//int order = source->order;
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(collection->GetDim(-1) == source->GetDim(-1), "Illegal dimension!");
......@@ -272,4 +272,4 @@ void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index)
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
......@@ -177,9 +177,17 @@ void _CudaSpread(XTensor * source, XTensor * collection, int dim,
DTYPE * c = (DTYPE*)collection->data;
XMem * mem = source->mem;
int * si = mem != NULL ?
/*int * si = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize * 2) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize * 2);
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize * 2);*/
int * si;
if (mem != NULL) {
mem->LockBuf();
si = (int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize * 2);
}
else {
si = (int*)XMemAlloc(mem->devID, sizeof(int) * indexSize * 2);
}
int * ci = si + indexSize;
XMemCopy(si, mem->devID, srcIndex, -1, sizeof(int) * indexSize);
......@@ -188,8 +196,10 @@ void _CudaSpread(XTensor * source, XTensor * collection, int dim,
KernelSpreadFuzed<<<blocks, threads >>>(s, c, blockNum, blockSizeSrc, blockSizeColl,
stride, indexSize, si, ci);
if(mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize * 2);
mem->UnlockBuf();
}
else
XMemFree(mem->devID, si);
}
......@@ -393,9 +403,16 @@ void _CudaSpreadForGather(XTensor * source, XTensor * collection, XTensor * srcI
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
if (srcIndex->devID < 0) {
sIndex = mem != NULL ?
/*sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(devID, sizeof(int) * indexSize);
(int*)XMemAlloc(devID, sizeof(int) * indexSize);*/
if (mem != NULL) {
mem->LockBuf();
sIndex = (int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize);
}
else {
sIndex = (int*)XMemAlloc(devID, sizeof(int) * indexSize);
}
XMemCopy(sIndex, devID, srcIndex->data, -1, sizeof(int) * indexSize);
}
else
......@@ -422,8 +439,10 @@ void _CudaSpreadForGather(XTensor * source, XTensor * collection, XTensor * srcI
}
if (srcIndex->devID < 0) {
if(mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize);
mem->UnlockBuf();
}
else
XMemFree(devID, sIndex);
}
......
......@@ -512,8 +512,8 @@ void funName(DTYPE * input, DTYPE * output,int stride, int strideNum,
KERNELREDUCEFUN1(KernelReduceMaxOp, MAX, shflDownReduceMax, FLOAT_MIN)
KERNELREDUCEFUN1(KernelReduceMinOp, MIN, shflDownReduceMin, MAX_FLOAT)
/*
get the max-valued items along a dimension of the tensor (cuda version).
/*
get the max-valued items along a dimension of the tensor (cuda version).
For a 1-dimensional data array a,
sum_i = max_{0<=j<strideNum} input_{i,j}
>> input - the input tensor
......@@ -574,7 +574,14 @@ void _funcName(const XTensor * input, XTensor * output, int dim)
XMem * mem = input->mem; \
GDevs.GetCudaThread2D(devID, strideNum, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize); \
int bufSize = input->unitSize * cudaGridSize[0] * stride * blockNum * 2; \
DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(devID, bufSize); \
DTYPE * buf; \
if (mem != NULL) { \
mem->LockBuf(); \
buf = (DTYPE*)mem->AllocBuf(mem->devID, bufSize); \
} \
else { \
buf = (DTYPE*)XMemAlloc(devID, bufSize); \
} \
DTYPE * buf1 = buf; \
DTYPE * buf2 = buf + cudaGridSize[0] * stride * blockNum; \
do { \
......@@ -706,8 +713,10 @@ void _funcName(const XTensor * input, XTensor * output, int dim)
\
} while (strideNum > 1); \
\
if (mem != NULL) \
if (mem != NULL) { \
mem->ReleaseBuf(mem->devID, bufSize); \
mem->UnlockBuf(); \
} \
else \
XMemFree(input->devID, buf); \
} \
......
......@@ -757,7 +757,15 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
GDevs.GetCudaThread2D(devID, strideNum, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize);
int bufSize = input->unitSize * cudaGridSize[0] * stride * blockNum * 2;
DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(devID, bufSize);
//DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(devID, bufSize);
DTYPE * buf;
if (mem != NULL) {
mem->LockBuf();
buf = (DTYPE*)mem->AllocBuf(mem->devID, bufSize);
}
else {
buf = (DTYPE*)XMemAlloc(devID, bufSize);
}
DTYPE * buf1 = buf;
DTYPE * buf2 = buf + cudaGridSize[0] * stride * blockNum;
do {
......@@ -907,8 +915,10 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
} while (strideNum > 1);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, bufSize);
mem->UnlockBuf();
}
else
XMemFree(devID, buf);
}
......
......@@ -56,12 +56,16 @@ void _ReduceSumAll(const XTensor * source, XTensor * target)
int dims[1] = {source->unitNum};
if (source->mem != NULL)
source->mem->LockBuf();
XTensor * all = NewTensorBufV2(1, dims, source->dataType, source->denseRatio, source->devID, source->mem);
_CopyValues(source, all);
_ReduceSum(all, target, 0);
DelTensorBuf(all);
if (source->mem != NULL)
source->mem->UnlockBuf();
}
/*
......@@ -72,7 +76,8 @@ sum all the items of the tensor (It should be optimized!)
void _ReduceSumAll(const XTensor * source, DTYPE * value)
{
int * dimSize = new int[MAX_TENSOR_DIM_NUM];
float dr = (!source->isSparse) ? 1.0F : source->denseRatio;
if (source->mem != NULL)
source->mem->LockBuf();
XTensor * target = NewTensorBufV2(0, dimSize, source->dataType, source->denseRatio, source->devID, source->mem);
target->SetTMPFlag();
......@@ -82,6 +87,8 @@ void _ReduceSumAll(const XTensor * source, DTYPE * value)
delete[] dimSize;
DelTensorBuf(target);
if (source->mem != NULL)
source->mem->UnlockBuf();
}
/*
......@@ -122,4 +129,4 @@ DTYPE ReduceSumAllValue(const XTensor & source)
return target.Get0D();
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
......@@ -32,14 +32,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
transform a tensor by merging it along with a dimension.
e.g., (N/3, M, 3) -> (N, M)
e.g., (3, M, N/3) -> (M, N)
>> s - the source tensor
>> t - the target tensor (for return)
>> whereToMerge - the merging operation is along with which dimension
>> leadingDim - the leading dimension of merging, take (N/3, M, 3) -> (N, M)
for example, whereToMerge = 0 (i.e., the dimension for "N/3")
leadingDim = 2 (i.e., the dimension for "3")
>> leadingDim - the leading dimension of merging, take (3, M, N/3) -> (M, N)
for example, whereToMerge = 2 (i.e., the dimension for "N/3")
leadingDim = 0 (i.e., the dimension for "3")
*/
void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
{
......@@ -118,30 +118,54 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
void * dataTMP = t->data;
if (!isOnSameDevice)
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(mem->devID, size);
if (!isOnSameDevice) {
/*dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(mem->devID, size);*/
if (mem != NULL) {
mem->LockBuf();
dataTMP = mem->AllocBuf(mem->devID, size);
}
else {
dataTMP = XMemAlloc(mem->devID, size);
}
}
int blockNumInMerge = s->dimSize[leadingDim];
int splitSizeInGrid = gridSize / blockNumInMerge;
int realBlockSize = blockSize * t->unitSize;
int * blockIndex = (int*)(mem != NULL ?
/*int * blockIndex = (int*)(mem != NULL ?
mem->AllocBuf(mem->devID, blockNum * gridNum * sizeof(int)) :
XMemAlloc(s->devID, blockNum * gridNum * sizeof(int)));
XMemAlloc(s->devID, blockNum * gridNum * sizeof(int)));*/
int * blockIndex;
if (mem != NULL) {
if (isOnSameDevice) {
mem->LockBuf();
}
blockIndex = (int*)mem->AllocBuf(mem->devID, blockNum * gridNum * sizeof(int));
}
else {
blockIndex = (int*)XMemAlloc(s->devID, blockNum * gridNum * sizeof(int));
}
_MakeMergeBlockIndex(blockIndex, blockNum, blockNumInMerge, splitSizeInGrid, gridSize, gridNum, s->devID);
_CopyBlocksOnSite(s->data, s->unitSize, realBlockSize, blockNum * gridNum, dataTMP, blockIndex, s->devID);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, blockNum * gridNum * sizeof(int));
if (isOnSameDevice) {
mem->UnlockBuf();
}
}
else
XMemFree(s->devID, blockIndex);
if (!isOnSameDevice) {
XMemCopy(t->data, t->devID, dataTMP, s->devID, size);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, size);
mem->UnlockBuf();
}
else
XMemFree(s->devID, dataTMP);
}
......@@ -185,13 +209,13 @@ bool CheckMergeSize(const XTensor * s, const XTensor * t, int whereToMerge, int
transform a tensor by merging it along with a dimension (return an XTensor structure)
make a new tensor to keep the result and return it
e.g., (N/3, M, 3) -> (N, M)
e.g., (3, M, N/3) -> (M, N)
>> s - the source tensor
>> whereToMerge - the merging operation is along with which dimension
>> leadingDim - the leading dimension of merging, take (N/3, M, 3) -> (N, M)
for example, whereToMerge = 0 (i.e., the dimension for "N/3")
leadingDim = 2 (i.e., the dimension for "3")
>> leadingDim - the leading dimension of merging, take (3, M, N/3) -> (M, N)
for example, whereToMerge = 2 (i.e., the dimension for "N/3")
leadingDim = 0 (i.e., the dimension for "3")
<< return - the transformed tensor by merging along with a dimension
*/
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
......@@ -358,8 +382,16 @@ void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge)
void * dataTMP = NULL;
if (uniform)
dataTMP = smallsItem0->data;
else
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(t->devID, size);
else {
//dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(t->devID, size);
if (mem != NULL) {
mem->LockBuf();
dataTMP = mem->AllocBuf(mem->devID, size);
}
else {
dataTMP = XMemAlloc(t->devID, size);
}
}
tensorTMP->data = dataTMP;
......@@ -378,8 +410,10 @@ void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge)
tensorTMP->data = NULL;
delete tensorTMP;
if ((!uniform) && (mem != NULL))
if ((!uniform) && (mem != NULL)) {
mem->ReleaseBuf(mem->devID, size);
mem->UnlockBuf();
}
else
XMemFree(t->devID, dataTMP);
}
......
......@@ -117,7 +117,7 @@ void _CudaMergeBlockLists(const StrList* sourceList, int * blockSizes, int block
GDevs.GetCudaThread2D(myMem->devID, realMaxBlockSize, newBlockListSize, MAX_INT,
cudaGridSizes, cudaBlockSizes);
myMem->LockBuf();
myMem->SetPinBuf();
int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256);
......@@ -133,6 +133,7 @@ void _CudaMergeBlockLists(const StrList* sourceList, int * blockSizes, int block
(sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU);
myMem->BackToPinBuf();
myMem->UnlockBuf();
delete[] sourceArrays;
delete[] targetArrays;
......
......@@ -110,22 +110,44 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
void * dataTMP = t->data;
if (!isOnSameDevice)
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(s->devID, size);
if (!isOnSameDevice) {
//dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(s->devID, size);
if (mem != NULL) {
mem->LockBuf();
dataTMP = mem->AllocBuf(mem->devID, size);
}
else {
dataTMP = XMemAlloc(s->devID, size);
}
}
int realBlockSize = blockSize * t->unitSize;
int blockSplitSize = blockNum / splitNum;
int * blockIndex = (int*)(mem != NULL ?
/*int * blockIndex = (int*)(mem != NULL ?
mem->AllocBuf(mem->devID, blockNum * sizeof(int)) :
XMemAlloc(s->devID, blockNum * sizeof(int)));
XMemAlloc(s->devID, blockNum * sizeof(int)));*/
int * blockIndex;
if (mem != NULL) {
if (isOnSameDevice) {
mem->LockBuf();
}
blockIndex = (int*)mem->AllocBuf(mem->devID, blockNum * sizeof(int));
}
else {
blockIndex = (int*)XMemAlloc(s->devID, blockNum * sizeof(int));
}
_MakeSplitBlockIndex(blockIndex, splitNum, blockSplitSize, blockNum, s->devID);
_CopyBlocksOnSite(s->data, s->unitSize, realBlockSize, blockNum, dataTMP, blockIndex, s->devID);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, blockNum * sizeof(int));
if (isOnSameDevice) {
mem->UnlockBuf();
}
}
else
XMemFree(s->devID, blockIndex);
......@@ -133,8 +155,10 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
if (!isOnSameDevice) {
XMemCopy(t->data, t->devID, dataTMP, s->devID, size);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, size);
mem->UnlockBuf();
}
else
XMemFree(s->devID, dataTMP);
}
......@@ -333,7 +357,14 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli
dataTMP = first->data;
}
else {
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(big->devID, size);
//dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(big->devID, size);
if (mem != NULL) {
mem->LockBuf();
dataTMP = mem->AllocBuf(mem->devID, size);
}
else {
dataTMP = XMemAlloc(big->devID, size);
}
}
tensorTMP->data = dataTMP;
......@@ -354,8 +385,10 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli
tensorTMP->data = NULL;
delete tensorTMP;
if ((!uniform) && (mem != NULL))
if ((!uniform) && (mem != NULL)) {
mem->ReleaseBuf(mem->devID, size);
mem->UnlockBuf();
}
else
XMemFree(big->devID, dataTMP);
}
......
......@@ -43,13 +43,11 @@ void _Stack(const TensorList * smalls, XTensor * t, int dim)
int blockSize = 1;
int blockNum = 1;
int gridSize = 1;
int gridNum = 1;
XTensor * smallsItem0 = smalls->GetItem(0);
int unitNum = smallsItem0->unitNum;
//int unitNum = smallsItem0->unitNum;
int unitSize = smallsItem0->unitSize;
int itemSize = unitNum * unitSize;
for (int i = 0; i < smallsItem0->order; i++) {
if (i >= dim)
......@@ -129,7 +127,7 @@ bool CheckStackShape(const TensorList &smalls, XTensor &t, int dim)
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
for (int i = 0; i < tensor->order; i++) {
for (int i = 0; i < order; i++) {
if (i < dim) {
if (t.GetDim(i) != tensor->GetDim(i))
return false;
......
......@@ -234,7 +234,15 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
int m = GetNextPower2(strideNum);
int n = stride * blockNum;
void * buf = mem != NULL ? mem->AllocBuf(a->devID, n * m * a->unitSize) : XMemAlloc(a->devID, n * m * a->unitSize);
//void * buf = mem != NULL ? mem->AllocBuf(a->devID, n * m * a->unitSize) : XMemAlloc(a->devID, n * m * a->unitSize);
void * buf;
if (mem != NULL) {
mem->LockBuf();
buf = mem->AllocBuf(a->devID, n * m * a->unitSize);
}
else {
buf = XMemAlloc(a->devID, n * m * a->unitSize);
}
void * bufIndex = NULL;
if (indexA != NULL && indexB != NULL) {
bufIndex = mem != NULL ? mem->AllocBuf(a->devID, n * m * sizeof(int)) : XMemAlloc(a->devID, n * m * sizeof(int));
......@@ -289,8 +297,10 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
KernelReorganizeBack<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
(bufIndex, indexB->data, m, n, stride, k, blockNum);
if (mem != NULL)
if (mem != NULL) {
mem->ReleaseBuf(a->devID, n * m * a->unitSize);
mem->UnlockBuf();
}
else
XMemFree(a->devID, buf);
if (indexA != NULL && indexB != NULL)
......
......@@ -79,6 +79,8 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
if (mem != NULL)
mem->LockBuf();
max = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
sum = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
......@@ -153,6 +155,8 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
DelTensorBuf(max);
DelTensorBuf(sum);
if (mem != NULL)
mem->UnlockBuf();
if (x->devID >= 0) {
delete blockx;
......
......@@ -54,6 +54,8 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
XTensor * max = NULL;
XTensor * sum = NULL;
if (mem != NULL)
mem->LockBuf();
max = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
sum = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
......@@ -113,6 +115,8 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
DelTensorBuf(sum);
DelTensorBuf(max);
if (mem != NULL)
mem->UnlockBuf();
delete[] dimSize;
}
......
......@@ -354,8 +354,10 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
dimSize[i - 1] = output->dimSize[i];
}
if (output->mem != NULL)
output->mem->LockBuf();
XTensor * lossBuf = NewTensorBufV2(output->order - 1, dimSize, output->dataType, output->denseRatio,
output->devID, output->mem);
output->devID, output->mem);
_CrossEntropy(output, gold, lossBuf, weight, padding, leadingDim);
......@@ -367,10 +369,16 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
nonZeroNum = (DTYPE)lossBuf->unitNum;
}
else {
if ((padding->mem != NULL) && (padding->mem != output->mem)) {
padding->mem->LockBuf();
}
XTensor * tmp = NewTensorBufV2(padding, padding->devID, padding->mem);
_IsNonZero(padding, tmp);
_ReduceSumAll(tmp, &nonZeroNum);
DelTensorBuf(tmp);
if ((padding->mem != NULL) && (padding->mem != output->mem)) {
padding->mem->UnlockBuf();
}
}
loss = loss / nonZeroNum;
......@@ -384,6 +392,8 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
delete[] dimSize;
DelTensorBuf(lossBuf);
if (output->mem != NULL)
output->mem->UnlockBuf();
return loss;
}
......
......@@ -57,6 +57,9 @@ void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
{
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
if (output->mem != NULL) {
output->mem->LockBuf();
}
XTensor * interBuf1 = NewTensorBufV2(output, output->devID, output->mem);
XTensor * interBuf2 = NewTensorBufV2(output, output->devID, output->mem);
......@@ -73,6 +76,9 @@ void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
DelTensorBuf(interBuf2);
DelTensorBuf(interBuf1);
if (output->mem != NULL) {
output->mem->UnlockBuf();
}
}
/*
......@@ -118,6 +124,9 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
dimSize[i - 1] = output->dimSize[i];
}
if (output->mem != NULL) {
output->mem->LockBuf();
}
XTensor * lossBuf = NewTensorBufV2(output->order - 1, dimSize, output->dataType, output->denseRatio,
output->devID, output->mem);
......@@ -131,10 +140,16 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
nonZeroNum = (DTYPE)lossBuf->unitNum;
}
else {
if ((padding->mem != NULL) && (padding->mem != output->mem)) {
padding->mem->LockBuf();
}
XTensor * tmp = NewTensorBufV2(padding, padding->devID, padding->mem);
_IsNonZero(padding, tmp);
_ReduceSumAll(tmp, &nonZeroNum);
DelTensorBuf(tmp);
if ((padding->mem != NULL) && (padding->mem != output->mem)) {
padding->mem->UnlockBuf();
}
}
loss = loss / nonZeroNum;
......@@ -148,6 +163,9 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
delete[] dimSize;
DelTensorBuf(lossBuf);
if (output->mem != NULL) {
output->mem->UnlockBuf();
}
return loss;
}
......
......@@ -215,12 +215,7 @@ bool TestConvertDataType3()
{0.5F, -4.0F},
{0.0F, 6.0F} };
DTYPE data2[2][3] = { {1.0F, 2.0F, 3.0F},
{0.0F, 4.0F, 5.0F} };
DTYPE answer[3][3] = { {1.0F, -6.0F, -7.0F},
{0.5F, -15.0F, -18.5F},
{0.0F, 24.0F, 30.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -241,6 +236,14 @@ bool TestConvertDataType3()
cpuTest = _CheckData(a, data1, unitNum1, 1e-4F);
#ifdef USE_CUDA
DTYPE data2[2][3] = { { 1.0F, 2.0F, 3.0F },
{ 0.0F, 4.0F, 5.0F } };
DTYPE answer[3][3] = { { 1.0F, -6.0F, -7.0F },
{ 0.5F, -15.0F, -18.5F },
{ 0.0F, 24.0F, 30.0F } };
/* GPU test */
bool gpuTest = true;
......
......@@ -67,7 +67,6 @@ bool TestGather1()
DTYPE answer[2][3] = { {0.0F, -1.0F, 2.0F},
{1.0F, 2.0F, 4.0F} };
int dim = 0;
int indexSize = 2;
int srcIndex[2] = {0, 2};
......
......@@ -422,7 +422,7 @@ bool TestSetData6()
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE answer[5] = {5.2F, 3.2F, 1.2F, -0.8F, -2.8F};
//DTYPE answer[5] = {5.2F, 3.2F, 1.2F, -0.8F, -2.8F};
/* CPU test */
bool cpuTest = true;
......
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2016-2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* We test XTrain here. It is simple, we design a simple task in that we
* make the model to predict an integer D (0-100) from four input integers
* A, B, C and D (0-100). We generate a number of samples with different values
* of A, B, C and D. The gold standard is
*
* D = (int)(sqrt(A * B) + abs(C - D))/2
*
* Our model is a two-layer feed-forward neural network. It can be treated
* as a classifier rather than a regression model.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-03-03
*/
#include "TTrain.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/function/FHeader.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
XTensor * tmpTT = NULL;
/* genreate the training data file */
void GeneateTTrainData(const char * fileName)
{
FILE * file = fopen(fileName, "wb");
CheckNTErrors(file, "Cannot open the file");
XPRINT(1, stderr, "[INFO] Generating data ... ");
int sampleNum = MAX_SAMPLE_NUM_IN_TTRAIN;
int range = MAX_INT_IN_TTRAIN;
fprintf(file, "%d\n", sampleNum);
srand(1);
for (int i = 0; i < sampleNum; i++) {
int A = (int)(((float)rand() / RAND_MAX) * range);
int B = (int)(((float)rand() / RAND_MAX) * range);
int C = (int)(((float)rand() / RAND_MAX) * range);
int D = (int)(((float)rand() / RAND_MAX) * range);
int E = (int)((sqrt(A * B) + abs(C - D)) / 2);
fprintf(file, "%d %d %d %d %d\n", A, B, C, D, E);
}
XPRINT2(1, stderr, "%d samples in \"%s\" [DONE]\n", sampleNum, fileName);
fclose(file);
}
/* run the test */
void TestTrain()
{
GeneateTTrainData("ttrain.txt");
XConfig config;
//config.Add("dev", -1);
config.Add("lrate", 0.1F);
config.Add("nstep", 100000);
config.Add("nepoch", 5);
config.Add("jobdev0", 0);
//config.Add("jobdev4", -1);
int serverDevID = config.GetInt("jobdev0", -1);
TTDataLoader loader;
loader.SetFileName("ttrain.txt");
loader.SetBatchSize(config.GetInt("batchsize", TT_BATCH_SIZE));
TTModel model;
model.Init(config, serverDevID);
tmpTT = model.params[0].param;
XOptimizer optimizer;
optimizer.Init(config);
XTrainer trainer;
trainer.Run(&config, &loader, &model, &optimizer);
}
/*****************************
* data loader
******************************/
/* constructor */
TTDataLoader::TTDataLoader()
{
fileName = new char[MAX_FILE_NAME_LENGTH];
file = NULL;
batchSize = TT_BATCH_SIZE;
}
/* de-constructor */
TTDataLoader::~TTDataLoader()
{
delete[] fileName;
}
/* set file name */
void TTDataLoader::SetFileName(const char * myFileName)
{
strcpy(fileName, myFileName);
}
/* set batch size */
void TTDataLoader::SetBatchSize(int myBatchSize)
{
batchSize = myBatchSize;
}
/* start the process */
bool TTDataLoader::Start()
{
file = fopen(fileName, "rb");
CheckNTErrors(file != NULL, "Cannot open the file");
/* skip the first line */
char * line = new char[MAX_SAMPLE_LINE_LENGTH];
fgets(line, MAX_SAMPLE_LINE_LENGTH, file);
delete[] line;
return true;
}
/* end the process */
bool TTDataLoader::End()
{
fclose(file);
return true;
}
/*
get a batch of samples
>> inputs - inputs of the model
>> golds - gold standards
*/
bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds)
{
CheckNTErrors(file != NULL, "No input file specificed!");
CheckNTErrors(inputs != NULL && inputs->count >= 1, "Wrong argument!");
CheckNTErrors(golds != NULL && golds->count >= 1, "Wrong argument!");
XTensor * input = (XTensor*)inputs->GetItem(0);
XTensor * gold = (XTensor*)golds->GetItem(0);
int count = 0;
int sampleSize = MAX_SAMPLE_SIZE;
char * line = new char[MAX_SAMPLE_LINE_LENGTH];
int * inputBatch = new int[batchSize * sampleSize];
int * goldBatch = new int[batchSize];
int A, B, C, D, E;
while (fgets(line, MAX_SAMPLE_LINE_LENGTH, file)) {
if (count == batchSize)
break;
if (sscanf(line, "%d %d %d %d %d", &A, &B, &C, &D, &E) < sampleSize + 1) {
ShowNTErrors("Wrong format in the training file!");
}
inputBatch[count * sampleSize] = A;
inputBatch[count * sampleSize + 1] = B;
inputBatch[count * sampleSize + 2] = C;
inputBatch[count * sampleSize + 3] = D;
goldBatch[count] = E;
count++;
}
if (count > 0) {
InitTensor2D(input, count, 4, X_INT);
InitTensor2D(gold, count, 1, X_INT);
input->SetData(inputBatch, count * 4);
gold->SetData(goldBatch, count);
}
delete[] line;
delete[] inputBatch;
delete[] goldBatch;
if (count > 0)
return true;
else
return false;
}
/*****************************
* the neural model
******************************/
/* constructor */
TTModel::TTModel()
{
devID = -1;
vSize = 0;
eSize = 0;
hSize = 0;
}
/* de-constructor */
TTModel::~TTModel()
{
}
/* config it */
void TTModel::SetConfig(XConfig &myConfig)
{
config.CreateFromMe(myConfig);
}
/*
initialize the model
>> myConfig - configuration
>> devID - device id
*/
void TTModel::Init(XConfig &myConfig, int myDevID)
{
Clear();
SetConfig(myConfig);
devID = myDevID;
vSize = MAX_INT_IN_TTRAIN + 1;
eSize = config.GetInt("esize", TT_EMBEDDING_SIZE);
hSize = config.GetInt("hsize", TT_HIDDEN_SIZE);
InitTensor2D(&embeddingW, vSize, eSize, X_FLOAT, devID);
InitTensor2D(&hiddenW, MAX_SAMPLE_SIZE * eSize, hSize, X_FLOAT, devID);
InitTensor2D(&outputW, hSize, vSize, X_FLOAT, devID);
embeddingW.SetName("embeddingw");
hiddenW.SetName("hiddenw");
outputW.SetName("outputw");
embeddingW.SetDataRand(-0.1F, 0.1F);
hiddenW.SetDataRand(-0.1F, 0.1F);
outputW.SetDataRand(-0.1F, 0.1F);
AddParam(&embeddingW);
AddParam(&hiddenW);
AddParam(&outputW);
}
/*
create the model
>> devID - device id
>> input - as it is
>> output - as it is
*/
void TTModel::Forward(int devID, XTensor * input, XTensor * output)
{
XTensor embedding;
XTensor embeddingCat;
XTensor hidden;
/* [e_0, e_1, e_2] = w_e * input(one-hot) */
embedding = Gather(embeddingW, *input);
/* e = merge(e_0, e_1, e_2) */
embeddingCat = Merge(embedding, embedding.order - 1, embedding.order - 2);
/* h = hardtanh(e * w_h) */
hidden = HardTanH(MMul(embeddingCat, hiddenW));
/* output = Softmax(h * w_o) */
*output = Softmax(MMul(hidden, outputW), -1);
}
/* clear the model */
void TTModel::Clear()
{
config.Clear();
}
/*
clone the model
>> devID - device id
*/
XModel * TTModel::Clone(int devID)
{
TTModel * model = new TTModel();
model->SetConfig(config);
model->Init(config, devID);
CopyValues(embeddingW, model->embeddingW);
CopyValues(hiddenW, model->hiddenW);
CopyValues(outputW, model->outputW);
return model;
}
/*
run the neural network
>> inputs - inputs of the model
>> outputs - outputs of the model
>> golds - gold standards
>> losses - losses of the output respect to the gold standards
*/
bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* losses)
{
//fprintf(stderr, "run simple 0\n");
CheckNTErrors(inputs != NULL && inputs->count >= 1, "Wrong arguments!");
CheckNTErrors(outputs != NULL && outputs->count >= 1, "Wrong arguments!");
CheckNTErrors(golds != NULL && golds->count >= 1, "Wrong arguments!");
CheckNTErrors(losses != NULL && losses->count >= 1, "Wrong arguments!");
XTensor * input = (XTensor*)inputs->GetItem(0);
XTensor * output = (XTensor*)outputs->GetItem(0);
XTensor * gold = (XTensor*)golds->GetItem(0);
XTensor * loss = (XTensor*)losses->GetItem(0);
XTensor goldOneHot;
/* place all input data on the correct device */
input->FlushToDevice(devID);
output->FlushToDevice(devID);
gold->FlushToDevice(devID);
XNet net;
/* create the neural network and run it */
Forward(devID, input, output);
/* gold standard in ong-hot representaiton */
goldOneHot = IndexToOnehot(*gold, vSize, 0.0F);
int * dims = new int[goldOneHot.order];
for (int i = 0; i < goldOneHot.order - 2; i++)
dims[i] = goldOneHot.GetDim(i);
dims[goldOneHot.order - 2] = goldOneHot.GetDim(goldOneHot.order - 1);
goldOneHot.Reshape(goldOneHot.order - 1, dims);
/* loss */
*loss = CrossEntropy(*output, goldOneHot);
/* back-propagation */
net.Backward(*loss);
delete[] dims;
//fprintf(stderr, "run simple 1\n");
return true;
}
}
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2016-2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* We test XTrain here. It is simple, we design a simple task in that we
* make the model to predict an integer D (0-100) from three input integers
* A, B and C (0-100). We generate a number of samples with different values
* of A, B and C. The gold standard is
*
* D = (int)(sqrt(A * B) + C)/2
*
* Our model is a two-layer feed-forward neural network. It can be treated
* as a classifier rather than a regression model.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-03-03
* The express train was updated this year. It just takes me two hours and
* a half from Shenyang to Beijing.
*/
#ifndef __TTRAIN_H__
#define __TTRAIN_H__
#include <stdio.h>
#include <stdlib.h>
#include "XTrainer.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_SAMPLE_NUM_IN_TTRAIN 200000
#define MAX_INT_IN_TTRAIN 100
#define MAX_SAMPLE_LINE_LENGTH 128
#define MAX_SAMPLE_SIZE 4
#define TT_BATCH_SIZE 256
#define TT_EMBEDDING_SIZE 128
#define TT_HIDDEN_SIZE 512
extern XTensor * tmpTT;
/* genreate the training data file */
void GeneateTTrainData(const char * fileName);
/* run the test */
extern
void TestTrain();
/* data loader */
class TTDataLoader : public DataDistributeBase
{
protected:
/* file name */
char * fileName;
/* file handle */
FILE * file;
/* batch size */
int batchSize;
public:
/* constructor */
TTDataLoader();
/* de-constructor */
~TTDataLoader();
/* set file name */
void SetFileName(const char * myFileName);
/* set batch size */
void SetBatchSize(int myBatchSize);
/* start the process */
bool Start();
/* end the process */
bool End();
/* get a batch of samples */
bool GetBatchSimple(XList * inputs, XList * golds);
};
/* the model */
class TTModel : public XModel
{
protected:
/* device id */
int devID;
/* configuration */
XConfig config;
/* embedding matrix of the input */
XTensor embeddingW;
/* parameter matrix of the hidden layer */
XTensor hiddenW;
/* parameter matrix of the output layer */
XTensor outputW;
/* vocabulary size */
int vSize;
/* embedding size */
int eSize;
/* hidden layer size */
int hSize;
public:
/* constructor */
TTModel();
/* de-constructor */
~TTModel();
/* config it */
void SetConfig(XConfig &myConfig);
/* initialize the parameters */
void Init(XConfig &myConfig, int myDevID);
/* create the model */
void Forward(int devID, XTensor * input, XTensor * output);
/* clear the model */
void Clear();
/* clone the model */
XModel * Clone(int devID);
/* run the neural network */
bool RunSimple(XList * inputs, XList * outputs, XList * golds, XList * losses);
};
/* */
}
#endif
\ No newline at end of file
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2016-2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* We define various template classes here. They will be overloaded and used
* in applications.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-02-25
*/
#include "XBaseTemplate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*******************************
* data loader template
*******************************/
/* constructor */
DataDistributeBase::DataDistributeBase()
{
MUTEX_INIT(loadMutex);
}
/* de-constructor */
DataDistributeBase::~DataDistributeBase()
{
MUTEX_DELE(loadMutex);
}
/* * start the job (e.g., open the file) */
bool DataDistributeBase::Start()
{
ShowNTErrors("DataDistributeBase::Start must be overloaded!");
return true;
}
/* end the job (e.g., close the file) */
bool DataDistributeBase::End()
{
ShowNTErrors("DataDistributeBase::End must be overloaded!");
return true;
}
/*
get a batch of samples
>> inputs - inputs of the model
>> golds - gold standards
*/
bool DataDistributeBase::GetBatchSimple(XList * inputs, XList * golds)
{
return false;
}
/* get a batch of samples */
bool DataDistributeBase::GetBatch(XList * args)
{
CheckNTErrors(args->count >= 2, "More input arguments are required!");
XList * input = (XList*)args->GetItem(0);
XList * gold = (XList*)args->GetItem(1);
if (GetBatchSimple(input, gold))
return true;
ShowNTErrors("You must be overload one of these: DataDistributeBase::GetBatchSimple ... !");
return false;
}
/* get a batch of samples (for multi-threading) */
bool DataDistributeBase::GetBatchSafe(XList * args)
{
bool r;
MUTEX_LOCK(loadMutex);
r = GetBatch(args);
MUTEX_UNLOCK(loadMutex);
return r;
}
}
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2016-2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* We define various template classes here. They will be overloaded and used
* in applications.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-02-25
* The meeting at 3:00pm today was canceled. More time for coding.
*/
#ifndef __XNETTEMPLATE_H__
#define __XNETTEMPLATE_H__
#include "../tensor/XTensor.h"
#include "../tensor/XThread.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
data distributor template. It distributes batches of data to workers.
The use of data distributor follows:
Start() -> GetBatch() -> ... -> GetBatch() -> End()
In addition, GetBatch() should be thread-safe, and thus could be
called by different threads simultaneously.
*/
class DataDistributeBase
{
protected:
/* mutex of batch loading */
MUTEX_HANDLE loadMutex;
public:
/* constructor */
DataDistributeBase();
/* de-constructor */
~DataDistributeBase();
/* start the job (e.g., open the file).
NOTE THAT before calling Start() one should initialize
the distributor if neccessary */
virtual
bool Start();
/* end the job (e.g., close the file) */
virtual
bool End();
/* get a batch of samples */
virtual
bool GetBatchSimple(XList * inputs, XList * golds);
public:
/* get a batch of samples */
bool GetBatch(XList * args);
/* get a batch of samples (for multi-threading) */
bool GetBatchSafe(XList * args);
};
}
#endif // __XNETTEMPLATE_H__
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论