Commit f21e1b48 by xiaotong

work on the triaining processing on t2t lm

parent 5cd1be65
......@@ -25,6 +25,7 @@
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
......@@ -35,6 +36,7 @@ void SumDimTest();
using namespace nts;
using namespace fnnlm;
using namespace transformer;
int main( int argc, const char ** argv )
{
......@@ -44,10 +46,11 @@ int main( int argc, const char ** argv )
//SumDimTest();
//return 0;
if(argc > 1 && !strcmp(argv[1], "-test"))
1;//Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t"))
TransformerMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
......
......@@ -614,7 +614,7 @@ void XMathGrad::GradNormalize(XTensor * node)
XTensor * p = NewTensor(a);
XTensor * q = NewTensor(a);
XTensor * r = NewTensor(a);
DTYPE epsilon = income.GetParamInt(0);
DTYPE epsilon = income.GetParam(0);
int dim = income.GetParamInt(0);
int n = a->GetDim(dim);
......@@ -742,7 +742,7 @@ void XMathGrad::GradReduceMean(XTensor * node)
XNoder::MakeGrad(a);
_Unsqueeze(node->grad, b, dim, n);
_ScaleAndShift(b, c, 1 / n);
_ScaleAndShift(b, c, 1.0F/n);
_Sum(a->grad, c, a->grad);
node->visitMark = NODE_FINISHED;
......
......@@ -258,10 +258,11 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
if(node == NULL)
return;
//fprintf(stderr, "%d\n", node->id);
if(node->visitMark == code + 1){
ShowNTErrors("There is a circle in the network\n");
}
else if(node->visitMark <= code || node->visitMark >= code + 2){
else if(node->visitMark <= code){
node->visitMark = code + 1;
XLink &income = node->income;
for(int i = 0; i < income.tailNum; i++){
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "T2TAttention.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
......@@ -56,9 +57,9 @@ void T2TAttention::InitModel(int argc, const char ** argv, int myDevID, XMem * m
float minmax = 0;
LoadParamInt(argc, argv, "nhead", &nhead, 8);
LoadParamInt(argc, argv, "dk", &dk, 512);
LoadParamInt(argc, argv, "dv", &dv, 512);
LoadParamInt(argc, argv, "d", &d, 512);
LoadParamInt(argc, argv, "d", &dk, DEFAULT_BEDDING_SIZE);
LoadParamInt(argc, argv, "d", &dv, DEFAULT_BEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_BEDDING_SIZE);
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.08F);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem);
......@@ -104,12 +105,12 @@ XTensor * T2TAttention::Make(XTensor * k, XTensor * q, XTensor * v)
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */
scalar = Softmax(Linear(BMMul(qheads, X_NOTRANS, kheads, X_TRANS), 1/sqrt((float)dk)), -1);
att = MMul(scalar, vheads);
att = BMMul(scalar, vheads);
XTensor * result = new XTensor();
/* concatenate the heads */
*result = Merge(att, -1);
*result = Merge(att, att.order - 1);
return result;
}
......
......@@ -57,7 +57,8 @@ void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * my
LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "maxlen", &maxLength, 256);
LoadParamInt(argc, argv, "d", &d, 256);
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_BEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_BEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem);
......@@ -74,9 +75,9 @@ length - length of the sequenc
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{
InitTensor2D(&posEmbedding, length, eSize, X_FLOAT, devID, mem);
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem);
float * data = new float[posEmbedding.unitNum];
float * data = new float[posEmbeddingBase.unitNum];
for(int pos = 0; pos < length; pos++){
float * dp = data + pos * eSize;
......@@ -92,7 +93,7 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
}
}
posEmbedding.SetData(data, posEmbedding.unitNum);
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
delete[] data;
}
......@@ -105,11 +106,12 @@ XTensor * T2TEmbedder::Make(XTensor * input)
CheckNTErrors(input->GetDim(-1) == vSize, "Wrong vocabulary size!");
CheckNTErrors(input->order > 1, "Wrong input tensor size!");
CheckNTErrors(input->dimSize[input->order - 2] < maxLength, "The sequence is too long!");
CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, input->dimSize, input->order);
dims[0] = eSize;
memcpy(dims, input->dimSize, input->order * sizeof(int));
dims[input->order - 1] = eSize;
bool match = (posEmbedding.order == input->order);
if(match){
......@@ -122,17 +124,10 @@ XTensor * T2TEmbedder::Make(XTensor * input)
/* we make positional embeddings first */
if(!match){
InitTensor(&posEmbedding, input->order, dims, X_FLOAT, 1.0F, devID, mem);
XTensor * posTMP = NewTensorBuf(2, dims, X_FLOAT, 1.0F, devID, mem);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem);
int dims2[MAX_TENSOR_DIM_NUM];
dims2[0] = dims[0];
dims2[1] = dims[1];
dims2[2] = posEmbedding.unitNum / (dims[0] * dims[1]);
posEmbedding.Reshape(3, dims2);
_Unsqueeze(posTMP, &posEmbedding, 0, dims2[2]);
posEmbedding.Reshape(input->order, dims);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
DelTensorBuf(posTMP);
}
......
......@@ -29,6 +29,8 @@ using namespace nts;
namespace transformer
{
#define DEFAULT_BEDDING_SIZE 512
/*
embedding (of word at position i):
word embedding + positional embedding
......
......@@ -21,6 +21,7 @@
#include "T2TFNN.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h"
#include "../../tensor/function/FHeader.h"
......@@ -54,9 +55,9 @@ void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem)
float minmax = 0;
LoadParamInt(argc, argv, "d", &inSize, 512);
LoadParamInt(argc, argv, "d", &outSize, 512);
LoadParamInt(argc, argv, "fnnh", &hSize, 512);
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_BEDDING_SIZE);
LoadParamInt(argc, argv, "d", &outSize, DEFAULT_BEDDING_SIZE);
LoadParamInt(argc, argv, "fnnh", &hSize, DEFAULT_BEDDING_SIZE);
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.08F);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem);
......
......@@ -20,6 +20,7 @@
*/
#include "T2TLayerNormal.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
......@@ -58,7 +59,32 @@ y =
*/
XTensor * T2TLN::Make(XTensor * input)
{
return NULL;
XTensor &x = *input;
XTensor mean;
XTensor variance;
XTensor standard;
XTensor meanFilled;
XTensor standardFilled;
XTensor * result = new XTensor();
/* \mu = (sum_i x_i)/m */
mean = ReduceSum(x, x.order - 1);
/* \sigma = (sum_i (x_i - \mu)^2)/m */
variance = ReduceVariance(x, x.order - 1, mean);
/* standard = sqrt(variance) */
standard = Power(variance, 0.5F);
/* unsqueeze mean and standard deviation to fit them into
the same size of x */
meanFilled = Unsqueeze(mean, x.order - 1, x.GetDim(-1));
standardFilled = Unsqueeze(standard, x.order - 1, x.GetDim(-1));
/* x' = (x - \mu)/standard */
*result = (x - meanFilled)/standardFilled;
return result;
}
}
......@@ -21,6 +21,7 @@
#include "T2TOutput.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
......@@ -53,10 +54,11 @@ void T2TOutput::InitModel(int argc, const char ** argv, int myDevID, XMem * myMe
mem = myMem;
LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "hsize", &inSize, 512);
LoadParamInt(argc, argv, "hsize", &hSize, 512);
}
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_BEDDING_SIZE);
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_BEDDING_SIZE);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem);
}
/*
make the network
......
......@@ -31,6 +31,8 @@ namespace transformer
/* constructor */
T2TTrainer::T2TTrainer()
{
devID = -1;
mem = NULL;
seqLen = NULL;
nseqBuf = 0;
nextSeq = -1;
......@@ -50,18 +52,19 @@ initialization
*/
void T2TTrainer::Init(int argc, const char ** argv)
{
LoadParamInt(argc, argv, "dev", &devID, -1);
LoadParamFloat(argc, argv, "lrate", &lrate, 0.001F);
LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
LoadParamInt(argc, argv, "wbatch", &wBatchSize, 1);
LoadParamInt(argc, argv, "nepoch", &nepoch, 1);
LoadParamInt(argc, argv, "nstep", &nstep, 1);
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamBool(argc, argv, "sorted", &isLenSorted, false);
LoadParamInt(argc, argv, "bufsize", &bufSize, 50000);
int maxUnitInBuf;
LoadParamInt(argc, argv, "bufsize", &maxUnitInBuf, 20000);
buf = new int[maxUnitInBuf];
seqLen = new int[maxUnitInBuf];
seqOffset = new int[maxUnitInBuf];
buf = new int[bufSize];
seqLen = new int[bufSize];
seqOffset = new int[bufSize];
}
/*
......@@ -78,6 +81,8 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
int wordCountTotal = 0;
bool isEnd = false;
float loss = 0;
XNet net;
double startT = GetClockSec();
......@@ -98,6 +103,9 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
/* make the network */
model->Make(&batch, &output);
/* back-propagation for obtaining gradients */
net.Backward(output, batch, CROSSENTROPY);
/* TODO: update the model!!!! */
......@@ -188,7 +196,7 @@ int T2TTrainer::LoadBuf(FILE * file)
wordCount += wNum;
lineCount++;
if(wordCount >= wBatchSize || lineCount >= sBatchSize)
if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH)
break;
}
......@@ -211,24 +219,28 @@ load a batch of sequences
*/
int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, int step, int vs, int sBatch, int wBatch, bool isSorted, int &wCount)
{
if(nextSeq >= nseqBuf)
if(nextSeq < 0 || nextSeq >= nseqBuf)
LoadBuf(file);
int seq = nextSeq;
int seq = MAX(nextSeq, 0);
int wc = 0;
int wn = 0;
int sc = 0;
int max = 0;
while(seq < nseqBuf){
wc += seqLen[seq];
while(seq + sc < nseqBuf){
wn = seqLen[seq + sc];
wc += wn;
sc += 1;
if(max < wc)
max = wc;
if(max < wn)
max = wn;
if(sc >= sBatch && wc >= wBatch)
break;
}
nextSeq = seq + sc;
if(sc > 0){
int dims[MAX_TENSOR_DIM_NUM];
dims[0] = sc;
......
......@@ -26,7 +26,7 @@
#include "../../tensor/function/FHeader.h"
#define MAX_SEQUENCE_LENGTH 1024 * 64
#define MAX_SEQUENCE_LENGTH 1024 * 4
using namespace nts;
......@@ -46,6 +46,9 @@ public:
/* buffer for loading words */
int * buf;
/* buffer size */
int bufSize;
/* length of each sequence */
int * seqLen;
......
......@@ -34,8 +34,8 @@ void LoadParamString(int argc, const char ** argv, const char * name, char * p,
bool hit = false;
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname) && i + 1 < argc){
*(int*)p = atoi(argv[i + 1]);
fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
strcpy(p, argv[i + 1]);
//fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
hit = true;
}
}
......@@ -52,7 +52,7 @@ void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname) && i + 1 < argc){
*(int*)p = atoi(argv[i + 1]);
fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
//fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
hit = true;
}
}
......@@ -69,7 +69,8 @@ void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bo
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname)){
*(bool*)p = true;
fprintf(stderr, " %s=%s\n", name, "true");
//fprintf(stderr, " %s=%s\n", name, "true");
hit = true;
}
}
if(!hit)
......@@ -84,12 +85,27 @@ void LoadParamFloat(int argc, const char ** argv, const char * name, float * p,
bool hit = false;
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], vname) && i + 1 < argc){
strcpy((char*)p, argv[i + 1]);
fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
*p = (float)atof(argv[i + 1]);
//fprintf(stderr, " %s=%s\n", name, argv[i + 1]);
hit = true;
}
}
if(!hit)
*p = defaultP;
}
void ShowParams(int argc, const char ** argv)
{
fprintf(stderr, "args:\n");
for(int i = 0; i < argc; i++){
if(argv[i][0] == '-'){
if(i + 1 < argc && argv[i + 1][0] != '-')
fprintf(stderr, " %s=%s\n", argv[i], argv[i + 1]);
else
fprintf(stderr, " %s=yes\n", argv[i]);
}
}
fprintf(stderr, "\n");
}
}
\ No newline at end of file
......@@ -27,12 +27,15 @@
namespace transformer
{
/* load model parameters */
/* load arguments */
void LoadParamString(int argc, const char ** argv, const char * name, char * p, char * defaultP);
void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int defaultP);
void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bool defaultP);
void LoadParamFloat(int argc, const char ** argv, const char * name, float * p, float defaultP);
/* show arguments */
void ShowParams(int argc, const char ** argv);
}
#endif
\ No newline at end of file
......@@ -20,12 +20,36 @@
*/
#include "Transformer.h"
#include "T2TModel.h"
#include "T2TUtility.h"
#include "T2TTrainer.h"
namespace transformer
{
int TransformerMain(int argc, const char ** argv)
{
if(argc == 0)
return 1;
ShowParams(argc, argv);
char * trainFN = new char[MAX_LINE_LENGTH];
LoadParamString(argc, argv, "train", trainFN, "");
T2TModel model;
model.InitModel(argc, argv);
if(strcmp(trainFN, "")){
T2TTrainer trainer;
trainer.Init(argc, argv);
trainer.Train(trainFN, &model);
}
delete[] trainFN;
return 0;
}
......
......@@ -38,7 +38,7 @@ namespace transformer
{
/* entrance of the program */
int TransformerMMain(int argc, const char ** argv);
int TransformerMain(int argc, const char ** argv);
}
......
......@@ -42,6 +42,8 @@
#include "core/movement/CopyValues.h"
#include "core/arithmetic/Sum.h"
#include "core/arithmetic/Multiply.h"
#include "core/arithmetic/Sub.h"
#include "core/arithmetic/Div.h"
#include "core/math/ScaleAndShift.h"
#ifdef USE_CUDA
......@@ -354,6 +356,18 @@ XTensor XTensor::operator* (const XTensor& tensor)
return Multiply(*this, tensor);
}
/* overloading of the minus-sign */
XTensor XTensor::operator- (const XTensor& tensor)
{
return Sub(*this, tensor);
}
/* overloading of the division-sign */
XTensor XTensor::operator/ (const XTensor& tensor)
{
return Div(*this, tensor);
}
/*
linear transformation b = a * \scale + \shift
>> scale - the slope
......@@ -610,8 +624,8 @@ double GaussRand()
double pi = 3.141592654;
if (phase == 0){
u = rand() / (RAND_MAX + 1.0);
v = rand() / (RAND_MAX + 1.0);
u = (rand() + 1) / (RAND_MAX + 1.0);
v = (rand() + 1) / (RAND_MAX + 1.0);
z = sqrt(-2.0 * log(u))* sin(2.0 * pi * v);
}
else{
......@@ -1008,8 +1022,8 @@ set the value of a cell in a 3d tensor in default type
bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
{
CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((d0 >= 0 && d1 < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((d2 >= 0 && d2 < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((d2 >= 0 && d1 < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 1 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
......
......@@ -203,6 +203,12 @@ public:
/* overloading of the multiply-sign */
XTensor operator* (const XTensor &tensor);
/* overloading of the minus-sign */
XTensor operator- (const XTensor &tensor);
/* overloading of the division-sign */
XTensor operator/ (const XTensor &tensor);
/* linear transformation */
XTensor Lin(DTYPE scale, DTYPE shift = 0);
......
......@@ -251,9 +251,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
/*
matrix multiplication with no transposition c = a * b * alpha
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication
......
......@@ -326,4 +326,60 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
return c;
}
/*
matrix multiplication of the two tensors (do it on site)
c = a * b * alpha
make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = ai * bi * alpha + cm * beta
>> a - tensor a
>> b - tensor b
>> alpha - a coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors
*/
XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
DTYPE alpha, XPRunner * parallelRunner)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = a.dimSizeRDI[1];
int am = a.dimSizeRDI[0];
int bn = b.dimSizeRDI[1];
int bm = b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order;
int sub = 0;
int * dimSize = new int[order];
for (int i = 0; i < a.order - 2; i++)
dimSize[sub++] = a.dimSize[i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP();
/*call _MatrixMulBatched function */
_MatrixMulBatched(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
/* destroy variables */
delete[] dimSize;
return c;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -73,6 +73,17 @@ where trans() returns the transposed matrix if the flag is fired
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication of the two tensors (return a XTensor structure) c = a * b * alpha
make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = ai * bi * alpha + cm * beta
*/
XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor)
#endif // __MATRIXMULBATCHED_H__
\ No newline at end of file
......@@ -36,7 +36,7 @@ copy s to t
void _CopyValues(const XTensor * s, XTensor * t, XStream * stream)
{
CheckNTErrors((s != NULL && t != NULL), "The input tensor and output tensor must be nonempty!");
CheckNTErrors((s->data != NULL), "Cannot copy from an empty data array!");
CheckNTErrors((s->data != NULL), "Cannot copy an empty data array!");
CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!");
CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!");
......@@ -82,7 +82,7 @@ copy s to t
void _CopyValues(const XTensor * s, const int sBeg, const int sLen, XTensor * t, const int tBeg, XStream * stream)
{
CheckNTErrors(s != NULL && t != NULL, "The input tensor and output tensor must be nonempty!");
CheckNTErrors(s->data != NULL && t->data != NULL, "Cannot copy from an empty data array!");
CheckNTErrors(s->data != NULL && t->data != NULL, "Cannot copy an empty data array!");
CheckNTErrors(s->unitSize == t->unitSize, "The input tensors must be of the same unit size!");
CheckNTErrors(s->order > sBeg && sBeg >= 0 && sLen <= s->unitNum, "Wrong segment on the source side");
CheckNTErrors(t->order > tBeg && tBeg >= 0, "Wrong segment on the target side");
......
......@@ -168,6 +168,8 @@ make a new tensor to keep the result and return it
XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
{
CheckNTErrors(&s, "Invalid tensors!");
CheckNTErrors(s.dimSize[whereToSplit] % splitNum == 0,
"The dimension cannot be splitted due to the inproper split number");
int order = s.order + 1;
int * dimSize = new int[order];
......
......@@ -282,6 +282,9 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
CheckNTErrors((!dedx->isSparse), "The gradient matrix must be dense!");
CheckNTErrors((gold != NULL), "The gold standard cannot be empty!");
if(leadDim < 0)
leadDim = y->order - 1;
int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA
if (gold->devID >= 0) {
......
......@@ -188,7 +188,11 @@ void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
CheckNTErrors((dedx->isSparse == false), "The gradient tensor must be dense!");
CheckNTErrors((gold != NULL), "Incorrect x gold standard tensor!");
if(leadDim < 0)
leadDim = y->order - 1;
int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA
if(y->devID >= 0){
_CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论