Commit 5f933fc6 by xuchen

cumulative update

parent 78954fad
......@@ -35,6 +35,8 @@
void BackwardTest();
void TransposeTest();
void SumDimTest();
void SplitBackwardTest();
void MemTest();
using namespace nts;
using namespace fnnlm;
......@@ -42,6 +44,10 @@ using namespace transformer;
int main( int argc, const char ** argv )
{
//MemTest();
//return 0;
//SplitBackwardTest();
//return 0;
//_CrtSetBreakAlloc(896);
//BackwardTest();
//return 0;
......@@ -89,7 +95,7 @@ void BackwardTest()
c = DivDim(a, b, 0);
c.Dump(stderr, "c:");
XLink::ShowNetwork(stderr, &c);
//XLink::ShowNetwork(stderr, &c);
net.Backward(c);
......@@ -209,3 +215,67 @@ void SumDimTest()
delete[] data;
}
void SplitBackwardTest()
{
int * dimSize = new int[2];
dimSize[0] = 2;
dimSize[1] = 4;
XTensor t1;
InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
XTensor t2;
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor;
//_SetDataFixedFloat(&t1, 1.0F);
//_SetDataFixedFloat(&t2, 2.0F);
t1.SetDataRand();
t2.SetDataRand();
tensor = t1 + t2;
XList smalls;
XTensor first;
XTensor second;
InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
smalls.Add(&first);
smalls.Add(&second);
Split(tensor, smalls, 1, 2);
XTensor mul;
mul = Sum(first, second);
XNet net;
net.Backward(mul);
net.Dump(stderr);
printf("Done!");
}
void MemTest()
{
XMem * mem;
mem = new XMem(0, FREE_ON_THE_FLY, (MTYPE)MILLION, 1024, MILLION);
XTensor tensor;
InitTensor2D(&tensor, 2, 4, X_FLOAT, 0, mem);
tensor.SetZeroAll();
tensor.Dump(stderr);
delete mem;
if (tensor.mem != NULL) {
printf("It isn't null!\n");
printf("%d\n", (int)tensor.mem->signature);
}
else {
printf("It's null\n");
}
tensor.Dump(stderr);
}
\ No newline at end of file
......@@ -25,6 +25,7 @@
#include "XNoder.h"
#include "XBackwardShape.h"
#include "../tensor/XName.h"
#include "../tensor/XUtility.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/core/getandset/SetData.h"
......@@ -40,7 +41,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
if(operID == MOVEMENT_COPYINDEXED)
GradCopyIndexed(node, isEfficent);
if(operID == MOVEMENT_GATHER)
else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent);
else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent);
......@@ -80,7 +81,7 @@ gradient computation for copying indexed sub-tensors
for
b = copyindexed(a)
we have
dE/da = spread(b)
dE/da = spreadforcopyindexed(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
......@@ -91,32 +92,14 @@ void XShapeGrad::GradCopyIndexed(XTensor * node, bool isEfficent)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!");
int dim = income.GetParamInt(0);
int * srcIndex = (int *)income.GetParamPointer(1);
int indexSize = income.GetParamInt(2);
int * tgtIndex = (int *)income.GetParamPointer(3);
int copyNum = income.GetParamInt(4);
int realIndexSize = indexSize * copyNum;
int * realSrcIndex = new int[realIndexSize];
int * realTgtIndex = new int[realIndexSize];
for(int i = 0; i < indexSize; i++) {
for(int j = 0; j < copyNum; j++) {
realSrcIndex[i * copyNum + j] = srcIndex[i] + j;
realTgtIndex[i * copyNum + j] = tgtIndex[i] + j;
}
}
int copyNum = income.GetParamInt(1);
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
_Spread(input->grad, node->grad, dim, realSrcIndex, realIndexSize, realTgtIndex);
XTensor * srcIndex = income.tails[1];
XTensor * tgtIndex = income.tails[2];
delete[] realSrcIndex;
delete[] realTgtIndex;
delete[] srcIndex;
delete[] tgtIndex;
node->visitMark = NODE_FINISHED;
XNoder::MakeGrad(input);
_SpreadForCopyIndexed(input->grad, node->grad, dim, srcIndex, tgtIndex, copyNum);
}
/*
......@@ -143,7 +126,6 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
node->visitMark = NODE_FINISHED;
}
/*
gradient for merge
for
......@@ -181,6 +163,7 @@ void XShapeGrad::GradMerge(XTensor * node, bool isEfficent)
XNoder::MakeGrad(input);
int * dims = new int[input->order];
memset(dims, 0, sizeof(int) * input->order);
for(int i = 0, j = 0; i < input->order; i++){
if(i >= leadDim){
dims[j++] = input->dimSize[i];
......
......@@ -437,4 +437,25 @@ void XNet::ClearGrad(XTensor * node)
}
}
/*
show network topology
>> file - file to dump information
>> node - pointer to the node
*/
void XNet::ShowNetwork(FILE * file, XTensor * node)
{
XList roots(1);
roots.Add(node);
Traverse(roots);
XLink::ShowNode(file, node);
/* go over nodes in its topological order */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * n = (XTensor*)nodes.Get(i);
XLink::ShowNode(file, n);
}
}
}
\ No newline at end of file
......@@ -108,6 +108,9 @@ struct XNet
/* clear the graident information if the node is no use */
void ClearGrad(XTensor * node);
/* show network topology */
void ShowNetwork(FILE * file, XTensor * node);
};
/* we make a unique id for every tensor */
......
......@@ -231,7 +231,7 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model)
}
for(int i = 0; i < argc; i++){
if(!strcmp(argv[i], "-mempool"))
if (!strcmp(argv[i], "-mempool"))
model.mem = new XMem(model.devID);
}
}
......@@ -715,11 +715,9 @@ The indexed cell is set to 1, and 0 otherwise.
>> devID - device id
>> mem - memory pool
*/
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols, int itemNum, int devID, XMem * mem)
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID, XMem * mem)
{
if(devID >= 0 || (mem != NULL && mem->devID >= 0))
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, -1);
else
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, devID, mem);
tensor.SetZeroAll();
......@@ -727,12 +725,6 @@ void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, in
/* set none-zero cells */
for(int i = 0; i < itemNum; i++)
tensor.Set2D(1.0F, rows[i], cols[i]);
if(devID >= 0 || (mem != NULL && mem->devID >= 0)){
XList list(1);
list.Add(&tensor);
CPUToGPUFlush(&list, devID, mem);
}
}
/*
......@@ -859,8 +851,6 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* y = softmax(s) */
_LogSoftmax(&s, &y, 1);
}
}
/*
......@@ -998,7 +988,6 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
XTensor embeddingBig;
XTensor hidden;
XTensor b;
XTensor srcIndex;
int size = batch * (n-1);
int * index = new int[size];
......@@ -1010,28 +999,25 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
}
}
InitTensor1D(&srcIndex, size, X_INT, model.devID, model.mem);
srcIndex.SetData(index, size);
InitTensor1D(&words, size, X_INT, model.devID, model.mem);
words.SetData(index, size);
XTensor embedding;
embedding = Gather(model.embeddingW, srcIndex);
embeddingBig = Gather(model.embeddingW, words);
delete[] index;
int dimSize[2];
dimSize[0] = embedding.GetDim(0) / (n - 1);
dimSize[1] = embedding.GetDim(1) * (n - 1);
dimSize[0] = embeddingBig.GetDim(0) / (n - 1);
dimSize[1] = embeddingBig.GetDim(1) * (n - 1);
hidden = Reshape(embedding, embedding.order, dimSize);
hidden = Reshape(embeddingBig, embeddingBig.order, dimSize);
/* hidden layers */
for(int i = 0; i < depth; i++)
hidden = MMul(hidden, model.hiddenW[i]) + model.hiddenB[i];
hidden = HardTanH(MMul(hidden, model.hiddenW[i]) + model.hiddenB[i]);
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//XLink::ShowNetwork(stderr, &output);
}
/*
......@@ -1071,7 +1057,6 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//XLink::ShowNetwork(stderr, &output);
}
/*
......
......@@ -60,7 +60,7 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){
attentionsEnde[i].InitModel(argc, argv, false, myIgnored, myDevID, myMem);
attentionsEnde[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
}
}
......@@ -69,11 +69,12 @@ void AttDecoder::InitModel(int argc, char ** argv,
make the decoding network
>> inputDec - the input tensor of the decoder
>> outputEnc - the output tensor of the encoder
>> mask - the mask that indicate each position is valid
>> mask - mask that indicates which position is valid
>> maskEncDec - mask for the encoder-decoder attention
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, bool isTraining)
XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining)
{
XTensor x;
......@@ -89,7 +90,6 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, b
XTensor ln;
XTensor fnn;
XTensor res;
XTensor nothing;
/******************/
/* self attention */
......@@ -107,7 +107,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, b
/*****************************/
/* encoder-decoder attention */
ende = attentionsEnde[i].Make(outputEnc, x, outputEnc, nothing, isTraining);
ende = attentionsEnde[i].Make(outputEnc, x, outputEnc, maskEncDec, isTraining);
/* dropout */
if(isTraining && dropoutP > 0)
......
......@@ -48,7 +48,7 @@ public:
int myDevID = -1, XMem * myMem = NULL);
/* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, bool isTraining);
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
};
}
......
......@@ -150,7 +150,6 @@ XTensor T2TEmbedder::Make(XTensor &input)
}
/* then we make word embeddings */
//wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)eSize));
wordEmbedding = Gather(w, input);
wordEmbedding = Linear(wordEmbedding, (float)sqrt((float)eSize));
......
......@@ -93,10 +93,11 @@ void AttEncoder::InitModel(int argc, char ** argv,
make the encoding network
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> maskEncDec - no use
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool isTraining)
XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining)
{
XTensor x;
......@@ -144,4 +145,18 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool isTraining)
return x;
}
/*
make the encoding network (wrapper)
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool isTraining)
{
XTensor nothing;
return Make(input, mask, nothing, isTraining);
}
}
......@@ -40,7 +40,7 @@ class T2TEncoder
{
public:
virtual
XTensor Make(XTensor &input, XTensor &mask, bool isTraining) = 0;
XTensor Make(XTensor &input, XTensor &mask, XTensor &mask2, bool isTraining) = 0;
};
/*
......@@ -49,7 +49,7 @@ the encoder based on RNN
class RNNEncoder : T2TEncoder
{
public:
XTensor Make(XTensor &input, XTensor &mask, bool isTraining);
XTensor Make(XTensor &input, XTensor &mask, XTensor &mask2, bool isTraining);
};
......@@ -118,6 +118,9 @@ public:
int myDevID = -1, XMem * myMem = NULL);
/* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
/* make the encoding network (wrapper) */
XTensor Make(XTensor &input, XTensor &mask, bool isTraining);
};
......
......@@ -75,7 +75,7 @@ void T2TModel::InitModel(int argc, char ** argv)
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
}
encoder->InitModel(argc, argv, isLM, 0, devID, mem);
encoder->InitModel(argc, argv, true, 0, devID, mem);
outputLayer->InitModel(argc, argv, devID, mem);
if(isMT)
......@@ -99,7 +99,9 @@ make the encoding network
*/
XTensor T2TModel::MakeEncoder(XTensor &input, XTensor &mask, bool isTraining)
{
return encoder->Make(input, mask, isTraining);
XTensor nothing;
return encoder->Make(input, mask, nothing, isTraining);
}
/*
......@@ -107,13 +109,14 @@ make the decoding network
>> inputDec - input tensor of the decoder
>> outputEnc - output tensor of the encoder
>> output - output tensor (distribution)
>> mask - the mask for positions that are/not involved in computation
>> mask - mask for positions that are/not involved in computation
>> maskEncDec - mask for the encoder-decoder attention
>> isTraining - indicates whether we are training the model
<< return - encoding result
*/
XTensor T2TModel::MakeDecoder(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, bool isTraining)
XTensor T2TModel::MakeDecoder(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining)
{
return decoder->Make(inputDec, outputEnc, mask, isTraining);
return decoder->Make(inputDec, outputEnc, mask, maskEncDec, isTraining);
}
/*
......@@ -190,14 +193,16 @@ make the network for machine translation (with the output softmax layer)
>> inputDec - input tensor of the decoder
>> output - output tensor (distribution)
>> paddingEnc - padding of the sequences (on the encoder side)
>> paddingDec - padding of the sequences (on the decoder side)
>> isTraining - indicates whether the model is for training
*/
void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, bool isTraining)
void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, XTensor &paddingDec, bool isTraining)
{
XTensor encoding;
XTensor decoding;
XTensor maskEnc;
XTensor maskDec;
XTensor maskEncDec;
/* generate mask to see "previous" words on the decoder side */
//int len = inputDec.GetDim(inputDec.order - 2);
......@@ -222,6 +227,23 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
_SetDataLowTri(&maskDec, 1e9F, 0);
_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F);
/* encoder-decoder mask that prevent the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_Unsqueeze(&paddingDec, maskEncDecTMPDec, paddingEnc.order, paddingEnc.GetDim(-1));
_Multiply(maskEncDecTMPDec, maskEncDecTMPEnc, maskEncDecTMPDec);
_ScaleAndShiftMe(maskEncDecTMPDec, 1e9F, -1e9F);
_Unsqueeze(maskEncDecTMPDec, &maskEncDec, 0, dims[0]);
DelTensorBuf(maskEncDecTMPDec);
DelTensorBuf(maskEncDecTMPEnc);
/* padding on the source side */
int * dimsPadding = new int[paddingEnc.order + 2];
for (int i = 0; i < paddingEnc.order - 1; i++)
......@@ -252,7 +274,7 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
_Sum(&maskEnc, padding3, &maskEnc);
encoding = MakeEncoder(inputEnc, maskEnc, isTraining);
decoding = MakeDecoder(inputDec, encoding, maskDec, isTraining);
decoding = MakeDecoder(inputDec, encoding, maskDec, maskEncDec, isTraining);
outputLayer->Make(decoding, output);
delete[] dims;
......
......@@ -72,13 +72,13 @@ public:
XTensor MakeEncoder(XTensor &input, XTensor &mask, bool isTraining);
/* make the encoding network */
XTensor MakeDecoder(XTensor &inputEnc, XTensor &inputDec, XTensor &mask, bool isTraining);
XTensor MakeDecoder(XTensor &inputEnc, XTensor &inputDec, XTensor &mask, XTensor &MaskEncDec, bool isTraining);
/* make the network for langauge modeling (with the output softmax layer) */
void MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining);
/* make the network for machine translation (with the output softmax layer) */
void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, bool isTraining);
void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, XTensor &paddingDec, bool isTraining);
/* get parameter matrics */
void GetParams(XList &list);
......
......@@ -93,8 +93,8 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{
XTensor &x = input;
//output = LogSoftmax(MMul(x, w), -1);
output = Softmax(MMul(x, w), -1);
output = LogSoftmax(MMul(x, w), -1);
//output = Softmax(MMul(x, w), -1);
}
}
......@@ -142,6 +142,9 @@ public:
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* indicates whether we use small memory footprint for backward process */
bool isSmallFootprint;
public:
/* constructor */
T2TTrainer();
......
......@@ -20,6 +20,7 @@
*/
#include <math.h>
#include <time.h>
#include "Transformer.h"
#include "T2TModel.h"
#include "T2TUtility.h"
......@@ -58,6 +59,7 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "output", outputFN, "");
srand((unsigned int)time(NULL));
T2TTrainer trainer;
trainer.Init(argc, args);
......
......@@ -126,7 +126,7 @@ void SmallTest()
d = a + b + c.Lin(0.5F);
XLink::CheckNetwork(&d);
XLink::ShowNetwork(stderr, &d);
//XLink::ShowNetwork(stderr, &d);
a.Dump(stderr, "a:");
b.Dump(stderr, "b:");
......
......@@ -65,10 +65,10 @@ namespace nts {
#endif
#ifndef MIN
#define MIN(a,b) ((a < b) ? a : b)
#define MIN(a,b) ((a) < (b) ? a : b)
#endif
#ifndef MAX
#define MAX(a,b) ((a > b) ? a : b)
#define MAX(a,b) ((a) > (b) ? a : b)
#endif
#define __FILENAME__ ( strrchr(__FILE__, DELIMITER) != NULL ? strrchr(__FILE__, DELIMITER)+1 : __FILE__ )
......
......@@ -593,21 +593,6 @@ void XLink::CheckNetwork(XTensor * root)
}
/*
show the network encoded in a root node (tensor)
>> file - file to dump information
>> root - pointer to the root node
*/
void XLink::ShowNetwork(FILE * file, XTensor * root)
{
XLink &income = root->income;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
ShowNetwork(file, child);
}
}
/*
show a node
>> file - file to dump information
>> root - pointer to the node
......
......@@ -178,10 +178,6 @@ struct XLink
static
void CheckNetwork(XTensor * root);
/* show the network encoded in a root node (tensor) */
static
void ShowNetwork(FILE * file, XTensor * root);
/* show a node */
static
void ShowNode(FILE * file, XTensor * node);
......
......@@ -79,7 +79,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define SHAPE MOVEMENT_COPYVALUES + 1
#define SHAPE MOVEMENT_GATHER + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1
......
......@@ -677,9 +677,9 @@ void XTensor::SetData(const void * d, int num, int beg)
return;
CheckNTErrors(!isSparse, "TODO");
CheckNTErrors(num == unitNum - beg, "Illegal size!");
CheckNTErrors(num <= unitNum - beg, "Illegal size!");
XMemCopy(data, devID, d, -1, num * unitSize);
XMemCopy((char*)data + beg * unitSize, devID, d, -1, num * unitSize);
}
/*
......@@ -804,7 +804,7 @@ set tensor items with an array of values
>> values - value for each data item
>> num - number of the data items
*/
void XTensor::SetDataBatched(MTYPE * offsets, void * values, int num)
void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
{
_SetDataWithOffsetAndValue(this, offsets, values, num);
}
......@@ -1289,7 +1289,7 @@ int XTensor::GetNonzeroSize()
if(dataType == DEFAULT_DTYPE){
int count = 0;
for(int i = 0; i < unitNum; i++){
DTYPE value = *((DTYPE*)(char*)data + i * sizeof(DTYPE));
DTYPE value = *(DTYPE*)((char*)data + i * sizeof(DTYPE));
if(value == 0)
count++;
}
......@@ -2271,6 +2271,8 @@ XTensor * NewTensor(const XTensor * a, bool isFilledData)
CheckNTErrors((a != NULL), "Empty input!");
memset(dims, 0, sizeof(int) * MAX_TENSOR_DIM_NUM);
if(a->order > 0)
memcpy(dims, a->dimSize, sizeof(int) * a->order);
......
......@@ -49,6 +49,8 @@ struct XLink;
#define USE_BATCHED_STRIDED_MAT_MUL
#define MIN_TENSOR_SPLIT_NUM 0
#define MIN_TENSOR_SPLIT_LIST_NUM 1024
#define MIN_TENSOR_MERGE_NUM 0
#define MIN_TENSOR_MERGE_LIST_NUM 1024
#define MIN_TENSOR_CAT_NUM 8
/* computation flags */
......@@ -283,7 +285,7 @@ public:
void SetDataBatched(MTYPE * offsets, DTYPE value, int num);
/* set tensor items with an array of values */
void SetDataBatched(MTYPE * offsets, void * values, int num);
void SetDataBatchedWithValues(MTYPE * offsets, void * values, int num);
/* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, int beg = 0);
......
......@@ -16,8 +16,8 @@
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
/* this is a header to include all functions in the "core" workspace */
......@@ -46,16 +46,17 @@
#include "arithmetic/XTensorBLAS.h"
#include "getandset/ConvertDataType.h"
#include "getandset/OnehotAndIndex.h"
#include "getandset/Select.h"
#include "getandset/SetData.h"
#include "math/Clip.h"
#include "math/Compare.h"
#include "math/Normalize.h"
#include "math/Power.h"
#include "math/ScaleAndShift.h"
#include "math/Unary.h"
#include "movement/CopyBlocks.h"
#include "movement/CopyBlocksInGrid.h"
#include "movement/CopyBlocksOnSite.h"
......
......@@ -32,8 +32,6 @@ convert data type
*/
void _ConvertDataType(const XTensor * input, XTensor * output)
{
//CheckNTErrors((input->unitSize == output->unitSize), "Input and Output must be same in size!");
if (input->dataType == output->dataType)
return;
......@@ -61,4 +59,29 @@ void _ConvertDataType(const XTensor * input, XTensor * output)
ShowNTErrors("Unsupported data types for conversion!");
}
/*
convert data type (return an XTensor structure)
make a new tensor to keep the result and return it
>> input - input tensor
>> output - output tensor
*/
XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType)
{
int order = input.order;
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, input.dimSize, dataType, dr, input.devID, input.mem);
output.SetTMPFlag();
_Gather(&s, &t, &index);
/* tensor connection */
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -23,12 +23,16 @@
#define __CONVERTDATATYPE_H__
#include "../../XTensor.h"
#include "../../XDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* convert data type */
void _ConvertDataType(const XTensor * input, XTensor * output);
/* convert data type (return an XTensor structure) */
XTensor ConvertDataType(const XTensor * input, TENSOR_DATA_TYPE dataType);
} // namespace nts(NiuTrans.Tensor)
#endif // __CONVERTDATATYPE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-17
*/
#include "OnehotAndIndex.h"
#include "OnehotAndIndex.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
convert onehot tensor to index tensor
>> onehot - onehot tensor, which value is 0 or 1
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
*/
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size)
{
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
CheckNTErrors(index->dataType == X_INT, "The index tensor must be in X_INT!")
for (int i = 0; i < index->order; i++)
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) {
_CudaOnehotToIndex(onehot, index, size);
return;
}
#endif
int blockNum = index->unitNum;
int stride = size;
int * onehotData = (int *)onehot->data;
int * indexData = (int *)index->data;
for (int i = 0; i < blockNum; i++) {
int * od = onehotData + i * stride;
int record = -1;
for (int j = 0; j < stride; j++) {
if (od[j] != 0) {
if (record == -1)
record = j;
else
ShowNTErrors("The value of onehot tensor is illegal!");
}
}
indexData[i] = record;
}
}
/*
convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
<< return - the index tensor
*/
XTensor OnehotToIndex(XTensor & onehot, int size)
{
CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
XTensor index;
InitTensor(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
index.SetTMPFlag();
_OnehotToIndex(&onehot, &index, size);
return index;
}
/*
convert index tensor to onehot tensor
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size)
{
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
CheckNTErrors(index->dataType == X_INT, "The index tensor must be in X_INT!")
for (int i = 0; i < index->order; i++)
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
onehot->SetZeroAll();
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size);
return;
}
#endif
int blockNum = index->unitNum;
int stride = size;
int * indexData = (int *)index->data;
int * onehotData = (int *)onehot->data;
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
int * od = onehotData + i * stride;
od[id] = 1;
}
}
/*
convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
<< return - the onehot tensor
*/
XTensor IndexToOnehot(XTensor & index, int size)
{
CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!")
XTensor onehot;
onehot.SetTMPFlag();
int order = index.order;
int * dim = new int[order + 1];
memcpy(dim, index.dimSize, order * sizeof(int));
dim[order] = size;
InitTensor(&onehot, index.order + 1, dim, X_INT, 1.0F, index.devID, index.mem);
_IndexToOnehot(&index, &onehot, size);
delete[] dim;
return onehot;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
#include "OnehotAndIndex.cuh"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
convert onehot tensor to index tensor (kernel version)
>> onehotData - the data pointer of the onehot tensor
>> indexData - the data pointer of the index tensor
>> blockNum - the number of block
>> stride - stride of a data block
*/
__global__
void KernelOnehotToIndex(int * onehotData, int * indexData, int blockNum, int stride)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* offset in each block */
int offset = blockDim.y * blockIdx.y + threadIdx.y;
if (i >= blockNum || offset >= stride)
return;
int * od = onehotData + i * stride;
int * id = indexData + i;
if (od[offset] != 0)
*id = offset;
}
/*
convert onehot tensor to index tensor (cuda version)
>> onehot - onehot tensor, which value is 0 or 1
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
*/
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size)
{
int devID = onehot->devID;
int blockNum = index->unitNum;
int stride = size;
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread2D(devID, blockNum, stride, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int * onehotData = (int *)onehot->data;
int * indexData = (int *)index->data;
KernelOnehotToIndex<<<blocks, threads >>>(onehotData, indexData, blockNum, stride);
BacktoCudaDev(devID, devIDBackup);
}
/*
convert index tensor to onehot tensor (kernel version)
>> onehotData - the data pointer of the onehot tensor
>> indexData - the data pointer of the index tensor
>> blockNum - the number of block
>> stride - stride of a data block
*/
__global__
void KernelIndexToOnehot(int * onehotData, int * indexData, int blockNum, int stride)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* offset in each block */
int offset = blockDim.y * blockIdx.y + threadIdx.y;
if (i >= blockNum || offset >= stride)
return;
int * od = onehotData + i * stride;
int id = indexData[i];
od[id] = 1;
}
/*
convert index tensor to onehot tensor (cuda version)
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size)
{
int devID = onehot->devID;
int blockNum = index->unitNum;
int stride = size;
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread2D(devID, blockNum, stride, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int * onehotData = (int *)onehot->data;
int * indexData = (int *)index->data;
KernelIndexToOnehot<<<blocks, threads >>>(onehotData, indexData, blockNum, stride);
BacktoCudaDev(devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-17
*/
#ifndef __ONEHOTANDINDEX_CUH__
#define __ONEHOTANDINDEX_CUH__
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor (cuda version) */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size);
/* convert index tensor to onehot tensor (cuda version) */
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size);
} // namespace nts(NiuTrans.Tensor)
#endif // __ONEHOTANDINDEX_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-17
*/
#ifndef __ONEHOTANDINDEX_H__
#define __ONEHOTANDINDEX_H__
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size);
/* convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor OnehotToIndex(XTensor & onehot, int num);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size);
/* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IndexToOnehot(XTensor & index, int num);
} // namespace nts(NiuTrans.Tensor)
#endif // __ONEHOTANDINDEX_H__
\ No newline at end of file
......@@ -70,8 +70,9 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
fanOut = numOutputFmaps * receptiveFieldSize;
}
DTYPE finfout = gain * (float)sqrt(6.0F/(fanIn + fanOut));
tensor->SetDataRand(-finfout, finfout);
DTYPE std = gain * (float)sqrt(2.0 / (fanIn + fanOut));
DTYPE a = (DTYPE)sqrt(3.0F) * std;
tensor->SetDataRand(-a, a);
//_SetDataRand(tensor, -finfout, finfout);
}
......@@ -499,36 +500,9 @@ void _SetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * values
}
else {
#ifdef USE_CUDA
XMem * mem = tensor->mem;
MTYPE offsetSize = num * sizeof(MTYPE);
MTYPE valueSize;
if (tensor->dataType == X_INT)
valueSize = num * sizeof(int);
else if (tensor->dataType == X_FLOAT)
valueSize = num * sizeof(float);
else
ShowNTErrors("TO DO!!!");
MTYPE * offsetsCuda = mem != NULL ?
(MTYPE*)mem->AllocBuf(mem->devID, offsetSize) :
(MTYPE*)XMemAlloc(tensor->devID, offsetSize);
void * valuesCuda = mem != NULL ?
mem->AllocBuf(mem->devID, valueSize) :
XMemAlloc(tensor->devID, valueSize);
XMemCopy(offsetsCuda, tensor->devID, offsets, -1, offsetSize);
XMemCopy(valuesCuda, tensor->devID, values, -1, valueSize);
_CudaSetDataWithOffsetAndValue(tensor, offsetsCuda, valuesCuda, num);
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, valueSize);
mem->ReleaseBuf(mem->devID, offsetSize);
}
else {
XMemFree(tensor->devID, offsetsCuda);
XMemFree(tensor->devID, valuesCuda);
if(tensor->devID >= 0) {
_CudaSetDataWithOffsetAndValue(tensor, offsets, values, num);
return;
}
#else
ShowNTErrors("Please recompile the code with USE_CUDA");
......
......@@ -26,6 +26,7 @@
#include "SetData.cuh"
#include <curand_kernel.h>
#include "../../XDevice.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -363,7 +364,7 @@ e.g., for a 3* 3 tensor,
2 2 0
*/
__global__
void _KernelSetDataLowTri(DTYPE * d, int l, int blockSize, int blockNum, DTYPE p, int shift)
void KernelSetDataLowTri(DTYPE * d, int l, int blockSize, int blockNum, DTYPE p, int shift)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -425,7 +426,7 @@ void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
_KernelSetDataLowTri<<<blocks, threads >>>((DTYPE*)tensor->data, l, blockSize, blockNum, p, shift);
KernelSetDataLowTri<<<blocks, threads >>>((DTYPE*)tensor->data, l, blockSize, blockNum, p, shift);
BacktoCudaDev(tensor->devID, devIDBackup);
}
......@@ -474,12 +475,12 @@ set the data with an array of offsets (kernel version)
>> num - number of the data items
*/
__global__
void _KernelSetDataWithOffset(DTYPE * data, MTYPE * offsets, DTYPE value, MTYPE num)
void KernelSetDataWithOffset(DTYPE * data, MTYPE * offsets, DTYPE value, MTYPE num)
{
/* index */
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i < num)
if (i < num)
data[offsets[i]] = value;
}
......@@ -505,7 +506,7 @@ void _CudaSetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYP
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
_KernelSetDataWithOffset << <blocks, threads >> > ((DTYPE*)tensor->data, offsets, value, num);
KernelSetDataWithOffset << <blocks, threads >> > ((DTYPE*)tensor->data, offsets, value, num);
BacktoCudaDev(tensor->devID, devIDBackup);
}
......@@ -519,7 +520,7 @@ set the data with an array of offsets (kernel version)
>> dataType - the data type of the data and values
*/
__global__
void _KernelSetDataWithOffset(void * data, MTYPE * offsets, void * values, MTYPE num, TENSOR_DATA_TYPE dataType)
void KernelSetDataWithOffsetAndValue(void * data, MTYPE * offsets, void * values, MTYPE num, TENSOR_DATA_TYPE dataType)
{
/* index */
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -541,6 +542,18 @@ set the data with an array of values
*/
void _CudaSetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * values, MTYPE num)
{
XMem * mem = tensor->mem;
MTYPE offsetSize = num * sizeof(MTYPE);
MTYPE valueSize;
if (tensor->dataType == X_INT)
valueSize = num * sizeof(int);
else if (tensor->dataType == X_FLOAT)
valueSize = num * sizeof(float);
else
ShowNTErrors("TO DO!!!");
int gridSize[3];
int blockSize[3];
......@@ -552,7 +565,32 @@ void _CudaSetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * va
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
_KernelSetDataWithOffset << <blocks, threads >> > (tensor->data, offsets, values, num, tensor->dataType);
MTYPE * offsetsCuda = mem != NULL ?
(MTYPE*)mem->AllocBuf(mem->devID, offsetSize) :
(MTYPE*)XMemAlloc(tensor->devID, offsetSize);
void * valuesCuda = mem != NULL ?
mem->AllocBuf(mem->devID, valueSize) :
XMemAlloc(tensor->devID, valueSize);
if (mem != NULL) {
XMemCopy(offsetsCuda, mem->devID, offsets, -1, offsetSize);
XMemCopy(valuesCuda, mem->devID, values, -1, valueSize);
}
else {
XMemCopy(offsetsCuda, tensor->devID, offsets, -1, offsetSize);
XMemCopy(valuesCuda, tensor->devID, values, -1, valueSize);
}
KernelSetDataWithOffsetAndValue<<<blocks, threads >>> (tensor->data, offsetsCuda, valuesCuda, num, tensor->dataType);
if (mem != NULL) {
mem->ReleaseBuf(mem->devID, valueSize);
mem->ReleaseBuf(mem->devID, offsetSize);
}
else {
XMemFree(tensor->devID, valuesCuda);
XMemFree(tensor->devID, offsetsCuda);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
......
......@@ -15,7 +15,6 @@
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-10
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Compare.h"
#include "Compare.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor)
DTYPE myIsEqual(DTYPE a, DTYPE b)
{
return (a == b ? 1.0F : 0.0F);
}
DTYPE myIsNotEqual(DTYPE a, DTYPE b)
{
return (a != b ? 1.0F : 0.0F);
}
#ifdef USE_CUDA
/* define three marco separately, specify the respective function names (GPU mode) */
#define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, number); \
return; \
} \
DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i], number); \
}
#define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, DTYPE number) \
{ \
_funcName(a, a, number); \
}
#define SIMPLE_COMPARE_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, DTYPE number) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, number); \
return b; \
}
// I think we needn't to make link.
// XLink::MakeLink(&a, NULL, &b, operationId);
_SIMPLE_COMPARE_FUNCTION(_Equal, _CudaEqual, myIsEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, _CudaNotEqual, myIsNotEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i], number); \
}
#define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, DTYPE number) \
{ \
_funcName(a, a, number); \
}
#define SIMPLE_COMPARE_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, DTYPE number) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, number); \
return b; \
}
// I think we needn't to make link.
// XLink::MakeLink(&a, NULL, &b, operationId);
_SIMPLE_COMPARE_FUNCTION(_Equal, myIsEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, myIsNotEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
#endif
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
#include <math.h>
#include "../../XDevice.h"
#include "../../XName.h"
#include "Compare.h"
#include "Compare.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
__device__
DTYPE cudaIsEqual(DTYPE a, DTYPE b)
{
return (a == b ? 1.0F : 0.0F);
}
__device__
DTYPE cudaIsNotEqual(DTYPE a, DTYPE b)
{
return (a != b ? 1.0F : 0.0F);
}
#define SIMPLE_COMPARE_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, int size, DTYPE number) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
b[i] = (DTYPE)origFunc(a[i], number); \
} \
__global__ \
void Kernel##funcName(__half * a, __half * b, int size, __half number) \
{ \
return; \
} \
void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == DEFAULT_DTYPE) { \
Kernel##funcName<<<blocks, threads>>> \
((DTYPE*)a->data, (DTYPE*)b->data, \
a->unitNum, (DTYPE)number); \
} \
else if (a->dataType == X_FLOAT16) { \
Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, \
a->unitNum, (__half)number); \
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
} \
SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual)
SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual)
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-10
*/
#ifndef __COMPARE_CUH__
#define __COMPARE_CUH__
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* compare whether every entry is equal to the specified value (cuda kernel) */
__global__
void KernelEqual(DTYPE * a, DTYPE * b, DTYPE * number);
/* compare whether every entry is equal to the specified value (cuda version) */
void _CudaEqual(const XTensor * a, XTensor * b, DTYPE number);
/* compare whether every entry is not equal to the specified value (cuda kernel) */
__global__
void KernelNotEqual(DTYPE * a, DTYPE * b, DTYPE * number);
/* compare whether every entry is not equal to the specified value (cuda version) */
void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE number);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif //end __COMPARE_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-10
*/
#ifndef __COMPARE_H__
#define __COMPARE_H__
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compare whether every entry is equal to the specified value */
void _Equal(const XTensor * a, XTensor * b, DTYPE number);
/* compare whether every entry is equal to the specified value (do it on site)
keep the result in the input tensor a and return nothing */
void _EqualMe(XTensor * a, DTYPE number);
/* compare whether every entry is equal to the specified value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Equal(const XTensor & a, DTYPE number);
/* compare whether every entry is not equal to the specified value */
void _NotEqual(const XTensor * a, XTensor * b, DTYPE number);
/* compare whether every entry is not equal to the specified value (do it on site)
keep the result in the input tensor a and return nothing */
void _NotEqualMe(XTensor * a, DTYPE number);
/* compare whether every entry is not equal to the specified value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor NotEqual(const XTensor & a, DTYPE number);
} // namespace nts(NiuTrans.Tensor)
#endif // end __COMPARE_H__
\ No newline at end of file
......@@ -223,4 +223,4 @@ _SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)*/
#endif
}
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -15,7 +15,6 @@
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
......
......@@ -15,7 +15,6 @@
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
......
......@@ -15,7 +15,6 @@
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
......@@ -145,5 +144,6 @@ void _TanMe(XTensor * a);
make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a);
}
#endif //end __UNARY_H__
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
#endif // end __UNARY_H__
\ No newline at end of file
......@@ -79,8 +79,13 @@ void _CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, i
ProtectCudaDev(devID, devIDBackup);
/* copy the index to the GPU memory */
int * sourceBlocksTMP = myMem != NULL ? (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) : (int *)XMemAlloc(devID, blockNum * sizeof(int));
int * targetBlocksTMP = myMem != NULL ? (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) : (int *)XMemAlloc(devID, blockNum * sizeof(int));
int * sourceBlocksTMP = myMem != NULL ?
(int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) :
(int *)XMemAlloc(devID, blockNum * sizeof(int));
int * targetBlocksTMP = myMem != NULL ?
(int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) :
(int *)XMemAlloc(devID, blockNum * sizeof(int));
XMemCopy(sourceBlocksTMP, devID, sourceBlocks, -1, blockNum * sizeof(int));
XMemCopy(targetBlocksTMP, devID, targetBlocks, -1, blockNum * sizeof(int));
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "CopyIndexed.h"
#include "CopyIndexed.cuh"
#include "CopyBlocks.h"
#include "Gather.h"
#include "../../XName.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -40,7 +42,9 @@ copy indexed sub-tensors
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
*/
void _CopyIndexed(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
int * srcIndex, int indexSize, int * tgtIndex,
int copyNum)
{
CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)),
......@@ -99,7 +103,148 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim, int * srcIndex, int i
}
/*
copy indexed sub-tensors (return an XTensor structure)
copy selected sub-tensors where indeces are kept in tensors
>> s - the source tensor
>> t - the target tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - the tensor to save the index of the source sub-tensors
>> tgtIndex - the tensor to save the index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
*/
void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
const XTensor * srcIndex, const XTensor * tgtIndex,
int copyNum)
{
int order = s->order;
int indexSize = srcIndex->unitNum;
CheckNTErrors(indexSize != 0, "NULL index!")
CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors((srcIndex && tgtIndex), "Invalid index tensors!");
CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)),
"the data must be kept on the same device!");
CheckNTErrors((srcIndex->devID == srcIndex->devID || (s->devID < 0 && t->devID < 0)),
"the index must be kept on the same device!");
CheckNTErrors((s->devID == srcIndex->devID || (s->devID < 0 && t->devID < 0)),
"the data and index must be kept on the same device!");
CheckNTErrors((dim >= 0 && dim < order), "A too larget dimension specified!");
CheckNTErrors((s->unitSize == t->unitSize), "Unmatched tensors!");
CheckNTErrors((srcIndex->unitNum == tgtIndex->unitNum), "Unmatched index tensors!");
for (int i = 0; i < order; i++) {
if (i != dim) {
CheckNTErrors(s->GetDim(i) == t->GetDim(i), "Unmatched dimensions");
}
else {
CheckNTErrors(t->GetDim(i) == indexSize * copyNum, "Unmatched dimensions");
}
}
#ifdef USE_CUDA
if (s->devID >= 0 && srcIndex->devID >= 0) {
_CudaCopyIndexed(s, t, dim, srcIndex, tgtIndex, copyNum);
return;
}
#endif
int blockNum = 1;
int stride = 1;
int blockSizeSrc = 1;
int blockSizeTgt = 1;
for (int i = 0; i < dim; i++)
blockNum *= s->GetDim(i);
for (int i = dim + 1; i < order; i++)
stride *= s->GetDim(i);
blockSizeSrc = stride * s->GetDim(dim);
blockSizeTgt = stride * t->GetDim(dim);
DTYPE * sData = (DTYPE*)s->data;
DTYPE * tData = (DTYPE*)t->data;
int * sIndex = (int*)srcIndex->data;
int * tIndex = (int*)tgtIndex->data;
for (int i = 0; i < indexSize; i++) {
for (int c = 0; c < copyNum; c++) {
int si = sIndex[i] + c;
int ti = tIndex[i] + c;
for (int j = 0; j < blockNum; j++) {
DTYPE * sd = sData + j * blockSizeSrc + si * stride;
DTYPE * td = tData + j * blockSizeTgt + ti * stride;
for (int k = 0; k < stride; k++)
*(td + k) = *(sd + k);
}
}
}
}
/*
copy selected sub-tensors where indeces are kept in tensors (return an XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3,2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
<< return - the result of copying indexed sub-tensors
*/
XTensor CopyIndexed(const XTensor & s, int dim,
const XTensor & srcIndex, const XTensor & tgtIndex,
int copyNum)
{
CheckNTErrors(dim >= 0 && dim < s.order, "A too larget dimension specified!");
int order = s.order;
int * dimSize = new int[order];
int indexSize = srcIndex.unitNum;
for (int i = 0; i < s.order; i++) {
if (i == dim)
dimSize[i] = indexSize * copyNum;
else
dimSize[i] = s.dimSize[i];
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMPFlag();
/* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, &srcIndex, &tgtIndex, copyNum);
XList list(3);
list.Add(&s);
list.Add(&srcIndex);
list.Add(&tgtIndex);
/* tensor connection */
XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadInt(&t, copyNum);
/* destroy variables */
delete[] dimSize;
return t;
}
/*
copy indexed sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-11-30
*/
#include "CopyIndexed.cuh"
#include "../../XDevice.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
copy selected sub-tensors where indeces are kept in tensors (kenerl version)
>> s - the source tensor
>> t - the target tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - the tensor to save the index of the source sub-tensors
>> tgtIndex - the tensor to save the index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
*/
__global__
void KernelCopyIndexed(DTYPE * sData, DTYPE * tData, int * sIndex, int * tIndex,
int blockNum, int blockSizeSrc, int blockSizeTgt,
int stride, int indexSize, int copyNum)
{
__shared__ DTYPE * sp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * tp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* offset in each block */
int offset = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockNum * indexSize * copyNum || offset >= stride)
return;
int realIndexSize = indexSize * copyNum;
int realBlockNum = i / realIndexSize;
int realIndex = i % realIndexSize;
int realSrcIndex = sIndex[realIndex / copyNum] + realIndex % copyNum;
int realTgtIndex = tIndex[realIndex / copyNum] + realIndex % copyNum;
if(threadIdx.y == 0){
sp[threadIdx.x] = sData + realBlockNum * blockSizeSrc + realSrcIndex * stride;
tp[threadIdx.x] = tData + realBlockNum * blockSizeTgt + realTgtIndex * stride;
}
__syncthreads();
DTYPE * s = sp[threadIdx.x];
DTYPE * t = tp[threadIdx.x];
t[offset] = s[offset];
}
/*
copy selected sub-tensors where indeces are kept in tensors
>> s - the source tensor
>> t - the target tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - the tensor to save the index of the source sub-tensors
>> tgtIndex - the tensor to save the index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
*/
void _CudaCopyIndexed(const XTensor * s, XTensor * t, int dim,
const XTensor * srcIndex, const XTensor * tgtIndex,
int copyNum)
{
int devID = s->devID;
int order = s->order;
int indexSize = srcIndex->unitNum;
int blockNum = 1;
int stride = 1;
int blockSizeSrc = 1;
int blockSizeTgt = 1;
for (int i = 0; i < dim; i++)
blockNum *= s->GetDim(i);
for (int i = dim + 1; i < order; i++)
stride *= s->GetDim(i);
blockSizeSrc = stride * s->GetDim(dim);
blockSizeTgt = stride * t->GetDim(dim);
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread2D(devID, blockNum * indexSize * copyNum, stride, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
DTYPE * sData = (DTYPE*)s->data;
DTYPE * tData = (DTYPE*)t->data;
int * sIndex = (int *)srcIndex->data;
int * tIndex = (int *)tgtIndex->data;
KernelCopyIndexed<<<blocks, threads >>>(sData, tData, sIndex, tIndex,
blockNum, blockSizeSrc, blockSizeTgt,
stride, indexSize, copyNum);
BacktoCudaDev(devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-11-30
* Tomorrow is the celebration of the laboratory, I'm so happy!
*/
#ifndef __CopyIndexed_CUH__
#define __CopyIndexed_CUH__
#include "../../XTensor.h"
#include "CopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* copy selected sub-tensors where indeces are kept in tensors (cuda version) */
void _CudaCopyIndexed(const XTensor * s, XTensor * t, int dim,
const XTensor * srcIndex, const XTensor * tgtIndex,
int copyNum);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __CopyIndexed_CUH__
\ No newline at end of file
......@@ -27,22 +27,27 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy selected sub-tensors */
void _CopyIndexed(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
int * srcIndex, int indexSize, int * tgtIndex,
int copyNum = 1);
/* copy selected sub-tensors where indeces are kept in tensors */
void _CopyIndexed(const XTensor * s, XTensor * t, int dim, const XTensor * srcIndex, const XTensor * tgtIndex);
void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
const XTensor * srcIndex, const XTensor * tgtIndex,
int copyNum = 1);
/*
copy selected sub-tensors (return an XTensor structure)
copy selected sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it (remove this???)
*/
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
/*
copy selected sub-tensors where indeces are kept in tensors (return an XTensor structure)
make a new tensor to keep the result and return it (remove this???)
make a new tensor to keep the result and return it
*/
void CopyIndexed(const XTensor * s, XTensor * t, int dim, const XTensor * srcIndex, const XTensor * tgtIndex);
XTensor CopyIndexed(const XTensor & s, int dim,
const XTensor & srcIndex, const XTensor & tgtIndex,
int copyNum = 1);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -57,15 +57,14 @@ gather indexed sub-tensors
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor
*/
void _Gather(XTensor * s, XTensor * t, XTensor * srcIndex)
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex)
{
CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors((s->devID == t->devID && t->devID == srcIndex->devID),
"the data must be kept on the same device!");
CheckNTErrors(s->devID == t->devID, "the data must be kept on the same device!");
CheckNTErrors((s->unitSize == t->unitSize), "Unmatched tensors!");
#ifdef USE_CUDA
if (s->devID >= 0 && t->devID >= 0 && srcIndex->devID >= 0) {
if (s->devID >= 0 && t->devID >= 0) {
_CudaGather(s, t, srcIndex);
return;
}
......@@ -116,6 +115,8 @@ XTensor Gather(XTensor &s, XTensor &index)
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMPFlag();
delete[] dimSize;
_Gather(&s, &t, &index);
/* tensor connection */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-11-27
*/
#include "Gather.cuh"
#include "CopyBlocksSelected.cuh"
......@@ -41,7 +41,7 @@ __global__
void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int indexSize, int stride)
{
__shared__ DTYPE * sp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * cp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * tp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -54,15 +54,15 @@ void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int indexSize, int
if(threadIdx.y == 0){
sp[threadIdx.x] = sData + sIndex[i] * stride;
cp[threadIdx.x] = tData + i * stride;
tp[threadIdx.x] = tData + i * stride;
}
__syncthreads();
DTYPE * s = sp[threadIdx.x];
DTYPE * c = cp[threadIdx.x];
DTYPE * t = tp[threadIdx.x];
c[offset] = s[offset];
t[offset] = s[offset];
}
/*
......@@ -72,9 +72,10 @@ gather indexed sub-tensors(cuda version)
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor
*/
void _CudaGather(XTensor * s, XTensor * t, XTensor * srcIndex)
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex)
{
int devID = s->devID;
XMem * mem = s->mem;
int stride = s->GetDim(1);
int indexSize = srcIndex->unitNum;
......@@ -93,10 +94,26 @@ void _CudaGather(XTensor * s, XTensor * t, XTensor * srcIndex)
DTYPE * sData = (DTYPE*)s->data;
DTYPE * tData = (DTYPE*)t->data;
int * sIndex = (int *)srcIndex->data;
int * sIndex = NULL;
if (srcIndex->devID < 0) {
sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
XMemCopy(sIndex, devID, srcIndex, -1, sizeof(int) * indexSize);
}
else
sIndex = (int *)srcIndex->data;
KernelGather<<<blocks, threads >>>(sData, tData, sIndex, indexSize, stride);
if (srcIndex->devID < 0) {
if(mem != NULL)
mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize);
else
XMemFree(mem->devID, sIndex);
}
BacktoCudaDev(devID, devIDBackup);
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-11-27
*/
#ifndef __GATHER_CUH__
#define __GATHER_CUH__
......@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* gather indexed sub-tensors(cuda version) */
void _CudaGather(XTensor * s, XTensor * t, XTensor * srcIndex);
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex);
#endif // USE_CUDA
......
......@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
void _Gather(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize);
/* gather selected sub-tensors */
void _Gather(XTensor * s, XTensor * t, XTensor * srcIndex);
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex);
/* gather selected sub-tensors (return an XTensor structure)
make a new tensor to keep the result and return it */
......
......@@ -134,63 +134,92 @@ void _AssignmentForGather(DTYPE * sData, DTYPE * cData, int blockNum,
/*
spread a collection tensor to source tensor.
And this is a special spread function for backward computation of gather function.
And this is a special spread function for backward computation of CopyIndexed function.
>> source - the source tensor whose data would be modified
>> collection - the collection whose data would be spread to source tensor
>> s - the source tensor whose data would be modified
>> c - the collection whose data would be spread to source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and collIndex)
>> srcIndex - the tensor to save the index of the source sub-tensors
>> collIndex - the tensor to save the index of the collection sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
*/
void _SpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize)
void _SpreadForCopyIndexed(XTensor * s, XTensor * c, int dim,
XTensor * srcIndex, XTensor * collIndex,
int copyNum)
{
int order = source->order;
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
int order = s->order;
int indexSize = srcIndex->unitNum;
CheckNTErrors(indexSize != 0, "NULL index!")
CheckNTErrors((s && c), "Invalid tensors!");
CheckNTErrors((srcIndex && collIndex), "Invalid index tensors!");
CheckNTErrors((s->devID == c->devID || (s->devID < 0 && c->devID < 0)),
"the data must be kept on the same device!");
CheckNTErrors((srcIndex->devID == srcIndex->devID || (s->devID < 0 && c->devID < 0)),
"the index must be kept on the same device!");
CheckNTErrors((s->devID == srcIndex->devID || (s->devID < 0 && c->devID < 0)),
"the data and index must be kept on the same device!");
CheckNTErrors((dim >= 0 && dim < s->order), "A too larget dimension specified!");
CheckNTErrors((s->unitSize == c->unitSize), "Unmatched tensors!");
CheckNTErrors((srcIndex->unitNum == collIndex->unitNum), "Unmatched index tensors!");
CheckNTErrors(s->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
for (int i = 0; i < order; i++){
if (i == dim) {
CheckNTErrors(collection->GetDim(i) == indexSize, "Illegal dimension!");
for (int i = 0; i < order; i++) {
if (i != dim) {
CheckNTErrors(s->GetDim(i) == c->GetDim(i), "Unmatched dimensions");
}
else {
CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!");
CheckNTErrors(c->GetDim(i) == indexSize * copyNum, "Unmatched dimensions");
}
}
#ifdef USE_CUDA
if(source->devID >= 0 && collection->devID >= 0) {
_CudaSpreadForGather(source, collection, dim, srcIndex, indexSize);
if(s->devID >= 0 && c->devID >= 0) {
_CudaSpreadForCopyIndexed(s, c, dim, srcIndex, collIndex, copyNum);
return;
}
#endif
int blockSizeSrc = 1;
int blockSizeColl = 1;
int blockNum = 1;
int stride = 1;
int blockSizeSrc = 1;
int blockSizeTgt = 1;
for (int i = dim + 1; i < order; i++) {
stride *= source->GetDim(i);
}
for (int i = 0; i < dim; i++)
blockNum *= s->GetDim(i);
blockSizeSrc = stride * source->GetDim(dim);
blockSizeColl = stride * collection->GetDim(dim);
blockNum = source->unitNum / blockSizeSrc;
for (int i = dim + 1; i < order; i++)
stride *= s->GetDim(i);
DTYPE * sData = (DTYPE*)source->data;
DTYPE * cData = (DTYPE*)collection->data;
blockSizeSrc = stride * s->GetDim(dim);
blockSizeTgt = stride * c->GetDim(dim);
for(int i = 0; i < indexSize; i++){
int src = srcIndex[i];
int tgt = i;
DTYPE * s = sData + src * stride;
DTYPE * c = cData + tgt * stride;
_AssignmentForGather(s, c, blockNum, blockSizeSrc, blockSizeColl, stride);
DTYPE * sData = (DTYPE*)s->data;
DTYPE * cData = (DTYPE*)c->data;
int * sIndex = (int*)srcIndex->data;
int * cIndex = (int*)collIndex->data;
for (int i = 0; i < indexSize; i++) {
for (int c = 0; c < copyNum; c++) {
int si = sIndex[i] + c;
int ti = cIndex[i] + c;
for (int j = 0; j < blockNum; j++) {
DTYPE * sd = sData + j * blockSizeSrc + si * stride;
DTYPE * td = cData + j * blockSizeTgt + ti * stride;
for (int k = 0; k < stride; k++)
*(sd + k) += *(td + k);
}
}
}
}
/*
......@@ -218,7 +247,7 @@ void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index)
}
#ifdef USE_CUDA
if(source->devID >= 0 && collection->devID >= 0 && index->devID >= 0) {
if(source->devID >= 0 && collection->devID >= 0) {
_CudaSpreadForGather(source, collection, index);
return;
}
......
......@@ -32,9 +32,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
void _CudaSpread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex);
/* special spread function for backward computation of gather function (cuda version) */
void _CudaSpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize);
/* special spread function for backward computation of CopyIndexed function (cuda version) */
void _CudaSpreadForCopyIndexed(XTensor * s, XTensor * c, int dim,
XTensor * srcIndex, XTensor * collIndex,
int copyNum);
/* special spread function for backward computation of gather function (cuda version) */
void _CudaSpreadForGather(XTensor * source, XTensor * collection, XTensor * srcIndex);
......
......@@ -36,9 +36,10 @@ void Spread(XTensor * source, XTensor * collection,
XTensor * srcIndex, XTensor * collIndex,
int dim);
/* special spread function for backward computation of gather function */
void _SpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize);
/* special spread function for backward computation of CopyIndexed function */
void _SpreadForCopyIndexed(XTensor * source, XTensor * collection, int dim,
XTensor * srcIndex, XTensor * collIndex,
int copyNum);
/* special spread function for backward computation of gather function */
void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index);
......
......@@ -46,6 +46,22 @@ sum all the items of the tensor (It should be optimized!)
*/
DTYPE _ReduceSumAll(const XTensor * source)
{
int dims[2] = {1, source->unitNum};
int one = 1;
XTensor * all = NewTensorBuf(2, dims, source->dataType, source->denseRatio, source->devID, source->mem);
XTensor * result = NewTensorBuf(1, &one, source->dataType, 1.0F, source->devID, source->mem);
_CopyValues(source, all);
_ReduceSum(all, result, 1);
DTYPE r = result->Get1D(0);
DelTensorBuf(result);
DelTensorBuf(all);
return r;
int order = source->order;
DTYPE summation;
......
......@@ -94,7 +94,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
gridSize = blockNum;
gridNum = s->unitNum / (blockSize * blockNum);
if (mergedNum * gridNum <= MIN_TENSOR_SPLIT_NUM) {
if (mergedNum * gridNum <= MIN_TENSOR_MERGE_NUM) {
int sPitch = blockSize * s->unitSize;
int tPtich = blockSize * mergedNum * t->unitSize;
int mSize = blockSize * t->unitSize;
......@@ -253,7 +253,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
gridNum = s0->unitNum / (blockSize * blockNum);
/* merging with fewer data copy operations */
if (mergedNum * gridNum <= MIN_TENSOR_SPLIT_LIST_NUM) {
if (mergedNum * gridNum <= MIN_TENSOR_MERGE_LIST_NUM) {
int sPitch = blockSize * s0->unitSize;
int tPtich = blockSize * mergedNum * big->unitSize;
int mSize = blockSize * big->unitSize;
......
......@@ -126,7 +126,7 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
void * dataTMP = t->data;
if (!isOnSameDevice)
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(mem->devID, size);
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(s->devID, size);
int realBlockSize = blockSize * t->unitSize;
int blockSplitSize = blockNum / splitNum;
......
......@@ -153,7 +153,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
int unitNum = x.dimSize[n];
DTYPE * maskArray = new DTYPE[unitNum];
srand((unsigned int)time(NULL));
//srand((unsigned int)time(NULL));
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
......@@ -166,4 +166,33 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
return MultiplyDim(x, mask, n, 0);
}
/*
dropout function without broadcast
>> x - input tensor
>> dropProb - probability to set an element to zero
*/
XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb)
{
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
/* generate a mask tensor with probability p */
int unitNum = x.unitNum;
DTYPE * maskArray = new DTYPE[unitNum];
srand((unsigned int)time(NULL));
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor mask;
InitTensor(&mask, x.order, x.dimSize, x.dataType, x.denseRatio, x.devID, x.mem);
mask.SetData(maskArray, unitNum);
delete[] maskArray;
return Multiply(x, mask);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -43,6 +43,9 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
/* dropout function */
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim = -1);
/* dropout function without broadcast */
XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb);
} // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUT_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_ABSOLUTE_H__
#define __TEST_ABSOLUTE_H__
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
#include "../XTensor.h"
#include "../core/math/Clip.h"
#include "TClip.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
#ifndef __TEST_CLIP_H__
#define __TEST_CLIP_H__
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "../XTensor.h"
#include "../core/math/Compare.h"
#include "TCompare.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Equal function.
Comapre whether every entry is equal to the specified value.
*/
bool TestCompare1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, 1.0F} };
DTYPE answer[3][2] = { {1.0F, 0.0F},
{0.0F, 0.0F},
{0.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(a, b, 1.0);
_EqualMe(aMe, 1.0);
bUser = Equal(*a, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(aGPU, bGPU, 1.0);
_EqualMe(aMeGPU, 1.0);
bUserGPU = Equal(*aGPU, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Compare Function */
bool TestCompare()
{
XPRINT(0, stdout, "[TEST Compare] compare every entry with specified value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCompare1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-10
*/
#ifndef __TEST_Compare_H__
#define __TEST_Compare_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Compare Function */
bool TestCompare();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_Compare_H__
......@@ -232,13 +232,12 @@ bool TestConvertDataType3()
/* initialize variables */
a->SetData(data1, unitNum1);
/* call ConvertDataType function */
/* call ConvertDataType function (We have not implemented this yet...) */
//_ConvertDataType(a, b);
//_ConvertDataType(b, c);
/* check results */
cpuTest = a->CheckData(data1, unitNum1, 1e-4F);
c->Dump(stderr, "");
//cpuTest = a->CheckData(data1, unitNum1, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
......
......@@ -311,8 +311,8 @@ bool TestCrossEntropy3()
delete goldGPU;
delete lossGPU;
delete weightGPU;
delete[] dimSize;
delete[] wDimSize;
return cpuTest && gpuTest;
#else
......@@ -322,6 +322,7 @@ bool TestCrossEntropy3()
delete loss;
delete weight;
delete[] dimSize;
delete[] wDimSize;
return cpuTest;
#endif // USE_CUDA
......
......@@ -212,6 +212,8 @@ bool TestDropout2()
/* destroy variables */
delete x;
delete y;
delete dedx;
delete dedy;
delete[] dimSize;
return cpuTest;
......
......@@ -332,6 +332,7 @@ bool TestGather3()
/* destroy variables */
delete s;
delete t;
delete index;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
......
......@@ -150,6 +150,7 @@ bool TestSetData2()
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest && gpuTest;
#else
......@@ -157,6 +158,7 @@ bool TestSetData2()
delete s;
delete modify;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -242,6 +244,7 @@ bool TestSetData3()
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest && gpuTest;
#else
......@@ -249,6 +252,7 @@ bool TestSetData3()
delete s;
delete modify;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest;
#endif // USE_CUDA
......
......@@ -61,7 +61,9 @@ bool TestSort1()
_SortMe(aMe, index, 0);
Sort(*a, bUser, *index, 0);
cpuTest = b->CheckData(answer, unitNum) && aMe->CheckData(answer, unitNum) && bUser.CheckData(answer, unitNum);
cpuTest = b->CheckData(answer, unitNum) &&
aMe->CheckData(answer, unitNum) &&
bUser.CheckData(answer, unitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -85,7 +87,9 @@ bool TestSort1()
Sort(*aGPU, bUserGPU, *indexGPU, 0);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum) && aMeGPU->CheckData(answer, unitNum) && bUserGPU.CheckData(answer, unitNum);
gpuTest = bGPU->CheckData(answer, unitNum) &&
aMeGPU->CheckData(answer, unitNum) &&
bUserGPU.CheckData(answer, unitNum);
/* destroy variables */
delete a;
......@@ -149,7 +153,9 @@ bool TestSort2()
Sort(*a, bUser, *index, 1);
/* check results */
cpuTest = b->CheckData(answer, unitNum) && aMe->CheckData(answer, unitNum) && bUser.CheckData(answer, unitNum);
cpuTest = b->CheckData(answer, unitNum) &&
aMe->CheckData(answer, unitNum) &&
bUser.CheckData(answer, unitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -173,7 +179,9 @@ bool TestSort2()
Sort(*aGPU, bUserGPU, *indexGPU, 1);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum) && aMeGPU->CheckData(answer, unitNum) && bUserGPU.CheckData(answer, unitNum);
gpuTest = bGPU->CheckData(answer, unitNum) &&
aMeGPU->CheckData(answer, unitNum) &&
bUserGPU.CheckData(answer, unitNum);
/* destroy variables */
delete a;
......
......@@ -357,6 +357,7 @@ bool TestSplit3()
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
delete tList;
return cpuTest;
#endif // USE_CUDA
......
......@@ -182,6 +182,7 @@ bool TestSpread2()
int dim = 0;
int indexSize = 2;
int srcIndex[2] = {0, 2};
int tgtIndex[2] = {0, 1};
/* CPU test */
bool cpuTest = true;
......@@ -190,17 +191,19 @@ bool TestSpread2()
XTensor * s1 = NewTensor(sOrder, sDimSize);
XTensor * s2 = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * index = NewTensor(indexOrder, indexDimSize, X_INT);
XTensor * sIndex = NewTensor(indexOrder, indexDimSize, X_INT);
XTensor * cIndex = NewTensor(indexOrder, indexDimSize, X_INT);
/* initialize variables */
s1->SetData(sData, sUnitNum);
s2->SetData(sData, sUnitNum);
t->SetData(tData, tUnitNum);
index->SetData(srcIndex, indexSize);
sIndex->SetData(srcIndex, indexSize);
cIndex->SetData(tgtIndex, indexSize);
/* call _SpreadForGather function */
_SpreadForGather(s1, t, dim, srcIndex, indexSize);
_SpreadForGather(s2, t, index);
_SpreadForCopyIndexed(s1, t, dim, sIndex, cIndex, 1);
_SpreadForGather(s2, t, sIndex);
/* check results */
cpuTest = s1->CheckData(answer, tUnitNum) &&
......@@ -214,17 +217,19 @@ bool TestSpread2()
XTensor * sGPU1 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor * sIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor * cIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU1->SetData(sData, sUnitNum);
sGPU2->SetData(sData, sUnitNum);
tGPU->SetData(tData, tUnitNum);
indexGPU->SetData(srcIndex, indexSize);
sIndexGPU->SetData(srcIndex, indexSize);
cIndexGPU->SetData(tgtIndex, indexSize);
/* call _SpreadForGather function */
_SpreadForGather(sGPU1, tGPU, dim, srcIndex, indexSize);
_SpreadForGather(sGPU2, tGPU, indexGPU);
_SpreadForCopyIndexed(sGPU1, tGPU, dim, sIndex, cIndex, 1);
_SpreadForGather(sGPU2, tGPU, sIndexGPU);
/* check results */
gpuTest = sGPU1->CheckData(answer, tUnitNum) &&
......@@ -234,11 +239,13 @@ bool TestSpread2()
delete s1;
delete s2;
delete t;
delete index;
delete sIndex;
delete cIndex;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete indexGPU;
delete sIndexGPU;
delete cIndexGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
......@@ -249,6 +256,8 @@ bool TestSpread2()
delete s1;
delete s2;
delete t;
delete sIndex;
delete cIndex;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
......
......@@ -31,6 +31,7 @@ bool Test()
wrong = !TestAbsolute() || wrong;
wrong = !TestClip() || wrong;
wrong = !TestCompare() || wrong;
wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong;
wrong = !TestCos() || wrong;
......
......@@ -24,6 +24,7 @@
#include "TAbsolute.h"
#include "TClip.h"
#include "TCompare.h"
#include "TConcatenate.h"
#include "TConcatenateSolely.h"
#include "TCos.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论