Commit 3a515f68 by liyinqiao

Merge with XU Chen branch (Don't use this! It's an incomplete version)

1. Supporting efficient propagate and gradient accumulation for backward functions.
2. Update the setData functions.
3. Clean the codes.
parent be870567
...@@ -31,43 +31,54 @@ namespace nts{ ...@@ -31,43 +31,54 @@ namespace nts{
/* compute dE/dx of a node */ /* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient) void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; if (!isEfficient) {
int operID = income.typeID;
if(!isEfficient){
CheckNTErrors(node->grad != NULL, "No gradient found!"); CheckNTErrors(node->grad != NULL, "No gradient found!");
} }
else{ else {
CheckNTErrors(!node->isGrad || node->grad != NULL, "No gradient found!"); CheckNTErrors(!node->isGrad || node->grad != NULL, "No gradient found!");
} }
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!"); CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * output = node; XTensor * output = node;
XNoder::MakeGrad(input); if (!isEfficient || input->isGrad) {
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH) XTensor * dedx = input->grad;
_HardTanHBackward(output, input, output->grad, input->grad); XTensor * dedy = output->grad;
else if(operID == FUNC_IDENTITY) //XTensor * tmp = NewTensorBufV2(output, output->devID, output->mem);
_IdentityBackward(output, input, output->grad, input->grad); XTensor * tmp = NewTensor(output);
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0); if (operID == FUNC_HARDTANH)
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!"); _HardTanHBackward(output, input, dedy, tmp);
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS); else if (operID == FUNC_IDENTITY)
} _IdentityBackward(output, input, dedy, tmp);
else if(operID == FUNC_RECTIFY) else if (operID == FUNC_LOGSOFTMAX) {
_RectifyBackward(output, input, output->grad, input->grad); int leadDim = income.GetParamInt(0);
else if(operID == FUNC_SIGMOID) CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_SigmoidBackward(output, input, output->grad, input->grad); _LogSoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS);
else if(operID == FUNC_SOFTMAX){ }
int leadDim = income.GetParamInt(0); else if (operID == FUNC_RECTIFY)
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!"); _RectifyBackward(output, input, dedy, tmp);
_SoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS); else if (operID == FUNC_SIGMOID)
} _SigmoidBackward(output, input, dedy, tmp);
else{ else if (operID == FUNC_SOFTMAX) {
ShowNTErrors("Wrong activation function type!"); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
_SoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS);
}
else {
ShowNTErrors("Wrong activation function type!");
}
_SumMe(dedx, tmp);
//DelTensorBuf(tmp);
DelTensor(tmp);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
namespace nts{ namespace nts{
/* compute dE/dx of a node */ /* compute dE/dx of a node */
void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
{ {
...@@ -48,33 +47,33 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -48,33 +47,33 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * padding = NULL; XTensor * padding = NULL;
int leadingDim; int leadingDim;
XNoder::MakeGrad(output); if (!isEfficient || output->isGrad) {
XTensor * dedy = output->grad; XNoder::MakeGrad(output);
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT) if (income.tailNum == 1) {
_SetDataFixedFloat(dedy, 1.0F); dedy->SetDataFixed(1);
else if(dedy->dataType == X_DOUBLE) return;
_SetDataFixedDouble(dedy, 1.0); }
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); gold = income.tails[1];
else
ShowNTErrors("TODO"); //XTensor * tmp = NewTensorBufV2(output, output->devID, output->mem);
XTensor* tmp = NewTensor(output);
return;
} if (operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
gold = income.tails[1]; padding = income.tails[2];
leadingDim = income.GetParamInt(0);
if(operID == LOSS_CROSSENTROPY) { CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
if (income.tailNum == 3) _CrossEntropyBackward(tmp, output, gold, weight, padding, leadingDim);
padding = income.tails[2]; _SumMe(dedy, tmp);
leadingDim = income.GetParamInt(0); }
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!"); else {
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim); ShowNTErrors("Wrong activation function type!");
} }
else{ //DelTensorBuf(tmp);
ShowNTErrors("Wrong activation function type!"); DelTensor(tmp);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
...@@ -87,79 +86,4 @@ bool XLossGrad::IsLossOP(XTensor * node) ...@@ -87,79 +86,4 @@ bool XLossGrad::IsLossOP(XTensor * node)
return (income.typeID & LOSS_BASE) != 0; return (income.typeID & LOSS_BASE) != 0;
} }
/*
compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> dedy - dE/dy
>> dedx - dE/dx
>> funcID - id of the function f
>> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy
*/
//void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
// XTensor * dedy, XTensor * dedx, XTensor * padding,
// int funcID, void * params,
// LOSS_FUNCTION_NAME lossName)
//{
// CheckNTErrors(gold && y && x, "Empty input tensors!");
// CheckNTErrors(dedx, "Empty gradient tensors!");
// CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
//
// if(funcID == FUNC_HARDTANH){
// _HardTanHBackward(gold, y, x, dedy, dedx, lossName);
// }
// else if(funcID == FUNC_IDENTITY){
// _IdentityBackward(gold, y, x, dedy, dedx, lossName);
// }
// else if(funcID == FUNC_LOGSOFTMAX){
// int leadDim = *(int*)params;
// _LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
// }
// else if(funcID == FUNC_RECTIFY){
// _RectifyBackward(gold, y, x, dedy, dedx, lossName);
// }
// else if(funcID == FUNC_SIGMOID){
// _SigmoidBackward(gold, y, x, dedy, dedx, lossName);
// }else if(funcID == FUNC_SOFTMAX){
// int leadDim = *(int*)params;
// _SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
// }
// else{
// ShowNTErrors("wrong function found when call the backward process!");
// }
//
//}
/*
compute dE/dy for variable y and error(loss) function E
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy
*/
//void XLossGrad::Compute(XTensor * gold, XTensor * y,
// XTensor * dedy, XTensor * padding,
// LOSS_FUNCTION_NAME lossName)
//{
// if(gold == NULL){
// if(dedy->dataType == X_FLOAT)
// _SetDataFixedFloat(dedy, 1.0F);
// else if(dedy->dataType == X_DOUBLE)
// _SetDataFixedDouble(dedy, 1.0);
// else if(dedy->dataType == X_INT)
// _SetDataFixedInt(dedy, 1);
// else{
// ShowNTErrors("TODO");
// }
// return;
// }
//
// //_LossBackward(dedy, gold, y, lossName);
// if(lossName == CROSSENTROPY)
// _CrossEntropyBackward(dedy, y, gold, NULL, padding);
//
//}
} }
\ No newline at end of file
...@@ -42,55 +42,55 @@ public: ...@@ -42,55 +42,55 @@ public:
/* post processing of a node */ /* post processing of a node */
static static
void PostProcessing(XTensor * node, int typeId, bool isEfficent); void PostProcessing(XTensor * node, int typeId, bool isEfficient);
private: private:
/* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */ /* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */
static static
void GradCopyIndexed(XTensor * node, bool isEfficent); void GradCopyIndexed(XTensor * node, bool isEfficient);
/* gradient computation for copying indexed sub-tensors: b = gather(a, index) */ /* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficient);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */ /* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static static
void GradDropoutWithIndex(XTensor * node, bool isEfficent); void GradDropoutWithIndex(XTensor * node, bool isEfficient);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node, bool isEfficent); void GradMerge(XTensor * node, bool isEfficient);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */ /* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static static
void GradMergeList(XTensor * node, bool isEfficent); void GradMergeList(XTensor * node, bool isEfficient);
/* gradient computation for transposing a tensor : b = transpose(a) */ /* gradient computation for transposing a tensor : b = transpose(a) */
static static
void GradTranspose(XTensor * node, bool isEfficent); void GradTranspose(XTensor * node, bool isEfficient);
/* gradient computation for reshaping a tensor: c = reshape(a) */ /* gradient computation for reshaping a tensor: c = reshape(a) */
static static
void GradReshape(XTensor * node, bool isEfficent); void GradReshape(XTensor * node, bool isEfficient);
/* gradient computation for split: c = split(a) */ /* gradient computation for split: c = split(a) */
static static
void GradSplit(XTensor * node, bool isEfficent); void GradSplit(XTensor * node, bool isEfficient);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */ /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static static
void GradSplitList(XTensor * node, bool isEfficent); void GradSplitList(XTensor * node, bool isEfficient);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a). /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */ manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static static
void GradSplitListPost(XTensor * node, bool isEfficent); void GradSplitListPost(XTensor * node, bool isEfficient);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */ /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static static
void GradUnsqueeze(XTensor * node, bool isEfficent); void GradUnsqueeze(XTensor * node, bool isEfficient);
}; };
......
...@@ -316,7 +316,6 @@ void XNet::ClearGrad(XTensor * node) ...@@ -316,7 +316,6 @@ void XNet::ClearGrad(XTensor * node)
} }
if(finished){ if(finished){
//fprintf(stderr, "del %d %ld\n", node->id, node->grad->unitNum);
delete node->grad; delete node->grad;
node->grad = NULL; node->grad = NULL;
} }
......
...@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, inputEnc->devID); InitTensor(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol); first.SetDataFixed(startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) { if (inputLast == NULL) {
...@@ -195,7 +195,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -195,7 +195,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor paddingDec; XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID); InitTensor(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1); paddingDec.SetDataFixed(1);
XTensor maskDec; XTensor maskDec;
XTensor maskEncDec; XTensor maskEncDec;
......
...@@ -503,7 +503,7 @@ void T2TSearch::Dump(XTensor * output) ...@@ -503,7 +503,7 @@ void T2TSearch::Dump(XTensor * output)
int * words = new int[maxLength]; int * words = new int[maxLength];
InitTensor(output, 3, dims, X_INT); InitTensor(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1); output->SetDataFixed(-1);
/* heap for an input sentence in the batch */ /* heap for an input sentence in the batch */
for(int h = 0; h < batchSize; h++){ for(int h = 0; h < batchSize; h++){
......
...@@ -50,6 +50,15 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName); ...@@ -50,6 +50,15 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
unsigned short FloatToFloat16(float f); unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h); float Float16ToFloat(unsigned short h);
#define CheckDataType(a, b) \
{ \
if(GetDataTypeName(a) != GetDataTypeName(a)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same datatype (%s vs %s)\n", \
__FILENAME__, __LINE__, GetDataTypeName(a), GetDataTypeName(b)); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
\ No newline at end of file
...@@ -303,6 +303,10 @@ public: ...@@ -303,6 +303,10 @@ public:
/* generate data items with a range by start, end and the step */ /* generate data items with a range by start, end and the step */
void Range(DTYPE lower, DTYPE upper, DTYPE step); void Range(DTYPE lower, DTYPE upper, DTYPE step);
/* generate data items with a fixed value */
template<class T>
void SetDataFixed(T num);
/* set tensor items by a uniform distribution */ /* set tensor items by a uniform distribution */
void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F); void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F);
...@@ -423,11 +427,11 @@ public: ...@@ -423,11 +427,11 @@ public:
bool BinarySearch(int key, DTYPE &value, void * &position) const; bool BinarySearch(int key, DTYPE &value, void * &position) const;
/* dump data to a file */ /* dump data to a file */
void Dump(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0); void Dump(FILE * file = stderr, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a file */ /* dump data to a file */
static static
void Dump(const XTensor * tensor, FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0); void Dump(const XTensor * tensor, FILE * file = stderr, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a binary file */ /* dump data to a binary file */
void BinaryDump(FILE * file); void BinaryDump(FILE * file);
......
...@@ -116,7 +116,7 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot, ...@@ -116,7 +116,7 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot,
float confidence = 1 - labelSmoothingP; float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size; float lowconfidence = labelSmoothingP / size;
_SetDataFixedFloat(onehot, lowconfidence); onehot->SetDataFixed(lowconfidence);
#ifdef USE_CUDA #ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
......
...@@ -28,31 +28,24 @@ ...@@ -28,31 +28,24 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */ /* generate data items with a fixed value */
void _CudaSetDataFixedInt(XTensor * tensor, int p); template<class T>
void _CudaSetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p (in float) */ /* generate data items with a fixed value p
void _CudaSetDataFixedFloat(XTensor * tensor, float p); only if the condition entry is non-zero */
template<class T>
/* generate data items with a fixed value p (in double) */ void _CudaSetDataFixedCond(XTensor * tensor, XTensor * condition, T p);
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in float) only
if the condition entry is non-zero */
void _CudaSetDataFixedCondFloat(XTensor * tensor, XTensor * condition, float p);
/* generate data items with a fixed value p (in int) only
if the condition entry is non-zero */
void _CudaSetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */ /* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p); template<class T>
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, T p);
/* modify data items along with a given index and dimension (and keep the remaining items unchanged) */ /* modify data items along with a given index and dimension (and keep the remaining items unchanged) */
void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index); void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
/* generate data as lower triangular matrics for last two dimensions (cuda version) */ /* generate data as lower triangular matrics for last two dimensions (cuda version) */
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift); void _CudaSetDataLowTri(XTensor * tensor, DTYPE value, int shift);
/* generate data items with a uniform distribution in [lower, upper] */ /* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper); void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper);
......
...@@ -30,32 +30,17 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,32 +30,17 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */ /* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F); void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a fixed value p */ /* generate data items with a fixed value */
void _SetDataFixed(XTensor * tensor, void * valuePointer); template<class T>
void _SetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p (in default type) */ /* generate data items with a fixed value only if the condition entry is non-zero */
void SetDataFixed(XTensor &tensor, DTYPE p); template<class T>
void _SetDataFixedCond(XTensor* tensor, XTensor* condition, T value);
/* generate data items with a fixed value p (in integer) */
void SetDataFixedInt(XTensor &tensor, int p);
/* generate data items with a fixed value p (in int) */
void _SetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _SetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p only if the condition entry is non-zero */
void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p);
/* generate data items with a fixed value p only if the condition entry is non-zero */
void _SetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */ /* set data items along with a given dimension (and keep the remaining items unchanged) */
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p); template<class T>
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, T p);
/* modify data items along with a given index and dimension (and keep the remaining items unchanged) */ /* modify data items along with a given index and dimension (and keep the remaining items unchanged) */
void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index); void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
......
...@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale) ...@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1DV2(&c, x.unitNum, x.dataType, x.devID, x.mem); InitTensor1DV2(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F); c.SetDataFixed(1.0);
_DropoutWithIndex(&x, &maskIndex, &c); _DropoutWithIndex(&x, &maskIndex, &c);
......
...@@ -383,15 +383,7 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -383,15 +383,7 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
int leadDim, int tBeg, int tLen, int yBeg) int leadDim, int tBeg, int tLen, int yBeg)
{ {
if(t == NULL){ if(t == NULL){
if(dedy->dataType == X_FLOAT) dedy->SetDataFixed(1);
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else{
ShowNTErrors("TODO");
}
return; return;
} }
......
...@@ -50,7 +50,7 @@ bool TestDropout1() ...@@ -50,7 +50,7 @@ bool TestDropout1()
XTensor yUser; XTensor yUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(x, 1.0F); x->SetDataFixed(1);
y->SetZeroAll(); y->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
...@@ -88,7 +88,7 @@ bool TestDropout1() ...@@ -88,7 +88,7 @@ bool TestDropout1()
XTensor yUserGPU; XTensor yUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F); xGPU->SetDataFixed(1);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
...@@ -157,10 +157,10 @@ bool TestDropout2() ...@@ -157,10 +157,10 @@ bool TestDropout2()
XTensor * dedy = NewTensorV2(order, dimSize); XTensor * dedy = NewTensorV2(order, dimSize);
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(x, 1.0F); x->SetDataFixed(1.0);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.5F); dedy->SetDataFixed(1.5);
/* call Dropout function */ /* call Dropout function */
float dropProb = 0.5F; float dropProb = 0.5F;
...@@ -183,10 +183,10 @@ bool TestDropout2() ...@@ -183,10 +183,10 @@ bool TestDropout2()
XTensor * dedyGPU = NewTensorV2(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensorV2(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F); xGPU->SetDataFixed(1.0);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.5F); dedyGPU->SetDataFixed(1.5);
/* call Dropout function */ /* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb); _Dropout(xGPU, yGPU, seed, dropProb);
......
...@@ -195,8 +195,8 @@ bool TestReduceSum2() ...@@ -195,8 +195,8 @@ bool TestReduceSum2()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -215,7 +215,7 @@ bool TestReduceSum2() ...@@ -215,7 +215,7 @@ bool TestReduceSum2()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -284,8 +284,8 @@ bool TestReduceSum3() ...@@ -284,8 +284,8 @@ bool TestReduceSum3()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -304,7 +304,7 @@ bool TestReduceSum3() ...@@ -304,7 +304,7 @@ bool TestReduceSum3()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -373,8 +373,8 @@ bool TestReduceSum4() ...@@ -373,8 +373,8 @@ bool TestReduceSum4()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -393,7 +393,7 @@ bool TestReduceSum4() ...@@ -393,7 +393,7 @@ bool TestReduceSum4()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -464,8 +464,8 @@ bool TestReduceSum5() ...@@ -464,8 +464,8 @@ bool TestReduceSum5()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -484,7 +484,7 @@ bool TestReduceSum5() ...@@ -484,7 +484,7 @@ bool TestReduceSum5()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -556,8 +556,8 @@ bool TestReduceSum6() ...@@ -556,8 +556,8 @@ bool TestReduceSum6()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -576,7 +576,7 @@ bool TestReduceSum6() ...@@ -576,7 +576,7 @@ bool TestReduceSum6()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
......
...@@ -119,7 +119,7 @@ bool TestSetData2() ...@@ -119,7 +119,7 @@ bool TestSetData2()
XTensor * modify = NewTensorV2(dataOrder, dataDimSize); XTensor * modify = NewTensorV2(dataOrder, dataDimSize);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
modify->SetData(data, dataUnitNum); modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
...@@ -137,7 +137,7 @@ bool TestSetData2() ...@@ -137,7 +137,7 @@ bool TestSetData2()
XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0); XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
modifyGPU->SetData(data, dataUnitNum); modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
...@@ -212,11 +212,11 @@ bool TestSetData3() ...@@ -212,11 +212,11 @@ bool TestSetData3()
XTensor * modify = NewTensorV2(dataOrder, dataDimSize); XTensor * modify = NewTensorV2(dataOrder, dataDimSize);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
modify->SetData(data, dataUnitNum); modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
_SetDataFixedFloat(s, 1.0F); s->SetDataFixed(1);
_SetDataIndexed(s, modify, 1, 1); _SetDataIndexed(s, modify, 1, 1);
/* check results */ /* check results */
...@@ -231,7 +231,7 @@ bool TestSetData3() ...@@ -231,7 +231,7 @@ bool TestSetData3()
XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0); XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); sGPU->SetDataFixed(1);
modifyGPU->SetData(data, dataUnitNum); modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
......
...@@ -91,7 +91,7 @@ bool TestSpread1() ...@@ -91,7 +91,7 @@ bool TestSpread1()
XTensor * modify = NewTensorV2(dataOrder, dataDimSize); XTensor * modify = NewTensorV2(dataOrder, dataDimSize);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(s, 0.0F); s->SetZeroAll();
modify->SetData(data, dataUnitNum); modify->SetData(data, dataUnitNum);
/* call _Spread function */ /* call _Spread function */
...@@ -109,7 +109,7 @@ bool TestSpread1() ...@@ -109,7 +109,7 @@ bool TestSpread1()
XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0); XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(sGPU, 0.0F); sGPU->SetZeroAll();
modifyGPU->SetData(data, dataUnitNum); modifyGPU->SetData(data, dataUnitNum);
/* call _Spread function */ /* call _Spread function */
......
...@@ -296,8 +296,8 @@ bool TestSumDim3() ...@@ -296,8 +296,8 @@ bool TestSumDim3()
/* initialize variables */ /* initialize variables */
a->SetZeroAll(); a->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F); b->SetDataFixed(1);
_SetDataFixedFloat(answer, 1.0F); answer->SetDataFixed(1);
/* call SumDim function */ /* call SumDim function */
_SumDim(a, b, c, 1); _SumDim(a, b, c, 1);
...@@ -323,7 +323,7 @@ bool TestSumDim3() ...@@ -323,7 +323,7 @@ bool TestSumDim3()
/* Initialize variables */ /* Initialize variables */
aGPU->SetZeroAll(); aGPU->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F); bGPU->SetDataFixed(1);
/* call sum function */ /* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1); _SumDim(aGPU, bGPU, cGPU, 1);
...@@ -405,8 +405,8 @@ bool TestSumDim4() ...@@ -405,8 +405,8 @@ bool TestSumDim4()
/* initialize variables */ /* initialize variables */
a->SetZeroAll(); a->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F); b->SetDataFixed(1);
_SetDataFixedFloat(answer, 1.0F); answer->SetDataFixed(1);
/* call SumDim function */ /* call SumDim function */
_SumDim(a, b, c, 1); _SumDim(a, b, c, 1);
...@@ -432,7 +432,7 @@ bool TestSumDim4() ...@@ -432,7 +432,7 @@ bool TestSumDim4()
/* Initialize variables */ /* Initialize variables */
aGPU->SetZeroAll(); aGPU->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F); bGPU->SetDataFixed(1);
/* call sum function */ /* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1); _SumDim(aGPU, bGPU, cGPU, 1);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论