Commit 5bfbd041 by liyinqiao

Merge with the branch of huchi and fix bugs.

parent 63eee374
...@@ -304,7 +304,7 @@ XTensor Attention::GetRPEmbedding(const int lenQ, const int lenKV, ...@@ -304,7 +304,7 @@ XTensor Attention::GetRPEmbedding(const int lenQ, const int lenKV,
XTensor range2DTrans; XTensor range2DTrans;
range2D = Unsqueeze(range, 0, lenQ); range2D = Unsqueeze(range, 0, lenQ);
range2DTrans = Transpose(range2D, 0, 1); range2DTrans = Transpose(range2D, 0, 1);
embMatrix = Sum(range2D, range2DTrans, -1); embMatrix = Sum(range2D, range2DTrans, false, -1);
} }
else { else {
for (int i = 0; i < lenKV; i++) for (int i = 0; i < lenKV; i++)
......
...@@ -34,7 +34,7 @@ const int unusedOPs[] { ...@@ -34,7 +34,7 @@ const int unusedOPs[] {
MATH_SCALE, MATH_SCALEANDSHIFT, MATH_SCALE, MATH_SCALEANDSHIFT,
/* shape operators */ /* shape operators */
MOVEMENT_GATHER, SHAPE_UNSQUEEZE, /*MOVEMENT_GATHER,*/ SHAPE_UNSQUEEZE,
SHAPE_MERGE, SHAPE_SPLIT, SHAPE_MERGE, SHAPE_SPLIT,
/* reduce operators */ /* reduce operators */
......
...@@ -196,7 +196,19 @@ where i is the index of the item ...@@ -196,7 +196,19 @@ where i is the index of the item
*/ */
XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDim) XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDim)
{ {
XTensor c(&a); XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag(); c.SetTMPFlag();
if (b.order == 0){ if (b.order == 0){
...@@ -239,6 +251,9 @@ XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDi ...@@ -239,6 +251,9 @@ XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDi
} }
} }
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c; return c;
} }
......
...@@ -89,12 +89,25 @@ make a new tensor c to keep the result and return it ...@@ -89,12 +89,25 @@ make a new tensor c to keep the result and return it
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> inplace - indicates whether the result will be placed in the input tensor
>> beta - the scaling factor >> beta - the scaling factor
<< return - the result of tensor subtraction << return - the result of tensor subtraction
*/ */
XTensor Sub(const XTensor & a, const XTensor & b, DTYPE beta) XTensor Sub(const XTensor & a, const XTensor & b, bool inplace, DTYPE beta)
{ {
XTensor c(&a); XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag(); c.SetTMPFlag();
if (b.order == 0){ if (b.order == 0){
...@@ -129,6 +142,10 @@ XTensor Sub(const XTensor & a, const XTensor & b, DTYPE beta) ...@@ -129,6 +142,10 @@ XTensor Sub(const XTensor & a, const XTensor & b, DTYPE beta)
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c; return c;
} }
......
...@@ -41,7 +41,7 @@ void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0); ...@@ -41,7 +41,7 @@ void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
tensor subtraction c = a - b * \beta tensor subtraction c = a - b * \beta
make a new tensor c to keep the result and return it make a new tensor c to keep the result and return it
*/ */
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sub(const XTensor &a, const XTensor &b, bool inplace = false, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */ /* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0); void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
......
...@@ -262,13 +262,27 @@ make a new tensor c to keep the result and return it ...@@ -262,13 +262,27 @@ make a new tensor c to keep the result and return it
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> inplace - indicates whether the result will be placed in the input tensor
>> beta - the scaling factor >> beta - the scaling factor
<< return - the result of tensor summation << return - the result of tensor summation
*/ */
XTensor Sum(const XTensor & a, const XTensor & b, DTYPE beta) XTensor Sum(const XTensor &a, const XTensor &b, bool inplace, DTYPE beta)
{ {
XTensor c(&a); XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag(); c.SetTMPFlag();
c.enableGrad = a.enableGrad;
if (b.order == 0){ if (b.order == 0){
DTYPE shift = b.Get0D() * beta; DTYPE shift = b.Get0D() * beta;
...@@ -302,6 +316,10 @@ XTensor Sum(const XTensor & a, const XTensor & b, DTYPE beta) ...@@ -302,6 +316,10 @@ XTensor Sum(const XTensor & a, const XTensor & b, DTYPE beta)
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c; return c;
} }
......
...@@ -43,7 +43,7 @@ void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0); ...@@ -43,7 +43,7 @@ void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
make a new tensor c to keep the result and return it make a new tensor c to keep the result and return it
*/ */
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, bool inplace = false, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */ /* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0); void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
......
...@@ -154,12 +154,25 @@ i.e., a is summed with b by broadcasting ...@@ -154,12 +154,25 @@ i.e., a is summed with b by broadcasting
>> a - a tensor >> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a >> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index >> n - the dimension index
>> inplace - indicates whether the result will be placed in the input tensor
>> beta - the scaling factor >> beta - the scaling factor
<< return - the result tensor by tensor summation << return - the result tensor by tensor summation
*/ */
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) XTensor SumDim(const XTensor &a, const XTensor &b, int n, bool inplace, DTYPE beta)
{ {
XTensor c(&a); XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order); n = MODX(n, a.order);
...@@ -174,6 +187,9 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) ...@@ -174,6 +187,9 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
} }
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c; return c;
} }
......
...@@ -40,7 +40,7 @@ void _SumDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0); ...@@ -40,7 +40,7 @@ void _SumDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a, /* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is summed with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0); XTensor SumDim(const XTensor &a, const XTensor &b, int n, bool inplace=false, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a, /* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */ i.e., a is summed with b by broadcasting */
......
...@@ -38,6 +38,49 @@ ...@@ -38,6 +38,49 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate data items according to the method
described in `Understanding the difficulty
of training deep feedforward neural networks`
- Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where
.. math::
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
Also known as Glorot initialization.
>> tensor - the tensor whose data array would be initialized
>> gain - an optional scaling factor
*/
void _SetDataXavierNormal(XTensor * tensor, DTYPE gain)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
CheckNTErrors(tensor->order >= 2, "the tensor dimension must be no less than 2!");
int fanIn = 1;
int fanOut = 1;
int order = tensor->order;
if (order == 2) {
fanIn = tensor->dimSize[1];
fanOut = tensor->dimSize[0];
}
else {
int numInputFmaps = tensor->dimSize[1];
int numOutputFmaps = tensor->dimSize[0];
int receptiveFieldSize = 0;
for (int i = 2; i < order; i++)
receptiveFieldSize += tensor->dimSize[i];
fanIn = numInputFmaps * receptiveFieldSize;
fanOut = numOutputFmaps * receptiveFieldSize;
}
DTYPE std = gain * (float)sqrt(2.0 / (float)(fanIn + fanOut));
tensor->SetDataRandn(0, std);
}
/* /*
Fills the input Tensor or Variable with values according to the method described in Fills the input Tensor or Variable with values according to the method described in
"Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010), "Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010),
...@@ -70,7 +113,7 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain) ...@@ -70,7 +113,7 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
fanOut = numOutputFmaps * receptiveFieldSize; fanOut = numOutputFmaps * receptiveFieldSize;
} }
DTYPE std = gain * (float)sqrt(2.0 / (fanIn + fanOut)); DTYPE std = gain * (float)sqrt(2.0 / (float)(fanIn + fanOut));
DTYPE a = (DTYPE)sqrt(3.0F) * std; DTYPE a = (DTYPE)sqrt(3.0F) * std;
tensor->SetDataRand(-a, a); tensor->SetDataRand(-a, a);
//_SetDataRand(tensor, -finfout, finfout); //_SetDataRand(tensor, -finfout, finfout);
......
...@@ -27,6 +27,9 @@ ...@@ -27,6 +27,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a Glorot initialization*/
void _SetDataXavierNormal(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a xavier initialization */ /* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F); void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
......
...@@ -153,11 +153,24 @@ b = a * scale + shift ...@@ -153,11 +153,24 @@ b = a * scale + shift
>> a - the input tensor >> a - the input tensor
>> scale - the scale factor >> scale - the scale factor
>> shift - the shift factor >> shift - the shift factor
>> inplace - indicates whether the result will be placed in the input tensor
<< return - the result of scaling and shifting all tensor entires << return - the result of scaling and shifting all tensor entires
*/ */
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift) XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift, bool inplace)
{ {
XTensor b(&a); XTensor b;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&b, a.order, dims, a.dataType, a.devID, a.enableGrad);
b.data = a.data;
}
else {
InitTensorV2(&b, &a);
}
b.SetTMPFlag(); b.SetTMPFlag();
if (scale == 1.0F) if (scale == 1.0F)
...@@ -178,6 +191,9 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift) ...@@ -178,6 +191,9 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
} }
} }
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return b; return b;
} }
......
...@@ -55,7 +55,7 @@ scale and shift all tensor entires ...@@ -55,7 +55,7 @@ scale and shift all tensor entires
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
b = a * scale + shift b = a * scale + shift
*/ */
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0); XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0, bool inplace=false);
/* /*
scale and shift all tensor entires scale and shift all tensor entires
......
...@@ -138,6 +138,7 @@ XTensor Transpose(const XTensor &a, const int i, const int j) ...@@ -138,6 +138,7 @@ XTensor Transpose(const XTensor &a, const int i, const int j)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio; float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.enableGrad = a.enableGrad;
b.SetTMPFlag(); b.SetTMPFlag();
/* call _Transpose function */ /* call _Transpose function */
......
...@@ -149,6 +149,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize) ...@@ -149,6 +149,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio; float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.enableGrad = a.enableGrad;
b.SetTMPFlag(); b.SetTMPFlag();
/* call _Unsqueeze function */ /* call _Unsqueeze function */
......
...@@ -242,6 +242,7 @@ XTensor GetReduceTensor(const XTensor & input, int dim) ...@@ -242,6 +242,7 @@ XTensor GetReduceTensor(const XTensor & input, int dim)
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMPFlag(); output.SetTMPFlag();
delete[] dimSize;
return output; return output;
} }
......
...@@ -87,7 +87,7 @@ bool TestMultiply1() ...@@ -87,7 +87,7 @@ bool TestMultiply1()
/* call Multiply function */ /* call Multiply function */
_Multiply(s1, s2, t, 0, 0); _Multiply(s1, s2, t, 0, 0);
_MultiplyMe(tMe, s2, 0, 0); _MultiplyMe(tMe, s2, 0, 0);
tUser = Multiply(*s1, *s2, 0); tUser = Multiply(*s1, *s2, false, 0);
/* check results */ /* check results */
cpuTest = _CheckData(t, answer, tUnitNum, 1e-4F) && cpuTest = _CheckData(t, answer, tUnitNum, 1e-4F) &&
......
...@@ -161,7 +161,7 @@ bool TestSub2() ...@@ -161,7 +161,7 @@ bool TestSub2()
/* call Sub function */ /* call Sub function */
_Sub(a, b, c, beta); _Sub(a, b, c, beta);
_SubMe(cMe, b, beta); _SubMe(cMe, b, beta);
cUser = Sub(*a, *b, beta); cUser = Sub(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(c, answer, unitNum, 1e-4F) && cpuTest = _CheckData(c, answer, unitNum, 1e-4F) &&
...@@ -268,7 +268,7 @@ bool TestSub3() ...@@ -268,7 +268,7 @@ bool TestSub3()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call Sum function */ /* call Sum function */
cUser = Sub(*a, *b, beta); cUser = Sub(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F); cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
...@@ -370,7 +370,7 @@ bool TestSub4() ...@@ -370,7 +370,7 @@ bool TestSub4()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call Sum function */ /* call Sum function */
cUser = Sub(*a, *b, beta); cUser = Sub(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F); cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
...@@ -472,7 +472,7 @@ bool TestSub5() ...@@ -472,7 +472,7 @@ bool TestSub5()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call Sum function */ /* call Sum function */
cUser = Sub(*a, *b, beta); cUser = Sub(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F); cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......
...@@ -161,7 +161,7 @@ bool TestSum2() ...@@ -161,7 +161,7 @@ bool TestSum2()
/* call Sum function */ /* call Sum function */
_Sum(a, b, c, beta); _Sum(a, b, c, beta);
_SumMe(cMe, b, beta); _SumMe(cMe, b, beta);
cUser = Sum(*a, *b, beta); cUser = Sum(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(c, answer, unitNum, 1e-4F) && cpuTest = _CheckData(c, answer, unitNum, 1e-4F) &&
...@@ -268,7 +268,7 @@ bool TestSum3() ...@@ -268,7 +268,7 @@ bool TestSum3()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call Sum function */ /* call Sum function */
cUser = Sum(*a, *b, beta); cUser = Sum(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F); cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
...@@ -370,7 +370,7 @@ bool TestSum4() ...@@ -370,7 +370,7 @@ bool TestSum4()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call Sum function */ /* call Sum function */
cUser = Sum(*a, *b, beta); cUser = Sum(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F); cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
...@@ -472,7 +472,7 @@ bool TestSum5() ...@@ -472,7 +472,7 @@ bool TestSum5()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call Sum function */ /* call Sum function */
cUser = Sum(*a, *b, beta); cUser = Sum(*a, *b, false, beta);
/* check results */ /* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F); cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论