Commit 5bfbd041 by liyinqiao

Merge with the branch of huchi and fix bugs.

parent 63eee374
......@@ -304,7 +304,7 @@ XTensor Attention::GetRPEmbedding(const int lenQ, const int lenKV,
XTensor range2DTrans;
range2D = Unsqueeze(range, 0, lenQ);
range2DTrans = Transpose(range2D, 0, 1);
embMatrix = Sum(range2D, range2DTrans, -1);
embMatrix = Sum(range2D, range2DTrans, false, -1);
}
else {
for (int i = 0; i < lenKV; i++)
......
......@@ -34,7 +34,7 @@ const int unusedOPs[] {
MATH_SCALE, MATH_SCALEANDSHIFT,
/* shape operators */
MOVEMENT_GATHER, SHAPE_UNSQUEEZE,
/*MOVEMENT_GATHER,*/ SHAPE_UNSQUEEZE,
SHAPE_MERGE, SHAPE_SPLIT,
/* reduce operators */
......
......@@ -196,7 +196,19 @@ where i is the index of the item
*/
XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDim)
{
XTensor c(&a);
XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag();
if (b.order == 0){
......@@ -239,6 +251,9 @@ XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDi
}
}
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c;
}
......
......@@ -89,12 +89,25 @@ make a new tensor c to keep the result and return it
>> a - a tensor
>> b - another tensor
>> inplace - indicates whether the result will be placed in the input tensor
>> beta - the scaling factor
<< return - the result of tensor subtraction
*/
XTensor Sub(const XTensor & a, const XTensor & b, DTYPE beta)
XTensor Sub(const XTensor & a, const XTensor & b, bool inplace, DTYPE beta)
{
XTensor c(&a);
XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag();
if (b.order == 0){
......@@ -129,6 +142,10 @@ XTensor Sub(const XTensor & a, const XTensor & b, DTYPE beta)
ShowNTErrors("Something is wrong!");
}
}
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c;
}
......
......@@ -41,7 +41,7 @@ void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
tensor subtraction c = a - b * \beta
make a new tensor c to keep the result and return it
*/
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
XTensor Sub(const XTensor &a, const XTensor &b, bool inplace = false, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
......
......@@ -262,13 +262,27 @@ make a new tensor c to keep the result and return it
>> a - a tensor
>> b - another tensor
>> inplace - indicates whether the result will be placed in the input tensor
>> beta - the scaling factor
<< return - the result of tensor summation
*/
XTensor Sum(const XTensor & a, const XTensor & b, DTYPE beta)
XTensor Sum(const XTensor &a, const XTensor &b, bool inplace, DTYPE beta)
{
XTensor c(&a);
XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag();
c.enableGrad = a.enableGrad;
if (b.order == 0){
DTYPE shift = b.Get0D() * beta;
......@@ -302,6 +316,10 @@ XTensor Sum(const XTensor & a, const XTensor & b, DTYPE beta)
ShowNTErrors("Something is wrong!");
}
}
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c;
}
......
......@@ -43,7 +43,7 @@ void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
tensor summation c = a + b * \beta
make a new tensor c to keep the result and return it
*/
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
XTensor Sum(const XTensor &a, const XTensor &b, bool inplace = false, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
......
......@@ -154,12 +154,25 @@ i.e., a is summed with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> inplace - indicates whether the result will be placed in the input tensor
>> beta - the scaling factor
<< return - the result tensor by tensor summation
*/
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
XTensor SumDim(const XTensor &a, const XTensor &b, int n, bool inplace, DTYPE beta)
{
XTensor c(&a);
XTensor c;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&c, a.order, dims, a.dataType, a.devID, a.enableGrad);
c.data = a.data;
}
else {
InitTensorV2(&c, &a);
}
c.SetTMPFlag();
n = MODX(n, a.order);
......@@ -174,6 +187,9 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
XLink::AddParamToHead(&c, beta);
}
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return c;
}
......
......@@ -40,7 +40,7 @@ void _SumDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0);
XTensor SumDim(const XTensor &a, const XTensor &b, int n, bool inplace=false, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */
......
......@@ -38,6 +38,49 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate data items according to the method
described in `Understanding the difficulty
of training deep feedforward neural networks`
- Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where
.. math::
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
Also known as Glorot initialization.
>> tensor - the tensor whose data array would be initialized
>> gain - an optional scaling factor
*/
void _SetDataXavierNormal(XTensor * tensor, DTYPE gain)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
CheckNTErrors(tensor->order >= 2, "the tensor dimension must be no less than 2!");
int fanIn = 1;
int fanOut = 1;
int order = tensor->order;
if (order == 2) {
fanIn = tensor->dimSize[1];
fanOut = tensor->dimSize[0];
}
else {
int numInputFmaps = tensor->dimSize[1];
int numOutputFmaps = tensor->dimSize[0];
int receptiveFieldSize = 0;
for (int i = 2; i < order; i++)
receptiveFieldSize += tensor->dimSize[i];
fanIn = numInputFmaps * receptiveFieldSize;
fanOut = numOutputFmaps * receptiveFieldSize;
}
DTYPE std = gain * (float)sqrt(2.0 / (float)(fanIn + fanOut));
tensor->SetDataRandn(0, std);
}
/*
Fills the input Tensor or Variable with values according to the method described in
"Understanding the difficulty of training deep feedforward neural networks" - Glorot, X. & Bengio, Y. (2010),
......@@ -70,7 +113,7 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
fanOut = numOutputFmaps * receptiveFieldSize;
}
DTYPE std = gain * (float)sqrt(2.0 / (fanIn + fanOut));
DTYPE std = gain * (float)sqrt(2.0 / (float)(fanIn + fanOut));
DTYPE a = (DTYPE)sqrt(3.0F) * std;
tensor->SetDataRand(-a, a);
//_SetDataRand(tensor, -finfout, finfout);
......
......@@ -27,6 +27,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a Glorot initialization*/
void _SetDataXavierNormal(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
......
......@@ -153,11 +153,24 @@ b = a * scale + shift
>> a - the input tensor
>> scale - the scale factor
>> shift - the shift factor
>> inplace - indicates whether the result will be placed in the input tensor
<< return - the result of scaling and shifting all tensor entires
*/
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift, bool inplace)
{
XTensor b(&a);
XTensor b;
if (inplace) {
/* the result is stored into the input tensor */
int dims[MAX_TENSOR_DIM_NUM];
memcpy(&(dims[0]), &(a.dimSize[0]), sizeof(int) * a.order);
dims[0] = -dims[0];
InitTensor(&b, a.order, dims, a.dataType, a.devID, a.enableGrad);
b.data = a.data;
}
else {
InitTensorV2(&b, &a);
}
b.SetTMPFlag();
if (scale == 1.0F)
......@@ -178,6 +191,9 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
}
}
XTensor* p = const_cast<XTensor*>(&a);
if (inplace)
p->data = NULL;
return b;
}
......
......@@ -55,7 +55,7 @@ scale and shift all tensor entires
make a new tensor to keep the result and return it
b = a * scale + shift
*/
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0, bool inplace=false);
/*
scale and shift all tensor entires
......
......@@ -138,6 +138,7 @@ XTensor Transpose(const XTensor &a, const int i, const int j)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.enableGrad = a.enableGrad;
b.SetTMPFlag();
/* call _Transpose function */
......
......@@ -149,6 +149,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.enableGrad = a.enableGrad;
b.SetTMPFlag();
/* call _Unsqueeze function */
......
......@@ -242,6 +242,7 @@ XTensor GetReduceTensor(const XTensor & input, int dim)
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMPFlag();
delete[] dimSize;
return output;
}
......
......@@ -87,7 +87,7 @@ bool TestMultiply1()
/* call Multiply function */
_Multiply(s1, s2, t, 0, 0);
_MultiplyMe(tMe, s2, 0, 0);
tUser = Multiply(*s1, *s2, 0);
tUser = Multiply(*s1, *s2, false, 0);
/* check results */
cpuTest = _CheckData(t, answer, tUnitNum, 1e-4F) &&
......
......@@ -161,7 +161,7 @@ bool TestSub2()
/* call Sub function */
_Sub(a, b, c, beta);
_SubMe(cMe, b, beta);
cUser = Sub(*a, *b, beta);
cUser = Sub(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(c, answer, unitNum, 1e-4F) &&
......@@ -268,7 +268,7 @@ bool TestSub3()
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sub(*a, *b, beta);
cUser = Sub(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......@@ -370,7 +370,7 @@ bool TestSub4()
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sub(*a, *b, beta);
cUser = Sub(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......@@ -472,7 +472,7 @@ bool TestSub5()
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sub(*a, *b, beta);
cUser = Sub(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......
......@@ -161,7 +161,7 @@ bool TestSum2()
/* call Sum function */
_Sum(a, b, c, beta);
_SumMe(cMe, b, beta);
cUser = Sum(*a, *b, beta);
cUser = Sum(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(c, answer, unitNum, 1e-4F) &&
......@@ -268,7 +268,7 @@ bool TestSum3()
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sum(*a, *b, beta);
cUser = Sum(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......@@ -370,7 +370,7 @@ bool TestSum4()
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sum(*a, *b, beta);
cUser = Sum(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......@@ -472,7 +472,7 @@ bool TestSum5()
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sum(*a, *b, beta);
cUser = Sum(*a, *b, false, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum, 1e-4F);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论