Commit 1348bcba by xuchen

merge with xuchen branch

parent d221ef9d
...@@ -77,104 +77,20 @@ backward propagation to obtain gradient ...@@ -77,104 +77,20 @@ backward propagation to obtain gradient
>> root - root node (output) of the network >> root - root node (output) of the network
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root)
{ {
TensorList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
TensorList golds(1); Backward(roots);
golds.Add(NULL);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the gradient computation
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
{
TensorList golds(roots.count);
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
}
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
{
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
} }
/* /*
backward propagation to obtain gradient wrt. the loss/error function backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes with a number of root nodes
>> roots - a list of root nodes (output) of the network >> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/ */
void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots)
{ {
Traverse(roots); Traverse(roots);
...@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, ...@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings,
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
} }
//XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
/*for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;*/
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
/*if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, padding, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
else {
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}*/
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
/*else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}*/
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i); XTensor * node = (XTensor*)nodes.Get(i);
...@@ -460,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node) ...@@ -460,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
} }
} }
/* /*
search for a node in a top-down manner by its name search for a node in a top-down manner by its name
>> top - the top most node >> top - the top most node
......
...@@ -61,25 +61,11 @@ struct XNet ...@@ -61,25 +61,11 @@ struct XNet
void Clear(); void Clear();
/* backward propagation to obtain gradient */ /* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
......
...@@ -76,16 +76,11 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -76,16 +76,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID); InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); _SetDataFanInOut(&wk, scale);
float finfoutv = (float)sqrt(6.0F * scale/(d + dv)); _SetDataFanInOut(&wq, scale);
float finfouta = (float)sqrt(6.0F * scale / (d + d)); _SetDataFanInOut(&wv, scale);
float finfoutbig = (float)sqrt(6.0F * scale / (d + 3*d)); _SetDataFanInOut(&wa, scale);
_SetDataFanInOut(&wbig, scale);
wk.SetDataRand(-finfoutk, finfoutk);
wq.SetDataRand(-finfoutk, finfoutk);
wv.SetDataRand(-finfoutv, finfoutv);
wa.SetDataRand(-finfouta, finfouta);
wbig.SetDataRand(-finfoutbig, finfoutbig);
} }
/* /*
......
...@@ -67,12 +67,10 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID) ...@@ -67,12 +67,10 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
InitTensor1DV2(&b2, outSize, X_FLOAT, devID); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); _SetDataFanInOut(&w1, scale);
float finfout2 = (float)sqrt(6.0F * scale/(hSize + outSize)); _SetDataFanInOut(&w2, scale);
w1.SetDataRand(-finfout1, finfout1);
b1.SetZeroAll(); b1.SetZeroAll();
w2.SetDataRand(-finfout2, finfout2);
b2.SetZeroAll(); b2.SetZeroAll();
} }
......
...@@ -280,7 +280,7 @@ void XTensor::Init() ...@@ -280,7 +280,7 @@ void XTensor::Init()
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false; isVar = false;
enableGrad = false; enableGrad = true;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -704,6 +704,12 @@ void XTensor::ReshapeMerged(const int i, const int j) ...@@ -704,6 +704,12 @@ void XTensor::ReshapeMerged(const int i, const int j)
Reshape(order - 1, dims); Reshape(order - 1, dims);
} }
/* return a tensor that datatype is same as the special tensor */
XTensor XTensor::TypeAs(const XTensor input)
{
return ConvertDataType(*this, input.dataType);
}
/* get the number of items in the data array */ /* get the number of items in the data array */
int XTensor::GetSize() const int XTensor::GetSize() const
{ {
...@@ -2977,4 +2983,28 @@ void DelTensorBuf(XTensor * tensor) ...@@ -2977,4 +2983,28 @@ void DelTensorBuf(XTensor * tensor)
delete tensor; delete tensor;
} }
/* overloading of the plus-sign */
XTensor operator+ (const DTYPE shift, const XTensor &tensor)
{
return ScaleAndShift(tensor, 1, shift);
}
/* overloading of the minus-sign */
XTensor operator- (const DTYPE shift, const XTensor &tensor)
{
return ScaleAndShift(tensor, 1, -shift);
}
/* overloading of the multiply-sign */
XTensor operator* (const DTYPE scale, const XTensor &tensor)
{
return ScaleAndShift(tensor, scale, 0);
}
/* overloading of the division-sign */
XTensor operator/ (const DTYPE scale, const XTensor &tensor)
{
return ScaleAndShift(tensor, (DTYPE)1/scale, 0);
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -283,6 +283,9 @@ public: ...@@ -283,6 +283,9 @@ public:
/* reshape the tensor by merging two consecutive dimensions */ /* reshape the tensor by merging two consecutive dimensions */
void ReshapeMerged(const int i, const int j = -1); void ReshapeMerged(const int i, const int j = -1);
/* return a tensor that datatype is same as the special tensor */
XTensor TypeAs(const XTensor input);
/* get the number of items in the data array */ /* get the number of items in the data array */
int GetSize() const; int GetSize() const;
...@@ -608,6 +611,18 @@ void DelTensor(XTensor * tensor); ...@@ -608,6 +611,18 @@ void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */ /* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor); void DelTensorBuf(XTensor * tensor);
/* overloading of the plus-sign */
XTensor operator+ (const DTYPE shift, const XTensor &tensor);
/* overloading of the minus-sign */
XTensor operator- (const DTYPE shift, const XTensor &tensor);
/* overloading of the multiply-sign */
XTensor operator* (const DTYPE scale, const XTensor &tensor);
/* overloading of the division-sign */
XTensor operator/ (const DTYPE scale, const XTensor &tensor);
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "OnehotAndIndex.h" #include "OnehotAndIndex.h"
#include "OnehotAndIndex.cuh" #include "OnehotAndIndex.cuh"
#include "SetData.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -31,7 +32,7 @@ convert onehot tensor to index tensor ...@@ -31,7 +32,7 @@ convert onehot tensor to index tensor
>> index - index tensor, which value is an integer num >> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size) void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{ {
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!"); CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
...@@ -78,7 +79,7 @@ make a new tensor to keep the result and return it ...@@ -78,7 +79,7 @@ make a new tensor to keep the result and return it
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
<< return - the index tensor << return - the index tensor
*/ */
XTensor OnehotToIndex(XTensor & onehot, int size) XTensor OnehotToIndex(const XTensor & onehot, int size)
{ {
CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
...@@ -99,7 +100,8 @@ convert index tensor to onehot tensor ...@@ -99,7 +100,8 @@ convert index tensor to onehot tensor
>> onehot - onehot tensor, which value is 0 or 1 >> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoothingP) void _IndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float labelSmoothingP)
{ {
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!"); CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
...@@ -109,11 +111,14 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -109,11 +111,14 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < index->order; i++) for (int i = 0; i < index->order; i++)
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!"); CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
onehot->SetZeroAll(); //onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP; float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size; float lowconfidence = labelSmoothingP / size;
_SetDataFixedFloat(onehot, lowconfidence);
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence); _CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return; return;
...@@ -129,7 +134,7 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -129,7 +134,7 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
int id = indexData[i]; int id = indexData[i];
DTYPE * od = onehotData + i * stride; DTYPE * od = onehotData + i * stride;
od[id] = 1; od[id] = confidence;
} }
} }
...@@ -143,7 +148,7 @@ make a new tensor to keep the result and return it ...@@ -143,7 +148,7 @@ make a new tensor to keep the result and return it
>> confidence - labelsmoothing >> confidence - labelsmoothing
<< return - the onehot tensor << return - the onehot tensor
*/ */
XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
{ {
CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!")
......
...@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version) ...@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version)
>> index - index tensor, which value is an integer num >> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size) void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{ {
int devID = onehot->devID; int devID = onehot->devID;
...@@ -111,13 +111,10 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int ...@@ -111,13 +111,10 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int
int id = indexData[i]; int id = indexData[i];
//od[id] = 2.0;
//onehotData[i * stride + id] = 0.1;
if (offset == id) if (offset == id)
od[offset] = confidence; od[offset] = confidence;
else{ //else
od[offset] = lowconfidence; // od[offset] = lowconfidence;
}
} }
/* /*
...@@ -127,7 +124,8 @@ convert index tensor to onehot tensor (cuda version) ...@@ -127,7 +124,8 @@ convert index tensor to onehot tensor (cuda version)
>> onehot - onehot tensor, which value is 0 or 1 >> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence) void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence)
{ {
int devID = onehot->devID; int devID = onehot->devID;
......
...@@ -27,10 +27,11 @@ ...@@ -27,10 +27,11 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor (cuda version) */ /* convert onehot tensor to index tensor (cuda version) */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size); void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert index tensor to onehot tensor (cuda version) */ /* convert index tensor to onehot tensor (cuda version) */
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence); void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,18 +27,18 @@ ...@@ -27,18 +27,18 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor */ /* convert onehot tensor to index tensor */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size); void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert onehot tensor to index tensor (return an XTensor structure) /* convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor OnehotToIndex(XTensor & onehot, int num); XTensor OnehotToIndex(const XTensor & onehot, int num);
/* convert index tensor to onehot tensor */ /* convert index tensor to onehot tensor */
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoothingP); void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor (return an XTensor structure) /* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IndexToOnehot(XTensor & index, int num, float labelSmoothingP); XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论