Commit 2cba1bdd by xuchen

implement negate and sign operation by macro (unary and binary)

parent 0d96c2a0
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for data operation
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-26
*/
#include "XNoder.h"
#include "XBackwardData.h"
#include "../tensor/XName.h"
#include "../tensor/XUtility.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/core/getandset/SetData.h"
namespace nts{
/* compute dE/dx of a node */
void XDataGrad::MakeGrad(XTensor * node, bool isEfficent)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == GETANDSET_CONVERTDATATYPE)
GradConvertDataType(node, isEfficent);
else if(operID == GETANDSET_INDEXTOONEHOT)
GradIndexToOnehot(node, isEfficent);
else if(operID == GETANDSET_ONEHOTTOINDEX)
GradOnehotToIndex(node, isEfficent);
else{
ShowNTErrors("TODO!");
}
}
/* indicates whether the node is for a data operation */
bool XDataGrad::IsDataOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & DATA_BASE) != 0;
}
/*
gradient computation for convert datatype
for
b = converdatatype(a)
we have
dE/da = convertdatatype(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradConvertDataType(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for ConvertDataType!");
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
_ConvertDataType(node->grad, input->grad);
}
/*
gradient computation for OnehotToIndex
for
b = OnehotToIndex(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradOnehotToIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for IndexToOnehot!");
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
}
/*
gradient computation for IndexToOnehot
for
b = IndexToOnehot(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradIndexToOnehot(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for IndexToOnehot!");
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for data operation
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-26
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDDATA_H__
#define __XBACKWARDDATA_H__
namespace nts{
/* this class computes the gradient for tensor data operation given a node */
class XDataGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node, bool isEfficent);
/* indicates whether the node is for a shaping operation */
static
bool IsDataOP(XTensor * node);
private:
/* gradient computation for ConverDataType: b = converdatatype(a, datatype) */
static
void GradConvertDataType(XTensor * node, bool isEfficent);
/* gradient computation for IndexToOnehot: b = indextoonehot(a, num) */
static
void GradIndexToOnehot(XTensor * node, bool isEfficent);
/* gradient computation for OnehotToIndex: b = onehottoindex(a, num) */
static
void GradOnehotToIndex(XTensor * node, bool isEfficent);
};
} // namespace nts(NiuTrans.Tensor)
#endif
\ No newline at end of file
......@@ -302,12 +302,12 @@ void T2TSearch::Generate(T2TStateBundle * beam)
row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */
_DescaleMe(preID, sizeVocab);
DescaleMe(preID, sizeVocab);
/* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */
_ModMe(index, sizeVocab);
ModMe(index, sizeVocab);
score.Reshape(order, dims);
......
......@@ -280,6 +280,7 @@ void XTensor::Init()
isTmp = false;
isGrad = false;
isVar = false;
enableGrad = false;
visitMark = 0;
grad = NULL;
}
......@@ -310,6 +311,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{
strcpy(name, tensor.name);
order = tensor.order;
enableGrad = tensor.enableGrad;
memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM);
memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM);
dataType = tensor.dataType;
......@@ -2447,6 +2449,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio,
reference->devID, reference->mem);
......@@ -2462,6 +2465,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
tensor->enableGrad = reference->enableGrad;
InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->devID);
}
......@@ -2476,6 +2480,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio,
-1);
......
......@@ -151,6 +151,9 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* indicates whether the gradient of the tensor should be computed */
bool enableGrad;
/* indicates whether the tensor is used as paramters (or variables) */
bool isVar;
......
......@@ -36,8 +36,6 @@
#include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/Multiply.h"
#include "arithmetic/MultiplyDim.h"
#include "arithmetic/Negate.h"
#include "arithmetic/Sign.h"
#include "arithmetic/Sub.h"
#include "arithmetic/SubDim.h"
#include "arithmetic/Sum.h"
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Negate(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaNegate(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++)
db[i] = -d[i];
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _NegateMe(XTensor * a)
{
_Negate(a, a);
}
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor Negate(const XTensor & a)
{
XTensor b(&a);
b.SetTMPFlag();
/* call _Negate function */
_Negate(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
return b;
}
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Negate(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Negate function */
_Negate(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its negtive value (CUDA Kernel)
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
__global__
void KernelNegate(DTYPE * a, DTYPE * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
b[i] = -a[i];
}
/*
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
__global__
void KernelNegate(__half * a, __half * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size)
b[i] = __hsub(__float2half(0), a[i]);
#else
if (i < size)
b[i] = __float2half(-__half2float(a[i]));
#endif
}
/*
set each entry to its negtive value
>> a - input tensor
>> b - output tensor
*/
void _CudaNegate(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_CUH__
#define __NEGATE_CUH__
#include "Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its negtive value (CUDA Kernel) */
__global__
void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__
void KernelNegate(__half * a, __half * b, int size);
/* set each entry to its negtive value */
void _CudaNegate(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */
void _Negate(const XTensor * a, XTensor * b);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
/* set every entry to its minus value */
void Negate(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Sign(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaSign(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0)
db[i] = 1.0F;
else if (d[i] == 0)
db[i] = 0.0F;
else
db[i] = -1.0F;
}
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _SignMe(XTensor * a)
{
_Sign(a, a);
}
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor Sign(const XTensor & a)
{
XTensor b(&a);
b.SetTMPFlag();
/* call _Sign function */
_Sign(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
return b;
}
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Sign(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Sign function */
_Sign(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_CUH__
#define __SIGN_CUH__
#include "Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its sign value (CUDA Kernel) */
__global__
void KernelSign(DTYPE * a, DTYPE * b, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
void KernelSign(__half * a, __half * b, int size);
/* set each entry to its sign value */
void _CudaSign(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_H__
#define __SIGN_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
void _Sign(const XTensor * a, XTensor * b);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _SignMe(XTensor * a);
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Sign(const XTensor & a);
/* set every entry to its sign value */
void Sign(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
......@@ -73,7 +73,8 @@ void _funcName(const XTensor * a, XTensor * b, int num) \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!"); \
CheckNTErrors(a->dataType == X_INT && b->dataType == X_INT, \
"TODO!"); \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
......@@ -90,30 +91,37 @@ void _funcName(const XTensor * a, XTensor * b, float num) \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\
CheckNTErrors(a->dataType == X_FLOAT && b->dataType == X_FLOAT, \
"TODO!"); \
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc(d[i], num); \
}
#define SIMPLE_BINARY_FUNCTION_ME_INT(funcName, _funcName) \
void funcName(XTensor &a, int num) \
#define _SIMPLE_BINARY_FUNCTION_ME_INT(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, int num) \
{ \
_funcName(&a, &a, num); \
} \
_funcName(a, a, num); \
}
#define SIMPLE_BINARY_FUNCTION_ME(funcName, _funcName) \
void funcName(XTensor &a, float num) \
#define _SIMPLE_BINARY_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, float num) \
{ \
_funcName(a, a, num); \
}
#define SIMPLE_BINARY_FUNCTION_ME_INT(funcNameMe, _funcName) \
void funcNameMe(XTensor &a, int num) \
{ \
_funcName(&a, &a, num); \
} \
#define SIMPLE_BINARY_FUNCTION_INT(funcName, _funcName) \
void funcName(const XTensor &a, XTensor &b, int num) \
#define SIMPLE_BINARY_FUNCTION_ME(funcNameMe, _funcName) \
void funcNameMe(XTensor &a, float num) \
{ \
_funcName(&a, &b, num); \
} \
_funcName(&a, &a, num); \
}
#define SIMPLE_BINARY_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, float num) \
......@@ -123,50 +131,83 @@ XTensor funcName(const XTensor &a, float num) \
_funcName(&a, &b, num); \
XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \
} \
}
#define SIMPLE_BINARY_FUNCTION_INT(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, int num) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, num); \
XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \
}
#define SIMPLE_BINARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, float num, bool requireLink) \
void funcName(const XTensor &a, XTensor &b, float num) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b, num); \
if (requireLink) { \
if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
} \
}
#define SIMPLE_BINARY_FUNCTION_INT_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, int num) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b, num); \
if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
_SIMPLE_BINARY_FUNCTION_INT(_Scale, _CudaScale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION_ME_INT(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_ME_INT(ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale, MATH_SCALE)
SIMPLE_BINARY_FUNCTION_INT_VOID(Scale, _Scale, MATH_SCALE)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScaleFloat, scale)
SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
_SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_ME(ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE)
SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE)
_SIMPLE_BINARY_FUNCTION_INT(_Descale, _CudaDescale, descale)
SIMPLE_BINARY_FUNCTION_ME_INT(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION_ME_INT(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_ME_INT(DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale, MATH_DESCALE)
SIMPLE_BINARY_FUNCTION_INT_VOID(Descale, _Descale, MATH_DESCALE)
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescaleFloat, descale)
SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
_SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_ME(DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE)
SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE)
_SIMPLE_BINARY_FUNCTION_INT(_Shift, _CudaShift, shift)
SIMPLE_BINARY_FUNCTION_ME_INT(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION_ME_INT(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_ME_INT(ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_INT_VOID(Shift, _Shift, MATH_SHIFT)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShiftFloat, shift)
SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
_SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_ME(ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
_SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_ME_INT(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod, MATH_MOD)
SIMPLE_BINARY_FUNCTION_INT_VOID(Mod, _Mod, MATH_MOD)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
......
......@@ -16,8 +16,8 @@
*/
/*
* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2019-04-05
*/
* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2019-04-05
*/
#ifndef __BINARY_H__
#define __BINARY_H__
......@@ -26,105 +26,84 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
scale up tensor entires
b = a * scale
*/
/* scale up tensor entires
b = a * scale */
void _Scale(const XTensor * a, XTensor * b, int scale);
void _Scale(const XTensor * a, XTensor * b, float scale);
/*
scale up tensor entires (on site)
b = a * scale
*/
void _ScaleMe(XTensor & a, int scale);
void _ScaleMe(XTensor & a, float scale);
/*
scale up tensor entires
b = a * scale
*/
void Scale(const XTensor & a, XTensor &b, int scale);
void Scale(const XTensor & a, XTensor &b, float scale, bool requireLink = false);
/*
scale up tensor entires (return an XTensor structure)
b = a * scale
*/
/* scale up tensor entires (on site)
b = a * scale */
void _ScaleMe(XTensor * a, int scale);
void _ScaleMe(XTensor * a, float scale);
/* scale up tensor entires (on site)
b = a * scale */
void ScaleMe(XTensor & a, int scale);
void ScaleMe(XTensor & a, float scale);
/* scale up tensor entires
b = a * scale */
void Scale(const XTensor & a, XTensor & b, int scale);
void Scale(const XTensor & a, XTensor & b, float scale);
/* scale up tensor entires (return an XTensor structure)
b = a * scale */
XTensor Scale(const XTensor & a, int scale);
XTensor Scale(const XTensor & a, float scale);
/*
descale tensor entires
b = a / scale
*/
/* descale tensor entires
b = a / scale */
void _Descale(const XTensor * a, XTensor * b, int scale);
void _Descale(const XTensor * a, XTensor * b, float scale);
/*
descale tensor entires (on site)
b = a / scale
*/
void _DescaleMe(XTensor & a, int scale);
void _DescaleMe(XTensor & a, float scale);
/*
descale tensor entires
b = a / scale
*/
/* descale tensor entires (on site)
b = a / scale */
void _DescaleMe(XTensor * a, int scale);
void _DescaleMe(XTensor * a, float scale);
/* descale tensor entires (on site)
b = a / scale */
void DescaleMe(XTensor & a, int scale);
void DescaleMe(XTensor & a, float scale);
/* descale tensor entires
b = a / scale */
void Descale(const XTensor & a, XTensor & b, int scale);
void Descale(const XTensor & a, XTensor & b, float scale, bool requireLink = false);
/*
descale tensor entires (return an XTensor structure)
b = a / scale
*/
void Descale(const XTensor & a, XTensor & b, float scale);
/* descale tensor entires (return an XTensor structure)
b = a / scale */
XTensor Descale(const XTensor & a, int scale);
XTensor Descale(const XTensor & a, float scale);
/*
shift tensor entires
b = a + shift
*/
/* shift tensor entires
b = a + shift */
void _Shift(const XTensor * a, XTensor * b, int shift);
void _Shift(const XTensor * a, XTensor * b, float shift);
/*
shift tensor entires (on site)
b = a + shift
*/
void _ShiftMe(XTensor & a, int shift);
void _ShiftMe(XTensor & a, float shift);
/*
shift tensor entires
b = a + shift
*/
/* shift tensor entires (on site)
b = a + shift */
void _ShiftMe(XTensor * a, int shift);
void _ShiftMe(XTensor * a, float shift);
/* shift tensor entires (on site)
b = a + shift */
void ShiftMe(XTensor & a, int shift);
void ShiftMe(XTensor & a, float shift);
/* shift tensor entires
b = a + shift */
void Shift(const XTensor & a, XTensor & b, int shift);
void Shift(const XTensor & a, XTensor & b, float shift, bool requireLink = false);
/*
shift tensor entires (return an XTensor structure)
b = a + shift
*/
void Shift(const XTensor & a, XTensor & b, float shift);
/* shift tensor entires (return an XTensor structure)
b = a + shift */
XTensor Shift(const XTensor & a, int shift);
XTensor Shift(const XTensor & a, float shift);
/*
mod tensor entires
b = a % mod
*/
/* mod tensor entires
b = a % mod */
void _Mod(const XTensor * a, XTensor * b, int base);
/*
mod tensor entires (on site)
b = a % mod
*/
void _ModMe(XTensor & a, int base);
/*
mod tensor entires
b = a % mod
*/
/* mod tensor entires (on site)
b = a % mod */
void _ModMe(XTensor * a, int base);
/* mod tensor entires (on site)
b = a % mod */
void ModMe(XTensor & a, int base);
/* mod tensor entires
b = a % mod */
void Mod(const XTensor & a, XTensor & b, int base);
/* mod tensor entires (return an XTensor structure)
b = a + shift */
XTensor Mod(const XTensor & a, int shift);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -27,6 +27,10 @@
namespace nts{
DTYPE negate(DTYPE x) {
return -x;
}
DTYPE square(DTYPE x)
{
return x * x;
......@@ -37,6 +41,16 @@ DTYPE round(DTYPE r)
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
DTYPE sign(DTYPE r)
{
if (r > 0)
return 1.0F;
else if (r == 0)
return 0.0F;
else
return -1.0F;
}
DTYPE isnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
......@@ -65,6 +79,20 @@ void _funcName(const XTensor * a, XTensor * b) \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i]); \
}
#else
/* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_UNARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i]); \
}
#endif
#define _SIMPLE_UNARY_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a) \
......@@ -72,8 +100,14 @@ void _funcNameMe(XTensor * a) \
_funcName(a, a); \
}
#define SIMPLE_UNARY_FUNCTION_ME(funcNameMe, _funcName) \
void funcNameMe(XTensor & a) \
{ \
_funcName(&a, &a); \
}
#define SIMPLE_UNARY_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a) \
XTensor funcName(const XTensor & a) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
......@@ -83,191 +117,124 @@ XTensor funcName(const XTensor &a) \
}
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \
void funcName(const XTensor & a, XTensor & b) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (requireLink) { \
if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
#ifdef USE_CUDA
_SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
SIMPLE_UNARY_FUNCTION_VOID(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
SIMPLE_UNARY_FUNCTION_VOID(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
SIMPLE_UNARY_FUNCTION_VOID(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
SIMPLE_UNARY_FUNCTION_VOID(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
SIMPLE_UNARY_FUNCTION_VOID(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Negate, _CudaNegate, negate)
_SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
SIMPLE_UNARY_FUNCTION_VOID(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sign, _CudaSign, sign)
_SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
SIMPLE_UNARY_FUNCTION_VOID(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
SIMPLE_UNARY_FUNCTION_VOID(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
SIMPLE_UNARY_FUNCTION_VOID(Sin, _Sin, MATH_SIN)
_SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos)
_SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos)
SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS)
SIMPLE_UNARY_FUNCTION_VOID(Cos, _Cos, MATH_COS)
_SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
SIMPLE_UNARY_FUNCTION_VOID(Tan, _Tan, MATH_TAN)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_UNARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i]); \
}
#define _SIMPLE_UNARY_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a) \
{ \
_funcName(a, a); \
}
#define SIMPLE_UNARY_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b); \
XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \
}
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (requireLink) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
_SIMPLE_UNARY_FUNCTION(_Absolute, fabs)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION(_Negate, negate)
_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION(_Sign, sign)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION(_Square, square)
_SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION(_Cos, cos)
_SIMPLE_UNARY_FUNCTION(_Tan, tan)
#endif // USE_CUDA
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION_ME(AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
SIMPLE_UNARY_FUNCTION_VOID(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION_ME(CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
SIMPLE_UNARY_FUNCTION_VOID(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION_ME(ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
SIMPLE_UNARY_FUNCTION_VOID(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION_ME(FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
SIMPLE_UNARY_FUNCTION_VOID(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION_ME(IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION_ME(IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION_ME(LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
SIMPLE_UNARY_FUNCTION_VOID(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_NegateMe, _Negate)
SIMPLE_UNARY_FUNCTION_ME(NegateMe, _Negate)
SIMPLE_UNARY_FUNCTION(Negate, _Negate, MATH_NEGATE)
SIMPLE_UNARY_FUNCTION_VOID(Negate, _Negate, MATH_NEGATE)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION_ME(RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
SIMPLE_UNARY_FUNCTION_VOID(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SignMe, _Sign)
SIMPLE_UNARY_FUNCTION_ME(SignMe, _Sign)
SIMPLE_UNARY_FUNCTION(Sign, _Sign, MATH_SIGN)
SIMPLE_UNARY_FUNCTION_VOID(Sign, _Sign, MATH_SIGN)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION_ME(SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
SIMPLE_UNARY_FUNCTION_VOID(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION_ME(SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
SIMPLE_UNARY_FUNCTION_VOID(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION_ME(SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
SIMPLE_UNARY_FUNCTION_VOID(Sin, _Sin, MATH_SIN)
_SIMPLE_UNARY_FUNCTION(_Cos, cos)
_SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos)
SIMPLE_UNARY_FUNCTION_ME(CosMe, _Cos)
SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS)
SIMPLE_UNARY_FUNCTION_VOID(Cos, _Cos, MATH_COS)
_SIMPLE_UNARY_FUNCTION(_Tan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION_ME(TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
SIMPLE_UNARY_FUNCTION_VOID(Tan, _Tan, MATH_TAN)
/*_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)*/
#endif
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -30,6 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
__device__
DTYPE cudanegate(DTYPE x)
{
return -x;
}
__device__
DTYPE cudasquare(DTYPE x)
{
return x * x;
......@@ -42,6 +48,17 @@ DTYPE cudaround(DTYPE r)
}
__device__
DTYPE cudasign(DTYPE r)
{
if (r > 0)
return 1.0F;
else if (r == 0)
return 0.0F;
else
return -1.0F;
}
__device__
DTYPE cudaisnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
......@@ -72,7 +89,7 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \
CheckNTErrors(a->isSparse == false, "TODO!"); \
\
int gridSize[3]; \
int blockSize[3]; \
......@@ -107,7 +124,9 @@ SIMPLE_UNARY_FUNCTION_GPU(Floor, floor)
SIMPLE_UNARY_FUNCTION_GPU(IsNonZero, cudaisnonzero)
SIMPLE_UNARY_FUNCTION_GPU(IsZero, cudaiszero)
SIMPLE_UNARY_FUNCTION_GPU(Log, log)
SIMPLE_UNARY_FUNCTION_GPU(Negate, cudanegate)
SIMPLE_UNARY_FUNCTION_GPU(Round, cudaround)
SIMPLE_UNARY_FUNCTION_GPU(Sign, cudasign)
SIMPLE_UNARY_FUNCTION_GPU(Sqrt, sqrt)
SIMPLE_UNARY_FUNCTION_GPU(Square, cudasquare)
......
......@@ -92,6 +92,15 @@ void KernelLog(__half * a, __half * b, int size);
/* set each entry to its logarithm value */
void _CudaLog(const XTensor * a, XTensor * b);
/* set each entry to its negative value (CUDA Kernel) */
__global__
void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negative value (CUDA Kernel) with float16 data type*/
__global__
void KernelNegate(__half * a, __half * b, int size);
/* set each entry to its negative value */
void _CudaNegate(const XTensor * a, XTensor * b);
/* set each entry to its round value (CUDA Kernel) */
__global__
void KernelRound(DTYPE * a, DTYPE * b, int size);
......@@ -101,6 +110,15 @@ void KernelRound(__half * a, __half * b, int size);
/* set each entry to its round value */
void _CudaRound(const XTensor * a, XTensor * b);
/* set each entry to its sign value (CUDA Kernel) */
__global__
void KernelSign(DTYPE * a, DTYPE * b, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
void KernelSign(__half * a, __half * b, int size);
/* set each entry to its sign value */
void _CudaSign(const XTensor * a, XTensor * b);
/* set each entry to its sqrt value (CUDA Kernel) */
__global__
void KernelSqrt(DTYPE * a, DTYPE * b, int size);
......
......@@ -31,144 +31,210 @@ void _Absolute(const XTensor * a, XTensor * b);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void _AbsoluteMe(XTensor * a);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void AbsoluteMe(XTensor & a);
/* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Absolute(const XTensor & a);
/* set every entry to its absolute value */
void Absolute(const XTensor & a, XTensor & b, bool requireLink = false);
void Absolute(const XTensor & a, XTensor & b);
/* set every entry to its ceil value */
void _Ceil(const XTensor * a, XTensor * b);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void _CeilMe(XTensor * a);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void CeilMe(XTensor & a);
/* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a);
/* set every entry to its ceil value */
void Ceil(const XTensor & a, XTensor & b, bool requireLink = false);
void Ceil(const XTensor & a, XTensor & b);
/* set every entry to its exponent value */
void _Exp(const XTensor * a, XTensor * b);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void _ExpMe(XTensor * a);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void ExpMe(XTensor & a);
/* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Exp(const XTensor & a);
/* set every entry to its exponent value */
void Exp(const XTensor & a, XTensor & b, bool requireLink = false);
void Exp(const XTensor & a, XTensor & b);
/* set every entry to its floor value */
void _Floor(const XTensor * a, XTensor * b);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void _FloorMe(XTensor * a);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void FloorMe(XTensor & a);
/* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a);
/* set every entry to its floor value */
void Floor(const XTensor & a, XTensor & b, bool requireLink = false);
void Floor(const XTensor & a, XTensor & b);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsNonZeroMe(XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void IsNonZero(const XTensor &a, XTensor & b, bool requireLink = false);
void IsNonZero(const XTensor &a, XTensor & b);
/* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsZeroMe(XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero */
void IsZero(const XTensor &a, XTensor & b, bool requireLink = false);
void IsZero(const XTensor &a, XTensor & b);
/* set every entry to its logarithm value */
void _Log(const XTensor * a, XTensor * b);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void _LogMe(XTensor * a);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void LogMe(XTensor & a);
/* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Log(const XTensor & a);
/* set every entry to its logarithm value */
void Log(const XTensor & a, XTensor & b, bool requireLink = false);
void Log(const XTensor & a, XTensor & b);
/* set every entry to its negative value */
void _Negate(const XTensor * a, XTensor * b);
/* set every entry to its negative value (do it on site)
keep the result in the input tensor a and return nothing */
void _NegateMe(XTensor * a);
/* set every entry to its negative value (do it on site)
keep the result in the input tensor a and return nothing */
void NegateMe(XTensor & a);
/* set every entry to its negative value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Negate(const XTensor & a);
/* set every entry to its negative value */
void Negate(const XTensor & a, XTensor & b);
/* set every entry to its round value */
void _Round(const XTensor * a, XTensor * b);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void _RoundMe(XTensor * a);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void RoundMe(XTensor & a);
/* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a);
/* set every entry to its round value */
void Round(const XTensor & a, XTensor & b, bool requireLink = false);
void Round(const XTensor & a, XTensor & b);
/* set every entry to its sign value */
void _Sign(const XTensor * a, XTensor * b);
/* set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing */
void _SignMe(XTensor * a);
/* set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing */
void SignMe(XTensor & a);
/* set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sign(const XTensor & a);
/* set every entry to its sign value */
void Sign(const XTensor & a, XTensor & b);
/* set every entry to its sqrt value */
void _Sqrt(const XTensor * a, XTensor * b);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void SqrtMe(XTensor & a);
/* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a);
/* set every entry to its sqrt value */
void Sqrt(const XTensor & a, XTensor & b, bool requireLink = false);
void Sqrt(const XTensor & a, XTensor & b);
/* set every entry to its square value */
void _Square(const XTensor * a, XTensor * b);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void _SquareMe(XTensor * a);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void SquareMe(XTensor & a);
/* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a);
/* set every entry to its square value */
void Square(const XTensor & a, XTensor & b, bool requireLink = false);
void Square(const XTensor & a, XTensor & b);
/* set every entry to its sine value */
void _Sin(const XTensor * a, XTensor * b);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void _SinMe(XTensor * a);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void SinMe(XTensor & a);
/* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sin(const XTensor & a);
/* set every entry to its sine value */
void Sin(const XTensor & a, XTensor & b, bool requireLink = false);
void Sin(const XTensor & a, XTensor & b);
/* set every entry to its cosine value */
void _Cos(const XTensor * a, XTensor * b);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void _CosMe(XTensor * a);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void CosMe(XTensor & a);
/* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Cos(const XTensor & a);
/* set every entry to its cosine value */
void Cos(const XTensor & a, XTensor & b, bool requireLink = false);
void Cos(const XTensor & a, XTensor & b);
/* set every entry to its tangent value */
void _Tan(const XTensor * a, XTensor * b);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void _TanMe(XTensor * a);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void TanMe(XTensor & a);
/* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a);
/* set every entry to its tangent value */
void Tan(const XTensor & a, XTensor & b, bool requireLink = false);
void Tan(const XTensor & a, XTensor & b);
} // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#include <math.h>
#include "CrossEntropy.h"
#include "CrossEntropy.cuh"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/math/Unary.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/arithmetic/Negate.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceSumAll.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
compute the cross entropy loss
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int unitNum = output->dimSize[n];
CheckNTErrors(n >= 0 && n < output->order, "Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == unitNum, "Wrong weight tensor!");
CheckNTErrors(padding == NULL || XTensor::IsSameShaped(padding, loss),
"The loss tensor and padding tensor must be same shape!");
CheckNTErrors(loss->order == output->order - 1, "Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE, "TODO!");
XTensor * interBuf1 = NewTensorBuf(output, output->devID, output->mem);
XTensor * interBuf2 = NewTensorBuf(output, output->devID, output->mem);
_Log(output, interBuf1);
_Multiply(gold, interBuf1, interBuf2);
if(weight != NULL)
_MultiplyDimMe(interBuf2, weight, n);
_NegateMe(interBuf2);
_ReduceSum(interBuf2, loss, n);
if(padding != NULL)
_MultiplyMe(loss, padding);
DelTensorBuf(interBuf2);
DelTensorBuf(interBuf1);
}
/*
compute the cross entropy loss (faster implementation with optimized code)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leading dimension!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || XTensor::IsSameShaped(padding, loss),
"The loss tensor and padding tensor must be same shape!");
CheckNTErrors(loss->order == output->order - 1,
"Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
for(int i = 0; i < order; i++){
if(i < n){
CheckNTErrors((output->GetDim(i) == loss->GetDim(i)), "Unmatched tensors!");
}
else if(i > n){
CheckNTErrors((output->GetDim(i) == loss->GetDim(i - 1)), "Unmatched tensors!");
}
}
#ifdef USE_CUDA
if(output->devID >= 0) {
_CudaCrossEntropyFast(output, gold, loss, weight, padding, leadingDim);
return;
}
#endif
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
DTYPE * lossData = (DTYPE*)loss->data;
DTYPE tmpLoss;
int lossPos;
int goldPos;
if(weight == NULL) {
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
tmpLoss = 0;
lossPos = i * stride + j;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
tmpLoss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos));
}
*(lossData + lossPos) = tmpLoss;
}
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
lossPos = i * stride + j;
if(*(paddingData + lossPos) == 0)
*(lossData + lossPos) = 0;
else {
tmpLoss = 0;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
tmpLoss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos));
}
*(lossData + lossPos) = tmpLoss;
}
}
}
}
}
else {
DTYPE * weightData = (DTYPE*)weight->data;
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
tmpLoss = 0;
lossPos = i * stride + j;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
tmpLoss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos)) *
(*(weightData + k));
}
*(lossData + lossPos) = tmpLoss;
}
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
lossPos = i * stride + j;
if(*(paddingData + lossPos) == 0)
*(lossData + lossPos) = 0;
else {
tmpLoss = 0;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
tmpLoss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos)) *
(*(weightData + k));
}
*(lossData + lossPos) = tmpLoss;
}
}
}
}
}
}
/*
compute the cross entropy loss
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> reduce - loss compute way, sum or mean
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
DTYPE loss = 0;
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int unitNum = output->dimSize[n];
CheckNTErrors(n >= 0 && n < output->order, "Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == unitNum, "Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"The loss tensor and padding tensor must be same shape!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE, "TODO!");
int * dimSize = new int[order - 1];
for (int i = 0; i < order; i++) {
if(i < n)
dimSize[i] = output->dimSize[i];
else if(i > n)
dimSize[i - 1] = output->dimSize[i];
}
XTensor * lossBuf = NewTensorBuf(output->order - 1, dimSize, output->dataType, output->denseRatio,
output->devID, output->mem);
_CrossEntropy(output, gold, lossBuf, weight, padding, leadingDim);
loss = _ReduceSumAll(lossBuf);
if(reduceWay == REDUCE_MEAN) {
int nonZeroNum;
if(padding == NULL) {
nonZeroNum = lossBuf->unitNum;
}
else {
XTensor * tmp = NewTensorBuf(padding, padding->devID, padding->mem);
_IsNonZero(padding, tmp);
nonZeroNum = (int)_ReduceSumAll(tmp);
DelTensorBuf(tmp);
}
loss = loss / (DTYPE)nonZeroNum;
}
else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */
}
else {
ShowNTErrors("TODO");
}
delete[] dimSize;
DelTensorBuf(lossBuf);
return loss;
}
/*
compute the cross entropy loss (faster implementation with optimized code)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> reduceWay - loss compute way, sum or mean
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
<< return - the cross entropy loss that is a scalar
*/
DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
DTYPE loss = 0;
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
if(padding != NULL) {
for(int i = 0; i < order; i++){
if(i < n){
CheckNTErrors((output->GetDim(i) == padding->GetDim(i)), "Unmatched tensors!");
}
else if(i > n){
CheckNTErrors((output->GetDim(i) == padding->dimSize[i - 1]), "Unmatched tensors!");
}
}
}
#ifdef USE_CUDA
if(output->devID >= 0) {
return _CudaCrossEntropyFast(output, gold, reduceWay, weight, padding, leadingDim);
}
#endif
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
int paddingPos;
int goldPos;
int nonZeroNum = 0;
if(weight == NULL) {
if(padding == NULL) {
nonZeroNum = blockNum * stride;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
paddingPos = i * stride + j;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
loss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos));
}
}
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
paddingPos = i * stride + j;
if(*(paddingData + paddingPos) == 0)
continue;
else {
nonZeroNum += 1;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
loss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos));
}
}
}
}
}
}
else {
DTYPE * weightData = (DTYPE*)weight->data;
if(padding == NULL) {
nonZeroNum = blockNum * stride;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
paddingPos = i * stride + j;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
loss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos)) *
(*(weightData + k));
}
}
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
paddingPos = i * stride + j;
if(*(paddingData + paddingPos) == 0)
continue;
else {
nonZeroNum += 1;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
loss += -(*(goldData + goldPos)) *
(DTYPE)log(*(outputData + goldPos)) *
(*(weightData + j));
}
}
}
}
}
}
if(reduceWay == REDUCE_MEAN) {
loss = loss / (DTYPE)nonZeroNum;
}
else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */
}
else {
ShowNTErrors("TODO");
}
return loss;
}
/*
backward compuation for cross entropy function
loss = sum_{i} (-t_i * log(y_i))
dE/dy_i = -t_i / y_i
where E is the error(loss) function that measure the errors in y
with respect to gold standard, and y this the model output
>> dedy - dE/dy (for return)
>> output - model prediction
>> gold - gold standard
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold, const XTensor * weight,
XTensor * padding, int leadingDim)
{
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leading dimension!");
CheckNTErrors(XTensor::IsSameShaped(dedy, output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
if(padding != NULL) {
for(int i = 0; i < order; i++){
if(i < n){
CheckNTErrors((output->GetDim(i) == padding->GetDim(i)), "Unmatched tensors!");
}
else if(i > n){
CheckNTErrors((output->GetDim(i) == padding->dimSize[i - 1]), "Unmatched tensors!");
}
}
}
#ifdef USE_CUDA
if(output->devID >= 0) {
_CudaCrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
return;
}
#endif
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
DTYPE * dedyData = (DTYPE*)dedy->data;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
int paddingPos;
int goldPos;
if(weight == NULL) {
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
*(dedyData + goldPos) = -(*(goldData + goldPos)) /
(*(outputData + goldPos));
}
}
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
paddingPos = i * stride + j;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
if(*(paddingData + paddingPos) == 0)
*(dedyData + goldPos) = 0;
else
*(dedyData + goldPos) = -(*(goldData + goldPos)) /
(*(outputData + goldPos));
}
}
}
}
}
else {
DTYPE * weightData = (DTYPE*)weight->data;
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
*(dedyData + goldPos) = -(*(weightData + k)) *
(*(goldData + goldPos)) /
(*(outputData + goldPos));
}
}
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
for(int j = 0; j < stride; j++) {
paddingPos = i * stride + j;
for(int k = 0; k < leadingDimSize; k++) {
goldPos = i * blockSize + j + k * stride;
if(*(paddingData + paddingPos) == 0)
*(dedyData + goldPos) = 0;
else
*(dedyData + goldPos) = -(*(weightData + k)) *
(*(goldData + goldPos)) /
(*(outputData + goldPos));
}
}
}
}
}
//if(padding != NULL) {
// XTensor * tmp = NewTensor(padding);
// _IsNonZero(padding, tmp);
// int nonZeroNum = (int)_ReduceSumAll(tmp);
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
// delete tmp;
//}
//else {
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
//}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __CROSSENTROPY_CUH__
#define __CROSSENTROPY_CUH__
#include "../XTensor.h"
#include "../XDevice.h"
#include "CrossEntropy.cuh"
#include "CrossEntropy.h"
#include "../core/arithmetic/Div.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/arithmetic/Negate.h"
#include "../core/math/Unary.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceSumAll.h"
#include "../core/shape/Transpose.h"
#include "../core/shape/Unsqueeze.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
compute the cross entropy loss (cuda version)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
XTensor * interBuf1 = NewTensorBuf(output, output->devID, output->mem);
XTensor * interBuf2 = NewTensorBuf(output, output->devID, output->mem);
_Log(output, interBuf1);
_Multiply(gold, interBuf1, interBuf2);
if(weight != NULL)
_MultiplyDimMe(interBuf2, weight, n);
_NegateMe(interBuf2);
_ReduceSum(interBuf2, loss, n);
if(padding != NULL)
_MultiplyMe(loss, padding);
DelTensorBuf(interBuf2);
DelTensorBuf(interBuf1);
}
/*
compute the cross entropy loss (scalar version)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> reduceWay - loss compute way, sum or mean
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
<< return - the cross entropy loss that is a scalar
*/
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
DTYPE loss = 0;
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int * dimSize = new int[output->order - 1];
for (int i = 0; i < order; i++) {
if(i < n)
dimSize[i] = output->dimSize[i];
else if(i > n)
dimSize[i - 1] = output->dimSize[i];
}
XTensor * lossBuf = NewTensorBuf(output->order - 1, dimSize, output->dataType, output->denseRatio,
output->devID, output->mem);
_CudaCrossEntropyFast(output, gold, lossBuf, weight, padding, leadingDim);
loss = _ReduceSumAll(lossBuf);
if(reduceWay == REDUCE_MEAN) {
int nonZeroNum;
if(padding == NULL) {
nonZeroNum = lossBuf->unitNum;
}
else {
XTensor * tmp = NewTensorBuf(padding, padding->devID, padding->mem);
_IsNonZero(padding, tmp);
nonZeroNum = (int)_ReduceSumAll(tmp);
DelTensorBuf(tmp);
}
loss = loss / (DTYPE)nonZeroNum;
}
else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */
}
else {
ShowNTErrors("TODO");
}
delete[] dimSize;
DelTensorBuf(lossBuf);
return loss;
}
/*
backward computation of cross entropy function
loss = sum_{i} (-t_i * log(y_i))
dE/dy_i = -t_i / y_i
where E is the error(loss) function that measure the errors in y
with respect to gold standard, and y this the model output
>> dedy - dE/dy (for return)
>> output - model prediction
>> gold - gold standard
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold, const XTensor * weight,
XTensor * padding, int leadingDim)
{
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
_Div(gold, output, dedy);
_NegateMe(dedy);
if(weight != NULL)
_MultiplyDimMe(dedy, weight, n);
if(padding != NULL) {
int paddingOrder = padding->order;
int * paddingDims = new int[paddingOrder];
memcpy(paddingDims, padding->dimSize, padding->order * sizeof(int));
padding->Reshape(padding->unitNum);
int order = dedy->order;
int * dims = new int[order];
memcpy(dims, dedy->dimSize, dedy->order * sizeof(int));
dedy->Reshape(dedy->unitNum/dedy->GetDim(n), dedy->GetDim(n));
_MultiplyDimMe(dedy, padding, 0);
padding->Reshape(paddingOrder, paddingDims);
dedy->Reshape(order, dims);
delete[] paddingDims;
delete[] dims;
}
//if(padding != NULL) {
// XTensor * tmp = NewTensor(padding);
// _IsNonZero(padding, tmp);
// int nonZeroNum = (int)_ReduceSumAll(tmp);
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
// delete tmp;
//}
//else {
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
//}
}
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __CROSSENTROPY_CUH__
#define __CROSSENTROPY_CUH__
#include "../XTensor.h"
#include "CrossEntropy.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the cross entropy loss */
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss */
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold, const XTensor * weight = NULL,
XTensor * padding = NULL, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __CROSSENTROPY_H__
#define __CROSSENTROPY_H__
#include "../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
enum LOSS_COMPUTE_WAY{
REDUCE_SUM,
REDUCE_MEAN
};
/* compute the cross entropy loss */
void _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss */
void _CrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (return the loss) */
DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (return the loss) */
DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay = REDUCE_MEAN, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold, const XTensor * weight = NULL,
XTensor * padding = NULL, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_H__
\ No newline at end of file
......@@ -25,7 +25,6 @@
#include "../core/math/Power.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/math/Unary.h"
#include "../core/arithmetic/Negate.h"
#include "../core/arithmetic/Sum.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/reduce/ReduceSum.h"
......
......@@ -28,7 +28,6 @@
#include "../core/arithmetic/Multiply.h"
#include "../core/math/Unary.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/arithmetic/Negate.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceSumAll.h"
......
......@@ -29,7 +29,6 @@
#include "../core/arithmetic/Div.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/arithmetic/Negate.h"
#include "../core/math/Unary.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/reduce/ReduceSum.h"
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_NEGATE_H__
#define __TEST_NEGATE_H__
#include "../core/arithmetic/Negate.h"
#include "../core/math/Unary.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SIGN_H__
#define __TEST_SIGN_H__
#include "../core/arithmetic/Sign.h"
#include "../core/math/Unary.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论