Commit 4e2bc4cf by xiaotong

Merge branch 'xiaotong-working' of 47.105.50.196:NiuTrans/NiuTrans.Tensor into xiaotong-working

# Conflicts:
#	source/tensor/Main.cpp
parents 577d1778 28fd2d94
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
*/
#include "XBackwardLoss.h"
#include "../tensor/XName.h"
#include "../tensor/function/HardTanH.h"
#include "../tensor/function/LogSoftmax.h"
namespace nts{
/*
compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> dedy - dE/dy
>> dedx - dE/dx
>> funcID - id of the function f
>> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName);
}
else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
}
else{
ShowNTErrors("wrong function found when call the backward process!");
}
}
/*
compute dE/dy for variable y and error(loss) function E
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName)
{
LossBackward(dedy, gold, y, lossName);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
* My students worked all night to prepare a submission to CWMT. Good luck
* to them!
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__
namespace nts{
/* this class computes the gradient (of a output node)
with respect to the loss */
class XLossGrad
{
public:
/* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
*/
#include "XBackwardNode.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/XName.h"
namespace nts{
/* make gradient tensor for a node */
void XNoder::MakeGrad(XTensor * node)
{
if(node == NULL)
return;
if(!XTensor::IsIdentical(node, node->grad)){
delete node->grad;
node->grad = NewTensor(node);
}
}
/* the node is a leaf node (intput) or not */
bool XNoder::IsLeaf(XTensor * node)
{
if(node == NULL)
return false;
if(node->income.tailNum == 0)
return true;
else
return false;
}
/* the node is a root node (output) or not */
bool XNoder::IsRoot(XTensor * node)
{
if(node == NULL)
return false;
if(node->outgo.tailNum == 0)
return true;
else
return false;
}
/* the node keeps the gradinent or not */
bool XNoder::IsGrad(XTensor * node)
{
if(node == NULL)
return false;
if(node->isGrad)
return true;
else
return false;
}
/*
compute dE/dx of a node
Note that input of the node could be arbitrary tensors
>> node - node of the network
*/
void XNodeGrad::Compute(XTensor * node)
{
if(node == NULL || node->visitMark == NODE_FINISHED)
return;
if(!XNoder::IsLeaf(node)){
}
node->visitMark = NODE_FINISHED;
}
/* indicates whether the node is for a math operation */
bool XNodeGrad::IsMathOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & MATH_BASE) != 0;
}
/* compute dE/dx as a math operation, e.g., sum, multiply ... */
void XNodeGrad::ComputeMath(XTensor * node)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
/* c = a + b * \beta
dE/da = dE/dc
dE/db = dE/dc * \beta */
if(operID == MATH_SUM){
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUM!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
_CopyValues(node->grad, a->grad);
if(beta != 1.0F)
_ScaleAndShift(node->grad, a->grad, beta);
else
_CopyValues(node->grad, b->grad);
}
/* c = a * b
dE/da = dE/dc * b
dE/db = dE/dc * a */
else if(operID == MATH_MULTIPLY){
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
int leadDim = income.GetParamInt(0);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsIdentical(a, b), "Wrong sized input tensors!");
_Multiply(node->grad, b, a->grad);
_Multiply(node->grad, a, b->grad);
}
else{
ShowNTErrors("TODO!");
}
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDNODE_H__
#define __XBACKWARDNODE_H__
namespace nts{
#define NODE_UNFINISHED 0
#define NODE_DOING 1
#define NODE_FINISHED 2
/* node management */
class XNoder
{
public:
/* make gradient tensor for a node */
static
void MakeGrad(XTensor * node);
/* the node is a leaf node (intput) or not */
static
bool IsLeaf(XTensor * node);
/* the node is a root node (output) or not */
static
bool IsRoot(XTensor * node);
/* the node keeps the gradinent or not */
static
bool IsGrad(XTensor * node);
};
/* this class computes the gradient for each node in the network */
class XNodeGrad
{
public:
/* compute dE/dx of a node */
void Compute(XTensor * node);
/* indicates whether the node is for a math operation */
bool IsMathOP(XTensor * node);
/* compute dE/dx as a math operation, e.g., sum, multiply ... */
void ComputeMath(XTensor * node);
};
}
#endif
\ No newline at end of file
......@@ -20,6 +20,9 @@
*/
#include "XNet.h"
#include "XBackwardLoss.h"
#include "XBackwardNode.h"
#include "../tensor/XName.h"
namespace nts{
......@@ -87,6 +90,44 @@ with a number of root nodes
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
Traverse(roots);
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED;
}
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(income.tailNum == 1 && (funcID & FUNCTION_BASE)){
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, loss);
}
}
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);
if(node->visitMark == NODE_FINISHED)
continue;
}
}
/*
......@@ -115,6 +156,15 @@ void XNet::Traverse(XList &roots)
for (int i = 0; i < roots.count; i++)
TarjanVisit((XTensor*)roots.Get(i), nodes, id);
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
if(XNoder::IsRoot(node))
outputs.Add(node);
if(XNoder::IsLeaf(node))
inputs.Add(node);
if(XNoder::IsGrad(node))
gradNodes.Add(node);
}
}
/*
......
......@@ -45,8 +45,7 @@ int main( int argc, const char ** argv )
//_CrtSetBreakAlloc(123);
/* a tiny test */
if(false)
SmallTest();
SmallTest();
//_CrtDumpMemoryLeaks();
//return 0;
......
......@@ -25,7 +25,7 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
int XLink::paramSize = 64;
int XLink::paramSize = PARAM_UNTI_SIZE;
/* constuctor */
XLink::XLink()
......@@ -235,6 +235,26 @@ void XLink::AddParam(void * param, int size)
}
/*
get a paramter in default type
>> i - id the of the parameter
*/
DTYPE XLink::GetParam(int i)
{
char * p = (char*)params + i * paramSize;
return *(DTYPE*)p;
}
/*
get a paramter in integer
>> i - id the of the parameter
*/
int XLink::GetParamInt(int i)
{
char * p = (char*)params + i * paramSize;
return *(int*)p;
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - another tail tensor
......
......@@ -34,6 +34,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
#define PARAM_UNTI_SIZE 64
/*
This defines the link among tensors in networks. XLink can be
......@@ -115,12 +116,18 @@ struct XLink
/* add two tails in one time */
void AddTwoTails(XTensor * t1, XTensor * t2);
/* add a integer parameter */
/* add a parameter in default type */
void AddParam(DTYPE param);
/* add a integer parameter */
/* add a parameter */
void AddParam(void * param, int size);
/* get a paramter in default type */
DTYPE GetParam(int i);
/* get a paramter in integer */
int GetParamInt(int i);
/* create a hyper edge with two input tensors and a output tensor */
static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
......
......@@ -26,7 +26,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */
const char * GetOPName(int type)
{
if((type & MATH_ARITHMETIC) != 0){
if((type & MATH_BASE) != 0){
if(type == MATH_ABSOLUTE)
return "M_ABSOLUTE";
else if(type == MATH_MATRIXMUL)
......@@ -49,12 +49,6 @@ const char * GetOPName(int type)
return "M_POWER";
else if(type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT";
else if(type == GETANDSET_SELECT)
return "G_SELECT";
else if(type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
else if(type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES";
else if(type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX";
else if(type == REDUCE_REDUCEMEAN)
......@@ -65,6 +59,14 @@ const char * GetOPName(int type)
return "R_REDUCESUMSQUARED";
else if(type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE";
}
else if((type & DATA_BASE) != 0){
if(type == GETANDSET_SELECT)
return "G_SELECT";
else if(type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
else if(type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES";
else if(type == SHAPE_CONCATENATE)
return "S_CONCATENATE";
else if(type == SHAPE_MERGE)
......
......@@ -28,8 +28,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_ARITHMETIC 0x00001000
#define MATH_ABSOLUTE MATH_ARITHMETIC + 1
/* math operations */
#define MATH_BASE 0x00001000
#define MATH_ABSOLUTE MATH_BASE + 1
#define MATH_MATRIXMUL MATH_ABSOLUTE + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
......@@ -42,20 +43,22 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1
#define GETANDSET MATH_SCALEANDSHIFT + 1
#define GETANDSET_SELECT GETANDSET + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define REDUCE MOVEMENT_COPYVALUES + 1
#define REDUCE MATH_SCALEANDSHIFT + 1
#define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1
#define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1
/* data and shape related operations */
#define DATA_BASE MATH_BASE * 2
#define GETANDSET DATA_BASE + 1
#define GETANDSET_SELECT GETANDSET + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define SHAPE REDUCE_REDUCEVARIANCE + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
......@@ -64,6 +67,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_TRANSPOSE SHAPE_SPLIT + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
/* activation functions */
#define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1
#define FUNC_IDENTITY FUNC_HARDTANH + 1
#define FUNC_LOGSOFTMAX FUNC_IDENTITY + 1
#define FUNC_RECTIFY FUNC_LOGSOFTMAX + 1
#define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1
/* get operator name */
const char * GetOPName(int type);
......
......@@ -211,6 +211,9 @@ XTensor::~XTensor()
XLink::ClearIncoming(this);
DestroyData();
if(grad != NULL)
delete grad;
}
/* initialize member variables */
......@@ -237,7 +240,9 @@ void XTensor::Init()
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false;
isTmp = false;
isGrad = false;
visitMark = 0;
grad = NULL;
}
/* delete data arrays */
......@@ -294,7 +299,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
}
else{
DestroyData();
if(isInit){
if(!isInit){
devID = tensor.devID;
mem = tensor.mem;
}
......@@ -347,6 +352,9 @@ judge whether the two matrices are in the same type and size
*/
bool XTensor::IsIdentical(const XTensor * a, const XTensor * b)
{
if(a == NULL || b == NULL)
return false;
if(a->order != b->order)
return false;
......@@ -1043,7 +1051,7 @@ int XTensor::GetNonzeroSize()
/*
set the tensor as "temporary"
>> myIsTMP - flag
>> myIsTMP - the flag
*/
void XTensor::SetTMP(bool myIsTmp)
{
......@@ -1051,6 +1059,15 @@ void XTensor::SetTMP(bool myIsTmp)
}
/*
set the tensor as "keep-gradient"
>> myIsGrad - the flag
*/
void XTensor::SetGrad(bool myIsGrad)
{
isGrad = myIsGrad;
}
/*
resize a tensor with a specified tensor size
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
......@@ -1105,7 +1122,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(isSparse){
/*
for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrice as examples,
ordered by key. Take a (2-dimensional) matrix as an examples,
we have key = m * i + j;
The data array is
---------
......@@ -1148,9 +1165,9 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(filledData){
/* allocate the new one */
if(mem == NULL){
data = (void*)new char[unitNum * unitSize];
data = XMemAlloc(devID, unitNum * unitSize);
#if defined(UNSAFE_BUT_FAST_MEM)
memset(data, 0, unitNum * unitSize);
XMemSet(devID, data, 0, unitNum * unitSize);
#endif
}
else
......
......@@ -139,8 +139,14 @@ public:
/* indicates whether the tensor is created temporarily */
bool isTmp;
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* mark for traversing the gragh */
unsigned int visitMark;
/* gradient (for back-propagation) */
XTensor * grad;
/*
the link used to form networks. Note that when we compute on tensors, we actually create a
......@@ -300,6 +306,9 @@ public:
/* set the tensor as "temporary" */
void SetTMP(bool myIsTmp = true);
/* set the tensor as "keep-gradient" */
void SetGrad(bool myIsGrad = true);
/* resize a matrix with a specified matrix size */
bool Resize(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE,
......
......@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
}
#ifdef USE_CUDA
else if(devIDT >= 0 && devIDS < 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyHostToDevice) == cudaSuccess),
"cudaMemcpy error (cudaMemcpyHostToDevice)");
cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyHostToDevice);
if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyHostToDevice)");
}
}
else if(devIDT < 0 && devIDS >= 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost) == cudaSuccess),
"cudaMemcpy error (cudaMemcpyDeviceToHost)");
cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost);
if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyDeviceToHost)");
}
}
else{
//if(devIDT == devIDS){
......
......@@ -80,13 +80,13 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!");
CheckNTErrors(a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order], "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i];
}
for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors(b->dimSizeRDI[i] == c->dimSizeRDI[i], "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i];
}
......@@ -224,10 +224,10 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
......
......@@ -142,16 +142,15 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
element-wise product of two tensors (return a XTensor structure)
make a new tensor c to keep the result and return it
c(i) = a(i)*b(i) + \alpha * c(i)
c(i) = a(i)*b(i)
where i is the index of the item
>> a - tensor a
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
<< return - the product of the tensors
*/
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim)
{
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
......@@ -159,11 +158,10 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
c.SetTMP();
/* call _Multiply function */
_Multiply(&a, &b, &c, alpha, leadingDim);
_Multiply(&a, &b, &c, 0, leadingDim);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
return c;
......
......@@ -44,10 +44,10 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0, int leadingDim
/*
element-wise product of two tensors (return a XTensor structure)
make a new tensor to keep the result and return it
c(i) = a(i)*b(i) + \alpha * c(i)
c(i) = a(i)*b(i)
where i is the index of the element
*/
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0, int leadingDim = 0);
XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -298,12 +298,12 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice);
_CudaBLASMatrixMULBatched(handle,
(const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count,
a0->dimSize[0], a0->dimSize[1],
b0->dimSize[0], b0->dimSize[1],
c0->dimSize[0], c0->dimSize[1], alpha, beta);
(const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count,
a0->dimSize[0], a0->dimSize[1],
b0->dimSize[0], b0->dimSize[1],
c0->dimSize[0], c0->dimSize[1], alpha, beta);
delete[] ap;
delete[] bp;
delete[] cp;
......
......@@ -119,8 +119,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
/* tensor connection */
XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT);
XLink::AddParamToHead(&c, low);
XLink::AddParamToHead(&c, high);
XLink::AddParamToHeadInt(&c, low);
XLink::AddParamToHeadInt(&c, high);
/* destroy variables */
delete[] dimSize;
......
......@@ -135,11 +135,11 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
/* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHead(&t, dim);
XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadPointer(&t, srcIndex);
XLink::AddParamToHead(&t, indexSize);
XLink::AddParamToHeadInt(&t, indexSize);
XLink::AddParamToHeadPointer(&t, tgtIndex);
XLink::AddParamToHead(&t, copyNum);
XLink::AddParamToHeadInt(&t, copyNum);
return t;
}
......
......@@ -125,7 +125,7 @@ XTensor ReduceMax(const XTensor &input, int dim)
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHead(&output, dim);
XLink::AddParamToHeadInt(&output, dim);
return output;
}
......
......@@ -79,7 +79,7 @@ XTensor ReduceMean(const XTensor &input, int dim)
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHead(&output, dim);
XLink::AddParamToHeadInt(&output, dim);
/* destroy variables */
delete[] dimSize;
......
......@@ -235,7 +235,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
/* tensor connection */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHead(&output, dim);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
/* destroy variables */
......
......@@ -75,7 +75,7 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
/* tensor connection */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHead(&output, dim);
XLink::AddParamToHeadInt(&output, dim);
/* destroy variables */
delete[] dimSize;
......
......@@ -20,6 +20,7 @@
*/
#include <stdlib.h>
#include "../XName.h"
#include "HardTanH.h"
#include "HardTanH.cuh"
......@@ -58,6 +59,26 @@ void _HardTanH(const XTensor * x, XTensor * y)
ShowNTErrors("TODO!");
}
/*
hard tanh function (return a structure)
y = 1 if x > 1
x if -1 <= x <= 1
-1 if x < -1
>> x - input tensor
<< return - y
*/
XTensor HardTanH(const XTensor &x)
{
XTensor y(&x);
y.SetTMP();
_HardTanH(&x, &y);
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
return y;
}
/*
backward computation
......@@ -77,16 +98,16 @@ hard tanh: y = 1 if x > 1
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!");
#ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){
CudaHardTanHBackward(gold, y, x, dedy, dedx, lossName);
_CudaHardTanHBackward(gold, y, x, dedy, dedx, lossName);
return;
}
#endif
......
......@@ -129,9 +129,9 @@ hard tanh: y = 1 if x > 1
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
......
......@@ -40,9 +40,9 @@ void _CudaHardTanH(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */
extern "C"
void CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA
......
......@@ -33,14 +33,15 @@ y = 1 if x > 1
x if -1 <= x <= 1
-1 if x < -1
*/
extern "C"
void _HardTanH(const XTensor * x, XTensor * y);
/* hard tanh function (return a structure) */
XTensor HardTanH(const XTensor &x);
/* de/dx */
extern "C"
void HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -122,7 +122,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
blockMax->data = mp;
blockSum->data = sp;
#ifdef USE_CUDA
CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax);
_CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax);
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
......@@ -223,10 +223,10 @@ better numerical stability.
>> lossName - type of loss function, e.g., cross entropy
>> leadDim - leading dimension (along which we perform reduction)
*/
void LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((!dedx->isSparse), "The gradient matrix must be dense!");
CheckNTErrors((gold != NULL), "The gold standard cannot be empty!");
......@@ -234,7 +234,7 @@ void LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA
if (gold->devID >= 0) {
CudaLogSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
_CudaLogSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
return;
}
#endif
......
......@@ -139,7 +139,7 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version)
>> sum - \sum_{i} e^{x_i}
>> max - \max_{i} e^{x_i}
*/
void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max)
void _CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max)
{
CheckNTErrors((x->devID >= 0), "Forward computation of log softmax must be run on GPUs.");
CheckNTErrors((x->devID == y->devID), "Input tensors must be on the same GPU.");
......@@ -353,7 +353,7 @@ better numerical stability.
>> lossName - type of loss function, e.g., cross entropy
>> leadDim - leading dimension (along which we perform reduction)
*/
void CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
void _CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
......
......@@ -30,16 +30,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) */
extern "C"
void _CudaLogSoftmax(const XTensor * input, XTensor * output, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) */
extern "C"
void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max);
void _CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max);
/* de/dx (Cuda version) */
extern "C"
void CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
void _CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
......
......@@ -28,15 +28,13 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */
extern "C"
void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* de/dx */
extern "C"
void LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -387,7 +387,7 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
if (y->devID < 0) {
CheckNTErrors((tLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!");
"The input tensors must be of the same size!");
CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), "Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
......
......@@ -405,7 +405,7 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i
*/
if(LFName == CROSSENTROPY){
else if(LFName == CROSSENTROPY){
if(t->isSparse){
ShowNTErrors("TODO!");
}
......@@ -416,6 +416,9 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
else{
ShowNTErrors("TODO");
}
BacktoCudaDev(y->devID, devIDBackup);
}
......
......@@ -773,7 +773,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* for y = softmax(s), we get dE/ds
where E is the error function (define by loss) */
LogSoftmaxBackward(&gold, &y, &s, NULL, &deds, 1, loss);
_LogSoftmaxBackward(&gold, &y, &s, NULL, &deds, 1, loss);
/* for s = x * w, we get
dE/w_{i,j} = dE/ds_j * ds/dw_{i,j}
......@@ -818,7 +818,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* backpropagation through the activation fucntion:
dE/ds = dE/dh * dh/ds */
HardTanHBackward(NULL, &h, &s, &dedh, &deds, NOLOSS);
_HardTanHBackward(NULL, &h, &s, &dedh, &deds, NOLOSS);
/* gradient of the weight: dE/dw = x^T * dE/ds */
_MatrixMul(&x, X_TRANS, &deds, X_NOTRANS, &dedw);
......
......@@ -155,7 +155,7 @@ bool TestHardTanH2()
_HardTanH(x, y);
/* call HardTanHBackward function */
HardTanHBackward(gold, y, x, dedy, dedx, SQUAREDERROR);
_HardTanHBackward(gold, y, x, dedy, dedx, SQUAREDERROR);
/* check results */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -184,7 +184,7 @@ bool TestHardTanH2()
_HardTanH(xGPU, yGPU);
/* call hardtanhbackward function */
HardTanHBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, SQUAREDERROR);
_HardTanHBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, SQUAREDERROR);
/* check results */
gpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......
......@@ -142,7 +142,7 @@ bool TestLogSoftmax2()
_LogSoftmax(x, y, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
_LogSoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -170,7 +170,7 @@ bool TestLogSoftmax2()
_LogSoftmax(xGPU, yGPU, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
_LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F) && dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F);
......@@ -246,7 +246,7 @@ bool TestLogSoftmax3()
_LogSoftmax(x, y, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 1, SQUAREDERROR);
_LogSoftmaxBackward(g, y, x, dedy, dedx, 1, SQUAREDERROR);
/* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -274,7 +274,7 @@ bool TestLogSoftmax3()
_LogSoftmax(xGPU, yGPU, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, SQUAREDERROR);
_LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, SQUAREDERROR);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
......@@ -75,6 +75,7 @@ bool TestMatrixMul1()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -83,9 +84,10 @@ bool TestMatrixMul1()
/* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -95,6 +97,7 @@ bool TestMatrixMul1()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -103,9 +106,10 @@ bool TestMatrixMul1()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......@@ -185,6 +189,7 @@ bool TestMatrixMul2()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -193,9 +198,10 @@ bool TestMatrixMul2()
/* call MatrixMul function */
_MatrixMul(s1, X_TRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_TRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -205,6 +211,7 @@ bool TestMatrixMul2()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -213,9 +220,10 @@ bool TestMatrixMul2()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_TRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......@@ -315,6 +323,7 @@ bool TestMatrixMul3()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -323,9 +332,10 @@ bool TestMatrixMul3()
/* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -335,6 +345,7 @@ bool TestMatrixMul3()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -343,9 +354,10 @@ bool TestMatrixMul3()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......@@ -434,6 +446,7 @@ bool TestMatrixMul4()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -442,9 +455,10 @@ bool TestMatrixMul4()
/* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -454,6 +468,7 @@ bool TestMatrixMul4()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -462,9 +477,10 @@ bool TestMatrixMul4()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......
......@@ -86,7 +86,7 @@ bool TestMultiply1()
/* call Multiply function */
_Multiply(s1, s2, t, 0, 0);
_MultiplyMe(tMe, s2, 0, 0);
tUser = Multiply(*s1, *s2, 0, 0);
tUser = Multiply(*s1, *s2, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum)
......@@ -112,7 +112,7 @@ bool TestMultiply1()
/* call Multiply function */
_Multiply(sGPU1, sGPU2, tGPU, 0, 0);
_MultiplyMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Multiply(*sGPU1, *sGPU2, 0, 0);
tUserGPU = Multiply(*sGPU1, *sGPU2, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum)
......@@ -209,7 +209,7 @@ bool TestMultiply2()
/* call Multiply function */
_Multiply(s1, s2, t, 0, 0);
_MultiplyMe(tMe, s2, 0, 0);
tUser = Multiply(*s1, *s2, 0, 0);
tUser = Multiply(*s1, *s2, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum)
......@@ -235,11 +235,12 @@ bool TestMultiply2()
/* call Multiply function */
_Multiply(sGPU1, sGPU2, tGPU, 0, 0);
_MultiplyMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Multiply(*sGPU1, *sGPU2, 0, 0);
tUserGPU = Multiply(*sGPU1, *sGPU2, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum)
&& tMeGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) &&
tMeGPU->CheckData(answer, tUnitNum) &&
tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论