Commit 27635638 by xiaotong

new code for ScaleAndShift

parent 43331674
...@@ -28,8 +28,7 @@ ...@@ -28,8 +28,7 @@
#include <time.h> #include <time.h>
#include "XTensor.h" #include "XTensor.h"
#include "XDevice.h" #include "XDevice.h"
#include "./sample/fnnlm/FNNLM.h" #include "sample/fnnlm/FNNLM.h"
#include "test/Test.h" #include "test/Test.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
...@@ -39,26 +38,15 @@ ...@@ -39,26 +38,15 @@
using namespace nts; using namespace nts;
using namespace samplefnnlm; using namespace samplefnnlm;
void SmallTest();
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
//_CrtSetBreakAlloc(78); //_CrtSetBreakAlloc(78);
/* a tiny test */ /* a tiny test */
if(0){ if(1)
XTensor a; SmallTest();
XTensor b;
InitTensor2D(&a, 2, 2);
a.SetZeroAll();
a.Set2D(1.0F, 0, 0);
a.Set2D(1.0F, 1, 1);
b = Sum(a, Sum(a, a));
XTensor c = b;
a.Dump(stderr, "a: ");
b.Dump(stderr, "b: ");
}
if(argc > 1 && !strcmp(argv[1], "-test")) if(argc > 1 && !strcmp(argv[1], "-test"))
Test(); Test();
...@@ -75,3 +63,25 @@ int main( int argc, const char ** argv ) ...@@ -75,3 +63,25 @@ int main( int argc, const char ** argv )
return 0; return 0;
} }
void SmallTest()
{
XTensor a;
XTensor b;
InitTensor2D(&a, 2, 2);
a.SetZeroAll();
a.Set2D(1.0F, 0, 0);
a.Set2D(2.0F, 1, 1);
b = Sum(a, Multiply(a, a));
XTensor c = b;
XTensor d = b + a + Linear(c, 0.5F);
a.Dump(stderr, "a: ");
b.Dump(stderr, "b: ");
c.Dump(stderr, "c: ");
d.Dump(stderr, "d: ");
}
...@@ -74,7 +74,7 @@ namespace nts { ...@@ -74,7 +74,7 @@ namespace nts {
{ \ { \
if(!(x)) \ if(!(x)) \
{ \ { \
fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); \ fprintf(stderr, "[ERROR] calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); \
exit(1); \ exit(1); \
} \ } \
} \ } \
...@@ -83,7 +83,7 @@ namespace nts { ...@@ -83,7 +83,7 @@ namespace nts {
{ \ { \
if(!(x)) \ if(!(x)) \
{ \ { \
fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__); \ fprintf(stderr, "[ERROR] calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__); \
exit(1); \ exit(1); \
} \ } \
} \ } \
...@@ -91,7 +91,7 @@ namespace nts { ...@@ -91,7 +91,7 @@ namespace nts {
#define ShowNTErrors(msg) \ #define ShowNTErrors(msg) \
{ \ { \
{ \ { \
fprintf(stderr, "Error! (%s line %d): %s\n", __FILENAME__, __LINE__, msg); \ fprintf(stderr, "[ERROR] (%s line %d): %s\n", __FILENAME__, __LINE__, msg); \
exit(1); \ exit(1); \
} \ } \
} \ } \
......
...@@ -19,15 +19,10 @@ ...@@ -19,15 +19,10 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
*/ */
#ifndef __XNAME_H__ #include "XName.h"
#define __XNAME_H__
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_ARITHMETIC 0x00001000
#define MATH_SUM MATH_ARITHMETIC + 1
#define MATH_MULTIPLY MATH_SUM + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type) const char * GetOPName(int type)
{ {
...@@ -36,6 +31,8 @@ const char * GetOPName(int type) ...@@ -36,6 +31,8 @@ const char * GetOPName(int type)
return "M_SUM"; return "M_SUM";
else if(type == MATH_MULTIPLY) else if(type == MATH_MULTIPLY)
return "M_MULTIPLY"; return "M_MULTIPLY";
else if(type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT";
} }
return "NULL"; return "NULL";
...@@ -43,4 +40,3 @@ const char * GetOPName(int type) ...@@ -43,4 +40,3 @@ const char * GetOPName(int type)
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __XNAME_H__
...@@ -31,6 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,6 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_ARITHMETIC 10000 #define MATH_ARITHMETIC 10000
#define MATH_SUM MATH_ARITHMETIC + 1 #define MATH_SUM MATH_ARITHMETIC + 1
#define MATH_MULTIPLY MATH_SUM + 1 #define MATH_MULTIPLY MATH_SUM + 1
#define MATH_SCALEANDSHIFT MATH_MULTIPLY + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type); const char * GetOPName(int type);
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "XBLAS.h" #include "XBLAS.h"
#include "core/shape/MergeBlockLists.h" #include "core/shape/MergeBlockLists.h"
#include "core/movement/CopyValues.h" #include "core/movement/CopyValues.h"
#include "core/arithmetic/Sum.h"
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -308,6 +309,12 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -308,6 +309,12 @@ XTensor& XTensor::operator= (const XTensor& tensor)
return *this; return *this;
} }
/* overloading of the plus-sign */
XTensor XTensor::operator+ (const XTensor& tensor)
{
return Sum(*this, tensor);
}
/* /*
judge whether the two matrices are in the same type and size judge whether the two matrices are in the same type and size
>> a - input tensor >> a - input tensor
......
...@@ -184,6 +184,9 @@ public: ...@@ -184,6 +184,9 @@ public:
/* overloading of the equal-sign */ /* overloading of the equal-sign */
XTensor& operator= (const XTensor &tensor); XTensor& operator= (const XTensor &tensor);
/* overloading of the plus-sign */
XTensor operator+ (const XTensor &tensor);
/* judge whether the two matrices are in the same type and size */ /* judge whether the two matrices are in the same type and size */
static static
bool IsIdentical(XTensor * a, XTensor * b); bool IsIdentical(XTensor * a, XTensor * b);
......
...@@ -19,24 +19,28 @@ ...@@ -19,24 +19,28 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "ScaleAndShift.h" #include "ScaleAndShift.h"
#include "ScaleAndShift.cuh" #include "ScaleAndShift.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
scale and shift all tensor entires scale and shift all tensor entires b = a * scale + shift
p = p * scale + shift b = a * scale + shift
>> a - the tensor >> a - the input tensor
>> b - the output tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
*/ */
void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift) void _ScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if(a->devID >= 0){ if(a->devID >= 0){
CudaScaleAndShift(a, scale, shift); _CudaScaleAndShift(a, b, scale, shift);
return; return;
} }
#endif #endif
...@@ -46,7 +50,7 @@ void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -46,7 +50,7 @@ void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift)
/* sparse tensor */ /* sparse tensor */
if(a->isSparse){ if(a->isSparse){
int num = a->GetNonzeroSize(); int num = a->unitNumNonZero;
char * d = (char*)a->data + sizeof(int); char * d = (char*)a->data + sizeof(int);
char * f = d + (sizeof(int) + sizeof(DTYPE)) * 0 + sizeof(int); char * f = d + (sizeof(int) + sizeof(DTYPE)) * 0 + sizeof(int);
for(int i = 0; i < num; i++){ for(int i = 0; i < num; i++){
...@@ -65,4 +69,40 @@ void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -65,4 +69,40 @@ void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift)
} }
} }
/*
scale and shift all tensor entires on site b = a * scale + shift
b = a * scale + shift
>> a - the input/output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
{
_ScaleAndShift(a, a, scale, shift);
}
/*
scale and shift all tensor entires b = a * scale + shift
b = a * scale + shift
>> a - the input tensor
>> b - the output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
{
XTensor b(&a);
b.SetTMP();
/* computation */
_ScaleAndShift(&a, &b, scale, shift);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SUM);
XLink::AddParamToHead(&b, scale);
XLink::AddParamToHead(&b, shift);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "ScaleAndShift.h"
#include "ScaleAndShift.cuh" #include "ScaleAndShift.cuh"
#include "../../XDevice.h" #include "../../XDevice.h"
...@@ -28,68 +27,71 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -28,68 +27,71 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
scale and shift all tensor entires p = p * scale + shift (CUDA Kernel) scale and shift all tensor entires b = a * scale + shift (CUDA Kernel)
>> d - the data array >> a - the input data array
>> b - the output data array
>> size - the size of d >> size - the size of d
>> scale - how much we want to scale it >> scale - how much we want to scale it
>> shift - how much we want to shift it >> shift - how much we want to shift it
*/ */
template<bool isUnitScale, bool isZeroShift> template<bool isUnitScale, bool isZeroShift>
__global__ __global__
void KernelScaleAndShift(DTYPE * d, int size, DTYPE scale, DTYPE shift) void KernelScaleAndShift(DTYPE * a, DTYPE * b, int size, DTYPE scale, DTYPE shift)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){ if (i < size){
if (isUnitScale && !isZeroShift){ if (isUnitScale && !isZeroShift){
d[i] = d[i] + shift; b[i] = a[i] + shift;
} }
else if (isUnitScale && isZeroShift) { else if (isUnitScale && isZeroShift) {
d[i] = d[i]; b[i] = a[i];
} }
else if (!isUnitScale && isZeroShift) { else if (!isUnitScale && isZeroShift) {
d[i] = d[i] * scale; b[i] = a[i] * scale;
} }
else { else {
d[i] = d[i] * scale + shift; b[i] = a[i] * scale + shift;
} }
} }
} }
/* /*
scale and shift all matrix entires p = p * scale + shift (CUDA Kernel) scale and shift all tensor entires p = p * scale + shift (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - the data array >> a - the input data array
>> b - the output data array
>> size - the size of d >> size - the size of d
>> scale - how much we want to scale it >> scale - how much we want to scale it
>> shift - how much we want to shift it >> shift - how much we want to shift it
*/ */
__global__ __global__
void KernelScaleAndShift(__half * d, int size, __half scale, __half shift) void KernelScaleAndShift(__half * a, __half * b, int size, __half scale, __half shift)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if(i < size) if(i < size)
d[i] = __hadd(__hmul(d[i], scale), shift); b[i] = __hadd(__hmul(a[i], scale), shift);
#else #else
if (i < size) if (i < size)
d[i] = __float2half(__half2float(d[i]) * __half2float(scale) + __half2float(shift)); b[i] = __float2half(__half2float(a[i]) * __half2float(scale) + __half2float(shift));
#endif #endif
} }
/* /*
scale and shift all matrix entires scale and shift all tensor entires
p = p * scale + shift p = p * scale + shift
>> a - the tensor >> a - the input tensor
>> b - the output tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
*/ */
void CudaScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift) void _CudaScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift)
{ {
/* sparse tensor */ /* sparse tensor */
if(a->isSparse){ if(a->isSparse){
// TODO ShowNTErrors("TODO!");
} }
/* dense tensor */ /* dense tensor */
else{ else{
...@@ -106,20 +108,20 @@ void CudaScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -106,20 +108,20 @@ void CudaScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift)
if(a->dataType == DEFAULT_DTYPE){ if(a->dataType == DEFAULT_DTYPE){
if(scale == 1.0F && shift == 0) if(scale == 1.0F && shift == 0)
KernelScaleAndShift<true, true> <<<blocks, threads>>>((DTYPE*)a->data, a->unitNum, scale, shift); KernelScaleAndShift<true, true> <<<blocks, threads>>>((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum, scale, shift);
else if (scale == 1.0F && shift != 0) else if (scale == 1.0F && shift != 0)
KernelScaleAndShift<true, false> << <blocks, threads >> >((DTYPE*)a->data, a->unitNum, scale, shift); KernelScaleAndShift<true, false> << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum, scale, shift);
else if(scale != 1.0F && shift == 0) else if(scale != 1.0F && shift == 0)
KernelScaleAndShift<false, true> << <blocks, threads >> >((DTYPE*)a->data, a->unitNum, scale, shift); KernelScaleAndShift<false, true> << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum, scale, shift);
else else
KernelScaleAndShift<false, false> << <blocks, threads >> >((DTYPE*)a->data, a->unitNum, scale, shift); KernelScaleAndShift<false, false> << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum, scale, shift);
} }
else if(a->dataType == X_FLOAT16){ else if(a->dataType == X_FLOAT16){
unsigned short scale2 = FloatToFloat16(scale); unsigned short scale2 = FloatToFloat16(scale);
unsigned short shift2 = FloatToFloat16(shift); unsigned short shift2 = FloatToFloat16(shift);
__half * scaleft16p = (__half*)&scale2; __half * scaleft16p = (__half*)&scale2;
__half * shiftft16p = (__half*)&shift2; __half * shiftft16p = (__half*)&shift2;
KernelScaleAndShift<<<blocks, threads>>>((__half*)a->data, a->unitNum, *scaleft16p, *shiftft16p); KernelScaleAndShift<<<blocks, threads>>>((__half*)a->data, (__half*)b->data, a->unitNum, *scaleft16p, *shiftft16p);
} }
else{ else{
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -22,23 +22,22 @@ ...@@ -22,23 +22,22 @@
#ifndef __SCALEANDSHIFT_CUH__ #ifndef __SCALEANDSHIFT_CUH__
#define __SCALEANDSHIFT_CUH__ #define __SCALEANDSHIFT_CUH__
#include "../../XTensor.h" #include "ScaleAndShift.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* scale and shift all tensor entires p = p * scale + shift (CUDA Kernel) */ /* scale and shift all tensor entires b = a * scale + shift (CUDA Kernel) */
__global__ __global__
void KernelScaleAndShift(DTYPE * d, int size, DTYPE scale, DTYPE shift); void KernelScaleAndShift(DTYPE * a, DTYPE * b, int size, DTYPE scale, DTYPE shift);
/* scale and shift all tensor entires p = p * scale + shift (CUDA Kernel) with float16 data type */ /* scale and shift all tensor entires b = a * scale + shift (CUDA Kernel) with float16 data type */
__global__ __global__
void KernelScaleAndShift(__half * d, int size, __half scale, __half shift); void KernelScaleAndShift(__half * a, __half * b, int size, __half scale, __half shift);
/* scale and shift all tensor entires (cuda version) */ /* scale and shift all tensor entires b = a * scale + shift (cuda version) */
extern "C" void _CudaScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift);
void CudaScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -26,9 +26,18 @@ ...@@ -26,9 +26,18 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* scale and shift all tensor entires */ #define _Linear _ScaleAndShift
extern "C" #define _LinearMe _ScaleAndShiftMe
void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift); #define Linear ScaleAndShift
/* scale and shift all tensor entires b = a * scale + shift */
void _ScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift = 0);
/* scale and shift all tensor entires on site a = a * scale + shift */
void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/* scale and shift all tensor entires b = a * scale + shift, and return the result tensor b */
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -40,7 +40,7 @@ void ReduceMean(XTensor * input, XTensor * output, int dim) ...@@ -40,7 +40,7 @@ void ReduceMean(XTensor * input, XTensor * output, int dim)
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
ReduceSum(input, output, dim); ReduceSum(input, output, dim);
ScaleAndShift(output, (DTYPE)1/num, 0); _ScaleAndShiftMe(output, (DTYPE)1/num, 0);
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -39,7 +39,7 @@ void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean) ...@@ -39,7 +39,7 @@ void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean)
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
ReduceSum(input, output, dim, mean, 2.0F); ReduceSum(input, output, dim, mean, 2.0F);
ScaleAndShift(output, (DTYPE)1 / num, 0); _ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -288,7 +288,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -288,7 +288,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize); beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize);
/* \beta = \sum_i (dE/dy_i * y_i) */ /* \beta = \sum_i (dE/dy_i * y_i) */
Multiply(dedy, y, ytmp, 0); _Multiply(dedy, y, ytmp, 0, 0);
ReduceSum(ytmp, beta, leadDim); ReduceSum(ytmp, beta, leadDim);
/* ytmp = dE/dy_j - \beta */ /* ytmp = dE/dy_j - \beta */
...@@ -296,7 +296,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -296,7 +296,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
_Sum(dedy, ytmp, ytmp, -1.0F); _Sum(dedy, ytmp, ytmp, -1.0F);
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */ /* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
Multiply(y, ytmp, dedx, 0); _Multiply(y, ytmp, dedx, 0, 0);
mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize); mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize);
mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize); mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize);
......
...@@ -485,7 +485,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) ...@@ -485,7 +485,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
InitTensor(&probs, &output); InitTensor(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */ /* probs[i,j] = output[i,j] * gold[i,j] */
Multiply(&output, &gold, &probs, 0); _Multiply(&output, &gold, &probs);
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
......
...@@ -54,8 +54,8 @@ bool TestLoss1() ...@@ -54,8 +54,8 @@ bool TestLoss1()
/* initialize variables */ /* initialize variables */
output->SetZeroAll(); output->SetZeroAll();
gold->SetZeroAll(); gold->SetZeroAll();
ScaleAndShift(output, 1, 1); _ScaleAndShiftMe(output, 1, 1);
ScaleAndShift(gold, 1, 2); _ScaleAndShiftMe(gold, 1, 2);
DTYPE error; DTYPE error;
error = LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0); error = LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
...@@ -74,8 +74,8 @@ bool TestLoss1() ...@@ -74,8 +74,8 @@ bool TestLoss1()
/* Initialize variables */ /* Initialize variables */
outputGPU->SetZeroAll(); outputGPU->SetZeroAll();
goldGPU->SetZeroAll(); goldGPU->SetZeroAll();
ScaleAndShift(outputGPU, 1, 1); _ScaleAndShiftMe(outputGPU, 1, 1);
ScaleAndShift(goldGPU, 1, 2); _ScaleAndShiftMe(goldGPU, 1, 2);
/* call LossCompute function */ /* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0); error = LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
...@@ -131,8 +131,8 @@ bool TestLoss2() ...@@ -131,8 +131,8 @@ bool TestLoss2()
/* initialize variables */ /* initialize variables */
output->SetZeroAll(); output->SetZeroAll();
gold->SetZeroAll(); gold->SetZeroAll();
ScaleAndShift(output, 1, 1); _ScaleAndShiftMe(output, 1, 1);
ScaleAndShift(gold, 1, 2); _ScaleAndShiftMe(gold, 1, 2);
DTYPE error; DTYPE error;
error = LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0); error = LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
...@@ -151,8 +151,8 @@ bool TestLoss2() ...@@ -151,8 +151,8 @@ bool TestLoss2()
/* Initialize variables */ /* Initialize variables */
outputGPU->SetZeroAll(); outputGPU->SetZeroAll();
goldGPU->SetZeroAll(); goldGPU->SetZeroAll();
ScaleAndShift(outputGPU, 1, 1); _ScaleAndShiftMe(outputGPU, 1, 1);
ScaleAndShift(goldGPU, 1, 2); _ScaleAndShiftMe(goldGPU, 1, 2);
/* call LossCompute function */ /* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0); error = LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
......
...@@ -81,7 +81,7 @@ bool TestMultiply1() ...@@ -81,7 +81,7 @@ bool TestMultiply1()
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
Multiply(s1, s2, t, 0); _Multiply(s1, s2, t, 0, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -101,7 +101,7 @@ bool TestMultiply1() ...@@ -101,7 +101,7 @@ bool TestMultiply1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 0); _Multiply(sGPU1, sGPU2, tGPU, 0, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -189,7 +189,7 @@ bool TestMultiply2() ...@@ -189,7 +189,7 @@ bool TestMultiply2()
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
Multiply(s1, s2, t, 0); _Multiply(s1, s2, t, 0, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -209,7 +209,7 @@ bool TestMultiply2() ...@@ -209,7 +209,7 @@ bool TestMultiply2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 0); _Multiply(sGPU1, sGPU2, tGPU, 0, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -296,7 +296,7 @@ bool TestMultiply3() ...@@ -296,7 +296,7 @@ bool TestMultiply3()
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
Multiply(s1, s2, t, 1); _Multiply(s1, s2, t, 0, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -316,7 +316,7 @@ bool TestMultiply3() ...@@ -316,7 +316,7 @@ bool TestMultiply3()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 1); _Multiply(sGPU1, sGPU2, tGPU, 0, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -57,7 +57,7 @@ bool TestScaleAndShift1() ...@@ -57,7 +57,7 @@ bool TestScaleAndShift1()
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
/* call ScaleAndShift function */ /* call ScaleAndShift function */
ScaleAndShift(s, scaleFactor, shiftFactor); _ScaleAndShift(s, s, scaleFactor, shiftFactor);
/* check results */ /* check results */
cpuTest = s->CheckData(answer, sUnitNum); cpuTest = s->CheckData(answer, sUnitNum);
...@@ -73,7 +73,7 @@ bool TestScaleAndShift1() ...@@ -73,7 +73,7 @@ bool TestScaleAndShift1()
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
/* call ScaleAndShift function */ /* call ScaleAndShift function */
ScaleAndShift(sGPU, scaleFactor, shiftFactor); _ScaleAndShift(sGPU, sGPU, scaleFactor, shiftFactor);
/* check results */ /* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum); gpuTest = sGPU->CheckData(answer, sUnitNum);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论