Commit f8a37184 by liyinqiao

Merge with li branch

parents 0ca350a3 9b11391e
...@@ -542,14 +542,14 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper) ...@@ -542,14 +542,14 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
d = new float[unitNum]; d = new float[unitNum];
for (int i = 0; i < unitNum; i++) { for (int i = 0; i < unitNum; i++) {
DTYPE value = lower + upper * (float)rand() / RAND_MAX; DTYPE value = lower + (upper - lower) * (float)rand() / RAND_MAX;
*((float*)d + i) = value; *((float*)d + i) = value;
} }
} }
else if (dataType == X_DOUBLE) { else if (dataType == X_DOUBLE) {
d = new double[unitNum]; d = new double[unitNum];
for (int i = 0; i < unitNum; i++) { for (int i = 0; i < unitNum; i++) {
*((double*)d + i) = rand() / RAND_MAX; *((double*)d + i) = lower + (upper - lower) * rand() / RAND_MAX;
} }
} }
else { else {
...@@ -922,8 +922,10 @@ set the value of a cell ...@@ -922,8 +922,10 @@ set the value of a cell
>> index - index of the cell for each dimension >> index - index of the cell for each dimension
>> >>
*/ */
bool XTensor::Set(DTYPE value, int * index, int size) bool XTensor::Set(DTYPE value, int index[], int size)
{ {
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
return SetToDevice(devID, GetCell(index, size), value); return SetToDevice(devID, GetCell(index, size), value);
} }
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "Absolute.h"
#include "Absolute.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its absolute value
>> a - the tensor we are processing
*/
void Absolute(XTensor * a)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaAbsolute(a);
return;
}
#endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)fabs(d[i]);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Absolute.h"
#include "Absolute.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its absolute value (CUDA Kernel)
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(DTYPE * d, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = fabs(d[i]);
}
/*
set each entry to its absolute value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(__half * d, int size)
{
return;
}
/*
set each entry to its with float16 data type value
>> a - the tensor
*/
extern "C"
void CudaAbsolute(XTensor * a)
{
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Absolute.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its absolute value (CUDA Kernel) */
__global__
void KernelAbsolute(DTYPE * d, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__
void KernelAbsolute(__half * d, int size);
/* set each entry to its absolute value */
extern "C"
void CudaAbsolute(XTensor * a);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __ABSOLUTE_H__
#define __ABSOLUTE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */
extern "C"
void Absolute(XTensor * a);
} // namespace nts(NiuTrans.Tensor)
#endif // __ABSOLUTE_H__
...@@ -89,9 +89,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -89,9 +89,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
void * ap = (char*)a->data + aRealBlockSize * p; void * ap = (char*)a->data + aRealBlockSize * p;
void * bp = (char*)b->data + bRealBlockSize * p; void * bp = (char*)b->data + bRealBlockSize * p;
void * cp = (char*)c->data + cRealBlockSize * p; void * cp = (char*)c->data + cRealBlockSize * p;
XTensor * ai = new XTensor(2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem); XTensor * ai = NewTensor(2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * bi = new XTensor(2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * bi = NewTensor(2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * ci = new XTensor(2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem); XTensor * ci = NewTensor(2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem);
ai->data = ap; ai->data = ap;
bi->data = bp; bi->data = bp;
ci->data = cp; ci->data = cp;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - the tensor we are processing
*/
void Sign(XTensor * a)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaSign(a);
return;
}
#endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0)
d[i] = 1.0F;
else if (d[i] == 0)
d[i] = 0.0F;
else
d[i] = -1.0F;
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its sign value (CUDA Kernel)
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelSign(DTYPE * d, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (d[i] > 0)
d[i] = 1.0F;
else if (d[i] == 0)
d[i] = 0.0F;
else
d[i] = -1.0F;
}
}
/*
set each entry to its sign value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelSign(__half * d, int size)
{
return;
}
/*
set each entry to its with float16 data type value
>> a - the tensor
*/
extern "C"
void CudaSign(XTensor * a)
{
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its sign value (CUDA Kernel) */
__global__
void KernelSign(DTYPE * d, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
void KernelSign(__half * d, int size);
/* set each entry to its sign value */
extern "C"
void CudaSign(XTensor * a);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_H__
#define __SIGN_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
extern "C"
void Sign(XTensor * a);
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
...@@ -52,7 +52,7 @@ void KernelADDByColumnVT(DTYPE * a, DTYPE * b, DTYPE * c, int colNum, int rowNum ...@@ -52,7 +52,7 @@ void KernelADDByColumnVT(DTYPE * a, DTYPE * b, DTYPE * c, int colNum, int rowNum
DTYPE * bp = b + (rowNum * k + row) * colNum; DTYPE * bp = b + (rowNum * k + row) * colNum;
if (colNum % 4 == 0) { if (colNum % 4 == 0) {
for (int i = 0; i < colNum; i += 4) for (int i = 0; i < colNum; i += 4)
sum += bp[i] + bp[i + 1] + b[i + 2] + b[i + 3]; sum += bp[i] + bp[i + 1] + bp[i + 2] + bp[i + 3];
} }
else if (colNum % 2 == 0) { else if (colNum % 2 == 0) {
for (int i = 0; i < colNum; i += 2) for (int i = 0; i < colNum; i += 2)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "ConvertDataType.h"
#include "ConvertDataType.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
convert data type
>> input - input tensor
>> output - output tensor
*/
void ConvertTensorDataType(XTensor * input, XTensor * output)
{
CheckNTErrors(XTensor::IsIdentical(input, output), "Input and Output are different in type or size!");
if (input->dataType == output->dataType)
return;
#ifdef USE_CUDA
/* run it on GPUs */
if (input->devID >= 0) {
CudaConvertDataType(input, output);
return;
}
#endif
if (input->dataType == X_FLOAT && output->dataType == X_INT) {
float * inputData = (float*)input->data;
int * outputData = (int*)output->data;
for (int i = 0; i < input->unitNum; i++)
outputData[i] = (int)inputData[i];
}
else if (input->dataType == X_INT && output->dataType == X_FLOAT) {
int * inputData = (int*)input->data;
float * outputData = (float*)output->data;
for (int i = 0; i < input->unitNum; i++)
outputData[i] = (float)inputData[i];
}
else
ShowNTErrors("Unsupported data types for conversion!");
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XDevice.h" #include "../../XDevice.h"
#include "ConvertDataType.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -49,6 +50,24 @@ void KernelFloat16ToFloat(__half * s, float * t, int size) ...@@ -49,6 +50,24 @@ void KernelFloat16ToFloat(__half * s, float * t, int size)
} }
} }
__global__
void KernelFloatToInt(float * inputData, int * outputData, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
outputData[i] = (int)(inputData[i]);
}
}
__global__
void KernelIntToFloat(int * inputData, float * outputData, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
outputData[i] = (float)(inputData[i]);
}}
/* /*
data conversion (cuda code) data conversion (cuda code)
...@@ -88,6 +107,39 @@ void CudaConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, ...@@ -88,6 +107,39 @@ void CudaConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t,
ProtectCudaDev(devID, devIDBackup); ProtectCudaDev(devID, devIDBackup);
} }
/*
convert data type (cuda code)
>> input - input tensor
>> output - output tensor
*/
void CudaConvertDataType(XTensor * input, XTensor * output)
{
CheckNTErrors(XTensor::IsIdentical(input, output), "Input and Output are different in type or size!");
if (input->dataType == output->dataType)
return;
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(input->devID, input->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(input->devID, devIDBackup);
if(input->dataType == X_FLOAT && output->dataType == X_INT)
KernelFloatToInt<<<blocks, threads>>>((float*)input->data, (int*)output->data, input->unitNum);
else if(input->dataType == X_INT && output->dataType == X_FLOAT)
KernelIntToFloat<<<blocks, threads>>>((int*)input->data, (float*)output->data, input->unitNum);
else{
ShowNTErrors("Unsupported data types for conversion!");
}
ProtectCudaDev(input->devID, devIDBackup);
}
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* convert data type from X_FLOAT to X_FLOAT16 (CUDA Kernel) */
__global__
void KernelFloatToFloat16(float * s, __half * t, int size);
/* convert data type from X_FLOAT16 to X_FLOAT (CUDA Kernel) */
__global__
void KernelFloat16ToFloat(__half * s, float * t, int size);
/* convert data type from X_FLOAT to X_INT (CUDA Kernel) */
__global__
void KernelFloatToInt(float * inputData, int * outputData, int size);
/* convert data type from X_INT to X_FLOAT (CUDA Kernel) */
__global__
void KernelIntToFloat(int * inputData, float * outputData, int size);
/* convert data type */
void CudaConvertDataType(XTensor * input, XTensor * output);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __CONVERTDATATYPE_H__
#define __CONVERTDATATYPE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* convert data type */
void ConvertDataType(XTensor * input, XTensor * output);
} // namespace nts(NiuTrans.Tensor)
#endif // __CONVERTDATATYPE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "Log.h"
#include "Log.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its log value
>> a - the tensor we are processing
*/
void Log(XTensor * a)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaLog(a);
return;
}
#endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)log(d[i]);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Log.h"
#include "Log.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its log value (CUDA Kernel)
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelLog(DTYPE * d, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = log(d[i]);
}
/*
set each entry to its log value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelLog(__half * d, int size)
{
return;
}
/*
set each entry to its log value
>> a - the tensor
*/
extern "C"
void CudaLog(XTensor * a)
{
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelLog << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelLog << <blocks, threads >> >((__half*)a->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Log.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its log value (CUDA Kernel) */
__global__
void KernelLog(DTYPE * d, int size);
/* set each entry to its log value (CUDA Kernel) with float16 data type*/
__global__
void KernelLog(__half * d, int size);
/* set each entry to its log value */
extern "C"
void CudaLog(XTensor * a);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __LOG_H__
#define __LOG_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */
extern "C"
void Log(XTensor * a);
} // namespace nts(NiuTrans.Tensor)
#endif // __LOG_H__
...@@ -37,6 +37,7 @@ __global__ ...@@ -37,6 +37,7 @@ __global__
void KernelScaleAndShift(__half * a, __half * b, int size, __half scale, __half shift); void KernelScaleAndShift(__half * a, __half * b, int size, __half scale, __half shift);
/* scale and shift all tensor entires b = a * scale + shift (cuda version) */ /* scale and shift all tensor entires b = a * scale + shift (cuda version) */
extern "C"
void _CudaScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift); void _CudaScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -66,18 +66,19 @@ copy a number of blocks source source positions to target positions ...@@ -66,18 +66,19 @@ copy a number of blocks source source positions to target positions
>> targetBlocks - target positions of the copy >> targetBlocks - target positions of the copy
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem) void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID)
{ {
if (myMem != NULL && myMem->devID >= 0) { if (myMem != NULL)
CheckNTErrors((myMem->devID == devID), "DevIDs are different between memory pool and input devID!");
if (devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaCopyBlocksSelected(source, blockSize, sourceBlocks, blockNum, target, targetBlocks, myMem); CudaCopyBlocksSelected(source, blockSize, sourceBlocks, blockNum, target, targetBlocks, myMem, devID);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
} }
else { else {
int devID = myMem != NULL ? myMem->devID : -1;
/* /*
The following code should be fine with GPUs, but too many The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use kernel calls would slow down the system. We prefer to use
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem); void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem);
/* copy a number of blocks from source positions to target positions */ /* copy a number of blocks from source positions to target positions */
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem); void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -70,28 +70,33 @@ copy a number of blocks from source positions to target positions (cuda version) ...@@ -70,28 +70,33 @@ copy a number of blocks from source positions to target positions (cuda version)
>> targetBlocks - target positions of the copy >> targetBlocks - target positions of the copy
>> myMem - memory pool >> myMem - memory pool
*/ */
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem) void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID)
{ {
CheckNTErrors((myMem != NULL), "No memory pool!"); CheckNTErrors((devID >= 0), "Wrong device to run!");
CheckNTErrors((myMem->devID >= 0), "Wrong device to run!");
CheckNTErrors((blockSize % sizeof(DTYPE) == 0), "Unsupported block size!"); CheckNTErrors((blockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
/* copy the index to the GPU memory */ /* copy the index to the GPU memory */
int * sourceBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)); int * sourceBlocksTMP = myMem != NULL ? (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) : (int *)XMemAlloc(devID, blockNum * sizeof(int));
int * targetBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)); int * targetBlocksTMP = myMem != NULL ? (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) : (int *)XMemAlloc(devID, blockNum * sizeof(int));
XMemCopy(sourceBlocksTMP, myMem->devID, sourceBlocks, -1, blockNum * sizeof(int)); XMemCopy(sourceBlocksTMP, devID, sourceBlocks, -1, blockNum * sizeof(int));
XMemCopy(targetBlocksTMP, myMem->devID, targetBlocks, -1, blockNum * sizeof(int)); XMemCopy(targetBlocksTMP, devID, targetBlocks, -1, blockNum * sizeof(int));
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
GDevs.GetCudaThread2D(myMem->devID, blockSize / sizeof(DTYPE), blockNum, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(devID, blockSize / sizeof(DTYPE), blockNum, MAX_INT, cudaGrids, cudaBlocks);
KernelCopyBlocksSelected << <dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelCopyBlocksSelected << <dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)source, blockSize / sizeof(DTYPE), sourceBlocksTMP, blockNum, (DTYPE*)target, targetBlocksTMP); ((DTYPE*)source, blockSize / sizeof(DTYPE), sourceBlocksTMP, blockNum, (DTYPE*)target, targetBlocksTMP);
if (myMem != NULL) {
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int)); myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int)); myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
}
else {
XMemFree(devID, sourceBlocksTMP);
XMemFree(devID, targetBlocksTMP);
}
} }
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -34,7 +34,7 @@ void KernelCopyBlocksSelected(DTYPE * source, int blockSize, int * sourceBlocks, ...@@ -34,7 +34,7 @@ void KernelCopyBlocksSelected(DTYPE * source, int blockSize, int * sourceBlocks,
/* copy a number of blocks form source positions to target positions (cuda version) */ /* copy a number of blocks form source positions to target positions (cuda version) */
extern "C" extern "C"
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem); void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -84,7 +84,7 @@ bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSiz ...@@ -84,7 +84,7 @@ bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSiz
CheckNTErrors((tgtIndex[i] < blockNumTgt), "Index is out of range!"); CheckNTErrors((tgtIndex[i] < blockNumTgt), "Index is out of range!");
} }
CopyBlocks(s->data, blockSizeSrc * s->unitSize, realSrcIndex, realIndexSize, t->data, realTgtIndex, s->mem); CopyBlocks(s->data, blockSizeSrc * s->unitSize, realSrcIndex, realIndexSize, t->data, realTgtIndex, s->mem, s->devID);
delete[] realSrcIndex; delete[] realSrcIndex;
delete[] realTgtIndex; delete[] realTgtIndex;
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy s to t */ /* copy s to t */
extern "C"
bool CopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL); bool CopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "Loss.h" #include "Loss.h"
#include "Loss.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -43,6 +44,8 @@ compute the loss ...@@ -43,6 +44,8 @@ compute the loss
DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg) bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg)
{ {
DTYPE error = 0.0F;
if (output->devID < 0) {
CheckNTErrors((gLen >= 0 && gLen <= output->unitNum), "Illegal input length!"); CheckNTErrors((gLen >= 0 && gLen <= output->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(gold, output)), "The input tensors must be of the same size!"); CheckNTErrors((XTensor::IsIdentical(gold, output)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSizeRDI[0] == 1 && output->dimSizeRDI[0] == 1), "TODO!"); CheckNTErrors((gold->dimSizeRDI[0] == 1 && output->dimSizeRDI[0] == 1), "TODO!");
...@@ -66,7 +69,6 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, ...@@ -66,7 +69,6 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
DTYPE * gp = (DTYPE*)gold->data; DTYPE * gp = (DTYPE*)gold->data;
DTYPE * op = (DTYPE*)output->data; DTYPE * op = (DTYPE*)output->data;
DTYPE error = 0.0F;
/* /*
squared error squared error
...@@ -166,7 +168,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, ...@@ -166,7 +168,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
for(int k = 0; k < blockNum; k++){ for(int k = 0; k < blockNum; k++){
int size = stride * gLen; int size = stride * gLen;
for(int i = 0; i < size; i++){ for(int i = 0; i < size; i++){
if(*(gp + gBeg + i) >= 1.0F) if(*(gp + gBeg + i) < 1.0F)
continue; continue;
DTYPE diff = *(gp + gBeg + i) - *(op + oBeg + i); DTYPE diff = *(gp + gBeg + i) - *(op + oBeg + i);
error += (DTYPE)0.5 * diff * diff; error += (DTYPE)0.5 * diff * diff;
...@@ -174,6 +176,10 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, ...@@ -174,6 +176,10 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
} }
} }
} }
}
else {
error = CudaLossCompute(gold, output, LFName, isLogOutput, leadDim, gBeg, gLen, oBeg);
}
return error; return error;
} }
...@@ -374,17 +380,18 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -374,17 +380,18 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg) int leadDim, int tBeg, int tLen, int yBeg)
{ {
CheckNTErrors((tLen < y->unitNum), "Illegal input length!"); if (y->devID < 0) {
CheckNTErrors((tLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)), CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!"); "The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[0] == 1), "TODO!"); CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), "Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim && leadDim >= 0), "Illegal leading dimension!"); CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE), CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!"); "TODO!");
int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1; int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){ if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1]; leadDimRDI = y->order - 1;
tBeg = 0; tBeg = 0;
yBeg = 0; yBeg = 0;
tLen = y->dimSizeRDI[leadDimRDI]; tLen = y->dimSizeRDI[leadDimRDI];
...@@ -457,10 +464,19 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -457,10 +464,19 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
} }
} }
else{ else{
for(int i = 0; i < tLen; i++){ for (int i = 0; i < blockNum; i++) {
*(dedyp + yBeg + i) = -(DTYPE)*(tp + tBeg + i)/(DTYPE)*(yp + yBeg + i); for (int j = 0; j < stride; j++) {
for (int k = 0; k < tLen; k++) {
*(dedyp + i * stride * dimensionSize + j + stride * (yBeg + k)) = -(DTYPE)*(tp + i * stride * dimensionSize
+ j + stride * (tBeg + k)) / (DTYPE)*(yp + i * stride * dimensionSize + j + stride * (yBeg + k));
}
}
}
}
} }
} }
else {
CudaLossBackward(dedy, t, y, LFName, leadDim, tBeg, tLen, yBeg);
} }
} }
......
...@@ -22,6 +22,14 @@ ...@@ -22,6 +22,14 @@
#include "Loss.h" #include "Loss.h"
#include "Loss.cuh" #include "Loss.cuh"
#include "../XDevice.h" #include "../XDevice.h"
#include "../core/math/Power.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/math/Log.h"
#include "../core/arithmetic/Negate.h"
#include "../core/arithmetic/Sum.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -46,7 +54,126 @@ compute the loss ...@@ -46,7 +54,126 @@ compute the loss
DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg) bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
{ {
return 0; CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(gold, y)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((gold->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!");
CheckNTErrors((gold->devID == y->devID), "Tensors must be on the same device!");
CheckNTErrors((gold->devID >= 0), "Tensors must be on GPU device!");
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
if(isLogOutput)
return LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
DTYPE error = 0.0F;
/*
squared error
loss = sum_{i} 0.5*(gold_i - output_i)^2
where gold_i is the gold standard and output_i is the model prediction
*/
if(LFName == SQUAREDERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
_Sum(gold, y, diff, -1.0F);
Power(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
int diffOrder = diff->order - 1;
int * diffDimSize = new int[diffOrder];
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) {
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSizeRDI[1] = 1;
}
delete diff;
diff = diffNew;
delete diffDimSize;
}
error = diff->Get2D(0, 0);
delete diff;
}
/*
cross entropy
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
*/
if(LFName == CROSSENTROPY){
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
CopyValues(y, diff);
Log(diff);
_Multiply(gold, diff, diff);
Negate(diff);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
int diffOrder = diff->order - 1;
int * diffDimSize = new int[diffOrder];
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) {
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSizeRDI[1] = 1;
}
delete diff;
diff = diffNew;
delete diffDimSize;
}
error = diff->Get2D(0, 0);
delete diff;
}
/*
one hot error
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1,
e_i = 0 otherwise
*/
if(LFName == ONEHOTERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
XTensor * yOnehot = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
CopyValues(y, yOnehot);
_Multiply(gold, y, yOnehot);
_Sum(gold, yOnehot, diff, -1.0F);
Power(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
int diffOrder = diff->order - 1;
int * diffDimSize = new int[diffOrder];
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) {
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSizeRDI[1] = 1;
}
delete diff;
diff = diffNew;
delete diffDimSize;
}
error = diff->Get2D(0, 0);
delete diff;
delete yOnehot;
}
return error;
// TODO: call cuda kernels for computing the errors // TODO: call cuda kernels for computing the errors
} }
...@@ -140,13 +267,25 @@ backward compuation for cross entropy (Cuda kernel) ...@@ -140,13 +267,25 @@ backward compuation for cross entropy (Cuda kernel)
>> size - size of the vector (dedy) >> size - size of the vector (dedy)
*/ */
extern "C" __global__ extern "C" __global__
void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int size) void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i > stride * dimensionSize * blockNum)
return;
if (i < size){ int blockNumIndex = i / (stride * dimensionSize);
int blockNumTail = i % (stride * dimensionSize);
int dimensionSizeIndex = blockNumTail / stride;
int strideIndex = blockNumTail % stride;
if (dimensionSizeIndex >= tLen)
return;
dedy[blockNumIndex * stride * dimensionSize + strideIndex + stride * (yBeg + dimensionSizeIndex)] = -t[blockNumIndex * stride * dimensionSize +
strideIndex + stride * (tBeg + dimensionSizeIndex)] / y[blockNumIndex * stride * dimensionSize + strideIndex + stride * (yBeg + dimensionSizeIndex)];
/*if (i < size){
dedy[i] = -t[i]/y[i]; dedy[i] = -t[i]/y[i];
} }*/
} }
/* /*
...@@ -193,9 +332,11 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -193,9 +332,11 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg) int leadDim, int tBeg, int tLen, int yBeg)
{ {
CheckNTErrors((tLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)), CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!"); "The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[1] == 1), "TODO!"); CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), "Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && CheckNTErrors((t->dataType == DEFAULT_DTYPE &&
y->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE &&
dedy->dataType == DEFAULT_DTYPE), dedy->dataType == DEFAULT_DTYPE),
...@@ -208,21 +349,25 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -208,21 +349,25 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
"The vectors must be on the same GPU."); "The vectors must be on the same GPU.");
CheckNTErrors((tBeg == yBeg), "TODO!"); CheckNTErrors((tBeg == yBeg), "TODO!");
int leadDimRDI = y->order - leadDim - 1; int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){ if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1]; leadDimRDI = y->order - 1;
tBeg = 0; tBeg = 0;
yBeg = 0; yBeg = 0;
tLen = y->dimSizeRDI[leadDimRDI]; tLen = y->dimSizeRDI[leadDimRDI];
} }
int dimensionSize = y->dimSizeRDI[leadDimRDI];
int stride = 1; int stride = 1;
int blockSize = 1; int blockSize = 1;
int blockNum = 1;
int size = 1; int size = 1;
for(int i = 0; i < leadDimRDI; i++) for(int i = 0; i < leadDimRDI; i++)
stride *= y->dimSizeRDI[i]; stride *= y->dimSizeRDI[i];
size = tLen * stride; size = tLen * stride;
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
int cudaGridSize[3], cudaBlockSize[3]; int cudaGridSize[3], cudaBlockSize[3];
...@@ -265,7 +410,7 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -265,7 +410,7 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
else if(size == y->unitNum){ else if(size == y->unitNum){
KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tLen); KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
} }
else{ else{
KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum); KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
......
...@@ -97,7 +97,7 @@ void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, ...@@ -97,7 +97,7 @@ void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y,
if (i < size){ if (i < size){
DTYPE s = x[i]; DTYPE s = x[i];
if(s >= 0) if(s >= 0)
dedx[i] = 1; dedx[i] = dedy[i];
else else
dedx[i] = 0; dedx[i] = 0;
} }
......
...@@ -248,7 +248,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -248,7 +248,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
"Unknown loss function."); "Unknown loss function.");
if(lossName == CROSSENTROPY || lossName == SQUAREDERROR){ if(lossName == CROSSENTROPY || lossName == SQUAREDERROR){
ShowNTErrors("TODO!"); _Sum(y, gold, dedx, -1.0F);
} }
else if(lossName == ONEHOTERROR){ else if(lossName == ONEHOTERROR){
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -483,9 +483,9 @@ bool TestConcatenate4() ...@@ -483,9 +483,9 @@ bool TestConcatenate4()
delete sGPU1; delete sGPU1;
delete sGPU2; delete sGPU2;
delete tGPU; delete tGPU;
delete[] sDimSize1; //delete[] sDimSize1;
delete[] sDimSize2; //delete[] sDimSize2;
delete[] tDimSize; //delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
......
...@@ -30,15 +30,15 @@ Identity function: y = x ...@@ -30,15 +30,15 @@ Identity function: y = x
*/ */
bool TestIdentity1() bool TestIdentity1()
{ {
/* a input tensor of size (2, 3) */ /* a tensor of size (2, 3) */
int sOrder = 2; int order = 2;
int * sDimSize = new int[sOrder]; int * dimSize = new int[order];
sDimSize[0] = 2; dimSize[0] = 2;
sDimSize[1] = 3; dimSize[1] = 3;
int sUnitNum = 1; int unitNum = 1;
for (int i = 0; i < sOrder; i++) for (int i = 0; i < order; i++)
sUnitNum *= sDimSize[i]; unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F}, DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} }; {0.5F, 0.7F, 1.4F} };
...@@ -49,47 +49,50 @@ bool TestIdentity1() ...@@ -49,47 +49,50 @@ bool TestIdentity1()
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(sOrder, sDimSize); XTensor * y = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, unitNum);
y->SetZeroAll(); y->SetZeroAll();
/* call Identity function */ /* call Identity function */
Identity(x, y); Identity(x, y);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, sUnitNum); cpuTest = y->CheckData(answer, unitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Identity function */ /* call Identity function */
Identity(xGPU, yGPU); Identity(xGPU, yGPU);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum); gpuTest = yGPU->CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete x, y; delete x;
delete xGPU, yGPU; delete y;
delete[] sDimSize; delete xGPU;
delete yGPU;
delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y; delete x;
delete[] sDimSize; delete y;
delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -98,35 +101,39 @@ bool TestIdentity1() ...@@ -98,35 +101,39 @@ bool TestIdentity1()
/* /*
case 2: test IdentityBackward function. case 2: test IdentityBackward function.
IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
In this case, lossName=CROSSENTROPY.
*/ */
bool TestIdentity2() bool TestIdentity2()
{ {
int sOrder = 2; /* a tensor of size (2, 3) */
int * sDimSize = new int[sOrder]; int order = 2;
sDimSize[0] = 1; int * dimSize = new int[order];
sDimSize[1] = 3; dimSize[0] = 1;
dimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++) int unitNum = 1;
sUnitNum *= sDimSize[i]; for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} }; DTYPE xData[3] = {1.0F, 1.0F, 2.0F};
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F}; DTYPE gData[3] = {0.0F, 0.0F, 1.0F};
DTYPE yAnswer[3] = {1.0F, 1.0F, 2.0F};
DTYPE dedyAnswer[3] = {0.0F, 0.0F, -0.5F};
DTYPE dedxAnswer[3] = {0.0F, 0.0F, -0.5F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(sOrder, sDimSize); XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(sOrder, sDimSize); XTensor * g = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize); XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize); XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, unitNum);
g->SetData(gData, sUnitNum); g->SetData(gData, unitNum);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
dedy->SetZeroAll(); dedy->SetZeroAll();
...@@ -138,22 +145,24 @@ bool TestIdentity2() ...@@ -138,22 +145,24 @@ bool TestIdentity2()
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY); IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */ /* check result */
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F); cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedy->CheckData(dedyAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, sUnitNum); gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll(); dedyGPU->SetZeroAll();
...@@ -165,7 +174,9 @@ bool TestIdentity2() ...@@ -165,7 +174,9 @@ bool TestIdentity2()
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */ /* check result */
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F); gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
...@@ -178,7 +189,7 @@ bool TestIdentity2() ...@@ -178,7 +189,7 @@ bool TestIdentity2()
delete gGPU; delete gGPU;
delete dedxGPU; delete dedxGPU;
delete dedyGPU; delete dedyGPU;
delete[] sDimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -188,7 +199,7 @@ bool TestIdentity2() ...@@ -188,7 +199,7 @@ bool TestIdentity2()
delete g; delete g;
delete dedx; delete dedx;
delete dedy; delete dedy;
delete[] sDimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -20,15 +20,15 @@ ...@@ -20,15 +20,15 @@
*/ */
#include "../core/math/ScaleAndShift.h" #include "../core/math/ScaleAndShift.h"
#include "../function/Loss.h" #include "TLoss.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
case 1: test LossCompute function case 1: test LossCompute function.
In this case, Loss function name = SQUAREDERROR. In this case, Loss function name = SQUAREDERROR.
loss = sum_{i} 0.5*(t_i - y_i)^2, loss = sum_{i} 0.5*(t_i - y_i)^2,
where t_i is the gold standard and y_i is the model output where t_i is the gold standard and y_i is the model output.
*/ */
bool TestLoss1() bool TestLoss1()
{ {
...@@ -102,10 +102,10 @@ bool TestLoss1() ...@@ -102,10 +102,10 @@ bool TestLoss1()
} }
/* /*
case 2: test LossCompute function case 2: test LossCompute function.
In this case, Loss function name = CROSSENTROPY. In this case, Loss function name = CROSSENTROPY.
loss = sum_{i} (-t_i * log(y_i)) loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output where t_i is the gold standard and y_i is the model output.
*/ */
bool TestLoss2() bool TestLoss2()
{ {
...@@ -179,10 +179,10 @@ bool TestLoss2() ...@@ -179,10 +179,10 @@ bool TestLoss2()
} }
/* /*
case 3: test LossCompute function case 3: test LossCompute function.
In this case, Loss function name = ONEHOTERROR. In this case, Loss function name = ONEHOTERROR.
loss = sum_{i} e_i loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise.
*/ */
bool TestLoss3() bool TestLoss3()
{ {
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/ */
#include "../XTensor.h"
#include "TMatrixMulBatched.h" #include "TMatrixMulBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -29,25 +29,15 @@ In this case, y = max(0, x) ...@@ -29,25 +29,15 @@ In this case, y = max(0, x)
*/ */
bool TestRectify1() bool TestRectify1()
{ {
/* a x tensor of size (2, 3) */ /* a tensor of size (2, 3) */
int xOrder = 2; int order = 2;
int * xDimSize = new int[xOrder]; int * dimSize = new int[order];
xDimSize[0] = 2; dimSize[0] = 2;
xDimSize[1] = 3; dimSize[1] = 3;
int xUnitNum = 1; int unitNum = 1;
for (int i = 0; i < xOrder; i++) for (int i = 0; i < order; i++)
xUnitNum *= xDimSize[i]; unitNum *= dimSize[i];
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
DTYPE xData[2][3] = { {0.0F, -1.0F, 2.0F}, DTYPE xData[2][3] = { {0.0F, -1.0F, 2.0F},
{3.0F, -4.0F, -5.0F} }; {3.0F, -4.0F, -5.0F} };
...@@ -58,52 +48,50 @@ bool TestRectify1() ...@@ -58,52 +48,50 @@ bool TestRectify1()
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(yOrder, yDimSize); XTensor * y = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, xUnitNum); x->SetData(xData, unitNum);
y->SetZeroAll(); y->SetZeroAll();
/* call Rectify function */ /* call Rectify function */
Rectify(x, y); Rectify(x, y);
/* check results */ /* check results */
cpuTest = y->CheckData(answer, yUnitNum); cpuTest = y->CheckData(answer, unitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
xGPU->SetData(xData, xUnitNum); xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Rectify function */ /* call Rectify function */
Rectify(xGPU, yGPU); Rectify(xGPU, yGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, yUnitNum); gpuTest = yGPU->CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete x; delete x;
delete y; delete y;
delete xGPU; delete xGPU;
delete yGPU; delete yGPU;
delete[] xDimSize; delete[] dimSize;
delete[] yDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x; delete x;
delete y; delete y;
delete[] xDimSize; delete[] dimSize;
delete[] yDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -117,73 +105,83 @@ In this case, lossName=CROSSENTROPY. ...@@ -117,73 +105,83 @@ In this case, lossName=CROSSENTROPY.
*/ */
bool TestRectify2() bool TestRectify2()
{ {
/* a x tensor of size (2, 3) */ /* a tensor of size (2, 3) */
int xOrder = 2; int order = 2;
int * xDimSize = new int[xOrder]; int * dimSize = new int[order];
xDimSize[0] = 2; dimSize[0] = 2;
xDimSize[1] = 3; dimSize[1] = 3;
int xUnitNum = 1; int unitNum = 1;
for (int i = 0; i < xOrder; i++) for (int i = 0; i < order; i++)
xUnitNum *= xDimSize[i]; unitNum *= dimSize[i];
DTYPE xData[2][3] = { {1.0F, 1.0F, 2.0F}, DTYPE xData[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} }; {2.0F, 4.0F, 5.0F} };
DTYPE yData[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F}, DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} }; {1.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-1.0F, -1.0F, -0.5F}, DTYPE yAnswer[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE dedyAnswer[2][3] = { {-1.0F, -1.0F, -0.5F},
{-0.5F, -0.25F, -0.2F} }; {-0.5F, -0.25F, -0.2F} };
DTYPE answer[2][3] = { {-1.0F, -1.0F, -0.5F}, DTYPE dedxAnswer[2][3] = { {-1.0F, -1.0F, -0.5F},
{-0.5F, -0.25F, -0.2F} }; {-0.5F, -0.25F, -0.2F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(xOrder, xDimSize); XTensor * y = NewTensor(order, dimSize);
XTensor * gold = NewTensor(xOrder, xDimSize); XTensor * gold = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(xOrder, xDimSize); XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(xOrder, xDimSize); XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, xUnitNum); x->SetData(xData, unitNum);
y->SetData(yData, xUnitNum); gold->SetData(goldData, unitNum);
gold->SetData(goldData, xUnitNum); y->SetZeroAll();
dedy->SetData(dedyData, xUnitNum); dedy->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call Rectify function */
Rectify(x, y);
/* call RectifyBackward function */ /* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, NOLOSS); RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */ /* check results */
cpuTest = dedx->CheckData(answer, xUnitNum); cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedy->CheckData(dedyAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, xUnitNum); xGPU->SetData(xData, unitNum);
yGPU->SetData(yData, xUnitNum); goldGPU->SetData(goldData, unitNum);
goldGPU->SetData(goldData, xUnitNum); yGPU->SetZeroAll();
dedyGPU->SetData(dedyData, xUnitNum); dedyGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call Rectify function */
Rectify(xGPU, yGPU);
/* call rectifybackward function */ /* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS); RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */ /* check results */
gpuTest = dedxGPU->CheckData(answer, xUnitNum); gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
...@@ -196,7 +194,7 @@ bool TestRectify2() ...@@ -196,7 +194,7 @@ bool TestRectify2()
delete dedyGPU; delete dedyGPU;
delete dedxGPU; delete dedxGPU;
delete goldGPU; delete goldGPU;
delete[] xDimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -206,7 +204,7 @@ bool TestRectify2() ...@@ -206,7 +204,7 @@ bool TestRectify2()
delete dedy; delete dedy;
delete dedx; delete dedx;
delete gold; delete gold;
delete[] xDimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -220,7 +218,7 @@ TODO!! ...@@ -220,7 +218,7 @@ TODO!!
/* test for Rectify Function */ /* test for Rectify Function */
bool TestRectify() bool TestRectify()
{ {
XPRINT(0, stdout, "[TEST RECTIFY] test rectify and its backward computation \n"); XPRINT(0, stdout, "[TEST RECTIFY] rectify function and its backward computation \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -23,8 +23,7 @@ ...@@ -23,8 +23,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension. /* case 1: set the cell to the ascending order along a given dimension. */
*/
bool TestSetAscendingOrder1() bool TestSetAscendingOrder1()
{ {
/* a input tensor of size (2, 4) */ /* a input tensor of size (2, 4) */
...@@ -50,7 +49,6 @@ bool TestSetAscendingOrder1() ...@@ -50,7 +49,6 @@ bool TestSetAscendingOrder1()
s->SetZeroAll(); s->SetZeroAll();
/* call SetAscendingOrder function */ /* call SetAscendingOrder function */
s->SetAscendingOrder(1); s->SetAscendingOrder(1);
/* check results */ /* check results */
......
...@@ -23,7 +23,10 @@ ...@@ -23,7 +23,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension. */ /*
case 1: test SetDataRand function.
set the tensor items by a uniform distribution in range [lower, upper].
*/
bool TestSetData1() bool TestSetData1()
{ {
/* a input tensor of size (2, 4) */ /* a input tensor of size (2, 4) */
...@@ -44,7 +47,7 @@ bool TestSetData1() ...@@ -44,7 +47,7 @@ bool TestSetData1()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
/* call SetData function */ /* call SetDataRand function */
s->SetDataRand(0.0, 1.0); s->SetDataRand(0.0, 1.0);
/* check results */ /* check results */
......
...@@ -25,102 +25,71 @@ ...@@ -25,102 +25,71 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
case 1: test Sigmoid function and SigmoidBackward function. case 1: test Sigmoid function.
sigmoid function: y = 1/(1+exp(-x)) sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/ */
bool TestSigmoid1() bool TestSigmoid1()
{ {
/* a input tensor of size (3) */ /* a input tensor of size (3) */
int sOrder = 1; int order = 1;
int * sDimSize = new int[sOrder]; int * dimSize = new int[order];
sDimSize[0] = 3; dimSize[0] = 3;
int sUnitNum = 1; int unitNum = 1;
for (int i = 0; i < sOrder; i++) for (int i = 0; i < order; i++)
sUnitNum *= sDimSize[i]; unitNum *= dimSize[i];
DTYPE xData[3] = {0.0F, 1.0F, 2.0F}; DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F}; DTYPE answer[3] = {0.5F, 0.7311F, 0.8808F};
DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(sOrder, sDimSize); XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, unitNum);
g->SetData(gData, sUnitNum);
dedy->SetData(dedyData, sUnitNum);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(x, y); Sigmoid(x, y);
/* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, NOLOSS);
/* check result */ /* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum); cpuTest = y->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, sUnitNum);
dedyGPU->SetData(dedyData, sUnitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(xGPU, yGPU); Sigmoid(xGPU, yGPU);
/* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
delete y; delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU; delete xGPU;
delete yGPU; delete yGPU;
delete gGPU; delete[] dimSize;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x; delete x;
delete y; delete y;
delete g; delete[] dimSize;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -129,70 +98,72 @@ bool TestSigmoid1() ...@@ -129,70 +98,72 @@ bool TestSigmoid1()
/* /*
case 2: test Sigmoid function and SigmoidBackward function. case 2: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x)) sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx backward computation:
dE/ds = dE/dy * dy/dx
dy/dx = y * (1 -y)
In this case, LossName=CROSSENTROPY.
*/ */
bool TestSigmoid2() bool TestSigmoid2()
{ {
/* a input tensor of size (3) */ /* a input tensor of size (3) */
int sOrder = 1; int order = 1;
int * sDimSize = new int[sOrder]; int * dimSize = new int[order];
sDimSize[0] = 3; dimSize[0] = 3;
int sUnitNum = 1; int unitNum = 1;
for (int i = 0; i < sOrder; i++) for (int i = 0; i < order; i++)
sUnitNum *= sDimSize[i]; unitNum *= dimSize[i];
DTYPE xData[3] = {0.0F, 1.0F, 2.0F}; DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F}; DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F}; DTYPE yAnswer[3] = {0.5F, 0.7311F, 0.8808F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F}; DTYPE dedyAnswer[3] = {-0.8F, -1.0943F, -1.1353F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F}; DTYPE dedxAnswer[3] = {-0.2F, -0.2151F, -0.1192F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(sOrder, sDimSize); XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(sOrder, sDimSize); XTensor * g = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize); XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize); XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, unitNum);
g->SetData(gData, sUnitNum); g->SetData(gData, unitNum);
dedy->SetZeroAll();
y->SetZeroAll(); y->SetZeroAll();
dedy->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(x, y); Sigmoid(x, y);
/* initialize variables */
dedy->SetData(dedyData, sUnitNum);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY); SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */ /* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum); cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedy->CheckData(dedyAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, sUnitNum); gGPU->SetData(gData, unitNum);
dedyGPU->SetZeroAll();
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedyGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
...@@ -202,8 +173,9 @@ bool TestSigmoid2() ...@@ -202,8 +173,9 @@ bool TestSigmoid2()
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum); gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
delete y; delete y;
...@@ -215,7 +187,7 @@ bool TestSigmoid2() ...@@ -215,7 +187,7 @@ bool TestSigmoid2()
delete gGPU; delete gGPU;
delete dedxGPU; delete dedxGPU;
delete dedyGPU; delete dedyGPU;
delete[] sDimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -225,7 +197,7 @@ bool TestSigmoid2() ...@@ -225,7 +197,7 @@ bool TestSigmoid2()
delete g; delete g;
delete dedx; delete dedx;
delete dedy; delete dedy;
delete[] sDimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -252,6 +224,16 @@ bool TestSigmoid() ...@@ -252,6 +224,16 @@ bool TestSigmoid()
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSigmoid2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -31,68 +31,69 @@ softmax function: y = e^x / \sum_{i} e^{x_i} ...@@ -31,68 +31,69 @@ softmax function: y = e^x / \sum_{i} e^{x_i}
*/ */
bool TestSoftmax1() bool TestSoftmax1()
{ {
/* a input tensor of size (2, 3) */ /* a tensor of size (2, 3) */
int sOrder = 2; int order = 2;
int * sDimSize = new int[sOrder]; int * dimSize = new int[order];
sDimSize[0] = 2; dimSize[0] = 2;
sDimSize[1] = 3; dimSize[1] = 3;
int sUnitNum = 1; int unitNum = 1;
for (int i = 0; i < sOrder; i++) for (int i = 0; i < order; i++)
sUnitNum *= sDimSize[i]; unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F}, DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} }; {0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {0.09003057F, 0.24472848F, 0.66524094F}, DTYPE answer[2][3] = { {0.0900F, 0.2447F, 0.6652F},
{0.21362929F, 0.2609274F , 0.52544326F} }; {0.2136F, 0.2609F, 0.5254F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(sOrder, sDimSize); XTensor * y = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, unitNum);
y->SetZeroAll(); y->SetZeroAll();
/* call Softmax function */ /* call Softmax function */
Softmax(x, y, 1); Softmax(x, y, 1);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, sUnitNum); cpuTest = y->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Softmax function */ /* call Softmax function */
Softmax(xGPU, yGPU, 1); Softmax(xGPU, yGPU, 1);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
delete y; delete y;
delete xGPU; delete xGPU;
delete yGPU; delete yGPU;
delete[] sDimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y; delete x;
delete[] sDimSize; delete y;
delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -101,36 +102,38 @@ bool TestSoftmax1() ...@@ -101,36 +102,38 @@ bool TestSoftmax1()
/* /*
case 2: test SoftmaxBackward function. case 2: test SoftmaxBackward function.
SoftmaxBackward function: dE/dx_j = -gold_j + y_j SoftmaxBackward function: dE/dx_j = -gold_j + y_j
In this case, LossName=CROSSENTROPY.
*/ */
bool TestSoftmax2() bool TestSoftmax2()
{ {
/* a input tensor of size (2, 3) */ /* a input tensor of size (2, 3) */
int sOrder = 2; int order = 2;
int * sDimSize = new int[sOrder]; int * dimSize = new int[order];
sDimSize[0] = 1; dimSize[0] = 1;
sDimSize[1] = 3; dimSize[1] = 3;
int sUnitNum = 1; int unitNum = 1;
for (int i = 0; i < sOrder; i++) for (int i = 0; i < order; i++)
sUnitNum *= sDimSize[i]; unitNum *= dimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} }; DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} }; DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F}; DTYPE yAnswer[1][3] = { {0.0900F, 0.2447F, 0.6652F} };
DTYPE dedxAnswer[1][3] = {0.0900F, 0.2447F, -0.3347F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(sOrder, sDimSize); XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(sOrder, sDimSize); XTensor * g = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize); XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize); XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, unitNum);
g->SetData(gData, sUnitNum); g->SetData(gData, unitNum);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
dedy->SetZeroAll(); dedy->SetZeroAll();
...@@ -138,25 +141,27 @@ bool TestSoftmax2() ...@@ -138,25 +141,27 @@ bool TestSoftmax2()
/* call Softmax function */ /* call Softmax function */
Softmax(x, y, 1); Softmax(x, y, 1);
/* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY); SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */ /* check result */
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum); cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, sUnitNum); gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll(); dedyGPU->SetZeroAll();
...@@ -168,7 +173,8 @@ bool TestSoftmax2() ...@@ -168,7 +173,8 @@ bool TestSoftmax2()
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY); SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */ /* check result */
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum); gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
...@@ -181,7 +187,7 @@ bool TestSoftmax2() ...@@ -181,7 +187,7 @@ bool TestSoftmax2()
delete gGPU; delete gGPU;
delete dedxGPU; delete dedxGPU;
delete dedyGPU; delete dedyGPU;
delete[] sDimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -191,7 +197,7 @@ bool TestSoftmax2() ...@@ -191,7 +197,7 @@ bool TestSoftmax2()
delete g; delete g;
delete dedx; delete dedx;
delete dedy; delete dedy;
delete[] sDimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -181,14 +181,20 @@ bool TestSplit2() ...@@ -181,14 +181,20 @@ bool TestSplit2()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s, t, sGPU, tGPU; delete s;
delete[] sDimSize, tDimSize; delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s, t; delete s;
delete[] sDimSize, tDimSize; delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -295,14 +301,25 @@ bool TestSplit3() ...@@ -295,14 +301,25 @@ bool TestSplit3()
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete s, t1, t2, sGPU, tGPU1, tGPU2; delete s;
delete[] sDimSize, tDimSize1, tDimSize2; delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s, t1, t2; delete s;
delete[] sDimSize, tDimSize1, tDimSize2; delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -31,12 +31,12 @@ bool Test() ...@@ -31,12 +31,12 @@ bool Test()
wrong = !TestConcatenate() || wrong; wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong; wrong = !TestConcatenateSolely() || wrong;
//wrong = !TestCopyIndexed() || wrong; wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong; wrong = !TestCopyValues() || wrong;
wrong = !TestMatrixMul() || wrong; wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong; wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMul2DParallel() || wrong; wrong = !TestMatrixMul2DParallel() || wrong;
//wrong = !TestMatrixMulBatched() || wrong; wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong; wrong = !TestMatrixMulBatchedCPU() || wrong;
wrong = !TestMerge() || wrong; wrong = !TestMerge() || wrong;
wrong = !TestMultiply() || wrong; wrong = !TestMultiply() || wrong;
...@@ -56,18 +56,18 @@ bool Test() ...@@ -56,18 +56,18 @@ bool Test()
wrong = !TestSplit() || wrong; wrong = !TestSplit() || wrong;
wrong = !TestSum() || wrong; wrong = !TestSum() || wrong;
wrong = !TestSumByColumnTV() || wrong; wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong; wrong = !TestSumByColumnVT() || wrong;
wrong = !TestTopK() || wrong; wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong; wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong; wrong = !TestXMem() || wrong;
//wrong = !TestHardTanH() || wrong; wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong; wrong = !TestIdentity() || wrong;
//wrong = !TestLogSoftmax() || wrong; wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong; wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong; wrong = !TestRectify() || wrong;
//wrong = !TestSigmoid() || wrong; wrong = !TestSigmoid() || wrong;
//wrong = !TestSoftmax() || wrong; wrong = !TestSoftmax() || wrong;
/* other test */ /* other test */
/* /*
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论