Commit f8a37184 by liyinqiao

Merge with li branch

parents 0ca350a3 9b11391e
......@@ -542,14 +542,14 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
if (dataType == X_FLOAT) {
d = new float[unitNum];
for (int i = 0; i < unitNum; i++) {
DTYPE value = lower + upper * (float)rand() / RAND_MAX;
DTYPE value = lower + (upper - lower) * (float)rand() / RAND_MAX;
*((float*)d + i) = value;
}
}
else if (dataType == X_DOUBLE) {
d = new double[unitNum];
for (int i = 0; i < unitNum; i++) {
*((double*)d + i) = rand() / RAND_MAX;
*((double*)d + i) = lower + (upper - lower) * rand() / RAND_MAX;
}
}
else {
......@@ -922,8 +922,10 @@ set the value of a cell
>> index - index of the cell for each dimension
>>
*/
bool XTensor::Set(DTYPE value, int * index, int size)
bool XTensor::Set(DTYPE value, int index[], int size)
{
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
return SetToDevice(devID, GetCell(index, size), value);
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "Absolute.h"
#include "Absolute.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its absolute value
>> a - the tensor we are processing
*/
void Absolute(XTensor * a)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaAbsolute(a);
return;
}
#endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)fabs(d[i]);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Absolute.h"
#include "Absolute.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its absolute value (CUDA Kernel)
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(DTYPE * d, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = fabs(d[i]);
}
/*
set each entry to its absolute value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(__half * d, int size)
{
return;
}
/*
set each entry to its with float16 data type value
>> a - the tensor
*/
extern "C"
void CudaAbsolute(XTensor * a)
{
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Absolute.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its absolute value (CUDA Kernel) */
__global__
void KernelAbsolute(DTYPE * d, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__
void KernelAbsolute(__half * d, int size);
/* set each entry to its absolute value */
extern "C"
void CudaAbsolute(XTensor * a);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __ABSOLUTE_H__
#define __ABSOLUTE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */
extern "C"
void Absolute(XTensor * a);
} // namespace nts(NiuTrans.Tensor)
#endif // __ABSOLUTE_H__
......@@ -89,9 +89,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
void * ap = (char*)a->data + aRealBlockSize * p;
void * bp = (char*)b->data + bRealBlockSize * p;
void * cp = (char*)c->data + cRealBlockSize * p;
XTensor * ai = new XTensor(2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * bi = new XTensor(2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * ci = new XTensor(2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem);
XTensor * ai = NewTensor(2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * bi = NewTensor(2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * ci = NewTensor(2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem);
ai->data = ap;
bi->data = bp;
ci->data = cp;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - the tensor we are processing
*/
void Sign(XTensor * a)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaSign(a);
return;
}
#endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0)
d[i] = 1.0F;
else if (d[i] == 0)
d[i] = 0.0F;
else
d[i] = -1.0F;
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its sign value (CUDA Kernel)
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelSign(DTYPE * d, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (d[i] > 0)
d[i] = 1.0F;
else if (d[i] == 0)
d[i] = 0.0F;
else
d[i] = -1.0F;
}
}
/*
set each entry to its sign value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelSign(__half * d, int size)
{
return;
}
/*
set each entry to its with float16 data type value
>> a - the tensor
*/
extern "C"
void CudaSign(XTensor * a)
{
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its sign value (CUDA Kernel) */
__global__
void KernelSign(DTYPE * d, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
void KernelSign(__half * d, int size);
/* set each entry to its sign value */
extern "C"
void CudaSign(XTensor * a);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_H__
#define __SIGN_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
extern "C"
void Sign(XTensor * a);
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
......@@ -52,7 +52,7 @@ void KernelADDByColumnVT(DTYPE * a, DTYPE * b, DTYPE * c, int colNum, int rowNum
DTYPE * bp = b + (rowNum * k + row) * colNum;
if (colNum % 4 == 0) {
for (int i = 0; i < colNum; i += 4)
sum += bp[i] + bp[i + 1] + b[i + 2] + b[i + 3];
sum += bp[i] + bp[i + 1] + bp[i + 2] + bp[i + 3];
}
else if (colNum % 2 == 0) {
for (int i = 0; i < colNum; i += 2)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "ConvertDataType.h"
#include "ConvertDataType.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
convert data type
>> input - input tensor
>> output - output tensor
*/
void ConvertTensorDataType(XTensor * input, XTensor * output)
{
CheckNTErrors(XTensor::IsIdentical(input, output), "Input and Output are different in type or size!");
if (input->dataType == output->dataType)
return;
#ifdef USE_CUDA
/* run it on GPUs */
if (input->devID >= 0) {
CudaConvertDataType(input, output);
return;
}
#endif
if (input->dataType == X_FLOAT && output->dataType == X_INT) {
float * inputData = (float*)input->data;
int * outputData = (int*)output->data;
for (int i = 0; i < input->unitNum; i++)
outputData[i] = (int)inputData[i];
}
else if (input->dataType == X_INT && output->dataType == X_FLOAT) {
int * inputData = (int*)input->data;
float * outputData = (float*)output->data;
for (int i = 0; i < input->unitNum; i++)
outputData[i] = (float)inputData[i];
}
else
ShowNTErrors("Unsupported data types for conversion!");
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -21,6 +21,7 @@
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "ConvertDataType.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -49,6 +50,24 @@ void KernelFloat16ToFloat(__half * s, float * t, int size)
}
}
__global__
void KernelFloatToInt(float * inputData, int * outputData, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
outputData[i] = (int)(inputData[i]);
}
}
__global__
void KernelIntToFloat(int * inputData, float * outputData, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
outputData[i] = (float)(inputData[i]);
}}
/*
data conversion (cuda code)
......@@ -88,6 +107,39 @@ void CudaConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t,
ProtectCudaDev(devID, devIDBackup);
}
/*
convert data type (cuda code)
>> input - input tensor
>> output - output tensor
*/
void CudaConvertDataType(XTensor * input, XTensor * output)
{
CheckNTErrors(XTensor::IsIdentical(input, output), "Input and Output are different in type or size!");
if (input->dataType == output->dataType)
return;
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(input->devID, input->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(input->devID, devIDBackup);
if(input->dataType == X_FLOAT && output->dataType == X_INT)
KernelFloatToInt<<<blocks, threads>>>((float*)input->data, (int*)output->data, input->unitNum);
else if(input->dataType == X_INT && output->dataType == X_FLOAT)
KernelIntToFloat<<<blocks, threads>>>((int*)input->data, (float*)output->data, input->unitNum);
else{
ShowNTErrors("Unsupported data types for conversion!");
}
ProtectCudaDev(input->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* convert data type from X_FLOAT to X_FLOAT16 (CUDA Kernel) */
__global__
void KernelFloatToFloat16(float * s, __half * t, int size);
/* convert data type from X_FLOAT16 to X_FLOAT (CUDA Kernel) */
__global__
void KernelFloat16ToFloat(__half * s, float * t, int size);
/* convert data type from X_FLOAT to X_INT (CUDA Kernel) */
__global__
void KernelFloatToInt(float * inputData, int * outputData, int size);
/* convert data type from X_INT to X_FLOAT (CUDA Kernel) */
__global__
void KernelIntToFloat(int * inputData, float * outputData, int size);
/* convert data type */
void CudaConvertDataType(XTensor * input, XTensor * output);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __CONVERTDATATYPE_H__
#define __CONVERTDATATYPE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* convert data type */
void ConvertDataType(XTensor * input, XTensor * output);
} // namespace nts(NiuTrans.Tensor)
#endif // __CONVERTDATATYPE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "Log.h"
#include "Log.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its log value
>> a - the tensor we are processing
*/
void Log(XTensor * a)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaLog(a);
return;
}
#endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)log(d[i]);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Log.h"
#include "Log.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its log value (CUDA Kernel)
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelLog(DTYPE * d, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = log(d[i]);
}
/*
set each entry to its log value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelLog(__half * d, int size)
{
return;
}
/*
set each entry to its log value
>> a - the tensor
*/
extern "C"
void CudaLog(XTensor * a)
{
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelLog << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelLog << <blocks, threads >> >((__half*)a->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Log.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its log value (CUDA Kernel) */
__global__
void KernelLog(DTYPE * d, int size);
/* set each entry to its log value (CUDA Kernel) with float16 data type*/
__global__
void KernelLog(__half * d, int size);
/* set each entry to its log value */
extern "C"
void CudaLog(XTensor * a);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __LOG_H__
#define __LOG_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */
extern "C"
void Log(XTensor * a);
} // namespace nts(NiuTrans.Tensor)
#endif // __LOG_H__
......@@ -37,6 +37,7 @@ __global__
void KernelScaleAndShift(__half * a, __half * b, int size, __half scale, __half shift);
/* scale and shift all tensor entires b = a * scale + shift (cuda version) */
extern "C"
void _CudaScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift);
#endif // USE_CUDA
......
......@@ -66,18 +66,19 @@ copy a number of blocks source source positions to target positions
>> targetBlocks - target positions of the copy
>> myMem - the memory pool
*/
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem)
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID)
{
if (myMem != NULL && myMem->devID >= 0) {
if (myMem != NULL)
CheckNTErrors((myMem->devID == devID), "DevIDs are different between memory pool and input devID!");
if (devID >= 0) {
#ifdef USE_CUDA
CudaCopyBlocksSelected(source, blockSize, sourceBlocks, blockNum, target, targetBlocks, myMem);
CudaCopyBlocksSelected(source, blockSize, sourceBlocks, blockNum, target, targetBlocks, myMem, devID);
#else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif
}
else {
int devID = myMem != NULL ? myMem->devID : -1;
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
......
......@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem);
/* copy a number of blocks from source positions to target positions */
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem);
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -70,28 +70,33 @@ copy a number of blocks from source positions to target positions (cuda version)
>> targetBlocks - target positions of the copy
>> myMem - memory pool
*/
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem)
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID)
{
CheckNTErrors((myMem != NULL), "No memory pool!");
CheckNTErrors((myMem->devID >= 0), "Wrong device to run!");
CheckNTErrors((devID >= 0), "Wrong device to run!");
CheckNTErrors((blockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
/* copy the index to the GPU memory */
int * sourceBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int));
int * targetBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int));
XMemCopy(sourceBlocksTMP, myMem->devID, sourceBlocks, -1, blockNum * sizeof(int));
XMemCopy(targetBlocksTMP, myMem->devID, targetBlocks, -1, blockNum * sizeof(int));
int * sourceBlocksTMP = myMem != NULL ? (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) : (int *)XMemAlloc(devID, blockNum * sizeof(int));
int * targetBlocksTMP = myMem != NULL ? (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)) : (int *)XMemAlloc(devID, blockNum * sizeof(int));
XMemCopy(sourceBlocksTMP, devID, sourceBlocks, -1, blockNum * sizeof(int));
XMemCopy(targetBlocksTMP, devID, targetBlocks, -1, blockNum * sizeof(int));
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(myMem->devID, blockSize / sizeof(DTYPE), blockNum, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(devID, blockSize / sizeof(DTYPE), blockNum, MAX_INT, cudaGrids, cudaBlocks);
KernelCopyBlocksSelected << <dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)source, blockSize / sizeof(DTYPE), sourceBlocksTMP, blockNum, (DTYPE*)target, targetBlocksTMP);
if (myMem != NULL) {
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
}
else {
XMemFree(devID, sourceBlocksTMP);
XMemFree(devID, targetBlocksTMP);
}
}
#endif // USE_CUDA
......
......@@ -34,7 +34,7 @@ void KernelCopyBlocksSelected(DTYPE * source, int blockSize, int * sourceBlocks,
/* copy a number of blocks form source positions to target positions (cuda version) */
extern "C"
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem);
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID);
#endif // USE_CUDA
......
......@@ -84,7 +84,7 @@ bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSiz
CheckNTErrors((tgtIndex[i] < blockNumTgt), "Index is out of range!");
}
CopyBlocks(s->data, blockSizeSrc * s->unitSize, realSrcIndex, realIndexSize, t->data, realTgtIndex, s->mem);
CopyBlocks(s->data, blockSizeSrc * s->unitSize, realSrcIndex, realIndexSize, t->data, realTgtIndex, s->mem, s->devID);
delete[] realSrcIndex;
delete[] realTgtIndex;
......
......@@ -27,6 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy s to t */
extern "C"
bool CopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -21,6 +21,7 @@
#include <math.h>
#include "Loss.h"
#include "Loss.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -43,6 +44,8 @@ compute the loss
DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg)
{
DTYPE error = 0.0F;
if (output->devID < 0) {
CheckNTErrors((gLen >= 0 && gLen <= output->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(gold, output)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSizeRDI[0] == 1 && output->dimSizeRDI[0] == 1), "TODO!");
......@@ -66,7 +69,6 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
DTYPE * gp = (DTYPE*)gold->data;
DTYPE * op = (DTYPE*)output->data;
DTYPE error = 0.0F;
/*
squared error
......@@ -166,7 +168,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
for(int k = 0; k < blockNum; k++){
int size = stride * gLen;
for(int i = 0; i < size; i++){
if(*(gp + gBeg + i) >= 1.0F)
if(*(gp + gBeg + i) < 1.0F)
continue;
DTYPE diff = *(gp + gBeg + i) - *(op + oBeg + i);
error += (DTYPE)0.5 * diff * diff;
......@@ -174,6 +176,10 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
}
}
}
}
else {
error = CudaLossCompute(gold, output, LFName, isLogOutput, leadDim, gBeg, gLen, oBeg);
}
return error;
}
......@@ -374,17 +380,18 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
{
CheckNTErrors((tLen < y->unitNum), "Illegal input length!");
if (y->devID < 0) {
CheckNTErrors((tLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((t->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), "Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!");
int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1];
leadDimRDI = y->order - 1;
tBeg = 0;
yBeg = 0;
tLen = y->dimSizeRDI[leadDimRDI];
......@@ -457,10 +464,19 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
}
}
else{
for(int i = 0; i < tLen; i++){
*(dedyp + yBeg + i) = -(DTYPE)*(tp + tBeg + i)/(DTYPE)*(yp + yBeg + i);
for (int i = 0; i < blockNum; i++) {
for (int j = 0; j < stride; j++) {
for (int k = 0; k < tLen; k++) {
*(dedyp + i * stride * dimensionSize + j + stride * (yBeg + k)) = -(DTYPE)*(tp + i * stride * dimensionSize
+ j + stride * (tBeg + k)) / (DTYPE)*(yp + i * stride * dimensionSize + j + stride * (yBeg + k));
}
}
}
}
}
}
else {
CudaLossBackward(dedy, t, y, LFName, leadDim, tBeg, tLen, yBeg);
}
}
......
......@@ -22,6 +22,14 @@
#include "Loss.h"
#include "Loss.cuh"
#include "../XDevice.h"
#include "../core/math/Power.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/math/Log.h"
#include "../core/arithmetic/Negate.h"
#include "../core/arithmetic/Sum.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -46,7 +54,126 @@ compute the loss
DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
{
return 0;
CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(gold, y)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((gold->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!");
CheckNTErrors((gold->devID == y->devID), "Tensors must be on the same device!");
CheckNTErrors((gold->devID >= 0), "Tensors must be on GPU device!");
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
if(isLogOutput)
return LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
DTYPE error = 0.0F;
/*
squared error
loss = sum_{i} 0.5*(gold_i - output_i)^2
where gold_i is the gold standard and output_i is the model prediction
*/
if(LFName == SQUAREDERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
_Sum(gold, y, diff, -1.0F);
Power(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
int diffOrder = diff->order - 1;
int * diffDimSize = new int[diffOrder];
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) {
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSizeRDI[1] = 1;
}
delete diff;
diff = diffNew;
delete diffDimSize;
}
error = diff->Get2D(0, 0);
delete diff;
}
/*
cross entropy
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
*/
if(LFName == CROSSENTROPY){
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
CopyValues(y, diff);
Log(diff);
_Multiply(gold, diff, diff);
Negate(diff);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
int diffOrder = diff->order - 1;
int * diffDimSize = new int[diffOrder];
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) {
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSizeRDI[1] = 1;
}
delete diff;
diff = diffNew;
delete diffDimSize;
}
error = diff->Get2D(0, 0);
delete diff;
}
/*
one hot error
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1,
e_i = 0 otherwise
*/
if(LFName == ONEHOTERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
XTensor * yOnehot = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
CopyValues(y, yOnehot);
_Multiply(gold, y, yOnehot);
_Sum(gold, yOnehot, diff, -1.0F);
Power(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
int diffOrder = diff->order - 1;
int * diffDimSize = new int[diffOrder];
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) {
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSizeRDI[1] = 1;
}
delete diff;
diff = diffNew;
delete diffDimSize;
}
error = diff->Get2D(0, 0);
delete diff;
delete yOnehot;
}
return error;
// TODO: call cuda kernels for computing the errors
}
......@@ -140,13 +267,25 @@ backward compuation for cross entropy (Cuda kernel)
>> size - size of the vector (dedy)
*/
extern "C" __global__
void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int size)
void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i > stride * dimensionSize * blockNum)
return;
if (i < size){
int blockNumIndex = i / (stride * dimensionSize);
int blockNumTail = i % (stride * dimensionSize);
int dimensionSizeIndex = blockNumTail / stride;
int strideIndex = blockNumTail % stride;
if (dimensionSizeIndex >= tLen)
return;
dedy[blockNumIndex * stride * dimensionSize + strideIndex + stride * (yBeg + dimensionSizeIndex)] = -t[blockNumIndex * stride * dimensionSize +
strideIndex + stride * (tBeg + dimensionSizeIndex)] / y[blockNumIndex * stride * dimensionSize + strideIndex + stride * (yBeg + dimensionSizeIndex)];
/*if (i < size){
dedy[i] = -t[i]/y[i];
}
}*/
}
/*
......@@ -193,9 +332,11 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
{
CheckNTErrors((tLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[1] == 1), "TODO!");
CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), "Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE &&
y->dataType == DEFAULT_DTYPE &&
dedy->dataType == DEFAULT_DTYPE),
......@@ -208,21 +349,25 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
"The vectors must be on the same GPU.");
CheckNTErrors((tBeg == yBeg), "TODO!");
int leadDimRDI = y->order - leadDim - 1;
int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1];
leadDimRDI = y->order - 1;
tBeg = 0;
yBeg = 0;
tLen = y->dimSizeRDI[leadDimRDI];
}
int dimensionSize = y->dimSizeRDI[leadDimRDI];
int stride = 1;
int blockSize = 1;
int blockNum = 1;
int size = 1;
for(int i = 0; i < leadDimRDI; i++)
stride *= y->dimSizeRDI[i];
size = tLen * stride;
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
int cudaGridSize[3], cudaBlockSize[3];
......@@ -265,7 +410,7 @@ void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
ShowNTErrors("TODO!");
}
else if(size == y->unitNum){
KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tLen);
KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
}
else{
KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
......
......@@ -97,7 +97,7 @@ void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y,
if (i < size){
DTYPE s = x[i];
if(s >= 0)
dedx[i] = 1;
dedx[i] = dedy[i];
else
dedx[i] = 0;
}
......
......@@ -248,7 +248,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
"Unknown loss function.");
if(lossName == CROSSENTROPY || lossName == SQUAREDERROR){
ShowNTErrors("TODO!");
_Sum(y, gold, dedx, -1.0F);
}
else if(lossName == ONEHOTERROR){
ShowNTErrors("TODO!");
......
......@@ -483,9 +483,9 @@ bool TestConcatenate4()
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
//delete[] sDimSize1;
//delete[] sDimSize2;
//delete[] tDimSize;
return cpuTest && gpuTest;
#else
......
......@@ -30,15 +30,15 @@ Identity function: y = x
*/
bool TestIdentity1()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
......@@ -49,47 +49,50 @@ bool TestIdentity1()
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
x->SetData(xData, unitNum);
y->SetZeroAll();
/* call Identity function */
Identity(x, y);
/* check result */
cpuTest = y->CheckData(answer, sUnitNum);
cpuTest = y->CheckData(answer, unitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll();
/* call Identity function */
Identity(xGPU, yGPU);
/* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum);
gpuTest = yGPU->CheckData(answer, unitNum);
/* destroy variables */
delete x, y;
delete xGPU, yGPU;
delete[] sDimSize;
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y;
delete[] sDimSize;
delete x;
delete y;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -98,35 +101,39 @@ bool TestIdentity1()
/*
case 2: test IdentityBackward function.
IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
In this case, lossName=CROSSENTROPY.
*/
bool TestIdentity2()
{
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F};
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[3] = {1.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.0F, 0.0F, 1.0F};
DTYPE yAnswer[3] = {1.0F, 1.0F, 2.0F};
DTYPE dedyAnswer[3] = {0.0F, 0.0F, -0.5F};
DTYPE dedxAnswer[3] = {0.0F, 0.0F, -0.5F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
x->SetData(xData, unitNum);
g->SetData(gData, unitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
......@@ -138,22 +145,24 @@ bool TestIdentity2()
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedy->CheckData(dedyAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
......@@ -165,7 +174,9 @@ bool TestIdentity2()
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */
delete x;
......@@ -178,7 +189,7 @@ bool TestIdentity2()
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
......@@ -188,7 +199,7 @@ bool TestIdentity2()
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......
......@@ -20,15 +20,15 @@
*/
#include "../core/math/ScaleAndShift.h"
#include "../function/Loss.h"
#include "TLoss.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test LossCompute function
case 1: test LossCompute function.
In this case, Loss function name = SQUAREDERROR.
loss = sum_{i} 0.5*(t_i - y_i)^2,
where t_i is the gold standard and y_i is the model output
where t_i is the gold standard and y_i is the model output.
*/
bool TestLoss1()
{
......@@ -102,10 +102,10 @@ bool TestLoss1()
}
/*
case 2: test LossCompute function
case 2: test LossCompute function.
In this case, Loss function name = CROSSENTROPY.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output
where t_i is the gold standard and y_i is the model output.
*/
bool TestLoss2()
{
......@@ -179,10 +179,10 @@ bool TestLoss2()
}
/*
case 3: test LossCompute function
case 3: test LossCompute function.
In this case, Loss function name = ONEHOTERROR.
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise.
*/
bool TestLoss3()
{
......
......@@ -19,6 +19,7 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#include "../XTensor.h"
#include "TMatrixMulBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -29,25 +29,15 @@ In this case, y = max(0, x)
*/
bool TestRectify1()
{
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
xDimSize[1] = 3;
int xUnitNum = 1;
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.0F, -1.0F, 2.0F},
{3.0F, -4.0F, -5.0F} };
......@@ -58,52 +48,50 @@ bool TestRectify1()
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
x->SetData(xData, unitNum);
y->SetZeroAll();
/* call Rectify function */
Rectify(x, y);
/* check results */
cpuTest = y->CheckData(answer, yUnitNum);
cpuTest = y->CheckData(answer, unitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
xGPU->SetData(xData, xUnitNum);
xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll();
/* call Rectify function */
Rectify(xGPU, yGPU);
/* check results */
gpuTest = yGPU->CheckData(answer, yUnitNum);
gpuTest = yGPU->CheckData(answer, unitNum);
/* destroy variables */
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] xDimSize;
delete[] yDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete[] xDimSize;
delete[] yDimSize;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -117,73 +105,83 @@ In this case, lossName=CROSSENTROPY.
*/
bool TestRectify2()
{
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
xDimSize[1] = 3;
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int xUnitNum = 1;
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE yData[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-1.0F, -1.0F, -0.5F},
DTYPE yAnswer[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE dedyAnswer[2][3] = { {-1.0F, -1.0F, -0.5F},
{-0.5F, -0.25F, -0.2F} };
DTYPE answer[2][3] = { {-1.0F, -1.0F, -0.5F},
DTYPE dedxAnswer[2][3] = { {-1.0F, -1.0F, -0.5F},
{-0.5F, -0.25F, -0.2F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(xOrder, xDimSize);
XTensor * gold = NewTensor(xOrder, xDimSize);
XTensor * dedy = NewTensor(xOrder, xDimSize);
XTensor * dedx = NewTensor(xOrder, xDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
y->SetData(yData, xUnitNum);
gold->SetData(goldData, xUnitNum);
dedy->SetData(dedyData, xUnitNum);
x->SetData(xData, unitNum);
gold->SetData(goldData, unitNum);
y->SetZeroAll();
dedy->SetZeroAll();
dedx->SetZeroAll();
/* call Rectify function */
Rectify(x, y);
/* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, NOLOSS);
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */
cpuTest = dedx->CheckData(answer, xUnitNum);
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedy->CheckData(dedyAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, xUnitNum);
goldGPU->SetData(goldData, xUnitNum);
dedyGPU->SetData(dedyData, xUnitNum);
xGPU->SetData(xData, unitNum);
goldGPU->SetData(goldData, unitNum);
yGPU->SetZeroAll();
dedyGPU->SetZeroAll();
dedxGPU->SetZeroAll();
/* call Rectify function */
Rectify(xGPU, yGPU);
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */
gpuTest = dedxGPU->CheckData(answer, xUnitNum);
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */
delete x;
......@@ -196,7 +194,7 @@ bool TestRectify2()
delete dedyGPU;
delete dedxGPU;
delete goldGPU;
delete[] xDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
......@@ -206,7 +204,7 @@ bool TestRectify2()
delete dedy;
delete dedx;
delete gold;
delete[] xDimSize;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -220,7 +218,7 @@ TODO!!
/* test for Rectify Function */
bool TestRectify()
{
XPRINT(0, stdout, "[TEST RECTIFY] test rectify and its backward computation \n");
XPRINT(0, stdout, "[TEST RECTIFY] rectify function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -23,8 +23,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
*/
/* case 1: set the cell to the ascending order along a given dimension. */
bool TestSetAscendingOrder1()
{
/* a input tensor of size (2, 4) */
......@@ -50,7 +49,6 @@ bool TestSetAscendingOrder1()
s->SetZeroAll();
/* call SetAscendingOrder function */
s->SetAscendingOrder(1);
/* check results */
......
......@@ -23,7 +23,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension. */
/*
case 1: test SetDataRand function.
set the tensor items by a uniform distribution in range [lower, upper].
*/
bool TestSetData1()
{
/* a input tensor of size (2, 4) */
......@@ -44,7 +47,7 @@ bool TestSetData1()
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
/* call SetData function */
/* call SetDataRand function */
s->SetDataRand(0.0, 1.0);
/* check results */
......
......@@ -25,102 +25,71 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Sigmoid function and SigmoidBackward function.
case 1: test Sigmoid function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
bool TestSigmoid1()
{
/* a input tensor of size (3) */
int sOrder = 1;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
int order = 1;
int * dimSize = new int[order];
dimSize[0] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
DTYPE answer[3] = {0.5F, 0.7311F, 0.8808F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
dedy->SetData(dedyData, sUnitNum);
x->SetData(xData, unitNum);
y->SetZeroAll();
dedx->SetZeroAll();
/* call Sigmoid function */
Sigmoid(x, y);
/* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, NOLOSS);
/* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum);
cpuTest = y->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
dedyGPU->SetData(dedyData, sUnitNum);
xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
/* call Sigmoid function */
Sigmoid(xGPU, yGPU);
/* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum);
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -129,70 +98,72 @@ bool TestSigmoid1()
/*
case 2: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
backward computation:
dE/ds = dE/dy * dy/dx
dy/dx = y * (1 -y)
In this case, LossName=CROSSENTROPY.
*/
bool TestSigmoid2()
{
/* a input tensor of size (3) */
int sOrder = 1;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
int order = 1;
int * dimSize = new int[order];
dimSize[0] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
DTYPE yAnswer[3] = {0.5F, 0.7311F, 0.8808F};
DTYPE dedyAnswer[3] = {-0.8F, -1.0943F, -1.1353F};
DTYPE dedxAnswer[3] = {-0.2F, -0.2151F, -0.1192F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
dedy->SetZeroAll();
x->SetData(xData, unitNum);
g->SetData(gData, unitNum);
y->SetZeroAll();
dedy->SetZeroAll();
dedx->SetZeroAll();
/* call Sigmoid function */
Sigmoid(x, y);
/* initialize variables */
dedy->SetData(dedyData, sUnitNum);
/* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum);
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedy->CheckData(dedyAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
dedyGPU->SetZeroAll();
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedyGPU->SetZeroAll();
dedxGPU->SetZeroAll();
/* call Sigmoid function */
......@@ -202,8 +173,9 @@ bool TestSigmoid2()
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum);
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F)
&& dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
......@@ -215,7 +187,7 @@ bool TestSigmoid2()
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
......@@ -225,7 +197,7 @@ bool TestSigmoid2()
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -252,6 +224,16 @@ bool TestSigmoid()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSigmoid2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -31,68 +31,69 @@ softmax function: y = e^x / \sum_{i} e^{x_i}
*/
bool TestSoftmax1()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {0.09003057F, 0.24472848F, 0.66524094F},
{0.21362929F, 0.2609274F , 0.52544326F} };
DTYPE answer[2][3] = { {0.0900F, 0.2447F, 0.6652F},
{0.2136F, 0.2609F, 0.5254F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
x->SetData(xData, unitNum);
y->SetZeroAll();
/* call Softmax function */
Softmax(x, y, 1);
/* check result */
cpuTest = y->CheckData(answer, sUnitNum);
cpuTest = y->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll();
/* call Softmax function */
Softmax(xGPU, yGPU, 1);
/* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum);
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] sDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y;
delete[] sDimSize;
delete x;
delete y;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -101,36 +102,38 @@ bool TestSoftmax1()
/*
case 2: test SoftmaxBackward function.
SoftmaxBackward function: dE/dx_j = -gold_j + y_j
In this case, LossName=CROSSENTROPY.
*/
bool TestSoftmax2()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
sDimSize[1] = 3;
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F};
DTYPE yAnswer[1][3] = { {0.0900F, 0.2447F, 0.6652F} };
DTYPE dedxAnswer[1][3] = {0.0900F, 0.2447F, -0.3347F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor * g = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(order, dimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
x->SetData(xData, unitNum);
g->SetData(gData, unitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
......@@ -138,25 +141,27 @@ bool TestSoftmax2()
/* call Softmax function */
Softmax(x, y, 1);
/* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum);
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
&& dedx->CheckData(dedxAnswer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
......@@ -168,7 +173,8 @@ bool TestSoftmax2()
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum);
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
&& dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F);
/* destroy variables */
delete x;
......@@ -181,7 +187,7 @@ bool TestSoftmax2()
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
delete[] dimSize;
return cpuTest && gpuTest;
#else
......@@ -191,7 +197,7 @@ bool TestSoftmax2()
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
......
......@@ -181,14 +181,20 @@ bool TestSplit2()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s, t, sGPU, tGPU;
delete[] sDimSize, tDimSize;
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t;
delete[] sDimSize, tDimSize;
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -295,14 +301,25 @@ bool TestSplit3()
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s, t1, t2, sGPU, tGPU1, tGPU2;
delete[] sDimSize, tDimSize1, tDimSize2;
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t1, t2;
delete[] sDimSize, tDimSize1, tDimSize2;
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
......
......@@ -31,12 +31,12 @@ bool Test()
wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong;
//wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong;
wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMul2DParallel() || wrong;
//wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong;
wrong = !TestMerge() || wrong;
wrong = !TestMultiply() || wrong;
......@@ -56,18 +56,18 @@ bool Test()
wrong = !TestSplit() || wrong;
wrong = !TestSum() || wrong;
wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong;
wrong = !TestSumByColumnVT() || wrong;
wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong;
//wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong;
//wrong = !TestSigmoid() || wrong;
//wrong = !TestSoftmax() || wrong;
wrong = !TestHardTanH() || wrong;
wrong = !TestIdentity() || wrong;
wrong = !TestLogSoftmax() || wrong;
wrong = !TestLoss() || wrong;
wrong = !TestRectify() || wrong;
wrong = !TestSigmoid() || wrong;
wrong = !TestSoftmax() || wrong;
/* other test */
/*
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论