Commit bdf5c952 by xuchen

update setdata by template and optimize some interface

parent bd05b21b
......@@ -63,155 +63,3 @@ int main( int argc, const char ** argv )
return 0;
}
void BackwardTest()
{
XNet net;
XTensor a;
XTensor b;
XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean;
XTensor origin;
InitTensor2DV2(&a, 2, 3);
InitTensor1DV2(&b, 2);
a.SetZeroAll();
b.SetZeroAll();
a.Set2D(1.0F, 0, 0);
a.Set2D(2.0F, 0, 1);
a.Set2D(3.0F, 0, 2);
a.Set2D(4.0F, 1, 0);
a.Set2D(5.0F, 1, 1);
a.Set2D(6.0F, 1, 2);
b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1);
DivDim(a, b, c, 0);
c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c);
net.Backward(loss);
a.grad->Dump(stderr);
}
void TransposeTest()
{
#ifdef USE_CUDA
XMem mem0(0, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
//XMem mem1(1, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
XTensor x;
XTensor y;
XTensor z;
int loops = 2000;
int B = 3 * 2 * 4;
int K = 8 * 1;
int N = 50;
int H = 512 * 4;
int nnn = GDevs.nGPU;
InitTensor3DV2(&x, B, N, H, X_FLOAT, 0);
InitTensor4DV2(&y, K, B, N, H/K, X_FLOAT, 0);
InitTensor3DV2(&z, B, N, H, X_FLOAT, 0);
cudaEvent_t ctime0;
cudaEvent_t ctime1;
cudaEvent_t ctime2;
cudaEvent_t ctime3;
cudaEvent_t ctime4;
cudaEvent_t ctime5;
float elapsedSplit = 0.0;
float elapsedMerge = 0.0;
float elapsedSum = 0.0;
cudaEventCreate(&ctime0);
cudaEventCreate(&ctime1);
cudaEventCreate(&ctime2);
cudaEventCreate(&ctime3);
cudaEventCreate(&ctime4);
cudaEventCreate(&ctime5);
cudaEventRecord(ctime0, 0);
double time0 = GetClock();
for(int i = 0; i < loops; i++)
_Split(&x, &y, 2, K);
double time1 = GetClock();
cudaEventRecord(ctime1, 0);
cudaEventSynchronize(ctime1);
cudaEventElapsedTime(&elapsedSplit, ctime0, ctime1);
cudaEventRecord(ctime2, 0);
double time2 = GetClock();
for(int i = 0; i < loops; i++)
_Merge(&y, &x, 3);
double time3 = GetClock();
cudaEventRecord(ctime3, 0);
cudaEventSynchronize(ctime3);
cudaEventElapsedTime(&elapsedMerge, ctime2, ctime3);
cudaEventRecord(ctime4, 0);
double time4 = GetClock();
for(int i = 0; i < loops; i++)
_Sum(&x, &z, &x);
double time5 = GetClock();
cudaEventRecord(ctime5, 0);
cudaEventSynchronize(ctime5);
cudaEventElapsedTime(&elapsedSum, ctime4, ctime5);
fprintf(stderr, "split:%f merge:%f sum:%f\n", time1 - time0, time3 - time2, time5 - time4);
fprintf(stderr, "split:%f merge:%f sum:%f\n", elapsedSplit, elapsedMerge, elapsedSum);
#endif
}
void SumDimTest()
{
XTensor x;
XTensor y;
XTensor z;
int a = 5;
int b = 7;
int c = 3;
InitTensor3DV2(&x, a, b, c, X_FLOAT, -1);
InitTensor1DV2(&y, c, X_FLOAT, -1);
InitTensor3DV2(&z, a, b, c, X_FLOAT, -1);
x.SetZeroAll();
y.SetZeroAll();
z.SetZeroAll();
DTYPE * data = new DTYPE[x.unitNum];
for(int i = 0; i < x.unitNum; i++)
data[i] = (DTYPE)i;
x.SetData(data, x.unitNum);
for(int i = 0; i < y.unitNum; i++)
data[i] = -(DTYPE)i;
y.SetData(data, y.unitNum);
_SumDim(&x, &y, &z, 2);
z.Dump(stderr, "z:");
delete[] data;
}
......@@ -33,7 +33,6 @@
namespace nts{
/* compute dE/dx of a node */
void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
{
......@@ -53,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if (dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if (dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if (dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
dedy->SetDataFixed(1);
return;
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is the entrance of the low-level tensor library : NiuTrans.Tensor
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2015-12-14
*
*/
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "XTensor.h"
#include "XDevice.h"
#include "./test/Test.h"
#include "./core/CHeader.h"
#include "./XBLAS.h"
#include "./core/sort/TopK.h"
#include "./core/movement/Gather.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
void SmallTest();
void TransposeTest();
void PowerTest();
int main( int argc, const char ** argv )
{
//PowerTest();
//LittleTest();
//T2TTest();
//T2TTest2();
//return 0;
//_CrtSetBreakAlloc(123);
/* a tiny test */
//SmallTest();
//_CrtDumpMemoryLeaks();
//return 0;
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else{
fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
fprintf(stderr, "use of tensors. All you need is to ... \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
}
//_CrtDumpMemoryLeaks();
return 0;
}
void myRead(XTensor * tensor, const char * filename, const char * label)
{
FILE * file = fopen(filename, "rb");
if(file == NULL)
printf("%s\n", filename);
tensor->Read(file, label);
}
void myDump(XTensor * tensor, const char * filename, const char * label)
{
FILE * file = fopen(filename, "wb");
if(file == NULL)
printf("%s\n", filename);
tensor->Dump(file, label);
}
void PowerTest()
{
XTensor input;
XTensor output;
InitTensor2D(&input, 256, 10000, X_FLOAT, 0);
InitTensor2D(&output, 256, 10000, X_FLOAT, 0);
myRead(&input, "1.txt", "");
_Power(&input, &output, 2);
output.Dump(stderr, "", 200);
}
void SmallTest()
{
XTensor a;
XTensor b;
XTensor c;
XTensor d;
InitTensor2D(&a, 2, 2);
InitTensor2D(&b, 2, 2);
a.SetZeroAll();
b.SetZeroAll();
a.Set2D(1.0F, 0, 0);
a.Set2D(2.0F, 1, 1);
b = Sum(a, Multiply(a, a));
/* this is prohibited !!!!!!!!!!!!! */
//XTensor c = a * b + a;
//XTensor d = a + b + c.Lin(0.5F);
c = a * b + a;
d = a + b + c.Lin(0.5F);
XLink::CheckNetwork(&d);
//XLink::ShowNetwork(stderr, &d);
a.Dump(stderr, "a:");
b.Dump(stderr, "b:");
c.Dump(stderr, "c:");
d.Dump(stderr, "d:");
}
void TransposeTest()
{
XTensor a;
XTensor b;
int I = 2;
int J = 3;
InitTensor4D(&a, 2, 3, 4, 5);
int * dims = new int[a.order];
memcpy(dims, a.dimSize, sizeof(int) * a.order);
dims[I] = a.dimSize[J];
dims[J] = a.dimSize[I];
InitTensor(&b, 4, dims);
a.SetZeroAll();
b.SetZeroAll();
float * data = new float[a.unitNum];
for(int i = 0; i < a.unitNum; i++)
data[i] = (float)i;
a.SetData(data, a.unitNum, 0);
_Transpose(&a, &b, I, J);
b.Dump(stderr, "b:");
delete[] data;
}
......@@ -526,6 +526,8 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
}
else {
myMem = GMems.GetMem(myDevId);
FlushToMem(myMem);
isInGlobalMem = false;
}
}
......@@ -818,6 +820,16 @@ void XTensor::Range(DTYPE lower, DTYPE upper, DTYPE step)
_SetDataRange(this, lower, upper, step);
}
/* generate data items with a fixed value */
template<class T>
void XTensor::SetDataFixed(T num)
{
_SetDataFixed(this, num);
}
template void XTensor::SetDataFixed<int>(int);
template void XTensor::SetDataFixed<float>(float);
template void XTensor::SetDataFixed<double>(double);
/*
set the tensor items by a uniform distribution in range [lower, upper]
>> lower - lower value of the range
......@@ -825,62 +837,7 @@ set the tensor items by a uniform distribution in range [lower, upper]
*/
void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
{
// TODO: GPU code!!!!!!!
if (data == NULL)
return;
// srand((unsigned)time(0));
DTYPE variance = upper - lower;
void * d = NULL;
if (dataType == X_FLOAT) {
d = new float[unitNum];
for (int i = 0; i < unitNum; i++) {
DTYPE value = lower + variance * (float)rand() / RAND_MAX;
*((float*)d + i) = value;
}
}
else if (dataType == X_DOUBLE) {
d = new double[unitNum];
for (int i = 0; i < unitNum; i++) {
*((double*)d + i) = lower + variance * rand() / RAND_MAX;
}
}
else {
ShowNTErrors("Data type must be X_FLOAT or X_Double!");
}
SetData(d, unitNum);
if (dataType == X_FLOAT) {
delete[] (float*)d;
}
else {
delete[] (double*)d;
}
}
/* a gauss distribution (Box-Muller method) */
double GaussRand(DTYPE mean, DTYPE standardDeviation)
{
// TODO: GPU code!!!!!!!
static double u, v;
static int phase = 0;
double z;
double pi = 3.141592654;
if (phase == 0){
u = (rand() + 1.0) / (RAND_MAX + 1.0);
v = (rand() + 1.0) / (RAND_MAX + 1.0);
z = sqrt(-2.0 * log(u))* sin(2.0 * pi * v);
}
else{
z = sqrt(-2.0 * log(u)) * cos(2.0 * pi * v);
}
phase = 1 - phase;
return mean + (z * standardDeviation);
_SetDataRand(this, lower, upper);
}
/*
......@@ -890,37 +847,7 @@ set the tensor items by a normal distribution
*/
void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
{
// TODO: cuda code!!!!!!!
if (data == NULL)
return;
// srand((unsigned)time(0));
void * d = NULL;
if (dataType == X_FLOAT) {
d = new float[unitNum];
for (int i = 0; i < unitNum; i++) {
*((float*)d + i) = (float)GaussRand(mean, standardDeviation);
}
}
else if (dataType == X_DOUBLE) {
d = new double[unitNum];
for (int i = 0; i < unitNum; i++) {
*((double*)d + i) = GaussRand(mean, standardDeviation);
}
}
else {
ShowNTErrors("Data type must be X_FLOAT or X_Double!");
}
SetData(d, unitNum);
if (dataType == X_FLOAT) {
delete[] (float*)d;
}
else {
delete[] (double*)d;
}
_SetDataRandN(this, mean, standardDeviation);
}
/*
......
......@@ -28,6 +28,7 @@
#ifndef __XTENSOR_H__
#define __XTENSOR_H__
#include <math.h>
#include "XGlobal.h"
#include "XMem.h"
#include "XPRunner.h"
......@@ -303,6 +304,10 @@ public:
/* generate data items with a range by start, end and the step */
void Range(DTYPE lower, DTYPE upper, DTYPE step);
/* generate data items with a fixed value */
template<class T>
void SetDataFixed(T num);
/* set tensor items by a uniform distribution */
void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F);
......
......@@ -91,9 +91,9 @@
#include "sort/Sort.h"
#include "sort/TopK.h"
#include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/FlushToMem.h"
#include "utilities/SetAscendingOrder.h"
#include "utilities/XMatrixSegment.h"
#endif // __CHEADER_H__
......@@ -116,7 +116,7 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot,
float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size;
_SetDataFixedFloat(onehot, lowconfidence);
onehot->SetDataFixed(lowconfidence);
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) {
......
......@@ -77,277 +77,190 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
}
/*
generate data items with a fixed value p
>> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor
set a data array with a fixed value
>> d - pointer to the data array
>> v - the initial value
>> size - size of the array
*/
void _SetDataFixed(XTensor * tensor, void * valuePointer)
template<class T>
void ArraySetDataFixed(T * d, T v, int size)
{
int num = tensor->unitNum;
if(tensor->dataType == X_INT){
int p = *(int*)valuePointer;
if(tensor->devID < 0){
int * d = (int*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
if (size % 4 == 0) {
for (int i = 0; i < size; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedInt(tensor, p);
#endif
}
}
else if(tensor->dataType == X_FLOAT){
float p = *(float*)valuePointer;
if(tensor->devID < 0){
float * d = (float*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedFloat(tensor, p);
#endif
}
}
else if(tensor->dataType == X_DOUBLE){
double p = *(double*)valuePointer;
if(tensor->devID < 0){
double * d = (double*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedDouble(tensor, p);
#endif
}
}
else{
ShowNTErrors("TODO");
else {
for (int i = 0; i < size; i++)
d[i] = v;
}
}
/*
generate data items with a fixed value p (in default type)
>> tensor - the tensor whose data array would be initialized
>> p - number in default type
*/
void SetDataFixed(XTensor &tensor, DTYPE p)
{
_SetDataFixed(&tensor, &p);
}
generate data items with a fixed value
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an integer
>> tensor - the tensor for initialization
>> value - the initial value
*/
void SetDataFixedInt(XTensor &tensor, int p)
template<class T>
void _SetDataFixed(XTensor * tensor, T value)
{
CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!");
_SetDataFixed(&tensor, &p);
}
if (tensor->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataFixed(tensor, value);
return;
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
*/
void _SetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int num = tensor->unitNum;
if(p == 0)
tensor->SetZeroAll();
if (tensor->dataType == X_INT)
ArraySetDataFixed((int*)tensor->data, (int)value, num);
else if (tensor->dataType == X_FLOAT)
ArraySetDataFixed((float*)tensor->data, (float)value, num);
else if (tensor->dataType == X_DOUBLE)
ArraySetDataFixed((double*)tensor->data, (double)value, num);
else
_SetDataFixed(tensor, &p);
ShowNTErrors("TODO! Unsupported datatype!")
}
template void _SetDataFixed<int>(XTensor*, int);
template void _SetDataFixed<float>(XTensor*, float);
template void _SetDataFixed<double>(XTensor*, double);
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void _SetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
generate data items with a fixed value p only if the condition entry is non-zero
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
>> d - pointer to the data array
>> c - pointer to the condition array
>> v - the initial value
>> size - size of the array
*/
void _SetDataFixedDouble(XTensor * tensor, double p)
template<class T>
void ArraySetDataFixedCond(T* d, T* c, T v, int size)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
for (int i = 0; i < size; i++) {
if (c[i] != 0)
d[i] = v;
}
}
/*
generate data items with a fixed value p only if
the condition entry is non-zero
generate data items with a fixed value p only if the condition entry is non-zero
>> tensor - the tensor whose data array would be initialized
>> condition - the condition tensor whose entries would be checked
for set the corresponding entries in "tensor"
>> p - a given value
>> value - a given value
*/
void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p)
template<class T>
void _SetDataFixedCond(XTensor * tensor, XTensor * condition, T value)
{
int num = tensor->unitNum;
CheckDev(tensor->devID, condition->devID);
CheckDataType(tensor->dataType, condition->dataType);
CheckNTErrors(num == condition->unitNum, "Wrong size of the condition tensor!");
CheckNTErrors(condition->unitSize == sizeof(float), "TODO!");
if(tensor->dataType == DEFAULT_DTYPE){
if(tensor->devID < 0){
DTYPE * data = (DTYPE*)tensor->data;
DTYPE * cond = (DTYPE*)condition->data;
for(int i = 0; i < num; i++){
if(cond[i] != 0)
data[i] = p;
}
}
else{
if (tensor->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataFixedCondFloat(tensor, condition, p);
_CudaSetDataFixedCond(tensor, condition, value);
return;
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code");
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
}
else{
ShowNTErrors("the tensor should be in integer typed!");
}
}
/*
generate data items with a fixed value p only if
the condition entry is non-zero
>> tensor - the tensor whose data array would be initialized
>> condition - the condition tensor whose entries would be checked
for set the corresponding entries in "tensor"
>> p - a given value
*/
void _SetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p)
{
int num = tensor->unitNum;
CheckNTErrors(num == condition->unitNum, "Wrong size of the condition tensor!");
CheckNTErrors(condition->unitSize == sizeof(float), "TODO!");
if(tensor->dataType == DEFAULT_DTYPE){
if(tensor->devID < 0){
int * data = (int*)tensor->data;
int * cond = (int*)condition->data;
for(int i = 0; i < num; i++){
if(cond[i] != 0)
data[i] = p;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedCondInt(tensor, condition, p);
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code");
#endif
}
}
else{
ShowNTErrors("TODO!");
}
if (tensor->dataType == X_INT)
ArraySetDataFixedCond((int*)tensor->data, (int*)condition->data, (int)value, num);
else if (tensor->dataType == X_FLOAT)
ArraySetDataFixedCond((float*)tensor->data, (float*)condition->data, (float)value, num);
else if (tensor->dataType == X_DOUBLE)
ArraySetDataFixedCond((double*)tensor->data, (double*)condition->data, (double)value, num);
else
ShowNTErrors("TODO! Unsupported datatype!")
}
template void _SetDataFixedCond<int>(XTensor*, XTensor*, int);
template void _SetDataFixedCond<float>(XTensor*, XTensor*, float);
template void _SetDataFixedCond<double>(XTensor*, XTensor*, double);
/*
set data items along with a given dimension (and keep the remaining items unchanged)
>> tensor - the tensor whose data array would be initialized
>> tensor - the tensor for initialization
>> beg - the beginning position
>> len - length along with the given dimension
>> dim - the dimension along which we set the data
e.g., given a 3 * 3 tensor
e.g., given a 3 * 3 tensor
1 2 3
4 5 6
7 8 9
when beg = 1, len = 1, dim = 0 and p = 0, we have
when beg = 1, len = 1, dim = 0 and value = 0, we have
1 2 3
0 0 0
7 8 9
i.e., we set all entries of row 1 to 0
>> value - the given value
*/
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
template<class T>
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, T value)
{
int n = tensor->order;
int order = tensor->order;
int size = tensor->GetDim(dim);
if (dim < 0)
dim = order + dim;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
CheckNTErrors(dim < order && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < size, "Illegal beginning position!");
CheckNTErrors(len >= 0 && beg + len <= size, "Illegal length!");
if (tensor->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataDim(tensor, beg, len, dim, (DTYPE)value);
return;
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
if(tensor->devID < 0){
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = n - 1; i > dim; i--){
for (int i = order - 1; i > dim; i--)
stride *= tensor->GetDim(i);
}
blockSize = stride * tensor->GetDim(dim);
blockSize = stride * size;
blockNum = tensor->unitNum / blockSize;
int l = len * stride;
int initNum = len * stride;
for(int i = 0; i < blockNum; i++){
DTYPE * d = (DTYPE*)tensor->data + blockSize * i + beg * stride;
for(int j = 0; j < l; j++)
d[j] = p;
for(int i = 0; i < blockNum; i++) {
if (tensor->dataType == X_INT) {
int* d = (int*)tensor->data + blockSize * i + beg * stride;
for (int j = 0; j < initNum; j++)
d[j] = (int)value;
}
else if (tensor->dataType == X_FLOAT) {
float* d = (float*)tensor->data + blockSize * i + beg * stride;
for (int j = 0; j < initNum; j++)
d[j] = (float)value;
}
else{
#ifdef USE_CUDA
_CudaSetDataDim(tensor, beg, len, dim, p);
#endif
else if (tensor->dataType == X_DOUBLE) {
double* d = (double*)tensor->data + blockSize * i + beg * stride;
for (int j = 0; j < initNum; j++)
d[j] = (double)value;
}
else
ShowNTErrors("TODO! Unsupported datatype!")
}
}
template void _SetDataDim<int>(XTensor*, int, int, int, int);
template void _SetDataDim<float>(XTensor*, int, int, int, float);
template void _SetDataDim<double>(XTensor*, int, int, int, double);
/*
modify data items along with a given index and dimension (and keep the remaining items unchanged)
......@@ -355,7 +268,7 @@ modify data items along with a given index and dimension (and keep the remaining
>> modify - the tensor whose data array would be used to modify the source tensor
>> dim - the dimension along which we modify the tensor
>> index - index of the given dimension
e.g., given a source tensor (3, 3)
e.g., given a source tensor (3, 3)
1 2 3
4 5 6
7 8 9
......@@ -367,102 +280,127 @@ e.g., given a source tensor (3, 3)
7 8 9
i.e., we set entries of row 1 to {1, 2, 3}
*/
void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
void _SetDataIndexed(XTensor * tensor, XTensor * modify, int dim, int index)
{
int order = source->order;
int size = source->GetDim(dim);
int order = tensor->order;
int size = tensor->GetDim(dim);
if (dim < 0)
dim = order + dim;
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckDev(tensor->devID, modify->devID);
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
for(int i = 0; i < order - 1; i++){
if(i < dim){
CheckNTErrors(modify->GetDim(i) == source->GetDim(i), "Illegal dimension!");
for(int i = 0; i < order - 1; i++) {
if(i < dim) {
CheckNTErrors(modify->GetDim(i) == tensor->GetDim(i), "Illegal dimension!");
}
else if(i >= dim){
CheckNTErrors(modify->GetDim(i) == source->GetDim(i+1), "Illegal dimension!");
else if(i >= dim) {
CheckNTErrors(modify->GetDim(i) == tensor->GetDim(i+1), "Illegal dimension!");
}
}
if(source->devID < 0 && modify->devID < 0){
if (tensor->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataIndexed(tensor, modify, dim, index);
return;
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
if(tensor->devID < 0) {
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = order - 1; i > dim; i--){
stride *= source->GetDim(i);
for (int i = order - 1; i > dim; i--) {
stride *= tensor->GetDim(i);
}
blockSize = stride * source->GetDim(dim);
blockNum = source->unitNum / blockSize;
blockSize = stride * tensor->GetDim(dim);
blockNum = tensor->unitNum / blockSize;
for(int i = 0; i < blockNum; i++){
DTYPE * d = (DTYPE*)source->data + blockSize * i + index * stride;
for (int i = 0; i < blockNum; i++) {
DTYPE * d = (DTYPE*)tensor->data + blockSize * i + index * stride;
DTYPE * p = (DTYPE*)modify->data + stride * i;
for(int j = 0; j < stride; j++)
d[j] = p[j];
}
}
else if(source->devID >= 0 && modify->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataIndexed(source, modify, dim, index);
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
else{
ShowNTErrors("TODO!");
}
}
/*
generate data as lower triangular matrics for last two dimensions
>> tensor - the tensor whose data to be set
>> p - the value for each entry of the lower triangular matrics
>> value - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal
e.g., for a 3 * 3 tensor,
when p = 1 ans shift = 0, we have
e.g., for a 3 * 3 tensor,
when value = 1 ans shift = 0, we have
1 0 0
1 1 0
1 1 1
when p = 2 and shift = -1, we have
when value = 2 and shift = -1, we have
0 0 0
2 0 0
2 2 0
*/
void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift)
void _SetDataLowTri(XTensor * tensor, DTYPE value, int shift)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(n >= 2, "The tensor must have a order no less than 2!");
CheckNTErrors(tensor->GetDim(n - 1) == tensor->GetDim(n - 2),
"The last two dimensions must be of the same size!");
if(tensor->devID < 0){
int l = tensor->GetDim(-1);
int blockNum = 1;
int blockSize = l * l;
for(int i = 0; i < n - 2; i++)
blockNum *= tensor->GetDim(i);
tensor->SetZeroAll();
if (tensor->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataLowTri(tensor, value, shift);
return;
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
for(int i = 0; i < blockNum; i++){
DTYPE * d = (DTYPE*)tensor->data + i * blockSize;
for(int row = 0; row < l; row++){
for(int col = 0; col <= row + shift; col++){
d[row * l + col] = p;
int size = tensor->GetDim(-1);
int blockSize = size * size;
int blockNum = tensor->unitNum / blockSize;
for (int i = 0; i < blockNum; i++) {
for (int row = 0; row < size; row++) {
if (tensor->dataType == X_INT) {
int * d = (int*)tensor->data + i * blockSize;
for (int col = 0; col <= row + shift; col++) {
d[row * size + col] = (int)value;
}
for(int col = MAX(0, row + shift + 1); col < l; col++){
d[row * l + col] = 0;
/*for (int col = MAX(0, row + shift + 1); col < size; col++) {
d[row * size + col] = 0;
}*/
}
else if (tensor->dataType == X_FLOAT) {
float * d = (float*)tensor->data + i * blockSize;
for (int col = 0; col <= row + shift; col++) {
d[row * size + col] = (float)value;
}
/*for (int col = MAX(0, row + shift + 1); col < size; col++) {
d[row * size + col] = 0;
}*/
}
else if (tensor->dataType == X_DOUBLE) {
double * d = (double*)tensor->data + i * blockSize;
for (int col = 0; col <= row + shift; col++) {
d[row * size + col] = (double)value;
}
/*for (int col = MAX(0, row + shift + 1); col < size; col++) {
d[row * size + col] = 0;
}*/
}
else
ShowNTErrors("TODO! Unsupported datatype!")
}
else{
#ifdef USE_CUDA
_CudaSetDataLowTri(tensor, p, shift);
#endif
}
}
......@@ -484,7 +422,7 @@ generate data items with a uniform distribution in [lower, upper]
*/
void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
CheckNTErrors(upper >= lower, "the high value must be greater than low value!");
if(tensor == NULL)
return;
......@@ -506,27 +444,50 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
}
}
else{
ShowNTErrors("TODO");
ShowNTErrors("TODO! Unsupported datatype!")
}
}
else{
#ifdef USE_CUDA
/*
GPU code
The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
*/
else{
#ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper);
//_CudaSetDataRand(tensor, lower, upper);
int num = tensor->unitNum;
DTYPE variance = upper - lower;
void * d = NULL;
if (tensor->dataType == X_FLOAT) {
d = new float[num];
for (int i = 0; i < num; i++)
*((float*)d + i) = lower + variance * (float)rand() / RAND_MAX;
}
else if (tensor->dataType == X_DOUBLE) {
d = new double[num];
for (int i = 0; i < num; i++)
*((double*)d + i) = (double)lower + variance * rand() / RAND_MAX;
}
else {
ShowNTErrors("Data type must be X_FLOAT or X_Double!");
}
tensor->SetData(d, num);
if (tensor->dataType == X_FLOAT) {
delete[](float*)d;
}
else {
delete[](double*)d;
}
#endif
//XTensor * t2 = NewTensorV2(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor);
//delete t2;
}
}
/* generate data items with a range by start, end and the step
>> tensor - the tensor whose data array would be initialized
>> start - the begin of the array
>> end - the end of the array (not included self)
......@@ -537,7 +498,7 @@ void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step)
CheckNTErrors((tensor->order == 1), "Tensor must be 1 dimension!");
/* compute the true length according to the (start, end, step) */
DTYPE size = fabs(upper - lower);
DTYPE size = (DTYPE)fabs(upper - lower);
int num = ceil(size / fabs(step));
CheckNTErrors((tensor->unitNum == num), "Unit number of the tensor is not matched.");
......@@ -554,7 +515,7 @@ void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step)
*((float*)data + i) = lower + i * step;
}
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO! Unsupported datatype!")
}
/* set the data from the array */
......@@ -564,8 +525,10 @@ void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step)
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
generate data items with a uniform distribution in [lower, upper] and
set the item to a pre-defined value if the item >= p,
set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
......@@ -596,8 +559,30 @@ void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE va
}
}
/* a gauss distribution (Box-Muller method) */
double GaussRand(DTYPE mean, DTYPE standardDeviation)
{
static double u, v;
static int phase = 0;
double z;
double pi = 3.141592654;
if (phase == 0) {
u = (rand() + 1.0) / (RAND_MAX + 1.0);
v = (rand() + 1.0) / (RAND_MAX + 1.0);
z = sqrt(-2.0 * log(u)) * sin(2.0 * pi * v);
}
else {
z = sqrt(-2.0 * log(u)) * cos(2.0 * pi * v);
}
phase = 1 - phase;
return mean + (z * standardDeviation);
}
/*
generate data items with a normal distribution with specified mean and standard deviation
>> tensor - the tensor that keeps the data
>> mean - mean or expectation of the distribution
>> standardDeviation - standard deviation of the distribution
......@@ -605,7 +590,31 @@ generate data items with a normal distribution with specified mean and standard
void _SetDataRandN(XTensor * tensor, DTYPE mean, DTYPE standardDeviation)
{
// TODO: rewrite it and add cuda code!!!!!!!
tensor->SetDataRandn(mean, standardDeviation);
int num = tensor->unitNum;
void * d = NULL;
if (tensor->dataType == X_FLOAT) {
d = new float[num];
for (int i = 0; i < num; i++)
*((float*)d + i) = (float)GaussRand(mean, standardDeviation);
}
else if (tensor->dataType == X_DOUBLE) {
d = new double[num];
for (int i = 0; i < num; i++)
*((double*)d + i) = GaussRand(mean, standardDeviation);
}
else {
ShowNTErrors("TODO! Unsupported datatype!")
}
tensor->SetData(d, num);
if (tensor->dataType == X_FLOAT) {
delete[](float*)d;
}
else {
delete[](double*)d;
}
}
/*
......
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
*/
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
*/
#include <curand.h>
#include <time.h>
......@@ -33,109 +33,34 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set an integer data array with a fixed value p (in int)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedInt(int * d, int size, int p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in int)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
set a data array with a fixed value
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a float data array with a fixed value p (in int)
>> d - pointer to the data array
>> v - the initial value
>> size - size of the array
>> p - the initial value
*/
template<class T>
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
void KernelSetDataFixed(T * d, T v, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
d[i] = v;
}
template __global__ void KernelSetDataFixed<int>(int *, int, int);
template __global__ void KernelSetDataFixed<float>(float *, float, int);
template __global__ void KernelSetDataFixed<double>(double *, double, int);
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
generate data items with a fixed value
/*
set a double data array with a fixed value p (in int)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedDouble(double * d, int size, double p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
>> value - the initial value
*/
void _CudaSetDataFixedDouble(XTensor * tensor, double p)
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T value)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
int gridSize[3];
int blockSize[3];
......@@ -145,59 +70,23 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p)
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a float data array with a fixed value p (in int) only
if the condition entry is non-zero
>> d - pointer to the data array
>> c - pointer to the condition array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedCondFloat(float * d, float * c, int size, float p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size && c[i] != 0)
d[i] = p;
}
/*
generate data items with a fixed value p (in float) only
if the condition entry is non-zero
>> tensor - the tensor for initialization
>> condition - the condition tensor whose entry would be check to
set the corresponding entry in "tensor"
>> p - the initial value
*/
void _CudaSetDataFixedCondFloat(XTensor * tensor, XTensor * condition, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
CheckNTErrors(condition->unitSize == sizeof(float), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedCondFloat <<<blocks, threads >>>((float*)tensor->data, (float*)condition->data,
tensor->unitNum, p);
if (tensor->dataType == X_INT)
KernelSetDataFixed << <blocks, threads >> > ((int*)tensor->data, (int)value, tensor->unitNum);
else if (tensor->dataType == X_FLOAT)
KernelSetDataFixed << <blocks, threads >> > ((float*)tensor->data, (float)value, tensor->unitNum);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataFixed << <blocks, threads >> > ((double*)tensor->data, (double)value, tensor->unitNum);
else
ShowNTErrors("TODO! Unsupported datatype!")
BacktoCudaDev(tensor->devID, devIDBackup);
}
template void _CudaSetDataFixed<int>(XTensor *, int);
template void _CudaSetDataFixed<float>(XTensor *, float);
template void _CudaSetDataFixed<double>(XTensor *, double);
/*
set a float data array with a fixed value p (in int) only
......@@ -207,28 +96,30 @@ if the condition entry is non-zero
>> size - size of the array
>> p - the initial value
*/
template<class T>
__global__
void KernelSetDataFixedCondInt(int * d, float * c, int size, int p)
void KernelSetDataFixedCond(T * d, T * c, T value, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size && c[i] != 0)
d[i] = p;
d[i] = value;
}
template __global__ void KernelSetDataFixedCond<int>(int*, int*, int, int);
template __global__ void KernelSetDataFixedCond<float>(float*, float*, float, int);
template __global__ void KernelSetDataFixedCond<double>(double*, double*, double, int);
/*
generate data items with a fixed value p (in int) only
if the condition entry is non-zero
generate data items with a fixed value p
only if the condition entry is non-zero
>> tensor - the tensor for initialization
>> condition - the condition tensor whose entry would be check to
set the corresponding entry in "tensor"
>> p - the initial value
>> value - the initial value
*/
void _CudaSetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p)
template<class T>
void _CudaSetDataFixedCond(XTensor* tensor, XTensor* condition, T value)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
CheckNTErrors(condition->unitSize == sizeof(float), "TODO!");
int gridSize[3];
int blockSize[3];
......@@ -240,11 +131,24 @@ void _CudaSetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedCondInt <<<blocks, threads >>>((int*)tensor->data, (float*)condition->data,
tensor->unitNum, p);
if (tensor->dataType == X_INT)
KernelSetDataFixedCond <<< blocks, threads >>> ((int*)tensor->data, (int*)condition->data,
(int)value, tensor->unitNum);
else if (tensor->dataType == X_FLOAT)
KernelSetDataFixedCond <<< blocks, threads >>> ((float*)tensor->data, (float*)condition->data,
(float)value, tensor->unitNum);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataFixedCond <<< blocks, threads >>> ((double*)tensor->data, (double*)condition->data,
(double)value, tensor->unitNum);
else
ShowNTErrors("TODO! Unsupported datatype!")
BacktoCudaDev(tensor->devID, devIDBackup);
}
template void _CudaSetDataFixedCond<int>(XTensor*, XTensor*, int);
template void _CudaSetDataFixedCond<float>(XTensor*, XTensor*, float);
template void _CudaSetDataFixedCond<double>(XTensor*, XTensor*, double);
/*
set data array with a uniform distribution in [low, high]
......@@ -309,8 +213,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block
>> blockNum - number of data blocks
*/
template<class T>
__global__
void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p)
void KernelSetDataDim(T * d, int beg, int len, int blockSize, int blockNum, T p)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -326,6 +231,9 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
d[blockSize * j + i] = p;
}
template __global__ void KernelSetDataDim<int>(int*, int, int, int, int, int);
template __global__ void KernelSetDataDim<float>(float*, int, int, int, int, float);
template __global__ void KernelSetDataDim<double>(double*, int, int, int, int, double);
/*
set data items along with a given dimension (and keep the remaining items unchanged) - cuda version
......@@ -343,7 +251,8 @@ e.g., given a 3 * 3 tensor
7 8 9
i.e., we set all entries of row 1 to 0
*/
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
template<class T>
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, T p)
{
int n = tensor->order;
......@@ -372,11 +281,24 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
if (tensor->dataType == X_INT)
KernelSetDataDim << <blocks, threads >> > ((int*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, (int)p);
else if (tensor->dataType == X_FLOAT)
KernelSetDataDim << <blocks, threads >> > ((float*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, (float)p);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataDim << <blocks, threads >> > ((double*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, (double)p);
else
ShowNTErrors("TODO! Unsupported datatype!")
BacktoCudaDev(tensor->devID, devIDBackup);
}
template void _CudaSetDataDim<int>(XTensor*, int, int, int, int);
template void _CudaSetDataDim<float>(XTensor*, int, int, int, float);
template void _CudaSetDataDim<double>(XTensor*, int, int, int, double);
/*
modify data items along with a given index and dimension
......@@ -462,6 +384,7 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
/*
set lower triangular matrics for each block
>> d - pointer to the data array
>> l - row number (or column number) of each block, i.e,
a block is l * l matrix
......@@ -469,7 +392,7 @@ set lower triangular matrics for each block
>> blockNum - number of the blocks
>> p - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal
e.g., for a 3* 3 tensor,
e.g., for a 3* 3 tensor,
when p = 1 ans shift = 0, we have
1 0 0
1 1 0
......@@ -503,33 +426,26 @@ void KernelSetDataLowTri(DTYPE * d, int l, int blockSize, int blockNum, DTYPE p,
/*
generate data as lower triangular matrics for last two dimensions (cuda version)
>> tensor - the tensor whose data to be set
>> p - the value for each entry of the lower triangular matrics
>> value - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal
e.g., for a 3* 3 tensor,
when p = 1 ans shift = 0, we have
e.g., for a 3 * 3 tensor,
when value = 1 ans shift = 0, we have
1 0 0
1 1 0
1 1 1
when p = 2 and shift = -1, we have
when value = 2 and shift = -1, we have
0 0 0
2 0 0
2 2 0
*/
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
void _CudaSetDataLowTri(XTensor * tensor, DTYPE value, int shift)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(n >= 2, "The tensor must have a order no less than 2!");
CheckNTErrors(tensor->GetDim(n - 1) == tensor->GetDim(n - 2),
"The last two dimensions must be of the same size!");
int l = tensor->GetDim(-1);
int blockNum = 1;
int blockSize = l * l;
for(int i = 0; i < n - 2; i++)
blockNum *= tensor->GetDim(i);
int size = tensor->GetDim(-1);
int blockSize = size * size;
int blockNum = tensor->unitNum / blockSize;
int cudaGrids[3];
int cudaBlocks[3];
......@@ -542,7 +458,7 @@ void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataLowTri<<<blocks, threads >>>((DTYPE*)tensor->data, l, blockSize, blockNum, p, shift);
KernelSetDataLowTri<<<blocks, threads >>>((DTYPE*)tensor->data, size, blockSize, blockNum, value, shift);
BacktoCudaDev(tensor->devID, devIDBackup);
}
......
......@@ -28,31 +28,24 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */
void _CudaSetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value */
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in float) only
if the condition entry is non-zero */
void _CudaSetDataFixedCondFloat(XTensor * tensor, XTensor * condition, float p);
/* generate data items with a fixed value p (in int) only
if the condition entry is non-zero */
void _CudaSetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p);
/* generate data items with a fixed value p
only if the condition entry is non-zero */
template<class T>
void _CudaSetDataFixedCond(XTensor * tensor, XTensor * condition, T p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
template<class T>
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, T p);
/* modify data items along with a given index and dimension (and keep the remaining items unchanged) */
void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
/* generate data as lower triangular matrics for last two dimensions (cuda version) */
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
void _CudaSetDataLowTri(XTensor * tensor, DTYPE value, int shift);
/* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper);
......
......@@ -30,32 +30,17 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a fixed value p */
void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value */
template<class T>
void _SetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p (in default type) */
void SetDataFixed(XTensor &tensor, DTYPE p);
/* generate data items with a fixed value p (in integer) */
void SetDataFixedInt(XTensor &tensor, int p);
/* generate data items with a fixed value p (in int) */
void _SetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _SetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p only if the condition entry is non-zero */
void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p);
/* generate data items with a fixed value p only if the condition entry is non-zero */
void _SetDataFixedCondInt(XTensor * tensor, XTensor * condition, int p);
/* generate data items with a fixed value only if the condition entry is non-zero */
template<class T>
void _SetDataFixedCond(XTensor* tensor, XTensor* condition, T value);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
template<class T>
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, T p);
/* modify data items along with a given index and dimension (and keep the remaining items unchanged) */
void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
......
......@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1DV2(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
c.SetDataFixed(1.0);
_DropoutWithIndex(&x, &maskIndex, &c);
......
......@@ -383,15 +383,7 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
int leadDim, int tBeg, int tLen, int yBeg)
{
if(t == NULL){
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else{
ShowNTErrors("TODO");
}
dedy->SetDataFixed(1);
return;
}
......
......@@ -50,7 +50,7 @@ bool TestDropout1()
XTensor yUser;
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
x->SetDataFixed(1);
y->SetZeroAll();
/* call Dropout function */
......@@ -88,7 +88,7 @@ bool TestDropout1()
XTensor yUserGPU;
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
xGPU->SetDataFixed(1);
yGPU->SetZeroAll();
/* call Dropout function */
......@@ -157,10 +157,10 @@ bool TestDropout2()
XTensor * dedy = NewTensorV2(order, dimSize);
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
x->SetDataFixed(1.0);
y->SetZeroAll();
dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.5F);
dedy->SetDataFixed(1.5);
/* call Dropout function */
float dropProb = 0.5F;
......@@ -183,10 +183,10 @@ bool TestDropout2()
XTensor * dedyGPU = NewTensorV2(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
xGPU->SetDataFixed(1.0);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.5F);
dedyGPU->SetDataFixed(1.5);
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb);
......
......@@ -195,8 +195,8 @@ bool TestReduceSum2()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
s->SetDataFixed(1);
answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -215,7 +215,7 @@ bool TestReduceSum2()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -284,8 +284,8 @@ bool TestReduceSum3()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
s->SetDataFixed(1);
answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -304,7 +304,7 @@ bool TestReduceSum3()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -373,8 +373,8 @@ bool TestReduceSum4()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
s->SetDataFixed(1);
answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -393,7 +393,7 @@ bool TestReduceSum4()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -464,8 +464,8 @@ bool TestReduceSum5()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
s->SetDataFixed(1);
answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -484,7 +484,7 @@ bool TestReduceSum5()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -556,8 +556,8 @@ bool TestReduceSum6()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
s->SetDataFixed(1);
answer->SetDataFixed(s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -576,7 +576,7 @@ bool TestReduceSum6()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......
......@@ -119,7 +119,7 @@ bool TestSetData2()
XTensor * modify = NewTensorV2(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 1.0F);
s->SetDataFixed(1);
modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
......@@ -137,7 +137,7 @@ bool TestSetData2()
XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
......@@ -212,11 +212,11 @@ bool TestSetData3()
XTensor * modify = NewTensorV2(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 1.0F);
s->SetDataFixed(1);
modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
_SetDataFixedFloat(s, 1.0F);
s->SetDataFixed(1);
_SetDataIndexed(s, modify, 1, 1);
/* check results */
......@@ -231,7 +231,7 @@ bool TestSetData3()
XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
sGPU->SetDataFixed(1);
modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
......
......@@ -91,7 +91,7 @@ bool TestSpread1()
XTensor * modify = NewTensorV2(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 0.0F);
s->SetZeroAll();
modify->SetData(data, dataUnitNum);
/* call _Spread function */
......@@ -109,7 +109,7 @@ bool TestSpread1()
XTensor * modifyGPU = NewTensorV2(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 0.0F);
sGPU->SetZeroAll();
modifyGPU->SetData(data, dataUnitNum);
/* call _Spread function */
......
......@@ -296,8 +296,8 @@ bool TestSumDim3()
/* initialize variables */
a->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F);
_SetDataFixedFloat(answer, 1.0F);
b->SetDataFixed(1);
answer->SetDataFixed(1);
/* call SumDim function */
_SumDim(a, b, c, 1);
......@@ -323,7 +323,7 @@ bool TestSumDim3()
/* Initialize variables */
aGPU->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F);
bGPU->SetDataFixed(1);
/* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1);
......@@ -405,8 +405,8 @@ bool TestSumDim4()
/* initialize variables */
a->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F);
_SetDataFixedFloat(answer, 1.0F);
b->SetDataFixed(1);
answer->SetDataFixed(1);
/* call SumDim function */
_SumDim(a, b, c, 1);
......@@ -432,7 +432,7 @@ bool TestSumDim4()
/* Initialize variables */
aGPU->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F);
bGPU->SetDataFixed(1);
/* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论