Commit dd6646ed by xuchen

Merge branch 'xiaotong-working'

parents 4f37a5ad 7ac8e731
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XLink.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
int XLink::paramSize = 64;
/* constuctor */
XLink::XLink()
{
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/* deconstructor */
XLink::~XLink()
{
delete[] tails;
delete[] (char*)params;
}
/* reset it */
void XLink::Reset()
{
delete[] tails;
delete[] (char*)params;
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/*
set edge type name
>> typeName - type name in string
*/
void XLink::SetType(const char * typeName)
{
type[0] = 0;
if(typeName == NULL)
return;
strcpy(type, typeName);
}
/*
set head
>> h - pointer to the head tensor
*/
void XLink::SetHead(XTensor * h)
{
head = h;
}
/*
add a tail
>> t - pointer to the tail tensor
*/
void XLink::AddTail(XTensor * t)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 1];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t;
delete[] ts;
}
/*
add two tails in one time
>> t1 - pointer to the tail tensor
>> t2 - pointer to another tail tensor
*/
void XLink::AddTwoTails(XTensor * t1, XTensor * t2)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 2];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t1;
tails[tailNum++] = t2;
delete[] ts;
}
/*
add a parameter
>> param - parameter in default type
*/
void XLink::AddParam(DTYPE param)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize);
*p = param;
paramNum++;
delete[] (char*)ps;
}
/*
add a parameter
>> param - pointer to the parameter
>> size - size of the parameter
*/
void XLink::AddParam(void * param, int size)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
char * p = (char*)params + paramNum * paramSize;
memcpy(p, param, size);
paramNum++;
delete[] (char*)ps;
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - another tail tensor
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName)
{
if(h != NULL)
return;
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
if(t1 != NULL && t2 != NULL)
income.AddTwoTails(t1, t2);
else if(t1 != NULL)
income.AddTail(t1);
else{
ShowNTErrors("TODO!");
}
income.SetType(typeName);
/* backward for t1 */
if(t1 != NULL){
XLink &outgo = t1->outgo;
CheckNTErrors(outgo.head != t1, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
/* backward for t2 */
if(t2 != NULL){
XLink &outgo = t2->outgo;
CheckNTErrors(outgo.head != t2, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
create a hyper edge with a list of tensors and a output tensor
>> list - a list of input tensors
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XList * list, XTensor * h, const char * typeName)
{
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
income.SetType(typeName);
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
income.AddTail(t);
}
/* backward */
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
XLink &outgo = t->outgo;
CheckNTErrors(outgo.head != t, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
add parameters
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHead(XTensor * h, DTYPE param)
{
if(h != NULL)
return;
h->income.AddParam(param);
}
/*
add an integer parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadInt(XTensor * h, int param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(int));
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
/* this is a header to include all functions in the "core" workspace */
#ifndef __CHEADER_H__
#define __CHEADER_H__
#include "../XTensor.h"
#include "Concatenate.h"
#include "ConcatenateSolely.h"
#include "CopyIndexed.h"
#include "CopyInGrid.h"
#include "CopyValues.h"
#include "FlushToMem.h"
#include "MakeMergeBlockIndex.h"
#include "MakeSplitBlockIndex.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMul2DMultiTheading.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "Merge.h"
#include "MergeBlockLists.h"
#include "Multiply.h"
#include "Negate.h"
#include "Normalize.h"
#include "Permute.h"
#include "Power.h"
#include "ReduceMax.h"
#include "ReduceMean.h"
#include "ReduceStandardVariance.h"
#include "ReduceSum.h"
#include "ReduceSumSquared.h"
#include "ReduceVariance.h"
#include "ScaleAndShift.h"
#include "SetData.h"
#include "Sort.h"
#include "Split.h"
#include "Sum.h"
#include "SumByColumnTV.h"
#include "SumByColumnVT.h"
#include "TopK.h"
#include "Unsqueeze.h"
#include "XMatrixSegment.h"
#include "XTensorBLAS.h"
#endif // __CHEADER_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "Concatenate.h"
#include "Merge.h"
#include "ConcatenateSolely.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes
>> smalls - a list of tensors for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/
void Concatenate(XList * smalls, XTensor * big, int dim)
{
bool uniform = true;
for (int i = 1; i < smalls->count; i++) {
XTensor * a = (XTensor*)smalls->GetItem(i - 1);
XTensor * b = (XTensor*)smalls->GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsIdentical(a, b))
uniform = false;
}
if (uniform)
Merge(smalls, big, dim);
else
ConcatenateSolely(smalls, big, dim);
}
/*
concatenate two tensors along a given dimension
*/
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim)
{
XList smalls(2);
smalls.Add(smallA);
smalls.Add(smallB);
Concatenate(&smalls, big, dim);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-05-08
*/
#include "SetData.h"
#include "CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
#include "time.h"
#include "iconv.h"
#else
#include "time.h"
#include "windows.h"
#include "process.h"
#endif
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate data items with a uniform distribution in [low,high]
>> tensor - the tensor whose data array would be initialized
>> low - lower value of the range
>> high - higher value of the range
*/
void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
{
if(tensor == NULL)
return;
/* GPU code */
if(tensor->devID < 0){
DTYPE variance = high - low;
srand((unsigned)time(NULL));
if(tensor->dataType == X_FLOAT){
float * d = (float*)tensor->data;
for(int i = 0; i < tensor->unitNum; i++){
d[i] = variance * ((float)rand()/RAND_MAX) + low;
}
}
else if(tensor->dataType == X_DOUBLE){
double * d = (double*)tensor->data;
for(int i = 0; i < tensor->unitNum; i++){
d[i] = variance * ((double)rand()/RAND_MAX) + low;
}
}
else{
ShowNTErrors("TODO");
}
}
/* GPU code
The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
*/
else{
XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
SetDataRand(t2, low, high);
CopyValues(t2, tensor);
delete t2;
}
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "Loss.h"
#include "Loss.cuh"
#include "../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
loss function to measure the "number" of errors
*/
/*
compute the loss
>> gold - gold standard
>> y - model prediction
>> LFName - name of loss function
>> isLogOutput - is the output in log scale?
>> leadDim - the leading dimension for the output
>> gBeg - where to start in the gold standard (along the leading dimension)
>> gLen - segment length from oBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
*/
DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
{
return 0;
// TODO: call cuda kernels for computing the errors
}
/*
the log version of loss computation
>> gold - gold standard
>> y - model prediction
>> LFName - name of loss function
>> leadDim - the leading dimension for the output
>> gBeg - where to start in the gold standard (along the leading dimension)
>> gLen - segment length from oBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
*/
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int yBeg)
{
return 0;
// TODO: call cuda kernels for computing the errors
}
/*
backward compuation for a single element (Cuda version)
dE/dy
where E is the error(loss) function that measure the errors in y
with respect to gold standard, and y this the model output
>> t - gold standard
>> y - model output
>> LFName - name of loss function
<< return dE/dy
*/
DTYPE CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
{
return LossBackwardPoint(t, y, LFName);
// TODO: call cuda kernels for computing the errors
}
/*
backward compuation for squared error (Cuda kernel)
>> dedy - dE/dy (for return)
>> t - gold standard (in vector)
>> y - model output (in vector)
>> size - size of the vector (dedy)
*/
extern "C" __global__
void KernelLossBackwardSquaredError(DTYPE * dedy, DTYPE * t, DTYPE * y, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
dedy[i] = y[i] - t[i];
}
}
/*
backward compuation of blocks for squared error (Cuda kernel)
>> dedy - dE/dy (for return)
>> t - gold standard (in vector)
>> y - model output (in vector)
>> blockSize - size of a block
>> begInBlock - the begining position in a block for computation
>> lenInBlock - number of items in a block for computation
>> size - size of the vector (dedy)
*/
extern "C" __global__
void KernelLossBackwardSquaredErrorBlock(DTYPE * dedy, DTYPE * t, DTYPE * y,
int blockSize, int begInBlock, int lenInBlock, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int offset = i % blockSize;
if(offset < begInBlock || offset >= begInBlock + lenInBlock)
return;
if (i < size){
dedy[i] = y[i] - t[i];
}
}
/*
backward compuation for cross entropy (Cuda kernel)
>> dedy - dE/dy (for return)
>> t - gold standard (in vector)
>> y - model output (in vector)
>> size - size of the vector (dedy)
*/
extern "C" __global__
void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
dedy[i] = -t[i]/y[i];
}
}
/*
backward compuation for cross entropy (Cuda kernel)
>> dedy - dE/dy (for return)
>> t - gold standard (in vector)
>> y - model output (in vector)
>> blockSize - size of a block
>> begInBlock - the begining position in a block for computation
>> lenInBlock - number of items in a block for computation
>> size - size of the vector (dedy)
*/
extern "C" __global__
void KernelLossBackwardCrossEntropyBlock(DTYPE * dedy, DTYPE * t, DTYPE * y,
int blockSize, int begInBlock, int lenInBlock, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int offset = i % blockSize;
if(offset < begInBlock || offset >= begInBlock + lenInBlock)
return;
if (i < size){
dedy[i] = -t[i]/y[i];
}
}
/*
backward compuation for (dense) vectors (Cuda version)
dE/dy
where E is the error(loss) function that measure the errors in y
with respect to gold standard, and y this the model output
>> dedy - dE/dy (for return)
>> t - gold standard (in vector)
>> y - model output (in vector)
>> LFName - name of loss function
>> leadDim - the leading dimension for the output
>> tBeg - where to start in the gold standard (along the leading dimension)
>> tLen - segment length from oBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
*/
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
{
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[1] == 1), "TODO!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE &&
y->dataType == DEFAULT_DTYPE &&
dedy->dataType == DEFAULT_DTYPE),
"Input vectors are not in default type.");
CheckNTErrors((dedy->devID >= 0 && t->devID >= 0 && y->devID >= 0),
"The backward compuation must be performed on GPUs.");
CheckNTErrors((dedy->devID == t->devID && dedy->devID == y->devID),
"The vectors must be on the same GPU.");
CheckNTErrors((tBeg == yBeg), "TODO!");
int leadDimRDI = y->order - leadDim - 1;
if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1];
tBeg = 0;
yBeg = 0;
tLen = y->dimSizeRDI[leadDimRDI];
}
int stride = 1;
int blockSize = 1;
int size = 1;
for(int i = 0; i < leadDimRDI; i++)
stride *= y->dimSizeRDI[i];
size = tLen * stride;
int cudaGridSize[3], cudaBlockSize[3];
GDevs.GetCudaThread(dedy->devID, y->unitNum, cudaGridSize, cudaBlockSize);
dim3 blocks(cudaGridSize[0]);
dim3 threads(cudaBlockSize[0]);
DTYPE * tp = (DTYPE*)t->data;
DTYPE * yp = (DTYPE*)y->data;
DTYPE * dedyp = (DTYPE*)dedy->data;
int devIDBackup;
ProtectCudaDev(y->devID, devIDBackup);
/*
squared error
loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
dloss/dy_i = y_i - t_i
*/
if(LFName == SQUAREDERROR){
if(t->isSparse){
ShowNTErrors("TODO!");
}
else if(size == y->unitNum){
KernelLossBackwardSquaredError<<<blocks, threads>>>(dedyp, tp, yp, y->unitNum);
}
else{
KernelLossBackwardSquaredErrorBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
/*
cross entropy
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i
*/
if(LFName == CROSSENTROPY){
if(t->isSparse){
ShowNTErrors("TODO!");
}
else if(size == y->unitNum){
KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tLen);
}
else{
KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
BacktoCudaDev(y->devID, devIDBackup);
}
#endif
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "XNet.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/fnnlm/FNNLM.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace samplefnnlm;
int main( int argc, const char ** argv )
{
if(argc > 1 && !strcmp(argv[1], "-test"))
1;//Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}
XNet net;
XTensor a;
XTensor b;
XTensor c;
InitTensor2D(&a, 2, 2);
InitTensor2D(&b, 2, 4);
InitTensor2D(&c, 2, 4);
a.SetZeroAll();
b.SetZeroAll();
c.SetZeroAll();
SetDataFixed(a, 0.1F);
a.Set2D(0.3F, 1, 0);
a.Set2D(0.4F, 1, 1);
b = Merge(a, a, 1);
c = HTanH(MMul(a, b));
a.Dump(stderr, "a:");
b.Dump(stderr, "b:");
c.Dump(stderr, "c:");
XLink::ShowNetwork(stderr, &c);
net.Backward(c);
net.Dump(stderr);
//_CrtDumpMemoryLeaks();
return 0;
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for activation function
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
* Dingdang won 5 games in the GO training yesterday, hahaha ...
*/
#include "XNoder.h"
#include "XBackwardFunc.h"
#include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(node->grad != NULL, "No gradient found!");
CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
XTensor * input = income.tails[0];
XTensor * output = node;
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0);
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0);
_SoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else{
ShowNTErrors("Wrong activation function type!");
}
}
/* indicates whether the node is for an activation function */
bool XFuncGrad::IsFunc(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & FUNCTION_BASE) != 0;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for activation function
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
* Dingdang won 5 games in the GO training yesterday, hahaha ...
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDFUNC_H__
#define __XBACKWARDFUNC_H__
namespace nts{
/* this class computes the gradient for activation functions given a node */
class XFuncGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for an activation function */
static
bool IsFunc(XTensor * node);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
*/
#include "XBackwardLoss.h"
#include "../tensor/XName.h"
#include "../tensor/function/HardTanH.h"
#include "../tensor/function/LogSoftmax.h"
namespace nts{
/*
compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> dedy - dE/dy
>> dedx - dE/dx
>> funcID - id of the function f
>> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName);
}
else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
}
else{
ShowNTErrors("wrong function found when call the backward process!");
}
}
/*
compute dE/dy for variable y and error(loss) function E
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName)
{
_LossBackward(dedy, gold, y, lossName);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
* My students worked all night to prepare a submission to CWMT. Good luck
* to them!
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__
namespace nts{
/* this class computes the gradient (of a output node)
with respect to the loss */
class XLossGrad
{
public:
/* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for math operations
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "XNoder.h"
#include "XBackwardMath.h"
#include "../tensor/XName.h"
#include "../tensor/core/CHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XMathGrad::MakeGrad(XTensor * node)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == MATH_SUM)
GradSum(node);
else if(operID == MATH_MULTIPLY)
GradMultiply(node);
else if(operID == MATH_MATRIXMUL)
GradMatrixMul(node);
else{
ShowNTErrors("TODO!");
}
}
/* indicates whether the node is for a math operation */
bool XMathGrad::IsMathOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & MATH_BASE) != 0;
}
/*
gradient for sum
for
c = a + b * \beta
we have
dE/da = dE/dc
dE/db = dE/dc * \beta
>> node - the node (c) for backward computation
*/
void XMathGrad::GradSum(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUM!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad);
_Sum(b->grad, node->grad, b->grad, beta);
}
/*
gradient for multiply (dot production)
for
c = a * b
we have
dE/da = dE/dc * b
dE/db = dE/dc * a
>> node - the node (c) for backward computation
*/
void XMathGrad::GradMultiply(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!");
_Multiply(node->grad, b, a->grad, 1.0F);
_Multiply(node->grad, a, b->grad, 1.0F);
}
/*
gradient for matrix multiply
for c = matmul(a, b) * \alpha
we have
dE/da = dE/dc * b^T * \alpha
dE/db = a^T * dE/dc * \alpha
>> node - the node (c) for backward computation
*/
void XMathGrad::GradMatrixMul(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
CheckNTErrors(income.paramNum == 3, "Wrong parameter number for MULTIPLY!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
MATRIX_TRANS_TYPE transA = income.GetParamTrans(0);
MATRIX_TRANS_TYPE transB = income.GetParamTrans(1);
DTYPE alpha = income.GetParam(2);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
XTensor * dedc = node->grad;
XTensor * deda = a->grad;
XTensor * dedb = b->grad;
/* c = a * b * \alpha */
if(transA == X_NOTRANS && transB == X_NOTRANS){
/* dE/da = dE/dc * b^T * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a^T * dE/dc * \alpha */
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a^T * b * \alpha */
else if(transA == X_TRANS && transB == X_NOTRANS){
/* dE/da = dE/dc * b^T * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a * dE/dc * \alpha */
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a * b^T * \alpha */
else if(transA == X_NOTRANS && transB == X_TRANS){
/* dE/da = dE/dc * b * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
/* dE/db = a^T * dE/dc * \alpha */
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a^T * b^T * \alpha */
else if(transA == X_TRANS && transB == X_TRANS){
/* dE/da = dE/dc * b * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
/* dE/db = a * dE/dc * \alpha */
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for math operations
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#ifndef __XBACKWARDMATH_H__
#define __XBACKWARDMATH_H__
namespace nts{
/* this class computes the gradient for math operations given a node */
class XMathGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for a math operation */
static
bool IsMathOP(XTensor * node);
private:
/* gradient for sum: c = a + b * \beta */
static
void GradSum(XTensor * node);
/* gradient for multiply (dot production): c = a * b */
static
void GradMultiply(XTensor * node);
/* gradient for matrix multiply: c = matmul(a, b) */
static
void GradMatrixMul(XTensor * node);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for math operations
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-19
* It was chilly when I came into the office this morning ...
* because i forgot to turn the air-condition off last night :(
*/
#include "XNoder.h"
#include "XBackwardShape.h"
#include "../tensor/XName.h"
#include "../tensor/core/CHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XShapeGrad::MakeGrad(XTensor * node)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == SHAPE_MERGE)
GradMerge(node);
else if(operID == SHAPE_MERGE_LIST)
GradMergeList(node);
else if(operID == SHAPE_UNSQUEEZE)
GradUnsqueeze(node);
else{
ShowNTErrors("TODO!");
}
}
/* indicates whether the node is for a math operation */
bool XShapeGrad::IsShapeOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & DATA_BASE) != 0;
}
/*
gradient for merge
for
c = merge(a_0, a_1, ...)
where a_i is the i-th block in a tensor a
we have
dE/da_0 = dE/dc_{split_0}
dE/db_1 = dE/dc_{split_1}
...
i.e.,
dE/da = split(dE/dc)
>> node - the node (c) for backward computation
*/
void XShapeGrad::GradMerge(XTensor * node)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for MERGE!");
CheckNTErrors(node->order == input->order - 1, "wrong tensor orders!");
int whereToMerge = income.GetParamInt(0);
int leadDim = income.GetParamInt(1);
int blockSize = 1;
int blockNum = 1;
for(int i = 0; i < input->order; i++){
if(i < leadDim)
blockNum *= input->dimSize[i];
}
blockSize = input->GetDataSizeInChar() / blockNum;
XNoder::MakeGrad(input);
int * dims = new int[input->order];
for(int i = 0, j = 0; i < input->order; i++){
if(i >= leadDim){
dims[j++] = input->dimSize[i];
}
}
dims[0] = -dims[0];
XTensor gradInputSmall(input->order - leadDim, dims,
input->dataType, input->denseRatio,
input->devID, input->mem);
dims[whereToMerge - leadDim] *= dims[0];
XTensor gradNodeSmall(node->order - leadDim, dims + leadDim + 1,
node->dataType, node->denseRatio,
node->devID, node->mem);
/* we can simply split the gradient tensor
if the input is used in merging only */
if(input->outgo.tailNum == 1){
for(int i = 0; i < blockNum; i++){
gradNodeSmall.data = (char*)node->grad->data + i * blockSize;
gradInputSmall.data = (char*)input->grad->data + i * blockSize;
_Split(&gradNodeSmall, &gradInputSmall, whereToMerge - leadDim - 1, input->dimSize[leadDim]);
}
}
/* a more complicated case is that the input tensor is used for
other operations somewhere else. So we have to do gradient
accumulation after spliting, i.e., we need an additional
SUM operation */
else{
XTensor gradInputSmallBuf(&gradInputSmall);
for(int i = 0; i < blockNum; i++){
gradNodeSmall.data = (char*)node->grad->data + i * blockSize;
gradInputSmall.data = (char*)input->grad->data + i * blockSize;
_Split(&gradNodeSmall, &gradInputSmallBuf, whereToMerge - leadDim - 1, input->dimSize[leadDim]);
_Sum(&gradInputSmall, &gradInputSmallBuf, &gradInputSmall);
}
}
gradNodeSmall.data = NULL;
gradInputSmall.data = NULL;
delete[] dims;
}
/*
gradient for merging a list of tensors
for
c = merge(list(a, b, ...))
where a, b ... are of the same size
we have
dE/da = dE/dc_{split_0}
dE/db = dE/dc_{split_1}
i.e.,
list(dE/da, dE/db, ...) = split(dE/dc)
>> node - the node (c) for backward computation
*/
void XShapeGrad::GradMergeList(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL;
XList smalls(income.tailNum);
XList smallsGrad(income.tailNum);
bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i];
XNoder::MakeGrad(tail);
smalls.Add(tail);
smallsGrad.Add(tail->grad);
if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail),
"Input tensors must be of the same size!");
}
if(tail->outgo.tailNum > 1)
mergeOnly = false;
last = tail;
}
int whereToMerge = income.GetParamInt(0);
/* we can simply split the gradient tensor into the input tensors
if the inputs are used in merging only */
if(mergeOnly)
_Split(node->grad, &smallsGrad, whereToMerge, smalls.count);
/* a more complicated case is that the input tensors are used for
other operations somewhere else. So we have to do gradient
accumulation after spliting, i.e., we need an additional
SUM operation */
else{
int * dims = new int[last->order + 1];
dims[0] = smalls.count;
for(int i = 0; i < last->order; i++)
dims[i + 1] = last->dimSize[i];
XTensor gradSplit(last->order + 1, dims,
last->dataType, last->denseRatio,
last->devID, last->mem);
_Split(node->grad, &gradSplit, whereToMerge, smalls.count);
memcpy(dims, last->dimSize, sizeof(int) * last->order);
dims[0] = -dims[0];
XTensor gradSmall(last->order, dims,
last->dataType, last->denseRatio,
last->devID, last->mem);
/* gradient accumulation for each split */
for(int i = 0; i < smalls.count; i++){
XTensor * inputGrad = (XTensor*)smallsGrad.Get(i);
gradSmall.data = (char*)gradSplit.data + i * last->unitNum * last->unitSize;
_Sum(inputGrad, &gradSmall, inputGrad);
}
gradSmall.data = NULL;
delete[] dims;
}
}
/*
gradient for unsqueezing a tensor
for
c = unsqueeze(a)
we have
dE/da = reduecesum(dE/dc)
>> node - the node (c) for backward computation
*/
void XShapeGrad::GradUnsqueeze(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!");
XTensor * output = node;
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
int dim = income.GetParamInt(0);
int dSize = income.GetParamInt(1);
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
_ReduceSum(output->grad, input->grad, dim);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for shaping and data movement
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDSHAPE_H__
#define __XBACKWARDSHAPE_H__
namespace nts{
/* this class computes the gradient for tensor shaping and movement given a node */
class XShapeGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for a shaping operation */
static
bool IsShapeOP(XTensor * node);
private:
/* gradient for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node);
/* gradient for merging a list of tensors : c = merge(list(a, b, ...)) */
static
void GradMergeList(XTensor * node);
/* gradient for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradUnsqueeze(XTensor * node);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-12
*/
#include "XNet.h"
#include "XNoder.h"
#include "XBackwardLoss.h"
#include "XBackwardMath.h"
#include "XBackwardFunc.h"
#include "XBackwardShape.h"
#include "../tensor/XName.h"
namespace nts{
unsigned int netIDGlobal = 0;
MUTEX_HANDLE netMutex;
/* generate a network id */
unsigned int MakeNetID()
{
if(netIDGlobal == 0)
MUTEX_INIT(netMutex);
MUTEX_LOCK(netMutex);
netIDGlobal += 3;
unsigned int id = netIDGlobal;
MUTEX_UNLOCK(netMutex);
return id;
}
/* constructor */
XNet::XNet()
{
nodes.Clear();
}
/* de-constructor */
XNet::~XNet()
{
}
/* clear the network */
void XNet::Clear()
{
nodes.Clear();
gradNodes.Clear();
outputs.Clear();
inputs.Clear();
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
Backward(roots, golds, loss);
}
/*
backward propagation to obtain gradient
>> root - root node (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(NULL);
Backward(roots, golds, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes
>> root - a list of root nodes (output) of the network
>> gold - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
Traverse(roots);
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED;
}
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, loss);
}
}
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);
if(node->visitMark == NODE_FINISHED)
continue;
BackwardNode(node);
}
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> root - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
{
XList golds(roots.count);
for(int i = 0; i < roots.count; i++)
golds.Add(NULL);
Backward(roots, golds, loss);
}
/*
backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function)
*/
void XNet::BackwardNode(XTensor * node)
{
if(node == NULL || node->visitMark == NODE_FINISHED)
return;
if(!XNoder::IsLeaf(node)){
if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node);
else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node);
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node);
else{
ShowNTErrors("Wrong node type!");
}
}
node->visitMark = NODE_FINISHED;
}
/*
traverse the net and find the topological order by
depth-first search (Tarjan's algorithm)
>> root - root node (or output of the net)
*/
void XNet::Traverse(XTensor &root)
{
XList roots(1);
roots.Add(&root);
Traverse(roots);
}
/*
traverse the net and find the topological order by
depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes)
*/
void XNet::Traverse(XList &roots)
{
id = MakeNetID();
nodes.Clear();
for (int i = 0; i < roots.count; i++)
TarjanVisit((XTensor*)roots.Get(i), nodes, id);
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
if(XNoder::IsRoot(node))
outputs.Add(node);
if(XNoder::IsLeaf(node))
inputs.Add(node);
if(XNoder::IsGrad(node))
gradNodes.Add(node);
}
}
/*
depth-first search given a node (Tarjan's algorithm for topological ordering)
>> node - the node to visit (mark 0:unvisited, 1:visiting, 2:done)
>> orders - topological order of the nodes
>> code - code of the network
*/
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
{
if(node == NULL)
return;
if(node->visitMark == code + 1){
ShowNTErrors("There is a circle in the network\n");
}
else if(node->visitMark <= code || node->visitMark >= code + 2){
node->visitMark = code + 1;
XLink &income = node->income;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
if(child == NULL)
continue;
TarjanVisit(child, orders, code);
}
node->visitMark = code + 2;
orders.Add(node);
}
}
/*
dump network information
>> file - the file for dumping
*/
void XNet::Dump(FILE * file)
{
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
fprintf(file, "node %d: %d\n", i, node->id);
node->Dump(file, "tensor: ");
if(node->grad != NULL)
node->grad->Dump(file, "grad: ");
else
fprintf(file, "no gradient!\n");
fprintf(file, "\n");
}
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-12
* We expected a heavy rain today but a drizzle came down. Should I
* take a big umbrella?
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XNET_H__
#define __XNET_H__
namespace nts{
/* management of tensor net (or graph) */
struct XNet
{
/* id of the network */
unsigned int id;
/* tensor nodes of the network (in order) */
XList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes;
/* output nodes of the network */
XList outputs;
/* input nodes of the network */
XList inputs;
/* constructor */
XNet();
/* de-constructor */
~XNet();
/* clear the network */
void Clear();
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */
void BackwardNode(XTensor * node);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
void Traverse(XTensor &root);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code);
/* dump network information */
void Dump(FILE * file);
};
/* we make a unique id for every tensor */
extern unsigned int netIDGlobal;
extern MUTEX_HANDLE netMutex;
extern unsigned int MakeNetID();
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "XNoder.h"
namespace nts{
/* make gradient tensor for a node */
void XNoder::MakeGrad(XTensor * node)
{
if(node == NULL)
return;
if(!XTensor::IsSameShaped(node, node->grad)){
delete node->grad;
node->grad = NewTensor(node);
node->grad->SetZeroAll();
}
}
/* the node is a leaf node (intput) or not */
bool XNoder::IsLeaf(XTensor * node)
{
if(node == NULL)
return false;
if(node->income.tailNum == 0)
return true;
else
return false;
}
/* the node is a root node (output) or not */
bool XNoder::IsRoot(XTensor * node)
{
if(node == NULL)
return false;
if(node->outgo.tailNum == 0)
return true;
else
return false;
}
/* the node keeps the gradinent or not */
bool XNoder::IsGrad(XTensor * node)
{
if(node == NULL)
return false;
if(node->isGrad)
return true;
else
return false;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* low-level utilities
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#ifndef __XNODER_H__
#define __XNODER_H__
namespace nts{
#define NODE_UNFINISHED 0
#define NODE_DOING 1
#define NODE_FINISHED 2
/* node management */
class XNoder
{
public:
/* make gradient tensor for a node */
static
void MakeGrad(XTensor * node);
/* the node is a leaf node (intput) or not */
static
bool IsLeaf(XTensor * node);
/* the node is a root node (output) or not */
static
bool IsRoot(XTensor * node);
/* the node keeps the gradinent or not */
static
bool IsGrad(XTensor * node);
};
}
#endif
\ No newline at end of file
......@@ -30,9 +30,9 @@
#ifndef __FNNLM_H__
#define __FNNLM_H__
#include "../../XGlobal.h"
#include "../../XTensor.h"
#include "../../core/CHeader.h"
#include "../../tensor/XGlobal.h"
#include "../../tensor/XTensor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
......@@ -127,7 +127,6 @@ struct FNNNet
};
/* entry of the program */
extern "C"
int FNNLMMain(int argc, const char ** argv);
};
......
......@@ -28,29 +28,67 @@
#include <time.h>
#include "XTensor.h"
#include "XDevice.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./test/Test.h"
#include "test/Test.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace samplefnnlm;
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
void SmallTest();
int main( int argc, const char ** argv )
{
srand((unsigned)time(0));
//_CrtSetBreakAlloc(123);
/* a tiny test */
SmallTest();
//_CrtDumpMemoryLeaks();
//return 0;
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
return FNNLMMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
fprintf(stderr, "use of tensors. All you need is to ... \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}
//_CrtDumpMemoryLeaks();
return 0;
}
void SmallTest()
{
XTensor a;
XTensor b;
XTensor c;
XTensor d;
InitTensor2D(&a, 2, 2);
InitTensor2D(&b, 2, 2);
a.SetZeroAll();
b.SetZeroAll();
a.Set2D(1.0F, 0, 0);
a.Set2D(2.0F, 1, 1);
b = Sum(a, Multiply(a, a));
/* this is prohibited !!!!!!!!!!!!! */
//XTensor c = a * b + a;
//XTensor d = a + b + c.Lin(0.5F);
c = a * b + a;
d = a + b + c.Lin(0.5F);
XLink::CheckNetwork(&d);
XLink::ShowNetwork(stderr, &d);
a.Dump(stderr, "a:");
b.Dump(stderr, "b:");
c.Dump(stderr, "c:");
d.Dump(stderr, "d:");
}
......@@ -82,7 +82,7 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
}
/*
data conversion
data type conversion
>> devID - device id
>> s - source data array
>> typeS - source data type
......@@ -92,7 +92,7 @@ data conversion
*/
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{
CheckNTErrors((devID < 0), "This code must be run on GPUs!");
CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT)
return;
......
......@@ -47,9 +47,9 @@ extern const char * GetDataTypeName(TENSOR_DATA_TYPE type);
extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
/* data conversion (for lower precision computation) */
extern "C" unsigned short FloatToFloat16(float f);
extern "C" float Float16ToFloat(unsigned short h);
extern "C" void ConvertDataType(int devID,
unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h);
void ConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
......
......@@ -74,7 +74,7 @@ namespace nts {
{ \
if(!(x)) \
{ \
fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); \
fprintf(stderr, "[ERROR] calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); \
exit(1); \
} \
} \
......@@ -83,7 +83,7 @@ namespace nts {
{ \
if(!(x)) \
{ \
fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__); \
fprintf(stderr, "[ERROR] calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__); \
exit(1); \
} \
} \
......@@ -91,7 +91,7 @@ namespace nts {
#define ShowNTErrors(msg) \
{ \
{ \
fprintf(stderr, "Error! (%s line %d): %s\n", __FILENAME__, __LINE__, msg); \
fprintf(stderr, "[ERROR] (%s line %d): %s\n", __FILENAME__, __LINE__, msg); \
exit(1); \
} \
} \
......
......@@ -34,6 +34,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
#define PARAM_UNTI_SIZE 64
/*
This defines the link among tensors in networks. XLink can be
......@@ -74,6 +75,12 @@ struct XLink
/* name of the hyperedge type. e.g., sum, mul ... */
char type[MAX_OP_NAME_LENGTH];
/* type id */
int typeID;
/* caculator (pointer to the class for computation) */
void * caculator;
/* constuctor */
XLink();
......@@ -83,8 +90,22 @@ struct XLink
/* reset it */
void Reset();
/* set edge type name */
void SetType(const char * typeName);
/* clear it */
void Clear();
/* clear tails */
void ClearTail();
/* clear the incoming node list of tensor node */
static
void ClearIncoming(XTensor * node);
/* clear the outgoing node list of tensor node */
static
void ClearOutgoing(XTensor * node);
/* set edge type id and name */
void SetType(int id);
/* set head */
void SetHead(XTensor * h);
......@@ -95,19 +116,32 @@ struct XLink
/* add two tails in one time */
void AddTwoTails(XTensor * t1, XTensor * t2);
/* add a integer parameter */
/* add a parameter in default type */
void AddParam(DTYPE param);
/* add a integer parameter */
/* add a parameter */
void AddParam(void * param, int size);
/* get a paramter in default type */
DTYPE GetParam(int i);
/* get a paramter in integer */
int GetParamInt(int i);
/* get a parameter in MATRIX_TRANS_TYPE */
MATRIX_TRANS_TYPE GetParamTrans(int i);
/* create a hyper edge with two input tensors and a output tensor */
static
void MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName);
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with a list of input tensors and a output tensor */
static
void MakeLink(const XList * list, XTensor * h, int id);
/* create a hyper edge with a list of tensors and a output tensor */
/* create a hyper edge with a input tensors and a list of output tensors */
static
void MakeLink(XList * list, XTensor * h, const char * typeName);
void MakeLink(XTensor * h, XList * list, int id);
/* add a parameter */
static
......@@ -116,8 +150,36 @@ struct XLink
/* add an integer parameter */
static
void AddParamToHeadInt(XTensor * h, int param);
/* add a MATRIX_TRANS_TYPE parameter */
static
void AddParamToHeadTrans(XTensor * h, MATRIX_TRANS_TYPE param);
/* add a boolean parameter */
static
void AddParamToHeadBool(XTensor * h, bool param);
/* add a pointer parameter */
static
void AddParamToHeadPointer(XTensor * h, void * param);
/* replace a node with another, i.e., we redirect the links to the new node */
static
void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy links of a given node */
static
void CopyIncoming(const XTensor * reference, XTensor * target);
/* check the correctness of the network encoded in a root node (tensor) */
static
void CheckNetwork(XTensor * root);
/* show the network encoded in a root node (tensor) */
static
void ShowNetwork(FILE * file, XTensor * root);
};
} // namespace nts(NiuTrans.Tensor)
#endif // __XLINK_H__
\ No newline at end of file
#endif // __XLINK_H__
......@@ -42,6 +42,8 @@
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
XList NULLList;
/* constructor */
XList::XList()
{
......@@ -111,7 +113,7 @@ void XList::Create(int myMaxNum, XMem * myMem)
add an item into the list
>> item - pointer to the item
*/
void XList::Add(void * item)
void XList::Add(const void * item)
{
if( count == maxNum ){
void ** newItems;
......@@ -126,7 +128,8 @@ void XList::Add(void * item)
maxNum = maxNum * 2 + 1;
}
items[count++] = item;
MTYPE p = (MTYPE)item;
items[count++] = (MTYPE*)p;
}
......@@ -203,7 +206,7 @@ void XList::Insert(int pos, void * item)
}
/* get the item at position i */
void * XList::GetItem(int i)
void * XList::GetItem(int i) const
{
if( i >= 0 && i < count )
return items[i];
......@@ -355,4 +358,4 @@ void XList::Shuffle(int nround, int beg, int len)
}
}
/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
/* end of the nts (NiuTrans.Tensor) namespace */
......@@ -69,12 +69,12 @@ public:
/* utilities */
void Create(int myMaxNum, XMem * myMem);
void Add(void * item);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount);
void AddList(XList * l);
void AddInt(int i);
void Insert(int pos, void * item);
void * GetItem(int i);
void * GetItem(int i) const;
int GetItemInt(int i);
void SetItem(int i, void * item);
void SetItemInt(int i, int item);
......@@ -96,7 +96,9 @@ public:
};
extern XList NULLList;
}
/* end of the nts (NiuTrans.Tensor) namespace */
#endif
\ No newline at end of file
#endif
......@@ -24,6 +24,8 @@
#ifndef __XMEM_H__
#define __XMEM_H__
#include <stdlib.h>
#ifdef CUDA_BLAS
#define USE_CUDA
#endif
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
*/
#include "XName.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */
const char * GetOPName(int type)
{
if ((type & MATH_BASE) != 0){
if (type == MATH_ABSOLUTE)
return "M_ABSOLUTE";
else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED)
return "M_MATRIXMULBATCHED";
else if (type == MATH_MULTIPLY)
return "M_MULTIPLY";
else if (type == MATH_NEGATE)
return "M_NEGATE";
else if (type == MATH_SIGN)
return "M_SIGN";
else if (type == MATH_SUM)
return "M_SUM";
else if (type == MATH_LOG)
return "M_LOG";
else if (type == MATH_NORMALIZE)
return "M_NORMALIZE";
else if (type == MATH_POWER)
return "M_POWER";
else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT";
else if (type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX";
else if (type == REDUCE_REDUCEMEAN)
return "R_REDUCEMEAN";
else if (type == REDUCE_REDUCESUM)
return "R_REDUCESUM";
else if (type == REDUCE_REDUCESUMSQUARED)
return "R_REDUCESUMSQUARED";
else if (type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE";
}
else if ((type & DATA_BASE) != 0){
if (type == GETANDSET_SELECT)
return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
else if (type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES";
else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE";
else if (type == SHAPE_MERGE)
return "S_MERGE";
else if (type == SHAPE_MERGE_LIST)
return "S_MERGE_LIST";
else if (type == SHAPE_PERMUTE)
return "S_PERMUTE";
else if (type == SHAPE_SPLIT)
return "S_SPLIT";
else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST";
else if (type == SHAPE_TRANSPOSE)
return "S_TRANSPOSE";
else if (type == SHAPE_UNSQUEEZE)
return "S_UNSQUEEZE";
else if (type == SORT_SORT)
return "S_SORT";
else if (type == SORT_TOPK)
return "S_TOPK";
}
else if ((type & FUNCTION_BASE) != 0){
if (type == FUNC_HARDTANH)
return "F_HARDTANH";
else if (type == FUNC_IDENTITY)
return "F_IDENTITY";
else if (type == FUNC_LOGSOFTMAX)
return "F_LOGSOFTMAX";
else if (type == FUNC_RECTIFY)
return "F_RECTIFY";
else if (type == FUNC_SIGMOID)
return "F_SIGMOID";
else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX";
}
return "NULL";
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* We define various names here
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
* It was really HOT these days. I can't imagine it is SO hot here in Shenyang!
*/
#ifndef __XNAME_H__
#define __XNAME_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* math operations */
#define MATH_BASE 0x00001000
#define MATH_ABSOLUTE MATH_BASE + 1
#define MATH_MATRIXMUL MATH_ABSOLUTE + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_NEGATE MATH_MULTIPLY + 1
#define MATH_SIGN MATH_NEGATE + 1
#define MATH_SUM MATH_SIGN + 1
#define MATH_LOG MATH_SUM + 1
#define MATH_NORMALIZE MATH_LOG + 1
#define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1
#define REDUCE MATH_SCALEANDSHIFT + 1
#define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1
#define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1
/* data and shape related operations */
#define DATA_BASE MATH_BASE * 2
#define GETANDSET DATA_BASE + 1
#define GETANDSET_SELECT GETANDSET + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define SHAPE MOVEMENT_COPYVALUES + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1
#define SHAPE_PERMUTE SHAPE_MERGE_LIST + 1
#define SHAPE_SPLIT SHAPE_PERMUTE + 1
#define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SORT SHAPE_UNSQUEEZE + 1
#define SORT_SORT SORT + 1
#define SORT_TOPK SORT_SORT + 1
/* activation functions */
#define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1
#define FUNC_IDENTITY FUNC_HARDTANH + 1
#define FUNC_LOGSOFTMAX FUNC_IDENTITY + 1
#define FUNC_RECTIFY FUNC_LOGSOFTMAX + 1
#define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1
/* get operator name */
const char * GetOPName(int type);
} // namespace nts(NiuTrans.Tensor)
#endif // __XNAME_H__
......@@ -55,12 +55,13 @@ struct XLink;
#define UNSAFE_BUT_FAST_MEM
#define FAST_MATRIX
/*
We implemente the tensor class here though we have defined the class of XMatrix. It
is the parent class of XMatrix.
*/
/* XTensor is a class to do everything a tensor can do :) */
struct XTensor
{
public:
/* id */
int id;
/* memory pool */
XMem * mem;
......@@ -70,6 +71,10 @@ struct XTensor
/* copy of data on the host memory. It is only activated
when the matrix is operated on GPUs */
void * dataHost;
/* a pointer to data (i.e., a pointer to the address of "data".
This is for reset "data" when XTensor is used as a const variable. */
void ** dataP;
/*
device id
......@@ -130,6 +135,18 @@ struct XTensor
/* indicates whether the tensor is initialized or not */
bool isInit;
/* indicates whether the tensor is created temporarily */
bool isTmp;
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* mark for traversing the gragh */
unsigned int visitMark;
/* gradient (for back-propagation) */
XTensor * grad;
/*
the link used to form networks. Note that when we compute on tensors, we actually create a
......@@ -152,31 +169,49 @@ struct XTensor
XTensor();
/* constructor */
XTensor(XTensor * reference);
XTensor(const XTensor * reference);
/* constructor */
XTensor(const int myOrder, int myDevID, XMem * myMem);
/* constructor */
XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const float myDenseRatio, XMem * myMem);
const float myDenseRatio, int myDevID, XMem * myMem);
/* copy constructor */
XTensor(const XTensor &reference);
/* de-constructor */
~XTensor();
/* initialize member variables */
void Init();
/* delete data arrays */
void DestroyData();
/* shallow copy of tensor */
void ShallowCopy(const XTensor &tensor);
/* overloading of the equal-sign */
XTensor& operator = (const XTensor& tensor);
XTensor& operator= (const XTensor &tensor);
/* overloading of the plus-sign */
XTensor operator+ (const XTensor &tensor);
/* overloading of the multiply-sign */
XTensor operator* (const XTensor &tensor);
/* linear transformation */
XTensor Lin(DTYPE scale, DTYPE shift = 0);
/* judge whether the two matrices are in the same type and size */
static
bool IsIdentical(XTensor * a, XTensor * b);
bool IsSameShaped(const XTensor * a, const XTensor * b);
/* judge whether the three matrices are in the same type and size */
static
bool IsIdentical(XTensor * a, XTensor * b, XTensor * c);
bool IsSameShaped(XTensor * a, XTensor * b, XTensor * c);
/* set the size of each dimension */
void SetDim(int * myDimSize);
......@@ -188,7 +223,7 @@ struct XTensor
void Reshape(const int order, const int * myDimSize);
/* get the number of items in the data array */
int GetSize();
int GetSize() const;
/* get size of the memory used */
int GetDataSizeInChar();
......@@ -211,6 +246,12 @@ struct XTensor
/* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, int beg = 0);
/* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, float tolerance, int beg = 0);
/* set the pointer to "data" */
void SetDataPointer();
/* set the cell to the ascending order along a given dimension */
void SetAscendingOrder(int dim);
......@@ -218,17 +259,26 @@ struct XTensor
DTYPE Get(int index[], int size = -1);
/* get the pointer to a cell */
void * GetCell(int index[], int size = -1);
void * GetCell(int index[], int size = -1) const;
/* get the value of a cell in a 1d tensor */
/* get the default type value of a cell in a 1d tensor */
DTYPE Get1D(int i);
/* get the value of a cell in a 2d tensor */
DTYPE Get2D(int ni, int mi);
/* get the default type value of a cell in a 2d tensor */
DTYPE Get2D(int ni, int mi) const;
/* get the value of a cell in a 3d tensor */
/* get the default type value of a cell in a 3d tensor */
DTYPE Get3D(int d0, int d1, int d2);
/* get the int value of a cell in a 1d tensor */
int Get1DInt(int i);
/* get the int value of a cell in a 2d tensor */
int Get2DInt(int ni, int mi);
/* get the int value of a cell in a 3d tensor */
int Get3DInt(int d0, int d1, int d2);
/* get the value of a cell in a sparse tensor */
DTYPE GetInSparse(int i);
......@@ -253,6 +303,12 @@ struct XTensor
/* get the number of non-zero elements (in a sparse tensor) */
int GetNonzeroSize();
/* set the tensor as "temporary" */
void SetTMP(bool myIsTmp = true);
/* set the tensor as "keep-gradient" */
void SetGrad(bool myIsGrad = true);
/* resize a matrix with a specified matrix size */
bool Resize(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE,
......@@ -267,7 +323,7 @@ struct XTensor
bool Resize(const XTensor * myTensor);
/* binary search to find an element in a sparse matrix*/
bool BinarySearch(int key, DTYPE &value, void * &position);
bool BinarySearch(int key, DTYPE &value, void * &position) const;
/* dump data to a file */
void Dump(FILE * file, const char * label = NULL, const int n = -1, const int verbose = 0);
......@@ -287,6 +343,12 @@ struct XTensor
void FreeData(XTensor * matrix, XMem * myMem = NULL, bool useBuf = false);
};
/* we make a unique id for every tensor */
extern int tensorIDGlobal;
extern MUTEX_HANDLE tensorMutex;
extern XTensor NULLTensor;
extern int MakeTensorID();
/************************************************
* we define the "new and delete" functions below
*/
......@@ -317,7 +379,7 @@ void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, co
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, XTensor * reference);
void InitTensor(XTensor * tensor, const XTensor * reference);
/* generate a XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
......
......@@ -32,9 +32,6 @@
#define USE_PTHREAD // for linux
#endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
//////////////////////////////////////////////////
// neccessary libs
#ifdef USE_PTHREAD
......@@ -46,12 +43,15 @@ namespace nts{
#endif
#endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
#if(defined(_WIN32) && !defined (__CYGWIN__))
#define CRFPP_USE_THREAD 1
#define BEGINTHREAD(src, stack, func, arg, flag, id) \
(HANDLE)_beginthreadex((void *)(src), (unsigned)(stack), \
(unsigned(_stdcall *)(void *))(func), (void *)(arg), \
(unsigned)(flag), (unsigned *)(id))
(HANDLE)_beginthreadex((void *)(src), (unsigned)(stack), \
(unsigned(_stdcall *)(void *))(func), (void *)(arg), \
(unsigned)(flag), (unsigned *)(id))
#endif
//////////////////////////////////////////////////
......
......@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
}
#ifdef USE_CUDA
else if(devIDT >= 0 && devIDS < 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyHostToDevice) == cudaSuccess),
"cudaMemcpy error (cudaMemcpyHostToDevice)");
cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyHostToDevice);
if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyHostToDevice)");
}
}
else if(devIDT < 0 && devIDS >= 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost) == cudaSuccess),
"cudaMemcpy error (cudaMemcpyDeviceToHost)");
cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost);
if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyDeviceToHost)");
}
}
else{
//if(devIDT == devIDS){
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
/* this is a header to include all functions in the "core" workspace */
#ifndef __CHEADER_H__
#define __CHEADER_H__
#include "../XTensor.h"
#include "shape/Concatenate.h"
#include "shape/ConcatenateSolely.h"
#include "movement/CopyBlocks.h"
#include "movement/CopyBlocksInGrid.h"
#include "movement/CopyBlocksOnSite.h"
#include "movement/CopyData2D.h"
#include "movement/CopyIndexed.h"
#include "movement/CopyInGrid.h"
#include "movement/CopyValues.h"
#include "utilities/FlushToMem.h"
#include "shape/MakeMergeBlockIndex.h"
#include "shape/MakeSplitBlockIndex.h"
#include "arithmetic/MatrixMul.h"
#include "arithmetic/MatrixMul2D.h"
#include "arithmetic/MatrixMul2DMultiTheading.h"
#include "arithmetic/MatrixMul2DParallel.h"
#include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/MatrixMULBatchedCPU.h"
#include "shape/Merge.h"
#include "shape/MergeBlockLists.h"
#include "arithmetic/Multiply.h"
#include "arithmetic/Negate.h"
#include "math/Normalize.h"
#include "shape/Permute.h"
#include "math/Power.h"
#include "reduce/ReduceMax.h"
#include "reduce/ReduceMean.h"
#include "reduce/ReduceStandardVariance.h"
#include "reduce/ReduceSum.h"
#include "reduce/ReduceSumSquared.h"
#include "reduce/ReduceVariance.h"
#include "math/ScaleAndShift.h"
#include "getandset/Select.h"
#include "getandset/SetData.h"
#include "sort/Sort.h"
#include "shape/Split.h"
#include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "sort/TopK.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "utilities/XMatrixSegment.h"
#include "arithmetic/XTensorBLAS.h"
#endif // __CHEADER_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include <math.h>
#include "../../XTensor.h"
#include "../../XName.h"
#include "Absolute.h"
#include "Absolute.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its absolute value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Absolute(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaAbsolute(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++)
db[i] = (DTYPE)fabs(d[i]);
}
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _AbsoluteMe(XTensor * a)
{
_Absolute(a, a);
}
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the absolute value of input tensor
*/
XTensor Absolute(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Absolute function */
_Absolute(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_ABSOLUTE);
return b;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Absolute.h"
#include "Absolute.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its absolute value (CUDA Kernel)
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(DTYPE * a, DTYPE * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
b[i] = fabs(a[i]);
}
/*
set each entry to its absolute value (CUDA Kernel)
This is for float16 computation
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(__half * a, __half * b, int size)
{
return;
}
/*
set each entry to its absolute value
>> a - input tensor
>> b - output tensor
*/
void _CudaAbsolute(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "Absolute.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its absolute value (CUDA Kernel) */
__global__
void KernelAbsolute(DTYPE * a, DTYPE * b, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__
void KernelAbsolute(__half * a, __half * b, int size);
/* set each entry to its absolute value */
void _CudaAbsolute(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __ABSOLUTE_H__
#define __ABSOLUTE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */
void _Absolute(const XTensor * a, XTensor * b);
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _AbsoluteMe(XTensor * a);
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Absolute(const XTensor & a);
} // namespace nts(NiuTrans.Tensor)
#endif // __ABSOLUTE_H__
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMULBatchedCPU.h"
#include "MatrixMul2D.h"
#include "XTensorBLAS.h"
......@@ -33,16 +33,16 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> transposedA - indicate whether the matrix a is transposed
>> b - another list of input matrices (2d tensors)
>> transposedB - indicate whether the matrix b is transposed
>> c - output matrix (2d tensor)
>> alpha - scalar
>> beta - scalar
>> c - output matrix (2d tensor)
*/
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input lists!");
CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!");
CheckNTErrors(a && b && c, "Empty input lists!");
CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");
if (a->count == 0)
return;
......@@ -55,19 +55,15 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsIdentical(aim, ai) ||
!XTensor::IsIdentical(bim, bi) ||
!XTensor::IsIdentical(cim, ci))
if (!XTensor::IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci))
{
isUniform = false;
break;
}
}
//if(isUniform){
//}
//else{
for (int i = 0; i < a->count; i++) {
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
......@@ -77,11 +73,11 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!");
#ifdef USE_BLAS
if (useBLAS)
MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else
MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif
}
//}
......
......@@ -22,14 +22,13 @@
#ifndef __MATRIXMULBATCHEDCPU_H__
#define __MATRIXMULBATCHEDCPU_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication in batch mode (CPU code) */
extern "C"
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
} // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __MATRIXMUL_H__
#define __MATRIXMUL_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MMul MatrixMul
/*
matrix multiplication c = trans(a) * trans(b) * alpha + c * beta
For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions.
E.g., let A be a tensor of size y * z * m and B bea tensor of size x * y * n.
For A * B, we go over each order-2 tensor of A (of size x * y) and each order-2 tensor B (of size z * x),
like this c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
where trans() returns the transposed matrix if the flag is fired, ai is the i-th element tensor of A,
bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of the result C.
C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/
void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication (return a XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor c to keep the result and return it
For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions.
E.g., let A be a tensor of size y * z * m and B bea tensor of size x * y * n.
For A * B, we go over each order-2 tensor of A (of size x * y) and each order-2 tensor B (of size z * x),
like this c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
where trans() returns the transposed matrix if the flag is fired, ai is the i-th element tensor of A,
bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of the result C.
C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor)
#endif // __MATRIXMUL_H__
\ No newline at end of file
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "MatrixMul2DParallel.h"
......@@ -30,8 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
matrix multiplication (for 2d tensors)
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
......@@ -42,23 +44,16 @@ where trans() return the transposed matrix if the flag is fired
>> parallelRunner - parallel processing module
>> stream - the string for creating the job pipeline
*/
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner, XStream * stream)
void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner, XStream * stream)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType), "Input tensors should have the same data type!");
CheckNTErrors((a->dataType == b->dataType), "Input tensors should have the same data type!");
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL2D);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1];
......@@ -74,7 +69,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
CudaMatrixMul2D(a, transposedA, b, transposedB, c, alpha, beta, stream);
_CudaMatrixMul2D(a, transposedA, b, transposedB, c, alpha, beta, stream);
return;
}
#endif
......@@ -88,9 +83,9 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
c->dataType == DEFAULT_DTYPE)
{
if (useBLAS)
MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
_MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
else
MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
_MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
}
else {
// TODO!!
......@@ -112,7 +107,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int num = *((int*)b->data);
char * p = (char*)b->data + sizeof(int); // pointer to the first tuple
/* a * b */
/* a * b */
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) {
for (int i = 0; i < num; i++) {
int key = *((int*)p);
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "XTensorBLAS.h"
......@@ -37,17 +37,19 @@ c = a * b * \alpha
>> aColSize - column size of matrix a
>> aRowSize - row size of matrix a
>> b - a sparse matrix
>> transposedA - indicates whether b is transposed
>> transposedB - indicates whether b is transposed
>> bNonZeroNum - number of non-zero items in b
>> bColSize - column size of matrix b
>> bRowSize - row size of matrix b
>> c - the resulting (dense) matrix
>> cColSize - column size of matrix c
>> cRowSize - row size of matrix c
>> alpha - the scaling factor
*/
extern "C" __global__
__global__
void KernelMatrixMulDenseMSparseMV2(DTYPE * a, MATRIX_TRANS_TYPE transposedA, int aColSize, int aRowSize,
void * b, MATRIX_TRANS_TYPE transposedB, int bNonZeroNum, int bColSize, int bRowSize,
DTYPE * c, int cColSize, int cRowSize, DTYPE alpha)
void * b, MATRIX_TRANS_TYPE transposedB, int bNonZeroNum, int bColSize, int bRowSize,
DTYPE * c, int cColSize, int cRowSize, DTYPE alpha)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -106,8 +108,10 @@ void KernelMatrixMulDenseMSparseMV2(DTYPE * a, MATRIX_TRANS_TYPE transposedA, in
/*
matrix multiplication (for 2d tensors) (cuda version)
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
......@@ -117,10 +121,9 @@ where trans() return the transposed matrix if the flag is fired
>> beta - another coefficient
>> stream - the string for creating the job pipeline
*/
void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
DTYPE alpha, DTYPE beta, XStream * stream)
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XStream * stream)
{
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
......@@ -147,7 +150,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (!a->isSparse && !b->isSparse) {
CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!");
//cublasHandle_t * handle = GDevs->GetCudaHandle(a->devID);
cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle();
/* !!!! might have problems */
......@@ -155,9 +157,12 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
cublasSetStream(*handle, stream->stream);
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT && c->dataType == X_FLOAT) {
CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType, b->data, transposedB, a->dataType, c->data, c->dataType,
a->dimSize[0], a->dimSize[1], b->dimSize[0], b->dimSize[1], c->dimSize[0], c->dimSize[1],
alpha, beta);
_CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType,
b->data, transposedB, a->dataType, c->data, c->dataType,
a->dimSize[0], a->dimSize[1],
b->dimSize[0], b->dimSize[1],
c->dimSize[0], c->dimSize[1],
alpha, beta);
}
else {
// TODO!!
......@@ -183,7 +188,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (beta == 0)
c->SetZeroAll();
else if (beta != 1.0F) {
//XTensor::ScaleAndShift(c, beta, 0);
ShowNTErrors("TODO!");
}
......
......@@ -32,19 +32,18 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
mutilication of a dense matrix with a sparse vector
c = a * b * \alpha
*/
extern "C" __global__
__global__
void KernelMatrixMulDenseMSparseMV2(DTYPE * a, MATRIX_TRANS_TYPE transposedA, int aColSize, int aRowSize,
void * b, MATRIX_TRANS_TYPE transposedB, int bNonZeroNum, int bColSize, int bRowSize,
DTYPE * c, int cColSize, int cRowSize, DTYPE alpha);
void * b, MATRIX_TRANS_TYPE transposedB, int bNonZeroNum, int bColSize, int bRowSize,
DTYPE * c, int cColSize, int cRowSize, DTYPE alpha);
/*
matrix multiplication (for 2d tensors) (cuda version)
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
*/
extern "C"
void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XStream * stream = NULL);
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XStream * stream = NULL);
#endif // USE_CUDA
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2D_H__
#define __MATRIXMUL2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -31,9 +31,8 @@ matrix multiplication (for 2d tensors)
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
*/
extern "C"
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DMultiTheading.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -38,7 +38,7 @@ argument5: matrix a
argument6: matrix b
argument7: matrix c (c=a*b*\alpha + c*beta)
*/
void MatrixMul2DMultiTheading(XList * args)
void _MatrixMul2DMultiTheading(XList * args)
{
int x1 = *(int*)args->GetItem(0);
int y1 = *(int*)args->GetItem(1);
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DMULTITHEADING_H__
#define __MATRIXMUL2DMULTITHEADING_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -30,8 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/
extern "C"
void MatrixMul2DMultiTheading(XList * args);
void _MatrixMul2DMultiTheading(XList * args);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMul2DMultiTheading.h"
#include "XMatrixSegment.h"
#include "../utilities/XMatrixSegment.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -30,6 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication (for 2d tensors) with multi-threading
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
......@@ -39,10 +40,9 @@ where trans() return the transposed matrix if the flag is fired
>> beta - another coefficient
>> parallelRunner - parallel processing module
*/
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner)
void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
......@@ -56,7 +56,7 @@ void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA,
/* a * b */
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) {
RunParallel2D(parallelRunner, (void*)MatrixMul2DMultiTheading, an * am * bm,
RunParallel2D(parallelRunner, (void*)_MatrixMul2DMultiTheading, an * am * bm,
cn, cm, 5,
a, b, c, &alpha, &beta);
}
......
......@@ -22,18 +22,17 @@
#ifndef __MATRIXMUL2DPARALLEL_H__
#define __MATRIXMUL2DPARALLEL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
matrix multiplication (for 2d tensors) with multi-threading
matrix multiplication (for 2d tensors) with multi-threading.
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
where trans() return the transposed matrix if the flag is fired.
*/
extern "C"
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "XTensorBLAS.h"
......@@ -30,10 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
matrix multiplication of the two tensors
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
......@@ -41,31 +43,26 @@ where trans() returns the transposed matrix if the flag is fired
>> c - where we keep a*b
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
*/
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner)
void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
"Input tensors must have a order >= 2!");
CheckNTErrors((a->order == b->order && a->order == c->order),
"Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!");
......@@ -87,17 +84,17 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10);
XList * bList = new XList(10);
XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSizeRDI[0], a->dimSizeRDI[1] };
int bDimSize[2] = { -b->dimSizeRDI[0], b->dimSizeRDI[1] };
int cDimSize[2] = { -c->dimSizeRDI[0], c->dimSizeRDI[1] };
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
for (int p = 0; p < blockNum; p++) {
void * ap = (char*)a->data + aRealBlockSize * p;
void * bp = (char*)b->data + bRealBlockSize * p;
void * cp = (char*)c->data + cRealBlockSize * p;
XTensor * ai = new XTensor(2, aDimSize, a->dataType, a->denseRatio, a->mem);
XTensor * bi = new XTensor(2, bDimSize, b->dataType, b->denseRatio, b->mem);
XTensor * ci = new XTensor(2, cDimSize, c->dataType, c->denseRatio, c->mem);
XTensor * ai = NewTensor(2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * bi = NewTensor(2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * ci = NewTensor(2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem);
ai->data = ap;
bi->data = bp;
ci->data = cp;
......@@ -114,11 +111,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
CudaBLASMatrixMULList(a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID),
aList, transposedA,
bList, transposedB,
cList, aList->count,
alpha, beta);
cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
_CudaBLASMatrixMULList(handle,
aList, transposedA,
bList, transposedB,
cList, aList->count,
alpha, beta);
BacktoCudaDev(a->devID, devIDBackup);
#else
......@@ -127,7 +125,7 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
}
else {
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
MatrixMULBatchedCPU(aList, transposedA,
_MatrixMULBatchedCPU(aList, transposedA,
bList, transposedB,
cList, alpha, beta);
}
......@@ -155,4 +153,63 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList;
}
/*
matrix multiplication of the two tensors (do it on site)
c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired.
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors
*/
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, XPRunner * parallelRunner)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order;
int sub = 0;
int * dimSize = new int[order];
for (int i = 0; i < a.order - 2; i++)
dimSize[sub++] = a.dimSize[i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP();
/*call _MatrixMulBatched function */
_MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
/* destroy variables */
delete[] dimSize;
return c;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -22,20 +22,32 @@
#ifndef __MATRIXMULBATCHED_H__
#define __MATRIXMULBATCHED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
matrix multiplication of the two tensors
matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
*/
void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication of the two tensors (return a XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
*/
extern "C"
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -19,37 +19,36 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the item
>> a - matrix a
>> b - matrix b
>> c - result matrix
>> alpha - the coefficient
>>
>> leadingDim - the dimension along which we perform broadcasting
*/
void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MULTIPLY);
XLink::AddParamToHeadInt(c, leadingDim);
XLink::AddParamToHead(c, alpha);
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!");
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
CudaMultiply(a, b, c, leadingDim, alpha);
_CudaMultiply(a, b, c, alpha, leadingDim);
return;
}
#endif
......@@ -122,4 +121,50 @@ void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
{
_Multiply(a, b, a, alpha, leadingDim);
}
/*
element-wise product of two tensors (return a XTensor structure)
make a new tensor c to keep the result and return it
c(i) = a(i)*b(i)
where i is the index of the item
>> a - tensor a
>> b - tensor b
>> leadingDim - the dimension along which we perform broadcasting
<< return - the product of the tensors
*/
XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim)
{
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
XTensor c(&a);
c.SetTMP();
/* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHeadInt(&c, leadingDim);
return c;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Multiply.h"
#include "Multiply.cuh"
......@@ -34,7 +34,7 @@ multiplication of data arrays in a element-wise manner c(i) = a(i)*b(i)
>> c - result data array
>> size - size of c
*/
extern "C" __global__
__global__
void KernelMulElementWise(DTYPE * a, DTYPE * b, DTYPE * c, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -51,7 +51,7 @@ multiplication of data arrays in a element-wise manner c(i) = a(i)*b(i) + \alpha
>> size - size of c
>> alpha - the coefficient
*/
extern "C" __global__
__global__
void KernelMulElementWiseV2(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE alpha)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> stride - the number of items we go over when move next along the leading dimension in a block
>> ldSizeA - size of the leading dimension of a
>> ldSizeB - size of the leading dimension of b
......@@ -116,15 +117,14 @@ where i is the item index
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> leadingDim - leading dimension
>> alpha - the coefficient
>> leadingDim - dimension along which we perform broadcasting
*/
extern "C"
void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
int stride = 1;
......@@ -137,8 +137,8 @@ void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE a
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
"Unmatched tensors!");
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
......
......@@ -29,11 +29,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* multiplication of two tensors in a element-wise manner c(i) = a(i)*b(i) */
extern "C" __global__
__global__
void KernelMulElementWise(DTYPE * a, DTYPE * b, DTYPE * c, int size);
/* multiplication of two tensors in a element-wise manner c(i) = a(i)*b(i) + \alpha*c(i) */
extern "C" __global__
__global__
void KernelMulElementWiseV2(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE alpha);
/* multiplication of two tensors in a element-wise manner c(i) = a(i)*b(i)+ \alpha*c(i) */
......@@ -41,8 +41,7 @@ template<int nonZeroAlpha>__global__
void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha, int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum);
/* element-wise product of two tensors */
extern "C"
void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha = 0, int leadingDim = 0);
#endif // USE_CUDA
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __MULTIPLY_H__
#define __MULTIPLY_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element
*/
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha = 0, int leadingDim = 0);
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element
*/
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0, int leadingDim = 0);
/*
element-wise product of two tensors (return a XTensor structure)
make a new tensor to keep the result and return it
c(i) = a(i)*b(i)
where i is the index of the element
*/
XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLY_H__
\ No newline at end of file
......@@ -19,29 +19,64 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - the tensor we are processing
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void Negate(XTensor * a)
void _Negate(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
CudaNegate(a);
_CudaNegate(a, b);
return;
}
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++)
d[i] = -d[i];
db[i] = -d[i];
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _NegateMe(XTensor * a)
{
_Negate(a, a);
}
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor Negate(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Negate function */
_Negate(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
return b;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
......@@ -29,45 +29,48 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its negtive value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
__global__
void KernelNegate(DTYPE * d, int size)
void KernelNegate(DTYPE * a, DTYPE * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = -d[i];
b[i] = -a[i];
}
/*
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
__global__
void KernelNegate(__half * d, int size)
void KernelNegate(__half * a, __half * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size)
d[i] = __hsub(__float2half(0), d[i]);
b[i] = __hsub(__float2half(0), a[i]);
#else
if (i < size)
d[i] = __float2half(-__half2float(d[i]));
b[i] = __float2half(-__half2float(a[i]));
#endif
}
/*
set each entry to its negtive value
>> a - the tensor
>> a - input tensor
>> b - output tensor
*/
extern "C"
void CudaNegate(XTensor * a)
void _CudaNegate(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
......@@ -82,10 +85,10 @@ void CudaNegate(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelNegate << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
......
......@@ -19,6 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_CUH__
#define __NEGATE_CUH__
#include "Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -27,16 +30,17 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its negtive value (CUDA Kernel) */
__global__
void KernelNegate(DTYPE * d, int size);
void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__
void KernelNegate(__half * d, int size);
void KernelNegate(__half * a, __half * b, int size);
/* set each entry to its negtive value */
extern "C"
void CudaNegate(XTensor * a);
void _CudaNegate(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */
void _Negate(const XTensor * a, XTensor * b);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Sign(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaSign(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0)
db[i] = 1.0F;
else if (d[i] == 0)
db[i] = 0.0F;
else
db[i] = -1.0F;
}
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _SignMe(XTensor * a)
{
_Sign(a, a);
}
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor Sign(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _ScaleAndShift function */
_Sign(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
return b;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its sign value (CUDA Kernel)
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelSign(DTYPE * a, DTYPE * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (a[i] > 0)
b[i] = 1.0F;
else if (a[i] == 0)
b[i] = 0.0F;
else
b[i] = -1.0F;
}
}
/*
set each entry to its sign value with float16 data type value (CUDA Kernel)
This is for float16 computation
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelSign(__half * a, __half * b, int size)
{
return;
}
/*
set each entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _CudaSign(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_CUH__
#define __SIGN_CUH__
#include "Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its sign value (CUDA Kernel) */
__global__
void KernelSign(DTYPE * a, DTYPE * b, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
void KernelSign(__half * a, __half * b, int size);
/* set each entry to its sign value */
void _CudaSign(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#ifndef __SIGN_H__
#define __SIGN_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
void _Sign(const XTensor * a, XTensor * b);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _SignMe(XTensor * a);
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Sign(const XTensor & a);
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "Sum.h"
#include "Sum.cuh"
......@@ -28,26 +29,20 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
tensor summation c = a + b * \beta
>> a - a tensor
>> b - another tensor
>> c - where we put a+b*\beta. we save it in a if c is NULL
>> beta - the scaling factor
*/
void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{
if (c == NULL)
c = a;
CheckNTErrors(a && b && c, "Empty tensors in addition!");
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == b->unitNum && a->unitNum == c->unitNum,
"Unmatched tensors in addition!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_SUM);
XLink::AddParamToHead(c, beta);
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
......@@ -63,17 +58,16 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
ShowNTErrors("Cannot run this method on multiple devices simultaneously!");
}
else
CudaSum(a, b, c, beta);
_CudaSum(a, b, c, beta);
}
else
CudaSum(a, b, c, beta);
_CudaSum(a, b, c, beta);
#endif
}
else {
if (!a->isSparse && !b->isSparse) {
CheckNTErrors(!c->isSparse,
"Illegal use of sparse matrix in addition!");
CheckNTErrors(!c->isSparse, "Illegal use of sparse matrix in addition!");
if (a->dataType == DEFAULT_DTYPE &&
b->dataType == DEFAULT_DTYPE &&
......@@ -116,5 +110,42 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
}
}
}
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
{
_Sum(a, b, a, beta);
}
/*
tensor summation c = a + b * \beta (return a XTensor structure)
make a new tensor c to keep the result and return it
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
<< return - the result of tensor summation
*/
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
{
XTensor c(&a);
c.SetTMP();
/* call _Sum function */
_Sum(&a, &b, &c, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
return c;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论