Commit 0405663f by liyinqiao

Merge with Xuchen branch.

parent c22e2e31
......@@ -20,7 +20,7 @@
* This is a simple impelementation of the feed-forward network-baesd language
* model (FNNLM). See more details about FNNLM in
* "A Neural Probabilistic Language Model" by Bengio et al.
* Journal of Machine Learning Research 3 (2003) 1137?155
* Journal of Machine Learning Research 3 (2003) 1137-1155
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
*/
......
......@@ -135,6 +135,8 @@ const char * GetOPName(int type)
return "S_SPLIT";
else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST";
else if (type == SHAPE_STACK)
return "S_SHAPE_STACK";
else if (type == SHAPE_SQUEEZE)
return "S_SQUEEZE";
else if (type == SHAPE_TRANSPOSE)
......
......@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MAX MATH_MATRIXMULBATCHED + 1
#define MATH_MIN MATH_MAX + 1
#define MATH_MULTIPLY MATH_MIN + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
......@@ -97,7 +99,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_RESHAPE SHAPE_PERMUTE + 1
#define SHAPE_SPLIT SHAPE_RESHAPE + 1
#define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1
#define SHAPE_SQUEEZE SHAPE_SPLIT_LIST + 1
#define SHAPE_STACK SHAPE_SPLIT_LIST + 1
#define SHAPE_SQUEEZE SHAPE_STACK + 1
#define SHAPE_TRANSPOSE SHAPE_SQUEEZE + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
......
......@@ -28,6 +28,7 @@
#ifndef __XTENSOR_H__
#define __XTENSOR_H__
#include <math.h>
#include "XGlobal.h"
#include "XMem.h"
#include "XPRunner.h"
......
......@@ -83,6 +83,7 @@
#include "shape/Permute.h"
#include "shape/Split.h"
#include "shape/Squeeze.h"
#include "shape/Stack.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
......
......@@ -20,6 +20,7 @@
*/
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Compare.h"
......@@ -42,7 +43,7 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b)
#define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((_IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \
......@@ -59,7 +60,7 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number)
#define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((_IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \
......@@ -97,8 +98,8 @@ XTensor funcName(const XTensor &a, DTYPE number)
#define SIMPLE_COMPARE_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, DTYPE number) \
{ \
if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \
if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \
} \
_funcName(&a, &b, number); \
}
......@@ -124,4 +125,95 @@ SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL)
/* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
CheckNTErrors((_IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a->devID, b->devID); \
CheckDev(a->devID, c->devID); \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, c); \
return; \
} \
DTYPE * da = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
DTYPE * dc = (DTYPE*)c->data; \
for (int i = 0; i < a->unitNum; i++) \
dc[i] = (DTYPE)origFunc(da[i], db[i]); \
}
#else
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor *c) \
{ \
CheckNTErrors((_IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a, b); \
CheckDev(a, c); \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
DTYPE * da = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
DTYPE * dc = (DTYPE*)c->data; \
for (int i = 0; i < a->unitNum; i++) \
dc[i] = (DTYPE)origFunc(da[i], db[i]); \
}
#endif
#define _SIMPLE_MAX_MIN_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, const XTensor * b) \
{ \
_funcName(a, b, a); \
}
#define SIMPLE_MAX_MIN_FUNCTION_ME(funcNameMe, _funcName) \
void funcNameMe(XTensor & a, const XTensor & b) \
{ \
_funcName(&a, &b, &a); \
}
#define SIMPLE_MAX_MIN_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor & a, const XTensor & b) \
{ \
XTensor c(&a); \
c.SetTMPFlag(); \
_funcName(&a, &b, &c); \
return c; \
}
#define SIMPLE_MAX_MIN_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, const XTensor &b, XTensor c) \
{ \
if (!c.isInit || !_IsSameShaped(&a, &c)) { \
InitTensor(&c, &a); \
} \
_funcName(&a, &b, &c); \
}
#ifdef USE_CUDA
_SIMPLE_MAX_MIN_FUNCTION(_Max, _CudaMax, max)
_SIMPLE_MAX_MIN_FUNCTION(_Min, _CudaMin, min)
#else
_SIMPLE_MAX_MIN_FUNCTION(_Max, max)
_SIMPLE_MAX_MIN_FUNCTION(_Min, min)
#endif
_SIMPLE_MAX_MIN_FUNCTION_ME(_MaxMe, _Max)
SIMPLE_MAX_MIN_FUNCTION_ME(MaxMe, _Max)
SIMPLE_MAX_MIN_FUNCTION(Max, _Max, MATH_MAX)
SIMPLE_MAX_MIN_FUNCTION_VOID(Max, _Max, MATH_MAX)
_SIMPLE_MAX_MIN_FUNCTION_ME(_MinMe, _Min)
SIMPLE_MAX_MIN_FUNCTION_ME(MinMe, _Min)
SIMPLE_MAX_MIN_FUNCTION(Min, _Min, MATH_MIN)
SIMPLE_MAX_MIN_FUNCTION_VOID(Min, _Min, MATH_MIN)
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -89,6 +89,53 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \
SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual)
SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual)
#define SIMPLE_MAX_MIN_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, DTYPE * c, int size) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
c[i] = (DTYPE)origFunc(a[i], b[i]); \
} \
__global__ \
void Kernel##funcName(__half * a, __half * b, __half * c, int size) \
{ \
return; \
} \
void _Cuda##funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == DEFAULT_DTYPE) { \
Kernel##funcName<<<blocks, threads>>> \
((DTYPE*)a->data, (DTYPE*)b->data, \
(DTYPE*)c->data, a->unitNum); \
} \
else if (a->dataType == X_FLOAT16) { \
Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, \
(__half*)c->data, a->unitNum); \
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
}
SIMPLE_MAX_MIN_FUNCTION_GPU(Max, max)
SIMPLE_MAX_MIN_FUNCTION_GPU(Min, min)
#endif // USE_CUDA
......
......@@ -34,6 +34,12 @@ void _CudaEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (cuda version) */
void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* return maximum of two tensor for each items (cuda version) */
void _CudaMax(const XTensor * a, const XTensor * b, XTensor *c);
/* return minimum of two tensor for each items (cuda version) */
void _CudaMin(const XTensor * a, const XTensor * b, XTensor *c);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
......
......@@ -56,6 +56,36 @@ XTensor NotEqual(const XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value */
void NotEqual(const XTensor & a, XTensor & b, DTYPE value);
/* return maximum of two tensor for each items */
void _Max(const XTensor * a, const XTensor * b, XTensor * c);
/* return maximum of two tensor for each items (do it on site) */
void _MaxMe(XTensor * a, const XTensor * b);
/* return maximum of two tensor for each items (do it on site) */
void MaxMe(XTensor & a, const XTensor & b);
/* return maximum of two tensor for each items (return an XTensor structure) */
XTensor Max(const XTensor & a, const XTensor & b);
/* return maximum of two tensor for each items */
void Max(const XTensor & a, const XTensor & b, XTensor & c);
/* return minimum of two tensor for each items */
void _Min(const XTensor * a, const XTensor * b, XTensor * c);
/* return minimum of two tensor for each items (do it on site) */
void _MinMe(XTensor * a, const XTensor * b);
/* return minimum of two tensor for each items (do it on site) */
void MinMe(XTensor & a, const XTensor & b);
/* return minimum of two tensor for each items (return an XTensor structure) */
XTensor Min(const XTensor & a, const XTensor & b);
/* return minimum of two tensor for each items */
void Min(const XTensor & a, const XTensor & b, XTensor & c);
} // namespace nts(NiuTrans.Tensor)
#endif // end __COMPARE_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2019-10-13
*/
#include "Stack.h"
#include "IsSameShaped.h"
#include "../../XUtility.h"
#include "../../XName.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* stack small tensors into a big tensor along with a dimension */
void _Stack(const TensorList * smalls, XTensor * t, int dim)
{
dim = (dim < 0 ? t->order - 1 : dim);
int count = smalls->count;
CheckNTErrors(smalls != NULL, "Invalid list!");
CheckNTErrors(count > 0, "Empty list!");
CheckNTErrors(dim >= 0 && dim < t->order, "Wrong range of dim");
for (int i = 1; i < count; i++) {
XTensor * tmp1 = smalls->GetItem(i);
XTensor * tmp2 = smalls->GetItem(i-1);
CheckNTErrors(_IsSameShaped(tmp1, tmp2), "The input tensor must be same size!");
}
int blockSize = 1;
int blockNum = 1;
int gridSize = 1;
int gridNum = 1;
XTensor * smallsItem0 = smalls->GetItem(0);
int unitNum = smallsItem0->unitNum;
int unitSize = smallsItem0->unitSize;
int itemSize = unitNum * unitSize;
for (int i = 0; i < smallsItem0->order; i++) {
if (i >= dim)
blockSize *= smallsItem0->dimSize[i];
else
blockNum *= smallsItem0->dimSize[i];
}
/* merging with fewer data copy operations */
if (count * gridNum <= MIN_TENSOR_MERGE_LIST_NUM) {
int sPitch = blockSize * unitSize;
int tPtich = blockSize * count * unitSize;
int mSize = blockSize * unitSize;
int n = blockNum;
int sStep = 0;
int tStep = blockSize * unitSize;
char * tData = (char*)t->data;
for (int k = 0; k < count; k++) {
XTensor * s = smalls->GetItem(k);
char * sData = (char*)s->data;
XMemCopy2D(tData + k * tStep, tPtich, t->devID,
sData + k * sStep, sPitch, s->devID,
mSize, n);
}
}
else {
ShowNTErrors("TO DO!!!");
}
}
/* stack small tensors into a big tensor along with a dimension (return an XTensor structure) */
XTensor Stack(const TensorList &smalls, int dim)
{
int count = smalls.count;
CheckNTErrors(count > 0, "Empty list!");
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
XTensor * tensor = smalls.GetItem(0);
int order = tensor->order + 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = tensor->GetDim(i);
else if (i > dim)
dimSize[i] = tensor->GetDim(i);
else if (i == dim)
dimSize[i] = count;
}
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor t(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
t.SetTMPFlag();
/* destroy variables */
delete[] dimSize;
/* call _Stack function */
_Stack(&smalls, &t, dim);
/* tensor connection */
for (int i = 0; i < count; i++) {
XTensor * tmp = smalls.GetItem(i);
if (tmp->enableGrad == false)
return t;
}
XLink::MakeLink(&smalls, &t, SHAPE_STACK);
XLink::AddParamToHeadInt(&t, dim);
return t;
}
/* check the shape of target tensor */
bool CheckStackShape(const TensorList &smalls, XTensor &t, int dim)
{
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
for (int i = 0; i < tensor->order; i++) {
if (i < dim)
if (t.GetDim(i) != tensor->GetDim(i))
return false;
else if (i > dim)
if (t.GetDim(i) != tensor->GetDim(i-1))
return false;
else if (i == dim)
if (t.GetDim(i) != smalls.count)
return false;
}
return true;
}
/* stack small tensors into a big tensor along with a dimension */
void Stack(const TensorList &smalls, XTensor &t, int dim)
{
int count = smalls.count;
CheckNTErrors(count > 0, "Empty list!");
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
if (!t.isInit || !CheckStackShape(smalls, t, dim)) {
XTensor * tensor = smalls.GetItem(0);
int order = tensor->order + 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = tensor->GetDim(i);
else if (i > dim)
dimSize[i] = tensor->GetDim(i-1);
else if (i == dim)
dimSize[i] = count;
}
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensorV2(&t, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
/* destroy variables */
delete[] dimSize;
}
/* call _Stack function */
_Stack(&smalls, &t, dim);
/* tensor connection */
for (int i = 0; i < count; i++) {
XTensor * tmp = smalls.GetItem(i);
if (tmp->enableGrad == false)
return;
}
XLink::MakeLink(&smalls, &t, SHAPE_STACK);
XLink::AddParamToHeadInt(&t, dim);
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2019-10-13
* It's so cold outside. It's too hard for me to get out.
*/
#ifndef __STACK_H__
#define __STACK_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* stack small tensors into a big tensor along with a dimension */
void _Stack(const TensorList * smalls, XTensor * t, int dim);
/* stack small tensors into a big tensor along with a dimension (return an XTensor structure) */
XTensor Stack(const TensorList &list, int leadingDim);
/* stack small tensors into a big tensor along with a dimension */
void Stack(const TensorList &smalls, XTensor &t, int dim);
} // namespace nts(NiuTrans.Tensor)
#endif // __STACK_H__
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论