Commit e925cfd9 by huchi

refactor the translator engine for nmt

parent 143e048c
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "./network/XNet.h"
#include "./tensor/XUtility.h"
#include "./tensor/function/FHeader.h"
#include "./tensor/core/CHeader.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace fnnlm;
using namespace transformer;
int main( int argc, const char ** argv )
{
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708);
TransformerMain(argc - 1, argv + 1);
//_CrtDumpMemoryLeaks();
return 0;
}
#include "Model.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
/* register a parameter with a unique name */
void Model::Register(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID)
{
parameters.AddParameter(name, dims, dataType, devID);
}
/* get a parameter by its name */
XTensor* Model::operator[](const char* name)
{
return parameters.GetParameter(name);
}
/* load a model from a binary file */
void Model::Load(const char* fn)
{
CheckNTErrors(parameters.list.Size() > 0, "empty tensor list");
FILE* file = fopen(fn, "rb");
LongList offset(parameters.list.Size());
/* check number of parameter */
unsigned long int number;
fread(&number, sizeof(number), 1, file);
CheckNTErrors(number == parameters.list.Size(), "parameter number not matched");
/* read offset from the file */
fread(parameters.list.items, sizeof(long), offset.Size(), file);
/* read parameters from the file */
for (int i = 0; i < offset.Size(); i++) {
parameters.list[i]->BinaryRead(file, offset[i]);
}
fclose(file);
}
/* dump a model to a binary file */
void Model::Dump(const char* fn)
{
FILE* file = fopen(fn, "wb");
/* dump number of parameter */
unsigned long int number = parameters.list.Size();
fwrite(&number, sizeof(number), 1, file);
/* dump offset of parameters */
unsigned long int offset = sizeof(number);
for (int i = 0; i < parameters.list.Size(); i++) {
if (i > 0) {
offset += parameters.list[i - 1]->unitNum;
}
fwrite(&offset, sizeof(offset), 1, file);
}
/* dump parameters to the file */
for (int i = 0; i < parameters.list.Size(); i++) {
parameters.list[i]->BinaryDump(file);
}
fclose(file);
}
/* get a parameter by its name */
XTensor* Model::Get(const char* name)
{
return parameters.GetParameter(name);
}
/* add a parameter to the list */
void Parameter::AddParameter(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID)
{
CheckNTErrors(GetParameter(name) == NULL, "the name must be unique");
IntList dim;
for (int i : dims) {
dim.Add(i);
}
XTensor* p = NewTensorV2(dims.size(), dim.items, dataType, devID);
strcpy(p->name, (char*)name);
list.Add(p);
}
/* get a parameter by its name */
XTensor* Parameter::GetParameter(const char* name)
{
for (int i = 0; i < list.Size(); i++) {
if (strcmp(list[i]->name, name) == 0)
return list[i];
}
/* if miss, return a null pointer */
return NULL;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* the model class
*
* $Created by: HU Chi (huchinlp@foxmail.com) 2019-09-12
*
*/
#ifndef __MODEL_H__
#define __MODEL_H__
#include <utility>
#include "../tensor/XGlobal.h"
#include "../tensor/XTensor.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
using Dim = std::initializer_list<int>;
/* Parameter is a base class for parameters */
struct Parameter {
public:
/* the parameter list */
TensorList list;
public:
/* add a parameter to the list */
void AddParameter(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID);
/* get a parameter by its name */
XTensor* GetParameter(const char* name);
};
/* Model is a base class for neural networks */
struct Model {
public:
Parameter parameters;
public:
/* load a model from a binary file */
void Load(const char* fn);
/* dump the model to a binary file */
void Dump(const char* fn);
/* get a parameter by its name */
XTensor* Get(const char* name);
/* get a parameter by its name */
XTensor* operator[] (const char* name);
/* register a parameter with a unique name */
void Register(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID);
};
}
#endif // __MODEL_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "XNet.h"
#include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/transformer/Transformer.h"
#include <fstream>
#include <string>
using namespace nts;
using namespace transformer;
void test() {
XTensor posEmbeddingBase;
int length = 5;
int eSize = 4;
int d = 4;
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT);
float* data = new float[posEmbeddingBase.unitNum];
for (int pos = 0; pos < length; pos++) {
float* dp = data + pos * eSize;
//int channelSize = eSize / 2;
//int offset = 0;
//for(int i = 0; i < channelSize; i++){
// dp[offset++] = (float)sin(pos/pow(10000.0F, 2.0F*i/(d - 2)));
//}
//for(int i = 0; i < channelSize; i++){
// dp[offset++] = (float)cos(pos/pow(10000.0F, 2.0F*i/(d - 2)));
//}
for (int k = 0; k < eSize; k++) {
if (k % 2 == 0) {
int i = k / 2;
dp[k] = (float)sin(pos / pow(10000.0F, 2.0F * i / d));
}
else {
int i = (k - 1) / 2;
dp[k] = (float)cos(pos / pow(10000.0F, 2.0F * i / d));
}
}
}
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
posEmbeddingBase.Dump(stderr);
delete[] data;
}
struct A {
XTensor a;
void update(XTensor b) {
a = b;
}
};
void test2(A *a) {
XTensor x;
InitTensor2D(&x, 2, 3);
XTensor y;
InitTensor2D(&y, 3, 2);
float data[]{ 1,1,1,1,1,1 };
x.SetData(data, 6);
y.SetData(data, 6);
XTensor z;
z = MatrixMul(x, y);
a->update(z);
}
void TestMemory() {
int devID = 0;
int memSize = 1024;
XMem *mem = new XMem(devID, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
XTensor a;
InitTensor2D(&a, 5, 5, X_FLOAT, 0, mem);
float d[25]{ 0 };
for (int i = 0; i < 25; i++)
d[i] = float(i);
a.SetData(d, 25);
int index[]{ 0,1,2,3,4 };
for (int i = 0; i < 4; i++) {
XTensor srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 4 - i, X_INT, a.devID, a.mem);
InitTensor1D(&tgtIdx, 4 - i, X_INT, a.devID, a.mem);
srcIdx.SetData(index, srcIdx.unitNum);
tgtIdx.SetAscendingOrder(0);
a = CopyIndexed(a, 0, srcIdx, tgtIdx);
printf("\nround %d\n", i);
a.Dump(stderr);
}
delete mem;
}
int main(int argc, const char** argv)
{
TransformerMain(argc - 1, argv + 1);
return 0;
}
...@@ -173,6 +173,10 @@ private: ...@@ -173,6 +173,10 @@ private:
static static
void GradReduceSum(XTensor * node, bool isEfficient); void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumAll */
static
void GradReduceSumAll(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */ /* gradient for reduceSumSquared */
static static
void GradReduceSumSquared(XTensor * node, bool isEfficient); void GradReduceSumSquared(XTensor * node, bool isEfficient);
......
...@@ -281,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -281,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
smallsGrad.Add(tail->grad); smallsGrad.Add(tail->grad);
if(i > 1){ if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail), CheckNTErrors(_IsSameShaped(last, tail),
"Input tensors must be of the same size!"); "Input tensors must be of the same size!");
} }
...@@ -391,7 +391,7 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient) ...@@ -391,7 +391,7 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
/* if the tensor is used somewhere else, we need another SUM /* if the tensor is used somewhere else, we need another SUM
for gradient accumulation */ for gradient accumulation */
else{ else{
XTensor * inputGradTMP = NewTensorBuf(input, input->devID, input->mem); XTensor * inputGradTMP = NewTensorBufV2(input, input->devID, input->mem);
_Merge(node->grad, inputGradTMP, whereToSplit + 1, 0); _Merge(node->grad, inputGradTMP, whereToSplit + 1, 0);
_Sum(input->grad, inputGradTMP, input->grad); _Sum(input->grad, inputGradTMP, input->grad);
...@@ -475,7 +475,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -475,7 +475,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
somewhere else, we need another SUM for gradient somewhere else, we need another SUM for gradient
accumulation */ accumulation */
else{ else{
XTensor * nodeGradTMP = NewTensorBuf(node, node->devID, node->mem); XTensor * nodeGradTMP = NewTensorBufV2(node, node->devID, node->mem);
_Merge(&splits, nodeGradTMP, whereToSplit + 1); _Merge(&splits, nodeGradTMP, whereToSplit + 1);
_Sum(node->grad, nodeGradTMP, node->grad); _Sum(node->grad, nodeGradTMP, node->grad);
...@@ -501,7 +501,7 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient) ...@@ -501,7 +501,7 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
XTensor * output = node; XTensor * output = node;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * b = NewTensorBuf(input, input->devID, input->mem); XTensor * b = NewTensorBufV2(input, input->devID, input->mem);
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
int i = income.GetParamInt(0); int i = income.GetParamInt(0);
...@@ -543,7 +543,7 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient) ...@@ -543,7 +543,7 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!"); CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!"); CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
XTensor * g = NewTensorBuf(input->grad, input->devID, input->mem); XTensor * g = NewTensorBufV2(input->grad, input->devID, input->mem);
_ReduceSum(output->grad, g, dim); _ReduceSum(output->grad, g, dim);
_Sum(input->grad, g, input->grad); _Sum(input->grad, g, input->grad);
......
...@@ -77,104 +77,20 @@ backward propagation to obtain gradient ...@@ -77,104 +77,20 @@ backward propagation to obtain gradient
>> root - root node (output) of the network >> root - root node (output) of the network
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root)
{ {
TensorList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
TensorList golds(1); Backward(roots);
golds.Add(NULL);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the gradient computation
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
{
TensorList golds(roots.count);
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
}
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
{
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
} }
/* /*
backward propagation to obtain gradient wrt. the loss/error function backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes with a number of root nodes
>> roots - a list of root nodes (output) of the network >> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/ */
void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots)
{ {
Traverse(roots); Traverse(roots);
...@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, ...@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings,
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
} }
//XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
/*for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;*/
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
/*if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, padding, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
else {
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}*/
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
/*else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}*/
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i); XTensor * node = (XTensor*)nodes.Get(i);
...@@ -460,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node) ...@@ -460,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
} }
} }
/* /*
search for a node in a top-down manner by its name search for a node in a top-down manner by its name
>> top - the top most node >> top - the top most node
......
...@@ -61,25 +61,11 @@ struct XNet ...@@ -61,25 +61,11 @@ struct XNet
void Clear(); void Clear();
/* backward propagation to obtain gradient */ /* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
......
...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node) ...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL) if(node == NULL)
return; return;
if(!XTensor::IsSameShaped(node, node->grad)){ if(!_IsSameShaped(node, node->grad)){
delete node->grad; delete node->grad;
node->grad = NewTensor(node); node->grad = NewTensor(node);
node->grad->SetZeroAll(); node->grad->SetZeroAll();
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/ */
#include "../tensor/XTensor.h" #include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__ #ifndef __XNODER_H__
#define __XNODER_H__ #define __XNODER_H__
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is a simple impelementation of the feed-forward network-baesd language
* model (FNNLM). See more details about FNNLM in
* "A Neural Probabilistic Language Model" by Bengio et al.
* Journal of Machine Learning Research 3 (2003) 1137¨C1155
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
* Today I was awarded as the most popular teacher in our college.
* It was the great honour for me!!!
*/
#ifndef __FNNLM_H__
#define __FNNLM_H__
#include "../../tensor/XGlobal.h"
#include "../../tensor/XTensor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
namespace fnnlm
{
#define _EXIT_(x)// exit(x)
#define CheckErrors(x, msg) { if(!(x)) { fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); _EXIT_(1); } }
#define ShowErrors(msg) { { fprintf(stderr, "Error! (%s line %d): %s\n", __FILENAME__, __LINE__, msg); _EXIT_(1); } }
#define MAX_N_GRAM 8
#define MAX_HIDDEN_NUM 8
/* an n-gram = a sequence of n words
words[0..n-2] is the history, and
words[n-1] is the word for prediction. */
struct NGram
{
int words[MAX_N_GRAM];
};
/* fnn model */
struct FNNModel
{
/* word embedding */
XTensor embeddingW;
/* parameter matrix of each hidden layer
hidden layer: y = f(x * w + b)
where x is the input, y is the output, w is
the tranformation (parameter) matrix, b is
the bias and f() is the activation function. */
XTensor hiddenW[MAX_HIDDEN_NUM];
/* bias of each hidden layer */
XTensor hiddenB[MAX_HIDDEN_NUM];
/* parameter matrix of the output layer */
XTensor outputW;
/* bias of the output layer */
XTensor outputB;
/* order of the language model */
int n;
/* embedding size */
int eSize;
/* number of hidden layers */
int hDepth;
/* hidden layer size */
int hSize;
/* vocabulary size */
int vSize;
/* id of the device for running the model */
int devID;
/* indicates whether we use memory pool */
bool useMemPool;
/* memory pool */
XMem * mem;
FNNModel(){ n = -1; vSize = -1;hDepth = 0;devID = -1;mem = NULL;};
~FNNModel(){delete mem;};
};
/* the network built on the fly */
struct FNNNet
{
/* embedding result of the previous n - 1 words */
XTensor embeddings[MAX_N_GRAM];
/* concatenation of embeddings */
XTensor embeddingCat;
/* output of the hidden layers */
XTensor hiddens[MAX_HIDDEN_NUM];
/* state of the hidden layers (before activation function) */
XTensor hiddenStates[MAX_HIDDEN_NUM];
/* state before softmax */
XTensor stateLast;
/* output of the net */
XTensor output;
};
/* entrance of the program */
int FNNLMMain(int argc, const char ** argv);
};
#endif
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/ */
#ifndef __T2TATTENTION_H__ #ifndef __T2TATTENTION_H__
#define __T2TATTENTION_H__ #define __T2TATTENTION_H__
...@@ -35,18 +35,18 @@ public: ...@@ -35,18 +35,18 @@ public:
/* cache for key */ /* cache for key */
XTensor* k{ NULL }; XTensor* k{ NULL };
/* cache for value */ /* cache for value */
XTensor* v{ NULL }; XTensor* v{ NULL };
public: public:
bool IsEmpty(){ bool IsEmpty() {
return (k == NULL) && (v == NULL); return (k == NULL) && (v == NULL);
} }
void Clear() { void Clear() {
if (k && v && k->id > 0 && v->id >0) { if (k && v && k->id > 0 && v->id > 0) {
DelTensor(k); DelTensor(k);
DelTensor(v); DelTensor(v);
} }
...@@ -71,45 +71,42 @@ public: ...@@ -71,45 +71,42 @@ public:
} }
}; };
/* /*
multi-head attention multi-head attention
y(Q, K, V) = cat(head_1, head_2, ..., head_n) y(Q, K, V) = cat(head_1, head_2, ..., head_n)
where head_i = Attention(Q * w_i^Q, K * w_i^K, V * w_i^V) where head_i = Attention(Q * w_i^Q, K * w_i^K, V * w_i^V)
attention(Q, K, V) = softmax(Q * K^T/d_k^0.5) V attention(Q, K, V) = softmax(Q * K^T/d_k^0.5) V
d_k = dimension size of K d_k = dimension size of K
*/ */
class T2TAttention class T2TAttention
{ {
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* head number */ /* head number */
int nhead; int nhead;
/* transformation matrix for query */ /* transformation matrix for Q */
XTensor wq; XTensor wq;
/* bias for query */ /* bias for Q */
XTensor bq; XTensor bq;
/* transformation matrix for query */ /* transformation matrix for K */
XTensor wk; XTensor wk;
/* bias for query */ /* bias for K */
XTensor bk; XTensor bk;
/* transformation matrix for query */ /* transformation matrix for V */
XTensor wv; XTensor wv;
/* bias for query */ /* bias for V */
XTensor bv; XTensor bv;
/* max relative window size */ /* RPR emb */
XTensor rpEmbK; XTensor rp_embedding_k;
/* transformation after dot-product attention */ /* transformation after dot-product attention */
XTensor wa; XTensor wa;
...@@ -130,17 +127,17 @@ public: ...@@ -130,17 +127,17 @@ public:
bool isMasked; bool isMasked;
/* some positions can be ignored in attention. this is useful in lm where the first position needs /* some positions can be ignored in attention. this is useful in lm where the first position needs
special design for the attention model. */ special design for the attention model. */
int ignored; int ignored;
/* indicates whether the model is used for training */ /* indicates whether the model is used for training */
bool isTraining; bool isTraining;
/* dropout probability */ /* dropout probability */
DTYPE dropoutP; DTYPE dropoutP;
/* max relative window size */ /* max relative window size */
int maxRP; int max_relative_position;
public: public:
...@@ -151,23 +148,23 @@ public: ...@@ -151,23 +148,23 @@ public:
~T2TAttention(); ~T2TAttention();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor *mask, XTensor Make(XTensor& k, XTensor& q, XTensor& v, XTensor* mask,
bool isTraining, Cache* cache, int cacheType); bool isTraining, Cache* cache, int cacheType);
/* make the attention network given keys, queries and values (after linear transformation) */ /* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeAttention(XTensor *k, XTensor *q, XTensor *v, const XTensor *mask, bool isTraining, bool isEnc); XTensor MakeAttention(XTensor* k, XTensor* q, XTensor* v, const XTensor* mask, bool isTraining, bool is_encoder);
/* make the attention network given keys, queries and values (after linear transformation) */ /* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeRPRAttention(XTensor *k, XTensor *q, XTensor *v, XTensor *mask, bool isTraining, bool isEnc); XTensor MakeRPRAttention(XTensor* k, XTensor* q, XTensor* v, XTensor* mask, bool isTraining, bool is_encoder);
void GetRPEmbedding(XTensor* emb_matrix, const int len_q, const int len_kv, const int max_relative_length, const int device_id, const bool is_encoder);
void GetRPEmbedding(XTensor* embMatrix, const int lenQ, const int lenKV, const int maxRelativeLen, const int device_id, const bool isEnc); void RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* attention, const bool is_key);
void RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* attention, const bool isKey);
}; };
} }
......
...@@ -61,29 +61,27 @@ initialize the model ...@@ -61,29 +61,27 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttDecoder::InitModel(int argc, char ** argv, void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); //AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 3); LoadParamInt(argc, argv, "nlayer", &nlayer, 3);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1); LoadParamInt(argc, argv, "vsizetgt", &vSize, 34040);
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0); LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!"); CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem, false); embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -96,11 +94,11 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -96,11 +94,11 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) { for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID, myMem); attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
} }
decodeLayerNorm->InitModel(argc, argv, myDevID); decodeLayerNorm->InitModel(argc, argv, myDevID);
} }
...@@ -114,7 +112,7 @@ make the decoding network ...@@ -114,7 +112,7 @@ make the decoding network
>> isTraining - indicates whether the model is used for training >> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder << return - the output tensor of the encoder
*/ */
XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, const XTensor *mask, XTensor &maskEncDec, bool isTraining) XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, XTensor &maskEncDec, bool isTraining)
{ {
XTensor x; XTensor x;
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#ifndef __T2TDECODER_H__ #ifndef __T2TDECODER_H__
#define __T2TDECODER_H__ #define __T2TDECODER_H__
#include <array>
#include "T2TEncoder.h" #include "T2TEncoder.h"
namespace transformer namespace transformer
...@@ -38,9 +37,6 @@ public: ...@@ -38,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -103,10 +99,10 @@ public: ...@@ -103,10 +99,10 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the decoding network */ /* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, const XTensor *mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, XTensor &maskEncDec, bool isTraining);
}; };
} }
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder() T2TEmbedder::T2TEmbedder()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
maxLength = -1; maxLength = -1;
} }
...@@ -46,12 +45,10 @@ initialize the model ...@@ -46,12 +45,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, bool isEnc) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
if(isEnc){ if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsize", &vSize, -1);
...@@ -65,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b ...@@ -65,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "pad", &padIdx, 1); LoadParamInt(argc, argv, "pad", &padIdx, 1);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID); InitTensor2D(&w, vSize, eSize, X_FLOAT, devID);
maxLength = maxLength + 1 + 1; maxLength = maxLength + 1 + 1;
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
...@@ -83,7 +80,7 @@ make positional embeddings (of size eSize * length) ...@@ -83,7 +80,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
{ {
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID); InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -101,14 +98,13 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx) ...@@ -101,14 +98,13 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
} }
/* zero pad */ /* padding zeros */
int padStart = padIdx * eSize; int padStart = padIdx * eSize;
for (int i = padStart; i < padStart + eSize; i++) for (int i = padStart; i < padStart + eSize; ++i)
data[i] = 0.F; data[i] = 0.F;
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum); posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
delete[] data; delete[] data;
} }
......
...@@ -41,9 +41,6 @@ public: ...@@ -41,9 +41,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -74,7 +71,7 @@ public: ...@@ -74,7 +71,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL, bool isEnc = true); void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length, int padIdx); void MakePosEmbedding(int eSize, int d, int length, int padIdx);
......
...@@ -53,20 +53,18 @@ initialize the model ...@@ -53,20 +53,18 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttEncoder::InitModel(int argc, char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 35); LoadParamInt(argc, argv, "nlayer", &nlayer, 35);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsize", &vSize, 34040);
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0); LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!"); CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
...@@ -82,12 +80,11 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -82,12 +80,11 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
} }
encodeLayerNorm->InitModel(argc, argv, myDevID, myMem); encodeLayerNorm->InitModel(argc, argv, myDevID);
} }
/* /*
...@@ -104,6 +101,10 @@ XTensor AttEncoder::Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, boo ...@@ -104,6 +101,10 @@ XTensor AttEncoder::Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, boo
x = embedder.Make(input, 0); x = embedder.Make(input, 0);
/* dropout */
if(isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
XTensor att; XTensor att;
XTensor ln; XTensor ln;
......
...@@ -65,9 +65,6 @@ public: ...@@ -65,9 +65,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -118,7 +115,7 @@ public: ...@@ -118,7 +115,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -47,12 +47,10 @@ initialize the model ...@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -68,7 +66,7 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -68,7 +66,7 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
InitTensor2DV2(&w2, outSize, hSize, X_FLOAT, devID); InitTensor2DV2(&w2, outSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
fnnLayerNorm.InitModel(argc, argv, myDevID, myMem); fnnLayerNorm.InitModel(argc, argv, myDevID);
//float scale = 1.0F; //float scale = 1.0F;
//float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); //float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize));
......
...@@ -37,9 +37,6 @@ public: ...@@ -37,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -76,7 +73,7 @@ public: ...@@ -76,7 +73,7 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input, bool isTraining); XTensor Make(XTensor &input, bool isTraining);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN() T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL;
d = 0; d = 0;
} }
...@@ -46,21 +45,19 @@ initialize the model ...@@ -46,21 +45,19 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1DV2(&w, d, X_FLOAT, devID); InitTensor1D(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID); InitTensor1D(&b, d, X_FLOAT, devID);
//w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
//b.SetZeroAll(); b.SetZeroAll();
} }
/* /*
......
...@@ -36,9 +36,6 @@ class T2TLN ...@@ -36,9 +36,6 @@ class T2TLN
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */ /* the transformation matrix w */
XTensor w; XTensor w;
...@@ -57,7 +54,7 @@ public: ...@@ -57,7 +54,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -35,7 +35,9 @@ XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha) ...@@ -35,7 +35,9 @@ XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
XTensor base; XTensor base;
XTensor lp; XTensor lp;
base = (length + 5)/(1.0F + 5.0F); //base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5);
lp = Power(base, alpha); lp = Power(base, alpha);
return lp; return lp;
......
...@@ -40,9 +40,6 @@ public: ...@@ -40,9 +40,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the encoder */ /* the encoder */
AttEncoder * encoder; AttEncoder * encoder;
...@@ -71,9 +68,6 @@ public: ...@@ -71,9 +68,6 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv); void InitModel(int argc, char ** argv);
/* reset cache for decoder */
void ResetCache();
/* make the encoding network */ /* make the encoding network */
XTensor MakeEncoder(XTensor &input, XTensor *mask, bool isTraining); XTensor MakeEncoder(XTensor &input, XTensor *mask, bool isTraining);
...@@ -110,9 +104,6 @@ public: ...@@ -110,9 +104,6 @@ public:
void Read(const char * fn); void Read(const char * fn);
}; };
void FastRead(XTensor* x, FILE* f);
void FastDump(XTensor* x, FILE* f);
void ConvertModelFile(const TensorList* params, const char* src, const char* tgt);
} }
#endif #endif
...@@ -25,14 +25,12 @@ ...@@ -25,14 +25,12 @@
#include "T2TEmbedding.h" #include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
namespace transformer namespace transformer
{ {
/* constructor */ /* constructor */
T2TOutput::T2TOutput() T2TOutput::T2TOutput()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
inSize = -1; inSize = -1;
hSize = -1; hSize = -1;
...@@ -48,12 +46,10 @@ initialize the model ...@@ -48,12 +46,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -62,14 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,14 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID); InitTensor2D(&w, hSize, vSize, X_FLOAT, devID);
//float scale = 1.0F;
//float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
//w.SetDataRand(-finfout, finfout);
//DTYPE v = 1.0F/(float)sqrt((float)hSize);
//w.SetDataRandn(0, v);
} }
/* /*
...@@ -83,7 +72,6 @@ XTensor T2TOutput::Make(XTensor &input) ...@@ -83,7 +72,6 @@ XTensor T2TOutput::Make(XTensor &input)
XTensor &x = input; XTensor &x = input;
return Softmax(MMul(x, X_NOTRANS, w, X_TRANS), -1); return Softmax(MMul(x, X_NOTRANS, w, X_TRANS), -1);
//return MulAndShift(x, X_NOTRANS, w, X_TRANS, b);
} }
/* /*
......
...@@ -38,9 +38,6 @@ public: ...@@ -38,9 +38,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -61,7 +58,7 @@ public: ...@@ -61,7 +58,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -146,7 +146,7 @@ public: ...@@ -146,7 +146,7 @@ public:
~T2TPredictor(); ~T2TPredictor();
/* create an initial state */ /* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state, XTensor * encoding); void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */ /* set the start symbol */
void SetStartSymbol(int symbol); void SetStartSymbol(int symbol);
...@@ -155,12 +155,13 @@ public: ...@@ -155,12 +155,13 @@ public:
void Read(T2TModel * model, T2TStateBundle * state); void Read(T2TModel * model, T2TStateBundle * state);
/* predict the next state */ /* predict the next state */
void Predict(T2TStateBundle * next, XTensor & encoding, void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc);
XTensor & inputEnc, XTensor & paddingEnc,
XTensor& nonFinished, bool updateFinished);
/* generate paths up to the states of the current step */ /* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state); XTensor GeneratePaths(T2TStateBundle * state);
/* get the predictions of the previous step */
XTensor GetLastPrediction(T2TStateBundle* state);
}; };
} }
......
...@@ -62,12 +62,6 @@ private: ...@@ -62,12 +62,6 @@ private:
/* start symbol */ /* start symbol */
int startSymbol; int startSymbol;
/* scalar of the input sequence (for max number of search steps) */
float scalarMaxLength;
/* indicate whether the early stop strategy is used */
bool isEarlyStop;
public: public:
/* constructor */ /* constructor */
T2TSearch(); T2TSearch();
...@@ -79,8 +73,7 @@ public: ...@@ -79,8 +73,7 @@ public:
void Init(int argc, char ** argv); void Init(int argc, char ** argv);
/* search for the most promising states */ /* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
XTensor * output, XTensor * score);
/* preparation */ /* preparation */
void Prepare(int myBatchSize,int myBeamSize); void Prepare(int myBatchSize,int myBeamSize);
...@@ -101,7 +94,7 @@ public: ...@@ -101,7 +94,7 @@ public:
void FillHeap(T2TStateBundle * beam); void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */ /* save the output sequences in a tensor */
void Dump(XTensor * output, XTensor * score); void Dump(XTensor * output);
/* check if the token is an end symbol */ /* check if the token is an end symbol */
bool IsEnd(int token); bool IsEnd(int token);
...@@ -109,17 +102,6 @@ public: ...@@ -109,17 +102,6 @@ public:
/* set end symbols for search */ /* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum); void SetEnd(const int * tokens, const int tokenNum);
/* penalize beams that completed */
int UpdateCompleted(T2TStateBundle * beam, XTensor & encoding,
XTensor& inputEnc, XTensor& paddingEnc,
IntList completedStates, XTensor &nonFinished);
/* check whether all hypotheses are completed */
bool IsAllCompleted(T2TStateBundle * beam);
/* check if any hypotheses are completed */
IntList IsAnyCompleted(T2TStateBundle * beam);
/* make a mask to prevent duplicated entries in beam expansion for the first position */ /* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam); XTensor MakeFirstMask(T2TStateBundle * beam);
}; };
......
...@@ -15,17 +15,18 @@ ...@@ -15,17 +15,18 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/ */
#include <math.h> #include <math.h>
#include "T2TUtility.h"
#include "T2TTester.h" #include "T2TTester.h"
#include "T2TSearch.h" #include "T2TSearch.h"
#include "T2TUtility.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h"
#include "..//..//tensor/XTensor.h"
using namespace nts; using namespace nts;
...@@ -35,7 +36,6 @@ namespace transformer ...@@ -35,7 +36,6 @@ namespace transformer
/* constructor */ /* constructor */
T2TTester::T2TTester() T2TTester::T2TTester()
{ {
} }
/* de-constructor */ /* de-constructor */
...@@ -44,39 +44,23 @@ T2TTester::~T2TTester() ...@@ -44,39 +44,23 @@ T2TTester::~T2TTester()
} }
/* initialize the model */ /* initialize the model */
void T2TTester::Init(int argc, char** argv) void T2TTester::Init(int argc, char ** argv)
{ {
LoadParamInt(argc, argv, "vsize", &vSize, 1); LoadParamInt(argc, argv, "vsize", &vSize, 34040);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize); LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1); LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1);
LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, false); LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, true);
seacher.Init(argc, argv); seacher.Init(argc, argv);
} }
Result ExtractRes(XTensor& output, IntList& indices, int i) { /*
Result res;
XTensor sent, srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 1, X_INT, output.devID);
int idx[]{ i };
srcIdx.SetData(idx, 1);
InitTensor(&tgtIdx, &srcIdx);
tgtIdx.SetAscendingOrder(0);
sent = CopyIndexed(output, 0, srcIdx, tgtIdx);
res.data.Add((int*)sent.data, sent.unitNum);
res.id = indices[i];
return res;
}
/*
test the model test the model
>> fn - test data file >> fn - test data file
>> ofn - output data file >> ofn - output data file
>> model - model that is trained >> model - model that is trained
*/ */
void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model) void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
{ {
int wc = 0; int wc = 0;
int wordCount = 0; int wordCount = 0;
...@@ -85,11 +69,10 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model) ...@@ -85,11 +69,10 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
int batchCount = 0; int batchCount = 0;
/* data files */ /* data files */
FILE* ofile = fopen(ofn, "w"); FILE* ofile = fopen(ofn, "wb");
CheckNTErrors(ofile, "Cannot open the output file"); CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID; int devID = model->devID;
XMem* mem = model->mem;
double startT = GetClockSec(); double startT = GetClockSec();
...@@ -102,23 +85,44 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model) ...@@ -102,23 +85,44 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
/* an array that keeps the sequences */ /* an array that keeps the sequences */
int* seqs = new int[MILLION]; int* seqs = new int[MILLION];
batchLoader.Init(fn, 100, true); batchLoader.Init(fn);
int count = 0; int count = 0;
while (!batchLoader.IsEmpty()) { while (!batchLoader.IsEmpty())
{
count++;
wordCount = 0; wordCount = 0;
/*if (count % 10 == 0 && sentBatch < 128)
sentBatch *= 2;*/
/* reset cache for decoder */ /* reset cache for decoder */
model->ResetCache(); for (int i = 0; i < model->decoder->nlayer; ++i) {
model->decoder->selfCache[i].Clear();
IntList indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID); model->decoder->contextCache[i].Clear();
}
XTensor output, score; vector<int> indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID);
seacher.Search(model, &batchEnc, &paddingEnc, &output, &score); XTensor output;
for (int i = 0; i < indices.Size(); i++) seacher.Search(model, &batchEnc, &paddingEnc, &output);
batchLoader.resBuffer.Add(ExtractRes(output, indices, i));
for (int i = 0; i < indices.size(); ++i) {
Result res;
XTensor sent, srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 1, X_INT, output.devID);
int idx[]{i};
srcIdx.SetData(idx, 1);
InitTensor(&tgtIdx, &srcIdx);
SetAscendingOrder(tgtIdx, 0);
sent = CopyIndexed(output, 0, srcIdx, tgtIdx);
res.values = sent;
res.id = indices[i];
batchLoader.resBuffer.emplace_back(res);
}
wc = batchEnc.GetDim(-1); wc = batchEnc.GetDim(-1);
wordCount += wc; wordCount += wc;
...@@ -126,41 +130,48 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model) ...@@ -126,41 +130,48 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
sentCount += batchEnc.GetDim(-2); sentCount += batchEnc.GetDim(-2);
batchCount += 1; batchCount += 1;
double elapsed = GetClockSec() - startT; if (batchCount % 1 == 0) {
XPRINT3(0, stderr, "[INFO] elapsed=%.1fs, sent=%d, sword=%d\n", elapsed, sentCount, wordCount); double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
} }
batchLoader.SortRes(); batchLoader.RerankRes();
for (int i = 0; i < batchLoader.resBuffer.Size(); i++)
Dump(ofile, batchLoader.resBuffer[i].data);
for (auto res : batchLoader.resBuffer) {
Dump(ofile, &res.values);
}
fclose(ofile); fclose(ofile);
delete[] seqs;
delete[] seqs;
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d)\n", elapsed, wordCountTotal, sentCount); XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d)\n", elapsed, wordCountTotal, sentCount);
} }
/* /*
dump the result into the file dump the result into the file
>> file - data file >> file - data file
>> output - output list >> output - output tensor
*/ */
void T2TTester::Dump(FILE* file, IntList& output) void T2TTester::Dump(FILE * file, XTensor * output)
{ {
for (int i = 0; i < output.Size(); i++) { int seqLength = output->GetDim(-1);
int w = output[i];
if (w < 0) { for (int i = 0; i < output->unitNum; i += seqLength) {
if (i == 0) for (int j = 0; j < seqLength; j++) {
return; int w = output->GetInt(i + j);
else fprintf(file, "%d ", w);
if (w < 0)
break; break;
} }
fprintf(file, "%d ", w); fprintf(file, "\n");
} }
fprintf(file, "\n");
} }
} }
...@@ -62,7 +62,7 @@ public: ...@@ -62,7 +62,7 @@ public:
void Test(const char * fn, const char * ofn, T2TModel * model); void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */ /* dump the result into the file */
void Dump(FILE * file, IntList& output); void Dump(FILE * file, XTensor * output);
}; };
} }
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "T2TUtility.h"
namespace transformer namespace transformer
{ {
...@@ -115,11 +114,4 @@ void ShowParams(int argc, char ** argv) ...@@ -115,11 +114,4 @@ void ShowParams(int argc, char ** argv)
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
/* dump tensors */
void DumpTensors(std::initializer_list<nts::XTensor*> list) {
int i(0);
for (auto& x : list)
x->Dump(stderr, std::to_string(++i).c_str());
}
} }
...@@ -23,9 +23,6 @@ ...@@ -23,9 +23,6 @@
#define __T2TUTILITY_H__ #define __T2TUTILITY_H__
#include <stdio.h> #include <stdio.h>
#include <string>
#include "..//..//tensor/XTensor.h"
#include <initializer_list>
namespace transformer namespace transformer
{ {
...@@ -41,10 +38,6 @@ void LoadParamFloat(int argc, char ** argv, const char * name, float * p, float ...@@ -41,10 +38,6 @@ void LoadParamFloat(int argc, char ** argv, const char * name, float * p, float
/* show arguments */ /* show arguments */
void ShowParams(int argc, char ** argv); void ShowParams(int argc, char ** argv);
/* dump tensors */
void DumpTensors(std::initializer_list<nts::XTensor*> list);
extern int llnum; extern int llnum;
extern FILE * tf; extern FILE * tf;
......
...@@ -29,35 +29,10 @@ ...@@ -29,35 +29,10 @@
#include "../../tensor/XDevice.h" #include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h" #include "../../tensor/XGlobal.h"
#include "..//..//model/Model.h"
namespace transformer namespace transformer
{ {
struct AttModel : Model {
AttModel(int devID) {
Register("w1", {2,3,4}, X_FLOAT, devID);
Register("b1", {2,3,4}, X_FLOAT, devID);
Register("3", {2,3,4}, X_FLOAT, devID);
}
};
struct Transformer {
AttModel *att;
Transformer(int devID) {
att = new AttModel(devID);
}
~Transformer() {
delete att;
}
};
void test() {
Transformer model(0);
model.att->Get("w1")->SetZeroAll();
model.att->Get("w1")->Dump(stderr);
}
int TransformerMain(int argc, const char ** argv) int TransformerMain(int argc, const char ** argv)
{ {
if(argc == 0) if(argc == 0)
...@@ -71,43 +46,24 @@ int TransformerMain(int argc, const char ** argv) ...@@ -71,43 +46,24 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, args); ShowParams(argc, args);
bool convertFile = false;
bool isBeamSearch = false; bool isBeamSearch = false;
bool convertModel = false; char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH]; char * modelFN = new char[MAX_LINE_LENGTH];
char * rawFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH]; char * testFN = new char[MAX_LINE_LENGTH];
char * outputFN = new char[MAX_LINE_LENGTH]; char * outputFN = new char[MAX_LINE_LENGTH];
char * rawModel = new char[MAX_LINE_LENGTH]; char * rawModel = new char[MAX_LINE_LENGTH];
LoadParamString(argc, args, "model", modelFN, ""); LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "rawmodel", rawModel, ""); LoadParamString(argc, args, "rawModel", rawModel, "");
LoadParamString(argc, args, "input", testFN, ""); LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "rawinput", rawFN, "");
LoadParamString(argc, args, "output", outputFN, ""); LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false); LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
LoadParamBool(argc, args, "convertfile", &convertFile, false);
LoadParamBool(argc, args, "convertmodel", &convertModel, false);
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
T2TModel model; T2TModel model;
model.InitModel(argc, args); model.InitModel(argc, args);
/* convert test file from text to binary */
if (convertFile) {
DataSet::ConvertFile(rawFN, testFN);
}
/* convert parameters from text to binary */
if (convertModel) {
TensorList params(100);
model.GetParams(params);
ConvertModelFile(&params, rawModel, modelFN);
}
/* load the model if neccessary */ /* load the model if neccessary */
if(strcmp(modelFN, "")) if(strcmp(modelFN, ""))
model.Read(modelFN); model.Read(modelFN);
...@@ -119,6 +75,7 @@ int TransformerMain(int argc, const char ** argv) ...@@ -119,6 +75,7 @@ int TransformerMain(int argc, const char ** argv)
searcher.Test(testFN, outputFN, &model); searcher.Test(testFN, outputFN, &model);
} }
delete[] trainFN;
delete[] modelFN; delete[] modelFN;
delete[] testFN; delete[] testFN;
delete[] outputFN; delete[] outputFN;
......
...@@ -19,23 +19,27 @@ ...@@ -19,23 +19,27 @@
* $Created by: HU Chi (huchinlp@foxmail.com) 2019-04-05 * $Created by: HU Chi (huchinlp@foxmail.com) 2019-04-05
*/ */
#include "DataSet.h"
#include "StringUtil.h"
#include <string> #include <string>
#include <vector>
#include <fstream> #include <fstream>
#include <algorithm> #include <algorithm>
#include "DataSet.h" #include "..//..//..//tensor/XUtility.h"
#include "StringUtil.h"
#include "../../../tensor/XUtility.h"
using namespace nts; using namespace nts;
using namespace std;
/* sort results by their ids */ bool Compare(Example& a, Example& b) {
void DataSet::SortRes() return a.values.size() > b.values.size();
{ }
auto cmp = [](Result& a, Result& b) {
return a.id < b.id; bool CompareRes(Result& a, Result& b) {
}; return a.id < b.id;
std::sort(resBuffer.items, resBuffer.items + resBuffer.count, cmp); }
void DataSet::RerankRes(){
sort(resBuffer.begin(), resBuffer.end(), CompareRes);
} }
/* /*
...@@ -43,30 +47,27 @@ load data from the file to the buffer ...@@ -43,30 +47,27 @@ load data from the file to the buffer
*/ */
void DataSet::LoadDataToBuffer() void DataSet::LoadDataToBuffer()
{ {
string line;
buffer.clear();
bufferUsed = 0; bufferUsed = 0;
srcBuffer.Clear(); const string tokenDelimiter = " ";
bufferSize = min(bufferSize, exampleNumber); int id = 0;
while (getline(*fp, line)) {
for (int i = 0; i < bufferSize; i++) { vector<int> values = Split<int>(line, tokenDelimiter);
long off = offset[index++];
IntList data(off);
data.count = off;
fread(data.items, sizeof(int), off, fp);
Example example; Example example;
example.id = id++; example.id = id++;
example.data = data; example.values = values;
srcBuffer.Add(example); buffer.emplace_back(example);
}
if (fp->eof()) {
fp->seekg(fp->beg);
} }
if (sortBuffer) { if (sortBuffer) {
auto cmp = [](Example& a, Example& b) { sort(buffer.begin(), buffer.end(), Compare);
return a.data.Size() > b.data.Size();
};
std::sort(srcBuffer.items, srcBuffer.items + srcBuffer.count, cmp);
} }
resBuffer.reserve(buffer.size());
} }
/* /*
...@@ -77,52 +78,43 @@ select a field and generate a mini-batch by indices ...@@ -77,52 +78,43 @@ select a field and generate a mini-batch by indices
>>> devID - devices id, -1 for CPU >>> devID - devices id, -1 for CPU
>>> mem - the memory pool >>> mem - the memory pool
*/ */
IntList DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batchSize, int devID) vector<int> DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc,
size_t batchSize, int devID)
{ {
if(srcBuffer.count == 0)
LoadDataToBuffer();
size_t realBatchSize = batchSize; size_t realBatchSize = batchSize;
/* real batch size */ /* real batch size */
if ((srcBuffer.Size() - bufferUsed) < batchSize) { if ((buffer.size()-bufferUsed) < batchSize) {
realBatchSize = srcBuffer.Size() - bufferUsed; realBatchSize = buffer.size()-bufferUsed;
} }
/* get the maximum sentence length in a mini-batch */ /* get the maximum sentence length in a mini-batch */
size_t maxLen = 0; size_t maxLen = 0;
if (realBatchSize == 1) { if (realBatchSize == 1)
maxLen = srcBuffer[bufferUsed].data.Size(); maxLen = buffer[bufferUsed].values.size();
} for (size_t i = 0; i < realBatchSize - 1; ++i) {
maxLen = max(maxLen, buffer[bufferUsed+i].values.size());
for (size_t i = 0; i < realBatchSize - 1; i++) {
maxLen = max(maxLen, srcBuffer[bufferUsed + i].data.Size());
} }
CheckNTErrors(maxLen != 0, "wrong length dectected"); CheckNTErrors(maxLen != 0, "wrong length dectected");
int* batchValues = new int[maxLen * realBatchSize]; int* batchValues = new int[realBatchSize * maxLen];
float* paddingValues = new float[maxLen * realBatchSize]; float* paddingValues = new float[realBatchSize * maxLen];
for (int i = 0; i < realBatchSize * maxLen; i++) { for (int i = 0; i < realBatchSize * maxLen; ++i) {
batchValues[i] = 1.0F; batchValues[i] = 1.0F;
} }
memset(batchValues, 0, sizeof(int) * maxLen * realBatchSize);
memset(paddingValues, 0, sizeof(float) * maxLen * realBatchSize); memset(paddingValues, 0, sizeof(float) * maxLen * realBatchSize);
size_t cur = 0; size_t cur = 0;
/* left padding */ /* left padding */
IntList indices; vector<int> indices;
indices.Reserve(realBatchSize); indices.reserve(realBatchSize);
for (size_t i = 0; i < realBatchSize; ++i) {
for (size_t i = 0; i < realBatchSize; i++) { indices.push_back(buffer[bufferUsed + i].id);
cur = maxLen * (i + 1) - buffer[bufferUsed+i].values.size();
indices.Add(srcBuffer[bufferUsed + i].id); for (int v : buffer[bufferUsed + i].values) {
IntList& data = srcBuffer[bufferUsed + i].data; batchValues[cur] = v;
cur = maxLen * (i + 1) - data.Size();
for (int j = 0; j < data.Size(); j++) {
batchValues[cur] = data[j];
paddingValues[cur++] = 1.0F; paddingValues[cur++] = 1.0F;
} }
cur = maxLen * (i + 1); cur = maxLen * (i + 1);
...@@ -135,106 +127,25 @@ IntList DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batc ...@@ -135,106 +127,25 @@ IntList DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batc
batchEnc->SetData(batchValues, batchEnc->unitNum); batchEnc->SetData(batchValues, batchEnc->unitNum);
paddingEnc->SetData(paddingValues, paddingEnc->unitNum); paddingEnc->SetData(paddingValues, paddingEnc->unitNum);
delete[] batchValues; delete[] batchValues;
delete[] paddingValues; delete[] paddingValues;
return indices; return indices;
} }
/*
convert text file to binary file
format of the text file:
one sentence per line, seperated by a blank
format of the binary file:
part 1: number of all examples
part 2: offsets of all examples
part 3: the raw data
>>> src - the path of source text file
>>> tgt - the path of target binary file
*/
void nts::DataSet::ConvertFile(const char* src, const char* tgt)
{
ifstream ifile(src, ios::in);
FILE* ofile = fopen(tgt, "wb");
CheckNTErrors(ofile, "unable to create the output file");
string line;
size_t idx = 0;
const int maxExample = 10240;
IntList dataList[maxExample];
while (getline(ifile, line)){
SplitInt(line, " ", dataList[idx++]);
}
/* part 1: number of examples */
fwrite(&idx, sizeof(idx), 1, ofile);
/* part 2: offset of all examples */
for (int i = 0; i < idx; i++) {
size_t size = (dataList[i].Size());
fwrite(&size, sizeof(size), 1, ofile);
}
/* part 3: value of examples */
for (int i = 0; i < idx; i++) {
fwrite(dataList[i].items, sizeof(int), dataList[i].Size(), ofile);
}
ifile.close();
fclose(ofile);
}
/* /*
the constructor of DataSet the constructor of DataSet
the binary data consists of three parts
part 1: number of all examples
part 2: offsets of all examples
part 3: the raw data
>>> fname - path of the data file >>> fname - path of the data file
>>> myBufferSize - size of the data buffer
>>> mySortBuffer - whether sort the data
*/ */
void DataSet::Init(const char* fname, size_t myBufferSize, bool mySortBuffer) void DataSet::Init(const char* fname)
{ {
id = 0; fp = new ifstream(fname);
index = 0; CheckNTErrors(fp->is_open(), "can not open the file");
bufferUsed = 0; bufferUsed = 0;
bufferSize = myBufferSize;
sortBuffer = mySortBuffer;
fp = fopen(fname, "rb");
CheckNTErrors(fp, "can not open the file"); LoadDataToBuffer();
if (bufferSize == 0)
/* read offsets */ bufferSize = buffer.size();
exampleNumber = 0;
fread(&exampleNumber, sizeof(exampleNumber), 1, fp);
CheckNTErrors(exampleNumber > 0, "invalid example numbers");
offset.Reserve(exampleNumber);
for (int i = 0; i < exampleNumber; i++) {
size_t off;
fread(&off, sizeof(off), 1, fp);
offset.Add(off);
}
/* reset the buffer size if it is too big */
bufferSize = min(bufferSize, exampleNumber);
srcBuffer.Reserve(bufferSize);
} }
/* check if the buffer is empty */
bool nts::DataSet::IsEmpty()
{
return (index >= offset.count) && (bufferUsed >= bufferSize);
}
/* de-constructor */
nts::DataSet::~DataSet()
{
if (fp) {
fclose(fp);
}
}
\ No newline at end of file
...@@ -22,70 +22,79 @@ ...@@ -22,70 +22,79 @@
#ifndef __DATASET_H__ #ifndef __DATASET_H__
#define __DATASET_H__ #define __DATASET_H__
#include <cstdio>
#include "../../..//tensor/XTensor.h" #include "../../..//tensor/XTensor.h"
#include "../../..//tensor/XGlobal.h" #include "../../..//tensor/XGlobal.h"
namespace nts { #include <cstdio>
#include <fstream>
#include <unordered_map>
#include <vector>
using namespace std;
using namespace nts;
/* `DataSet` maintains data buffers for the inference stage .*/ struct Example {
struct DataSet { int id;
vector<int> values;
};
public: struct Result {
int id;
XTensor values;
};
/* the data buffer */ using BufferType = vector<Example>;
ExampleList srcBuffer; using ResBufferType = vector<Result>;
/* the result buffer */ namespace nts { // namespace nts(NiuTrans.Tensor)
ResultList resBuffer;
/* the offset of all examples in the data */ /* A `DataSet` is associated with a file which contains variable length data.*/
LongList offset; struct DataSet {
/* wether sort the dataset */ /* the data buffer */
bool sortBuffer; BufferType buffer;
/* id for each example */ /* the result buffer */
size_t id; ResBufferType resBuffer;
/* the pointer to file stream */
ifstream* fp{nullptr};
/* size of the data buffer */ /* size of the data buffer */
size_t bufferSize; size_t bufferSize{ 0 };
/* size of used data in buffer */ /* size of used data in buffer */
size_t bufferUsed; size_t bufferUsed{ 0 };
/* size of data in the src file */
size_t exampleNumber;
/* current index of the offset */
size_t index;
/* the pointer of the src file stream */
FILE * fp;
public:
/* check if the buffer is empty */ /* wether sort the dataset */
bool IsEmpty(); bool sortBuffer{ true };
/* load data from a file to the buffer */ /* load data from a file to the buffer */
void LoadDataToBuffer(); void LoadDataToBuffer();
/* initlization function */ /* rerank result for output */
void Init(const char* fname, size_t myBufferSize, bool mySortBuffer); void RerankRes();
/* generate a mini-batch */ /* generate a mini-batch */
IntList LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batchSize, int devID); vector<int> LoadBatch(XTensor * batchEnc, XTensor * paddingEnc,
size_t batchSize, int devID);
/* sort results by their ids */ /* initlization function */
void SortRes(); void Init(const char* fname);
/* transform text file to binary file */ /* check if the buffer is empty */
static void ConvertFile(const char* src, const char* tgt); bool IsEmpty() {
if (bufferUsed < bufferSize)
return false;
return true;
}
/* de-constructor */ /* de-constructor */
~DataSet(); ~DataSet() {
if (fp)
fp->close();
delete fp;
}
}; };
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,66 +21,27 @@ ...@@ -21,66 +21,27 @@
#include "StringUtil.h" #include "StringUtil.h"
/* namespace nts {
split string by delimiter, this will return indices of all sub-strings
>>> s - the original string /* split string by delimiter, this will return indices of all sub-strings */
>>> delimiter - as it is vector<pair<int, int>> SplitToPos(const string& s, const string& delimiter)
>>> a - the indices of all sub-strings
*/
void SplitToPos(const string& s, const string& delimiter, LongList& indices)
{ {
vector<pair<int, int>> fields;
if (delimiter.length() == 0) { if (delimiter.length() == 0) {
indices.Add(0); fields.emplace_back(0, s.length());
return fields;
} }
int pos = 0; int pos = 0;
int start = 0; int start = 0;
while ((pos = s.find(delimiter, start)) != string::npos) { while ((pos = s.find(delimiter, start)) != string::npos) {
if (pos != start) { if (pos != start) {
indices.Add(start); fields.emplace_back(start, pos);
} }
start = pos + delimiter.length(); start = pos + delimiter.length();
} }
if (start != s.length()) { if (start != s.length()) {
indices.Add(start); fields.emplace_back(start, s.length());
}
}
IntList SplitInt(const string& s, const string& delimiter)
{
IntList fields;
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtol(s.data() + indices[i], nullptr, 10));
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, IntList& fields)
{
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtol(s.data() + indices[i], nullptr, 10));
}
}
FloatList SplitFloat(const string& s, const string& delimiter)
{
FloatList fields;
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtof(s.data() + indices[i], nullptr));
} }
return fields; return fields;
} }
void SplitInt(const string& s, const string& delimiter, FloatList& fields)
{
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtof(s.data() + indices[i], nullptr));
}
} }
\ No newline at end of file
...@@ -22,25 +22,85 @@ ...@@ -22,25 +22,85 @@
#ifndef __STRING_UTIL_H__ #ifndef __STRING_UTIL_H__
#define __STRING_UTIL_H__ #define __STRING_UTIL_H__
#include <cstdlib>
#include <string> #include <string>
#include <utility>
#include "..//..//..//tensor/XList.h" #include <vector>
using namespace std; using namespace std;
using namespace nts;
namespace nts {
/* Splits a string based on the given delimiter string. Each pair in the /* Splits a string based on the given delimiter string. Each pair in the
* returned vector has the start and past-the-end positions for each of the * returned vector has the start and past-the-end positions for each of the
* parts of the original string. Empty fields are not represented in the output. * parts of the original string. Empty fields are not represented in the output.
*/ */
void SplitToPos(const string& s, const string& delimiter, LongList& indices); vector<pair<int, int>> SplitToPos(const string& s, const string& delimiter);
/* Splits the given string and converts each part to the given T. */
template <typename T>
vector<T> Split(const string& s, const string& delimiter);
template <>
inline vector<string> Split(const string& s, const string& delimiter)
{
vector<string> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(s.substr(p.first, p.second - p.first));
}
return fields;
}
template <>
inline vector<int> Split(const string& s, const string& delimiter)
{
vector<int> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtol(s.data() + p.first, nullptr, 10));
}
return fields;
}
template <>
inline vector<int64_t> Split(const string& s, const string& delimiter)
{
vector<int64_t> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtoll(s.data() + p.first, nullptr, 10));
}
return fields;
}
IntList SplitInt(const string& s, const string& delimiter); template <>
inline vector<float> Split(const string& s, const string& delimiter)
{
vector<float> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtof(s.data() + p.first, nullptr));
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, IntList& fields); template <>
inline vector<uint8_t> Split(const string& s, const string& delimiter)
{
vector<uint8_t> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtol(s.data() + p.first, nullptr, 10));
}
return fields;
}
FloatList SplitFloat(const string& s, const string& delimiter); template <>
inline vector<bool> Split(const string& s, const string& delimiter)
{
vector<bool> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(
static_cast<bool>(strtol(s.data() + p.first, nullptr, 10)));
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, FloatList& fields); } // namespace nts
#endif // __STRING_UTIL_H__ #endif // __STRING_UTIL_H__
...@@ -26,183 +26,9 @@ ...@@ -26,183 +26,9 @@
* *
*/ */
#ifdef WIN32
#include <wtypes.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include "XBLAS.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
#ifdef WIN32
HINSTANCE hBLASDll;
#endif
/* single-precision floating matrix-matrix multiplication */
void (*XBLAS_SGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating matrix-matrix multiplication */
void (*XBLAS_DGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
double *, OPENBLAS_CONST BLASINT);
/* single-precision floating vector-vector multiplication (rank-1) */
void (*XBLAS_SGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating vector-vector multiplication (rank-1) */
void (*XBLAS_DGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
double *, OPENBLAS_CONST BLASINT);
/* set the number of threads */
void (*XBLAS_SET_THREAD_NUM)(int);
/* get the number of threads */
//int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/
int (*XBLAS_GET_CORE_NUM)();
/* get the CPU corename */
//char * (*XBLAS_GET_CORE_NAME)();
/* get the parallelization type used by OpenBLAS */
//int (*XBLAS_GET_PARALLEL_TYPE)(void);
#if defined(USE_BLAS)
/* load some stuff for BLAS */
void LoadBLAS(const char * dllFileName)
{
#ifndef CUDA_BLAS
#ifdef _WIN32
#if defined(OPENBLAS)
/* non-ascii characters are not supported yet */
wchar_t * fn = new wchar_t[strlen(dllFileName) + 1];
memset(fn, 0, sizeof(wchar_t) * (strlen(dllFileName) + 1));
for(int i = 0; i < strlen(dllFileName); i++)
fn[i] = dllFileName[i];
hBLASDll = LoadLibrary((LPCWSTR)fn);
if(!hBLASDll){
XPRINT1(0, stderr, "[LoadBLAS] Error! Cannot load dll %s!\n", dllFileName);
exit(1);
}
/* matrix-matrix multiplicatoin */
(FARPROC&)XBLAS_SGEMM = GetProcAddress(hBLASDll, "cblas_sgemm");
(FARPROC&)XBLAS_DGEMM = GetProcAddress(hBLASDll, "cblas_dgemm");
/* vector-vector multiplication */
(FARPROC&)XBLAS_SGER = GetProcAddress(hBLASDll, "cblas_sger");
(FARPROC&)XBLAS_DGER = GetProcAddress(hBLASDll, "cblas_dger");
/* multi-threading */
(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_set_num_threads");
//(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "goto_set_num_threads");
//(FARPROC&)XBLAS_GET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_get_num_threads");
(FARPROC&)XBLAS_GET_CORE_NUM = GetProcAddress(hBLASDll, "openblas_get_num_procs");
//(FARPROC&)XBLAS_GET_CORE_NAME = GetProcAddress(hBLASDll, "openblas_get_corename");
//(FARPROC&)XBLAS_GET_PARALLEL_TYPE = GetProcAddress(hBLASDll, "openblas_get_parallel");
delete[] fn;
#endif // defined(OPENBLAS)
#if defined(MKL)
/* non-ascii characters are not supported yet */
wchar_t * fn = new wchar_t[strlen(dllFileName) + 1];
memset(fn, 0, sizeof(wchar_t) * (strlen(dllFileName) + 1));
for(int i = 0; i < strlen(dllFileName); i++)
fn[i] = dllFileName[i];
hBLASDll = LoadLibrary((LPCWSTR)fn);
if(!hBLASDll){
XPRINT1(0, stderr, "[LoadBLAS] Error! Cannot load dll %s!\n", dllFileName);
exit(1);
}
/* matrix-matrix multiplicatoin */
(FARPROC&)XBLAS_SGEMM = GetProcAddress(hBLASDll, "cblas_sgemm");
(FARPROC&)XBLAS_DGEMM = GetProcAddress(hBLASDll, "cblas_dgemm");
/* vector-vector multiplication */
(FARPROC&)XBLAS_SGER = GetProcAddress(hBLASDll, "cblas_sger");
(FARPROC&)XBLAS_DGER = GetProcAddress(hBLASDll, "cblas_dger");
/* multi-threading */
(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "MKL_Set_Num_Threads");
(FARPROC&)XBLAS_GET_CORE_NUM = GetProcAddress(hBLASDll, "MKL_Get_Max_Threads");
#endif // defined(MKL)
#else // _WIN32
XBLAS_SGEMM = &cblas_sgemm;
XBLAS_DGEMM = &cblas_dgemm;
XBLAS_SGER = &cblas_sger;
XBLAS_DGER = &cblas_dger;
#if defined(OPENBLAS)
XBLAS_SET_THREAD_NUM = &openblas_set_num_threads;
XBLAS_GET_CORE_NUM = &openblas_get_num_procs;
#endif // defined(OPENBLAS)
#if defined(MKL)
XBLAS_SET_THREAD_NUM = &mkl_set_num_threads;
XBLAS_GET_CORE_NUM = &mkl_get_max_num_threads;
#endif // defined(MKL)
#endif // _WIN32
XBLAS_SET_THREAD_NUM(1);
#endif // ndef(CUDA_BLAS)
}
/* unload the libs */
void UnloadBLAS()
{
#ifdef _WIN32
if(!FreeLibrary(hBLASDll)){
XPRINT(0, stderr, "[UnloadBLAS] Error! Cannot free the BLAS dll!\n");
exit(1);
}
#else
#endif
}
#else // undefined(USE_BLAS) || undefined(OPENBLAS)
void LoadBLAS(const char * dllFileName)
{
XPRINT(0, stderr, "[LoadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!\n");
exit(1);
}
void UnloadBLAS()
{
XPRINT(0, stderr, "[UnloadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!\n");
exit(1);
}
#endif // defined(USE_BLAS) && defined(OPENBLAS)
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
...@@ -34,7 +34,6 @@ namespace nts{ ...@@ -34,7 +34,6 @@ namespace nts{
/* some of the code below is from OpenBLAS (https://github.com/xianyi/OpenBLAS) */ /* some of the code below is from OpenBLAS (https://github.com/xianyi/OpenBLAS) */
//#define OPENBLAS
#define OPENBLAS_CONST const #define OPENBLAS_CONST const
typedef int BLASINT; typedef int BLASINT;
...@@ -46,7 +45,26 @@ typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; ...@@ -46,7 +45,26 @@ typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
#if defined(USE_BLAS) #if defined(USE_BLAS)
#ifdef OPENBLAS
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM openblas_set_num_threads
#define XBLAS_GET_CORE_NUM openblas_get_num_procs
#endif
#ifdef MKL
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM MKL_Set_Num_Threads
#define XBLAS_GET_CORE_NUM MKL_Get_Max_Threads
#endif
/* /*
single/double-precision floating matrix-matrix multiplication (rank-3) single/double-precision floating matrix-matrix multiplication (rank-3)
- SGEMM (ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) - SGEMM (ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
...@@ -62,14 +80,14 @@ where A, B and C are matrices, ...@@ -62,14 +80,14 @@ where A, B and C are matrices,
LDB(=N) specifies the size of the first dimension of B as declared in the calling (sub) program, LDB(=N) specifies the size of the first dimension of B as declared in the calling (sub) program,
and LDC(=N) specifies the size of the first dimension of C as declared in the calling (sub) program. and LDC(=N) specifies the size of the first dimension of C as declared in the calling (sub) program.
*/ */
extern "C" void (*XBLAS_SGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE, extern "C" void XBLAS_SGEMM(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
float *, OPENBLAS_CONST BLASINT); float *, OPENBLAS_CONST BLASINT);
/* double-precision floating matrix-matrix multiplication */ /* double-precision floating matrix-matrix multiplication */
extern "C" void (*XBLAS_DGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE, extern "C" void XBLAS_DGEMM(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
...@@ -88,24 +106,33 @@ where X and Y are vectors with m and n elements respectively, ...@@ -88,24 +106,33 @@ where X and Y are vectors with m and n elements respectively,
E.g., if we are using CblasRowMajor, the leading dimension is the number of columns of A. E.g., if we are using CblasRowMajor, the leading dimension is the number of columns of A.
*/ */
extern "C" void (*XBLAS_SGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha, extern "C" void XBLAS_SGER(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
float *, OPENBLAS_CONST BLASINT); float *, OPENBLAS_CONST BLASINT);
/* double-precision floating vector-vector multiplication (rank-1) */ /* double-precision floating vector-vector multiplication (rank-1) */
extern "C" void (*XBLAS_DGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha, extern "C" void XBLAS_DGER(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
double *, OPENBLAS_CONST BLASINT); double *, OPENBLAS_CONST BLASINT);
/*
some description
*/
extern "C" void XBLAS_SAXPY(OPENBLAS_CONST BLASINT n, OPENBLAS_CONST float a, OPENBLAS_CONST float *x, OPENBLAS_CONST BLASINT incx, OPENBLAS_CONST float *y, OPENBLAS_CONST BLASINT incy);
/* double-precision floating sumMe function */
extern "C" void XBLAS_DAXPY(OPENBLAS_CONST BLASINT n, OPENBLAS_CONST double a, OPENBLAS_CONST double *x, OPENBLAS_CONST BLASINT incx, OPENBLAS_CONST double *y, OPENBLAS_CONST BLASINT incy);
/* set the number of threads */ /* set the number of threads */
extern "C" void (*XBLAS_SET_THREAD_NUM)(int); extern "C" void XBLAS_SET_THREAD_NUM(int);
/* get the number of threads */ /* get the number of threads */
//extern "C" int (*XBLAS_GET_THREAD_NUM)(); //extern "C" int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/ /* get the number of physical processors (cores).*/
extern "C" int (*XBLAS_GET_CORE_NUM)(); extern "C" int XBLAS_GET_CORE_NUM();
/* get the CPU corename */ /* get the CPU corename */
//extern "C" char * (*XBLAS_GET_CORE_NAME)(); //extern "C" char * (*XBLAS_GET_CORE_NAME)();
...@@ -113,58 +140,6 @@ extern "C" int (*XBLAS_GET_CORE_NUM)(); ...@@ -113,58 +140,6 @@ extern "C" int (*XBLAS_GET_CORE_NUM)();
/* get the parallelization type used by OpenBLAS */ /* get the parallelization type used by OpenBLAS */
//extern "C" int (*XBLAS_GET_PARALLEL_TYPE)(void); //extern "C" int (*XBLAS_GET_PARALLEL_TYPE)(void);
/* linux systems */
#ifndef _WIN32
/* cblas functions that are imported from the lib. See cblas.h in OpenBlas for more information */
extern "C" void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST BLASINT K, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *A, OPENBLAS_CONST BLASINT lda,
OPENBLAS_CONST float *B, OPENBLAS_CONST BLASINT ldb,
OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST BLASINT ldc);
extern "C" void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST BLASINT K, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *A, OPENBLAS_CONST BLASINT lda,
OPENBLAS_CONST double *B, OPENBLAS_CONST BLASINT ldb,
OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST BLASINT ldc);
extern "C" void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *X, OPENBLAS_CONST BLASINT incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT incY,
float *A, OPENBLAS_CONST BLASINT lda);
extern "C" void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *X, OPENBLAS_CONST BLASINT incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT incY,
double *A, OPENBLAS_CONST BLASINT lda);
#if defined(OPENBLAS)
/* better control of multi-threading */
extern "C" void openblas_set_num_threads(int num_threads);
extern "C" void goto_set_num_threads(int num_threads);
//extern "C" int openblas_get_num_threads(void);
extern "C" int openblas_get_num_procs(void);
//extern "C" char* openblas_get_config(void);
//extern "C" char* openblas_get_corename(void);
//extern "C" int openblas_get_parallel(void);
#endif
#endif
#if defined(MKL)
/* better control of multi-threading */
//_Mkl_Api(void,MKL_Set_Num_Threads,(int nth))
//_Mkl_Api(int,MKL_Get_Max_Threads,(void))
extern "C" void MKL_Set_Num_Threads(int num_threads);
extern "C" int MKL_Get_Max_Threads();
#define mkl_set_num_threads MKL_Set_Num_Threads
#define mkl_get_max_num_threads MKL_Get_Max_Threads
//extern "C" void mkl_set_num_threads(int num_threads);
//extern "C" void omp_set_num_threads(int num_threads);
//extern "C" int mkl_get_max_num_threads();
#endif
#if defined(CUDA_BLAS) #if defined(CUDA_BLAS)
...@@ -186,24 +161,8 @@ extern void BLASMatrixMULD(int deviceID, double * a, double * b, double * c, int ...@@ -186,24 +161,8 @@ extern void BLASMatrixMULD(int deviceID, double * a, double * b, double * c, int
#endif #endif
#endif
#ifdef _WIN32
#include "windows.h"
extern HINSTANCE hBLASDll;
#else
#endif #endif
/* load some stuff for BLAS */
extern void LoadBLAS(const char * dllFileName);
/* unload the libs */
extern void UnloadBLAS();
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-21
*/
#ifndef __XCALL_H__
#define __XCALL_H__
#include "XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
* we define the "new and delete" functions below
*/
/* global flag for enabling gradient flows or not */
static bool X_ENABLE_GRAD = false;
/* initialize a XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor */
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a scalar V2 */
void InitTensor0DV2(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a scalar */
void InitTensor0D(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a tensor with a reference tensor V2 */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
/* generate a XTensor with no initialization */
XTensor * NewTensor();
/* generate a XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a scalar V2 */
XTensor * NewTensor0DV2(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* generate a scalar */
XTensor * NewTensor0D(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
} // namespace nts(NiuTrans.Tensor)
#endif // __XCALL_H__
\ No newline at end of file
...@@ -50,14 +50,6 @@ int CONST_MINUSONE = -1; ...@@ -50,14 +50,6 @@ int CONST_MINUSONE = -1;
bool CONST_TRUE = true; bool CONST_TRUE = true;
int verboseLevel = 0; int verboseLevel = 0;
bool useBLAS = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL; FILE * tmpLog = NULL;
double myTime = 0; double myTime = 0;
......
...@@ -135,8 +135,6 @@ extern bool CONST_TRUE; ...@@ -135,8 +135,6 @@ extern bool CONST_TRUE;
#define NIUTRANSNNDEBUG #define NIUTRANSNNDEBUG
extern int verboseLevel; extern int verboseLevel;
extern bool useBLAS;
extern bool useCUDA;
#define FFLUSH(FILEH) \ #define FFLUSH(FILEH) \
{ \ { \
......
...@@ -249,26 +249,6 @@ inline int TensorListBase<T>::FindFirst(const T& item) ...@@ -249,26 +249,6 @@ inline int TensorListBase<T>::FindFirst(const T& item)
return -1; return -1;
} }
template <>
inline int TensorListBase<Example>::FindFirst(const Example& item)
{
for (int i = 0; i < count; i++) {
if (item.id == items[i].id)
return i;
}
return -1;
}
template <>
inline int TensorListBase<Result>::FindFirst(const Result& item)
{
for (int i = 0; i < count; i++) {
if (item.id == items[i].id)
return i;
}
return -1;
}
/* clear the data array */ /* clear the data array */
template <typename T> template <typename T>
void TensorListBase<T>::Clear() void TensorListBase<T>::Clear()
...@@ -383,8 +363,6 @@ template struct TensorListBase<long>; ...@@ -383,8 +363,6 @@ template struct TensorListBase<long>;
template struct TensorListBase<float>; template struct TensorListBase<float>;
template struct TensorListBase<short>; template struct TensorListBase<short>;
template struct TensorListBase<XTensor*>; template struct TensorListBase<XTensor*>;
template struct TensorListBase<Result>;
template struct TensorListBase<Example>;
template struct TensorListBase<void*>; template struct TensorListBase<void*>;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
...@@ -133,18 +133,6 @@ typedef TensorListBase<long> LongList; ...@@ -133,18 +133,6 @@ typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList; typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList; typedef TensorListBase<short> ShortList;
struct Example {
int id;
IntList data;
};
struct Result {
int id;
IntList data;
};
typedef TensorListBase<Result> ResultList;
typedef TensorListBase<Example> ExampleList;
typedef TensorListBase<XTensor*> TensorList; typedef TensorListBase<XTensor*> TensorList;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -31,8 +31,8 @@ ...@@ -31,8 +31,8 @@
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
int testxmemid = 0; //int testxmemid = 0;
void * recordp = NULL; //void * recordp = NULL;
/* /*
for managing the memories for managing the memories
...@@ -1482,7 +1482,7 @@ void XMem::ShowMemUsage(FILE * file) ...@@ -1482,7 +1482,7 @@ void XMem::ShowMemUsage(FILE * file)
} }
fprintf(file, "mem:%.1fMB used:%.1fMB usage:%.3f\n", fprintf(file, "mem:%.1fMB used:%.1fMB usage:%.3f\n",
(DTYPE)used/MILLION, (DTYPE)total/MILLION, (DTYPE)used/total); (DTYPE)total/MILLION, (DTYPE)used/MILLION, (DTYPE)used/total);
} }
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize) ...@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
if (freeMem >= MILLION * 512){ if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128; *myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) { if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256; *myBufSize = MILLION * 128;
if (freeMem >= MILLION * 2048) if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512; *myBufSize = MILLION * 128;
} }
} }
} }
......
...@@ -480,12 +480,10 @@ public: ...@@ -480,12 +480,10 @@ public:
/* managing the memories */ /* managing the memories */
extern XMemManager GMems; extern XMemManager GMems;
//extern XMem * GMem;
//extern int testxmemid;
extern XMem * GMem; //extern void * recordp;
extern int testxmemid;
extern void * recordp;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -105,6 +105,8 @@ const char * GetOPName(int type) ...@@ -105,6 +105,8 @@ const char * GetOPName(int type)
return "R_REDUCEMEAN"; return "R_REDUCEMEAN";
else if (type == REDUCE_REDUCESUM) else if (type == REDUCE_REDUCESUM)
return "R_REDUCESUM"; return "R_REDUCESUM";
else if (type == REDUCE_REDUCESUMALL)
return "R_REDUCESUMALL";
else if (type == REDUCE_REDUCESUMSQUARED) else if (type == REDUCE_REDUCESUMSQUARED)
return "R_REDUCESUMSQUARED"; return "R_REDUCESUMSQUARED";
else if (type == REDUCE_REDUCEVARIANCE) else if (type == REDUCE_REDUCEVARIANCE)
...@@ -135,6 +137,8 @@ const char * GetOPName(int type) ...@@ -135,6 +137,8 @@ const char * GetOPName(int type)
return "S_SPLIT"; return "S_SPLIT";
else if (type == SHAPE_SPLIT_LIST) else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST"; return "S_SPLIT_LIST";
else if (type == SHAPE_STACK)
return "S_SHAPE_STACK";
else if (type == SHAPE_SQUEEZE) else if (type == SHAPE_SQUEEZE)
return "S_SQUEEZE"; return "S_SQUEEZE";
else if (type == SHAPE_TRANSPOSE) else if (type == SHAPE_TRANSPOSE)
......
...@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MASK MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1 #define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MAX MATH_MATRIXMULBATCHED + 1
#define MATH_MIN MATH_MAX + 1
#define MATH_MULTIPLY MATH_MIN + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1 #define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1 #define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
...@@ -74,7 +76,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -74,7 +76,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define REDUCE_REDUCEMAX REDUCE + 1 #define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1 #define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1 #define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1 #define REDUCE_REDUCESUMALL REDUCE_REDUCESUM + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUMALL + 1
#define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1 #define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1
/* data and shape related operations */ /* data and shape related operations */
...@@ -97,7 +100,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -97,7 +100,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_RESHAPE SHAPE_PERMUTE + 1 #define SHAPE_RESHAPE SHAPE_PERMUTE + 1
#define SHAPE_SPLIT SHAPE_RESHAPE + 1 #define SHAPE_SPLIT SHAPE_RESHAPE + 1
#define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1 #define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1
#define SHAPE_SQUEEZE SHAPE_SPLIT_LIST + 1 #define SHAPE_STACK SHAPE_SPLIT_LIST + 1
#define SHAPE_SQUEEZE SHAPE_STACK + 1
#define SHAPE_TRANSPOSE SHAPE_SQUEEZE + 1 #define SHAPE_TRANSPOSE SHAPE_SQUEEZE + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1 #define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
......
...@@ -83,13 +83,17 @@ ...@@ -83,13 +83,17 @@
#include "shape/Permute.h" #include "shape/Permute.h"
#include "shape/Split.h" #include "shape/Split.h"
#include "shape/Squeeze.h" #include "shape/Squeeze.h"
#include "shape/Stack.h"
#include "shape/Transpose.h" #include "shape/Transpose.h"
#include "shape/Unsqueeze.h" #include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
#include "sort/Sort.h" #include "sort/Sort.h"
#include "sort/TopK.h" #include "sort/TopK.h"
#include "utilities/XMatrixSegment.h" #include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h" #include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/SetAscendingOrder.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Div.h" #include "Div.h"
#include "Div.cuh" #include "Div.cuh"
#include "DivDim.h" #include "DivDim.h"
...@@ -48,9 +49,6 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le ...@@ -48,9 +49,6 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim); _CudaDiv(a, b, c, alpha, leadingDim);
...@@ -63,17 +61,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le ...@@ -63,17 +61,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
int blockSizeB = 1; int blockSizeB = 1;
int blockSizeC = 1; int blockSizeC = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && a->dimSizeRDI[i] == c->dimSizeRDI[i]), CheckNTErrors((a->dimSize[i] == b->dimSize[i] && a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
...@@ -168,7 +166,7 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b) ...@@ -168,7 +166,7 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -253,8 +251,8 @@ where i is the index of the item ...@@ -253,8 +251,8 @@ where i is the index of the item
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetDivDimIndex(a, b); int n = GetDivDimIndex(a, b);
......
...@@ -122,7 +122,6 @@ where i is the item index ...@@ -122,7 +122,6 @@ where i is the item index
*/ */
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
...@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in ...@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -56,7 +57,7 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp ...@@ -56,7 +57,7 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(_IsSameShaped(a, b)){
_Div(a, b, c, alpha); _Div(a, b, c, alpha);
return; return;
} }
...@@ -188,8 +189,8 @@ i.e., a is divided with b by broadcasting ...@@ -188,8 +189,8 @@ i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Div function */ /* call _Div function */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Mask.h" #include "Mask.h"
#include "Mask.cuh" #include "Mask.cuh"
...@@ -171,8 +172,8 @@ where i is the index of the element ...@@ -171,8 +172,8 @@ where i is the index of the element
*/ */
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha) void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Mask function */ /* call _Mask function */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "MatrixMulBatched.h" #include "MatrixMulBatched.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
...@@ -94,27 +95,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -94,27 +95,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!"); "Input tensor and output tensor must have same order!");
CheckNTErrors(a->devID >= 0 && b->devID >= 0 && c->devID >= 0, "The tensors must be on GPUs"); CheckNTErrors(a->devID >= 0 && b->devID >= 0 && c->devID >= 0, "The tensors must be on GPUs");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0]; int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSizeRDI[1]; int cn = c->dimSize[c->order - 2];
int cm = c->dimSizeRDI[0]; int cm = c->dimSize[c->order - 1];
CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!"); CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1]; int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1]; int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1]; int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize; int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize; int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize; int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1; int blockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i]; blockNum *= a->dimSize[i];
} }
int devIDBackup = 0; int devIDBackup = 0;
...@@ -125,9 +126,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -125,9 +126,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
a->data, transposedA, a->dataType, aBlockSize, a->data, transposedA, a->dataType, aBlockSize,
b->data, transposedB, b->dataType, bBlockSize, b->data, transposedB, b->dataType, bBlockSize,
c->data, c->dataType, cBlockSize, blockNum, c->data, c->dataType, cBlockSize, blockNum,
a->dimSizeRDI[1], a->dimSizeRDI[0], a->dimSize[a->order - 2], a->dimSize[a->order - 1],
b->dimSizeRDI[1], b->dimSizeRDI[0], b->dimSize[b->order - 2], b->dimSize[b->order - 1],
c->dimSizeRDI[1], c->dimSizeRDI[0], alpha, beta); c->dimSize[c->order - 2], c->dimSize[c->order - 1], alpha, beta);
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
#endif #endif
...@@ -163,46 +164,43 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -163,46 +164,43 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!"); "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0]; int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSizeRDI[1]; int cn = c->dimSize[c->order - 2];
int cm = c->dimSizeRDI[0]; int cm = c->dimSize[c->order - 1];
CheckNTErrors(am == bn && an == cn && bm == cm, "Unmatched tensors in multiplication!"); CheckNTErrors(am == bn && an == cn && bm == cm, "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1]; int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1]; int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1]; int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize; int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize; int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize; int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1; int blockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i]; blockNum *= a->dimSize[i];
} }
int aDimSize[2] = {-a->dimSizeRDI[1], a->dimSizeRDI[0]}; int aDimSize[2] = {-a->dimSize[a->order - 2], a->dimSize[a->order - 1]};
int bDimSize[2] = {-b->dimSizeRDI[1], b->dimSizeRDI[0]}; int bDimSize[2] = {-b->dimSize[b->order - 2], b->dimSize[b->order - 1]};
int cDimSize[2] = {-c->dimSizeRDI[1], c->dimSizeRDI[0]}; int cDimSize[2] = {-c->dimSize[c->order - 2], c->dimSize[c->order - 1]};
XTensor * ai = NewTensor2D(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem); XTensor * ai = NewTensor2DV2(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem);
XTensor * bi = NewTensor2D(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem); XTensor * bi = NewTensor2DV2(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem);
XTensor * ci = NewTensor2D(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem); XTensor * ci = NewTensor2DV2(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem);
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
ai->data = (char*)a->data + i * aRealBlockSize; ai->data = (char*)a->data + i * aRealBlockSize;
bi->data = (char*)b->data + i * bRealBlockSize; bi->data = (char*)b->data + i * bRealBlockSize;
ci->data = (char*)c->data + i * cRealBlockSize; ci->data = (char*)c->data + i * cRealBlockSize;
#ifdef USE_BLAS #ifdef USE_BLAS
if (useBLAS) _MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else #else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif #endif
...@@ -245,9 +243,9 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -245,9 +243,9 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
XTensor * ai = (XTensor*)a->GetItem(i); XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i); XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i); XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) || if (!_IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) || !_IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci)) !_IsSameShaped(cim, ci))
{ {
isUniform = false; isUniform = false;
break; break;
...@@ -262,10 +260,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -262,10 +260,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((bi->order == 2), "2d tensor (i.e., matrix) is required!"); CheckNTErrors((bi->order == 2), "2d tensor (i.e., matrix) is required!");
CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!"); CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!");
#ifdef USE_BLAS #ifdef USE_BLAS
if (useBLAS)
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else #else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif #endif
...@@ -297,10 +292,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const ...@@ -297,10 +292,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!"); CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1]; int an = transposedA == X_TRANS ? a.dimSize[a.order - 1] : a.dimSize[a.order - 2];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0]; int am = transposedA == X_TRANS ? a.dimSize[a.order - 2] : a.dimSize[a.order - 1];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b.dimSize[b.order - 1] : b.dimSize[b.order - 2];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0]; int bm = transposedB == X_TRANS ? b.dimSize[b.order - 2] : b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!"); CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
...@@ -355,10 +350,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b, ...@@ -355,10 +350,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!"); CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = a.dimSizeRDI[1]; int an = a.dimSize[a.order - 2];
int am = a.dimSizeRDI[0]; int am = a.dimSize[a.order - 1];
int bn = b.dimSizeRDI[1]; int bn = b.dimSize[b.order - 2];
int bm = b.dimSizeRDI[0]; int bm = b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!"); CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
......
...@@ -37,7 +37,7 @@ int GetSumIndex(const XTensor &a, const XTensor &b) ...@@ -37,7 +37,7 @@ int GetSumIndex(const XTensor &a, const XTensor &b)
{ {
if (a.order < b.order) if (a.order < b.order)
return -1; return -1;
if (XTensor::IsSameShaped(&a, &b)) if (IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -66,31 +66,32 @@ operation c = x * w + b MulAndShift ...@@ -66,31 +66,32 @@ operation c = x * w + b MulAndShift
<< return - the result of matrix multiplication << return - the result of matrix multiplication
*/ */
XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
DTYPE alpha, XPRunner * parallelRunner) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!"); CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = x.dimSizeRDI[1]; int xn = x.dimSize[x.order - 2];
int xm = x.dimSizeRDI[0]; int xm = x.dimSize[x.order - 1];
int wn = w.dimSizeRDI[1]; int wn = w.dimSize[w.order - 2];
int wm = w.dimSizeRDI[0]; int wm = w.dimSize[w.order - 1];
CheckNTErrors(xm == wn, "Unmatched tensors in multiplication!"); CheckNTErrors(xm == wn, "Unmatched tensors in multiplication!");
int order = x.order + w.order - 2; int order = x.order + w.order - 2;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < x.order; i++) for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i]; dimSize[sub++] = x.dimSize[i];
for (int i = 2; i < w.order; i++) for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i]; dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn; dimSize[sub++] = xn;
dimSize[sub++] = wm; dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio); float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem); XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&x, X_NOTRANS, &w, X_NOTRANS, tmp, alpha, 0, parallelRunner); _MatrixMul(&x, X_NOTRANS, &w, X_NOTRANS, tmp, alpha, 0, parallelRunner);
...@@ -148,24 +149,24 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA, ...@@ -148,24 +149,24 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!"); CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = transposedA == X_TRANS ? x.dimSizeRDI[0] : x.dimSizeRDI[1]; int xn = transposedA == X_TRANS ? x.dimSize[x.order - 1] : x.dimSize[x.order - 2];
int xm = transposedA == X_TRANS ? x.dimSizeRDI[1] : x.dimSizeRDI[0]; int xm = transposedA == X_TRANS ? x.dimSize[x.order - 2] : x.dimSize[x.order - 1];
int wn = transposedB == X_TRANS ? w.dimSizeRDI[0] : w.dimSizeRDI[1]; int wn = transposedB == X_TRANS ? w.dimSize[w.order - 1] : w.dimSize[w.order - 2];
int wm = transposedB == X_TRANS ? w.dimSizeRDI[1] : w.dimSizeRDI[0]; int wm = transposedB == X_TRANS ? w.dimSize[w.order - 2] : w.dimSize[w.order - 1];
int order = x.order + w.order - 2; int order = x.order + w.order - 2;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < x.order; i++) for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i]; dimSize[sub++] = x.dimSize[i];
for (int i = 2; i < w.order; i++) for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i]; dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn; dimSize[sub++] = xn;
dimSize[sub++] = wm; dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio); float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem); XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner); _MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner);
...@@ -205,7 +206,6 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA, ...@@ -205,7 +206,6 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
DelTensorBuf(tmp); DelTensorBuf(tmp);
return c; return c;
} }
} }
\ No newline at end of file
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Multiply.h" #include "Multiply.h"
#include "Multiply.cuh" #include "Multiply.cuh"
#include "MultiplyDim.h" #include "MultiplyDim.h"
...@@ -48,9 +49,6 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i ...@@ -48,9 +49,6 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim); _CudaMultiply(a, b, c, alpha, leadingDim);
...@@ -63,18 +61,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i ...@@ -63,18 +61,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
int blockSizeB = 1; int blockSizeB = 1;
int blockSizeC = 1; int blockSizeC = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
...@@ -169,7 +167,7 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b) ...@@ -169,7 +167,7 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -254,8 +252,8 @@ where i is the index of the item ...@@ -254,8 +252,8 @@ where i is the index of the item
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim) void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetMultiplyDimIndex(a, b); int n = GetMultiplyDimIndex(a, b);
......
...@@ -122,26 +122,25 @@ where i is the item index ...@@ -122,26 +122,25 @@ where i is the item index
*/ */
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "MultiplyDim.h" #include "MultiplyDim.h"
#include "MultiplyDim.cuh" #include "MultiplyDim.cuh"
#include "../shape/Unsqueeze.h" #include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
...@@ -57,7 +58,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -57,7 +58,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(_IsSameShaped(a, b)){
_Multiply(a, b, c, alpha); _Multiply(a, b, c, alpha);
return; return;
} }
...@@ -203,8 +204,8 @@ i.e., a is multiplied with b by broadcasting ...@@ -203,8 +204,8 @@ i.e., a is multiplied with b by broadcasting
*/ */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n) void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Multiply function */ /* call _Multiply function */
...@@ -232,7 +233,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -232,7 +233,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
{ {
CheckNTErrors(a->order == b->order, "Wrong tensor orders!"); CheckNTErrors(a->order == b->order, "Wrong tensor orders!");
CheckNTErrors(a->order == c->order, "Wrong tensor orders!"); CheckNTErrors(a->order == c->order, "Wrong tensor orders!");
CheckNTErrors(a->order > 0, "TODO!"); CheckNTErrors(a->order >= 0, "TODO!");
int order = a->order; int order = a->order;
int count = 0; int count = 0;
...@@ -280,8 +281,8 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -280,8 +281,8 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
dimsS[0] = -dimsS[0]; dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0]; dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem); XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0) if(count == 0)
source = b->data; source = b->data;
...@@ -371,8 +372,8 @@ where some of dimensions of b can be of size 1 ...@@ -371,8 +372,8 @@ where some of dimensions of b can be of size 1
*/ */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c) void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _SumBroadcast function */ /* call _SumBroadcast function */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Sub.h" #include "Sub.h"
#include "Sub.cuh" #include "Sub.cuh"
#include "SubDim.h" #include "SubDim.h"
...@@ -149,7 +150,7 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b) ...@@ -149,7 +150,7 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -223,8 +224,8 @@ tensor subtraction c = a - b * \beta ...@@ -223,8 +224,8 @@ tensor subtraction c = a - b * \beta
*/ */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetSubDimIndex(a, b); int n = GetSubDimIndex(a, b);
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -61,7 +62,7 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -61,7 +62,7 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return; return;
} }
if (XTensor::IsSameShaped(a, b)) { if (_IsSameShaped(a, b)) {
_Sub(a, b, c, beta); _Sub(a, b, c, beta);
return; return;
} }
...@@ -188,8 +189,8 @@ i.e., a is subtracted with b by broadcasting ...@@ -188,8 +189,8 @@ i.e., a is subtracted with b by broadcasting
*/ */
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta) void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Sub function */ /* call _Sub function */
......
...@@ -39,7 +39,7 @@ where a is a tensor and b is a row vector ...@@ -39,7 +39,7 @@ where a is a tensor and b is a row vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta) void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x; int col = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -75,7 +75,7 @@ where a is a tensor and b is a colum vector ...@@ -75,7 +75,7 @@ where a is a tensor and b is a colum vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta) void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
......
...@@ -22,7 +22,9 @@ ...@@ -22,7 +22,9 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XBLAS.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "Sum.h" #include "Sum.h"
#include "Sum.cuh" #include "Sum.cuh"
#include "SumDim.h" #include "SumDim.h"
...@@ -45,6 +47,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -45,6 +47,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!"); "Unmatched tensors in addition!");
CheckDev(a->devID, b->devID);
if(beta == 0){ if(beta == 0){
_CopyValues(a, c); _CopyValues(a, c);
return; return;
...@@ -74,7 +78,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -74,7 +78,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
else { else {
if (!a->isSparse && !b->isSparse) { if (!a->isSparse && !b->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!"); CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!");
if (a->dataType == DEFAULT_DTYPE && if (a->dataType == DEFAULT_DTYPE &&
b->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE) c->dataType == DEFAULT_DTYPE)
...@@ -156,6 +160,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -156,6 +160,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
_Sum(a, b, a, beta); _Sum(a, b, a, beta);
} }
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/* /*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h) return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor >> a - a tensor
...@@ -165,6 +182,8 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b) ...@@ -165,6 +182,8 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(IsSameShaped(a, b))
return -1;
int hitCount = 0; int hitCount = 0;
int hitDim = -1; int hitDim = -1;
...@@ -184,7 +203,7 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b) ...@@ -184,7 +203,7 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
} }
/* /*
tensor summation c = a + b * \beta (return a XTensor structure) tensor summation c = a + b * \beta (return an XTensor structure)
make a new tensor c to keep the result and return it make a new tensor c to keep the result and return it
>> a - a tensor >> a - a tensor
...@@ -227,4 +246,45 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -227,4 +246,45 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
return c; return c;
} }
/*
tensor summation c = a + b * \beta
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetSumDimIndex(a, b);
if (n == -1) {
/* call _Sum function */
_Sum(&a, &b, &c, beta);
/* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
}
}
else if (n >= 0 && n < a.order) {
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
/* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -45,15 +45,6 @@ void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta) ...@@ -45,15 +45,6 @@ void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta)
c[i] = a[i] + b[i] * beta; c[i] = a[i] + b[i] * beta;
} }
__global__
void KernelADD(int * a, int * b, int * c, int size, int beta)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
c[i] = a[i] + b[i] * beta;
}
/* /*
tensor summation c = a + b * \beta (cuda version) tensor summation c = a + b * \beta (cuda version)
>> a - a tensor >> a - a tensor
...@@ -109,17 +100,6 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -109,17 +100,6 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
KernelADD << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, a->unitNum, beta); KernelADD << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, a->unitNum, beta);
} }
} }
else if (a->dataType == X_INT &&
b->dataType == X_INT &&
c->dataType == X_INT)
{
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
KernelADD << <blocks, threads >> >((int*)a->data, (int*)b->data, (int*)c->data, a->unitNum, (int)beta);
}
else { else {
// TODO!! // TODO!!
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta ...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
...@@ -41,6 +42,9 @@ make a new tensor c to keep the result and return it ...@@ -41,6 +42,9 @@ make a new tensor c to keep the result and return it
*/ */
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SUM_H__ #endif // __SUM_H__
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "SumDim.h" #include "SumDim.h"
#include "SumDim.cuh" #include "SumDim.cuh"
#include "../shape/Unsqueeze.h" #include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
...@@ -64,25 +65,11 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -64,25 +65,11 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return; return;
} }
if(XTensor::IsSameShaped(a, b)){ if(_IsSameShaped(a, b)){
_Sum(a, b, c, beta); _Sum(a, b, c, beta);
return; return;
} }
/*int dims[MAX_TENSOR_DIM_NUM];
for(int i = 0; i < a->order; i++)
dims[i] = 1;
dims[n] = a->GetDim(n);
XTensor * b2 = NewTensor(a->order, dims, b->dataType, b->denseRatio, b->devID, b->mem);
_CopyValues(b, b2);
_SumBroadcast(a, b2, c, beta);
DelTensor(b2);
return;*/
if(a->devID >= 0 || b->devID >= 0 || c->devID >= 0){ if(a->devID >= 0 || b->devID >= 0 || c->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSumDim(a, b, c, n, beta); _CudaSumDim(a, b, c, n, beta);
...@@ -205,8 +192,8 @@ i.e., a is summed with b by broadcasting ...@@ -205,8 +192,8 @@ i.e., a is summed with b by broadcasting
*/ */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta) void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _SumDim function */ /* call _SumDim function */
...@@ -233,7 +220,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta ...@@ -233,7 +220,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
{ {
CheckNTErrors(a->order == b->order, "Wrong tensor orders!"); CheckNTErrors(a->order == b->order, "Wrong tensor orders!");
CheckNTErrors(a->order == c->order, "Wrong tensor orders!"); CheckNTErrors(a->order == c->order, "Wrong tensor orders!");
CheckNTErrors(a->order > 0, "TODO!"); CheckNTErrors(a->order >= 0, "TODO!");
int order = a->order; int order = a->order;
int count = 0; int count = 0;
...@@ -281,8 +268,8 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta ...@@ -281,8 +268,8 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
dimsS[0] = -dimsS[0]; dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0]; dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem); XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0) if(count == 0)
source = b->data; source = b->data;
...@@ -374,8 +361,8 @@ c = a + b * \beta ...@@ -374,8 +361,8 @@ c = a + b * \beta
*/ */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _SumBroadcast function */ /* call _SumBroadcast function */
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
tensor summation of a tensor and a row vector tensor summation of a tensor and a row vector
c = a + b * \beta c = a + b * \beta
where a is a tensor and b is a row vector where a is a tensor and b is a row vector
>> a - pointer to the data array of a >> a - pointer to the data array of a
>> b - pointer to the data array of b >> b - pointer to the data array of b
...@@ -87,17 +87,17 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize ...@@ -87,17 +87,17 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize
int col = colIndex % colNum; int col = colIndex % colNum;
int block = colIndex / colNum; int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum) if(row >= rowNum || block >= blockNum)
return; return;
if (threadIdx.x == 0) if(threadIdx.x == 0)
bv[threadIdx.y] = b[row]; bv[threadIdx.y] = b[row];
__syncthreads(); __syncthreads();
int offset = block * blockSize + row * colNum + col; int offset = block * blockSize + row * colNum + col;
if (betaFired) if(betaFired)
c[offset] = a[offset] + bv[threadIdx.y] * beta; c[offset] = a[offset] + bv[threadIdx.y] * beta;
else else
c[offset] = a[offset] + bv[threadIdx.y]; c[offset] = a[offset] + bv[threadIdx.y];
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -224,9 +225,9 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -224,9 +225,9 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
XTensor * ai = (XTensor*)a->GetItem(i); XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i); XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i); XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) || if (!_IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) || !_IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci)) !_IsSameShaped(cim, ci))
{ {
isUniform = false; isUniform = false;
break; break;
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
...@@ -131,7 +131,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d ...@@ -131,7 +131,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
{ {
if (!output.isInit || input.dataType != output.dataType) { if (!output.isInit || input.dataType != output.dataType) {
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem); InitTensorV2(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
} }
_ConvertDataType(&input, &output); _ConvertDataType(&input, &output);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "OnehotAndIndex.h" #include "OnehotAndIndex.h"
#include "OnehotAndIndex.cuh" #include "OnehotAndIndex.cuh"
#include "SetData.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -31,7 +32,7 @@ convert onehot tensor to index tensor ...@@ -31,7 +32,7 @@ convert onehot tensor to index tensor
>> index - index tensor, which value is an integer num >> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size) void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{ {
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!"); CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
...@@ -78,13 +79,13 @@ make a new tensor to keep the result and return it ...@@ -78,13 +79,13 @@ make a new tensor to keep the result and return it
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
<< return - the index tensor << return - the index tensor
*/ */
XTensor OnehotToIndex(XTensor & onehot, int size) XTensor OnehotToIndex(const XTensor & onehot, int size)
{ {
CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
XTensor index; XTensor index;
InitTensor(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem); InitTensorV2(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
index.SetTMPFlag(); index.SetTMPFlag();
_OnehotToIndex(&onehot, &index, size); _OnehotToIndex(&onehot, &index, size);
...@@ -99,7 +100,8 @@ convert index tensor to onehot tensor ...@@ -99,7 +100,8 @@ convert index tensor to onehot tensor
>> onehot - onehot tensor, which value is 0 or 1 >> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoothingP) void _IndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float labelSmoothingP)
{ {
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!"); CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
...@@ -109,11 +111,14 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -109,11 +111,14 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < index->order; i++) for (int i = 0; i < index->order; i++)
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!"); CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
onehot->SetZeroAll(); //onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP; float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size; float lowconfidence = labelSmoothingP / size;
_SetDataFixedFloat(onehot, lowconfidence);
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence); _CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return; return;
...@@ -129,8 +134,49 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -129,8 +134,49 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
int id = indexData[i]; int id = indexData[i];
DTYPE * od = onehotData + i * stride; DTYPE * od = onehotData + i * stride;
od[id] = 1; od[id] = confidence;
}
}
/*
convert index tensor to onehot tensor
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP)
{
/*CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
onehot->SetZeroAll();
#ifdef USE_CUDA
if (onehot->devID >= 0) {
delete[] cudaIndex;
return;
} }
#endif
int blockNum = n;
int stride = size;
int * indexData = (int *)index;
int * onehotData = (int *)onehot->data;
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
int * od = onehotData + i * stride;
od[id] = 1;
}*/
XTensor* cudaIndex = NewTensor1DV2(n, X_INT, onehot->devID);
cudaIndex->SetData(index, n);
_IndexToOnehot(cudaIndex, onehot, size, labelSmoothingP);
delete[] cudaIndex;
} }
...@@ -143,7 +189,7 @@ make a new tensor to keep the result and return it ...@@ -143,7 +189,7 @@ make a new tensor to keep the result and return it
>> confidence - labelsmoothing >> confidence - labelsmoothing
<< return - the onehot tensor << return - the onehot tensor
*/ */
XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
{ {
CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!")
...@@ -154,7 +200,7 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) ...@@ -154,7 +200,7 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
int * dim = new int[order + 1]; int * dim = new int[order + 1];
memcpy(dim, index.dimSize, order * sizeof(int)); memcpy(dim, index.dimSize, order * sizeof(int));
dim[order] = size; dim[order] = size;
InitTensor(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem); InitTensorV2(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
_IndexToOnehot(&index, &onehot, size, labelSmoothingP); _IndexToOnehot(&index, &onehot, size, labelSmoothingP);
...@@ -163,4 +209,4 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) ...@@ -163,4 +209,4 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
return onehot; return onehot;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version) ...@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version)
>> index - index tensor, which value is an integer num >> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size) void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{ {
int devID = onehot->devID; int devID = onehot->devID;
...@@ -111,13 +111,10 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int ...@@ -111,13 +111,10 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int
int id = indexData[i]; int id = indexData[i];
//od[id] = 2.0;
//onehotData[i * stride + id] = 0.1;
if (offset == id) if (offset == id)
od[offset] = confidence; od[offset] = confidence;
else{ //else
od[offset] = lowconfidence; // od[offset] = lowconfidence;
}
} }
/* /*
...@@ -127,7 +124,8 @@ convert index tensor to onehot tensor (cuda version) ...@@ -127,7 +124,8 @@ convert index tensor to onehot tensor (cuda version)
>> onehot - onehot tensor, which value is 0 or 1 >> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence) void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence)
{ {
int devID = onehot->devID; int devID = onehot->devID;
...@@ -155,4 +153,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi ...@@ -155,4 +153,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -27,10 +27,11 @@ ...@@ -27,10 +27,11 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor (cuda version) */ /* convert onehot tensor to index tensor (cuda version) */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size); void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert index tensor to onehot tensor (cuda version) */ /* convert index tensor to onehot tensor (cuda version) */
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence); void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,18 +27,21 @@ ...@@ -27,18 +27,21 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor */ /* convert onehot tensor to index tensor */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size); void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert onehot tensor to index tensor (return an XTensor structure) /* convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor OnehotToIndex(XTensor & onehot, int num); XTensor OnehotToIndex(const XTensor & onehot, int num);
/* convert index tensor to onehot tensor */ /* convert index tensor to onehot tensor */
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoothingP); void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor (return an XTensor structure) /* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IndexToOnehot(XTensor & index, int num, float labelSmoothingP); XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -25,6 +25,114 @@ ...@@ -25,6 +25,114 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, int* index, int dim)
{
CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
CheckNTErrors(dim >= 0 && dim < a->order, "The input dimension is out of bounds!");
CheckNTErrors(a->dataType == c->dataType, "The tensor must be of the same data type!");
int stride = 1;
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int copyTimes = 1;
for (int i = 0; i < dim; i++)
{
copyTimes *= a->dimSize[i];
}
int cot = c->dimSize[dim];
int blockSize = stride * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data;
char * t = (char*)c->data;
for (int i = 0; i < copyTimes; i++) {
for (int j = 0; j < cot; ++j) {
XMemCopy(t + j * blockSize, c->devID, s + index[j] * blockSize, a->devID, blockSize);
}
s += stepSizeS;
t += stepSizeT;
}
}
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim)
{
if (index->devID >= 0)
{
int* indexCPU = new int[index->unitNum];
XMemCopy(indexCPU, -1, index->data,index->devID, index->unitNum * sizeof(int));
_Select(a, c, indexCPU, dim);
delete[] indexCPU;
}
else
{
_Select(a, c, (int *)index->data, dim);
}
}
/*
c = select(a)
>> a - input tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
<< return - the result of the generated tensor with selected data
*/
XTensor Select(const XTensor &a, XTensor &index, int dim)
{
int order = a.order;
int * dimSize = new int[order];
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
for (int i = 0; i < a.order; i++) {
if (i == dim) {
dimSize[i] = index.dimSize[0];
}
else
dimSize[i] = a.dimSize[i];
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMPFlag();
/* call _SelectRange function */
_Select(&a, &c, &index, dim);
/* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, &index, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
}
/* destroy variables */
delete[] dimSize;
return c;
}
/* /*
generate a tensor with selected data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
...@@ -58,13 +166,12 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high) ...@@ -58,13 +166,12 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
} }
int stride = 1; int stride = 1;
int dimRDI = a->order - dim - 1; for(int i = dim + 1; i < a->order; i++)
for(int i = 0; i < dimRDI; i++) stride *= a->dimSize[i];
stride *= a->dimSizeRDI[i];
int copyTimes = 1; int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++) for (int i = 0; i < dim; i++)
copyTimes *= a->dimSizeRDI[i]; copyTimes *= a->dimSize[i];
int blockSize = stride * (high - low) * a->unitSize; int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize; int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
......
...@@ -27,13 +27,16 @@ ...@@ -27,13 +27,16 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with selected data c = select(a) */ /* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor * indexCPU); void _Select(const XTensor * a, XTensor * c, int* index, int dim);
/* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim);
/* /*
generate a tensor with selected data c = select(a) (returna a XTensor structure) generate a tensor with selected data c = select(a) (returna a XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
XTensor Select(const XTensor &a, XTensor &indexCPU); XTensor Select(const XTensor &a, XTensor &index, int dim);
/* /*
generate a tensor with selected data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
......
...@@ -470,7 +470,7 @@ void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift) ...@@ -470,7 +470,7 @@ void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift)
void _SetDataRand(XTensor * tensor, int rNum, int cNum) void _SetDataRand(XTensor * tensor, int rNum, int cNum)
{ {
if (tensor == NULL || tensor->isInit == false || tensor->order !=2 ) { if (tensor == NULL || tensor->isInit == false || tensor->order !=2 ) {
InitTensor2D(tensor, rNum, cNum); InitTensor2DV2(tensor, rNum, cNum);
} }
_SetDataRand(tensor, 0.0F, 1.0F); _SetDataRand(tensor, 0.0F, 1.0F);
...@@ -519,7 +519,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper) ...@@ -519,7 +519,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper); _CudaSetDataRand(tensor, lower, upper);
#endif #endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1); //XTensor * t2 = NewTensorV2(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high); //_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor); //_CopyValues(t2, tensor);
//delete t2; //delete t2;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h" #include "Binary.h"
#include "Binary.cuh" #include "Binary.cuh"
...@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
_cudaFuncName(a, b, num); \ _cudaFuncName(a, b, num); \
return; \ return; \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -112,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -112,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
if (a->devID >= 0) { \ if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -169,8 +170,8 @@ XTensor funcName(const XTensor &a, T num) ...@@ -169,8 +170,8 @@ XTensor funcName(const XTensor &a, T num)
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if(a.enableGrad){ \ if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
} \ } \
XLink::AddParamToHead(&b, num); \
return b; \ return b; \
} \ } \
template XTensor funcName<int>(const XTensor&, int); \ template XTensor funcName<int>(const XTensor&, int); \
...@@ -181,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double); ...@@ -181,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double);
template<class T> \ template<class T> \
void funcName(const XTensor &a, XTensor &b, T num) \ void funcName(const XTensor &a, XTensor &b, T num) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensor(&b, &a); \ InitTensorV2(&b, &a); \
} \ } \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if (a.enableGrad) { \ if (a.enableGrad) { \
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h" #include "Binary.h"
#include "Binary.cuh" #include "Binary.cuh"
...@@ -89,7 +90,7 @@ void Kernel##funcName(T1 * a, T1 * b, int size, T2 num) ...@@ -89,7 +90,7 @@ void Kernel##funcName(T1 * a, T1 * b, int size, T2 num)
template<class T> \ template<class T> \
void _Cuda##funcName(const XTensor * a, XTensor * b, T num) \ void _Cuda##funcName(const XTensor * a, XTensor * b, T num) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \ CheckNTErrors((a->isSparse == false), "TODO!"); \
\ \
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h" #include "Clip.h"
#include "Clip.cuh" #include "Clip.cuh"
...@@ -43,7 +44,7 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) ...@@ -43,7 +44,7 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
} }
#endif #endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
DTYPE* d = (DTYPE*)a->data; DTYPE* d = (DTYPE*)a->data;
...@@ -80,11 +81,23 @@ keep the result in the input tensor a and return nothing ...@@ -80,11 +81,23 @@ keep the result in the input tensor a and return nothing
*/ */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper) void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper)
{ {
_Clip(a, a, lower, upper); _Clip(a, a, lower, upper);
} }
/* /*
set every entry to its clip value (return a XTensor structure) set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void ClipMe(XTensor& a, DTYPE lower, DTYPE upper)
{
_Clip(&a, &a, lower, upper);
}
/*
set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor we are processing >> a - input tensor we are processing
>> lower - the lower border >> lower - the lower border
...@@ -93,8 +106,27 @@ make a new tensor to keep the result and return it ...@@ -93,8 +106,27 @@ make a new tensor to keep the result and return it
*/ */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMPFlag(); b.SetTMPFlag();
/* call _Clip function */
_Clip(&a, &b, lower, upper);
/* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
}
return b;
}
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
{
if (!b.isInit || !IsSameShaped(a, b)) {
InitTensorV2(&b, &a);
}
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
...@@ -105,8 +137,6 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) ...@@ -105,8 +137,6 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper); XLink::AddParamToHead(&b, upper);
} }
return b;
} }
/* /*
......
...@@ -17,11 +17,11 @@ ...@@ -17,11 +17,11 @@
/* /*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16/int added
*/ */
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h" #include "Clip.h"
#include "Clip.cuh" #include "Clip.cuh"
...@@ -36,9 +36,8 @@ set each entry to its clip value (CUDA Kernel) ...@@ -36,9 +36,8 @@ set each entry to its clip value (CUDA Kernel)
>> upper - the upper border >> upper - the upper border
>> size - size of the data array >> size - size of the data array
*/ */
template <class T>
__global__ __global__
void KernelClip(T * a, T * b, T lower, T upper, int size) void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -53,6 +52,21 @@ void KernelClip(T * a, T * b, T lower, T upper, int size) ...@@ -53,6 +52,21 @@ void KernelClip(T * a, T * b, T lower, T upper, int size)
} }
/* /*
set each entry to its clip value with float16 data type value (CUDA Kernel)
This is for float16 computation
>> a - pointer to input data array
>> b - pointer to output data array
>> lower - the lower border
>> upper - the upper border
>> size - size of the data array
*/
__global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{
return;
}
/*
set each entry to its clip value set each entry to its clip value
>> a - input tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing >> b - output tensor we are processing
...@@ -61,7 +75,7 @@ set each entry to its clip value ...@@ -61,7 +75,7 @@ set each entry to its clip value
*/ */
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -78,11 +92,8 @@ void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) ...@@ -78,11 +92,8 @@ void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
} }
else if (a->dataType == X_INT) { else if (a->dataType == X_FLOAT16) {
int lower1 = (int)lower; KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
int upper1 = (int)upper;
KernelClip << <blocks, threads >> >((int *)a->data, (int *)b->data, lower1, upper1, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -29,8 +29,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,8 +29,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* set each entry to its clip value (CUDA Kernel) */ /* set each entry to its clip value (CUDA Kernel) */
template <class T> __global__ __global__
void KernelClip(T * a, T * b, T lower, T upper, int size); void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size);
/* set each entry to its clip value (CUDA Kernel) with float16 data type*/ /* set each entry to its clip value (CUDA Kernel) with float16 data type*/
__global__ __global__
......
...@@ -89,6 +89,53 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \ ...@@ -89,6 +89,53 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \
SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual) SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual)
SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual) SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual)
#define SIMPLE_MAX_MIN_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, DTYPE * c, int size) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
c[i] = (DTYPE)origFunc(a[i], b[i]); \
} \
__global__ \
void Kernel##funcName(__half * a, __half * b, __half * c, int size) \
{ \
return; \
} \
void _Cuda##funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == DEFAULT_DTYPE) { \
Kernel##funcName<<<blocks, threads>>> \
((DTYPE*)a->data, (DTYPE*)b->data, \
(DTYPE*)c->data, a->unitNum); \
} \
else if (a->dataType == X_FLOAT16) { \
Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, \
(__half*)c->data, a->unitNum); \
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
}
SIMPLE_MAX_MIN_FUNCTION_GPU(Max, max)
SIMPLE_MAX_MIN_FUNCTION_GPU(Min, min)
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -34,6 +34,12 @@ void _CudaEqual(const XTensor * a, XTensor * b, DTYPE value); ...@@ -34,6 +34,12 @@ void _CudaEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (cuda version) */ /* check whether every entry is not equal to the given value (cuda version) */
void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE value); void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* return maximum of two tensor for each items (cuda version) */
void _CudaMax(const XTensor * a, const XTensor * b, XTensor *c);
/* return minimum of two tensor for each items (cuda version) */
void _CudaMin(const XTensor * a, const XTensor * b, XTensor *c);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -56,6 +56,36 @@ XTensor NotEqual(const XTensor & a, DTYPE value); ...@@ -56,6 +56,36 @@ XTensor NotEqual(const XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value */ /* check whether every entry is not equal to the given value */
void NotEqual(const XTensor & a, XTensor & b, DTYPE value); void NotEqual(const XTensor & a, XTensor & b, DTYPE value);
/* return maximum of two tensor for each items */
void _Max(const XTensor * a, const XTensor * b, XTensor * c);
/* return maximum of two tensor for each items (do it on site) */
void _MaxMe(XTensor * a, const XTensor * b);
/* return maximum of two tensor for each items (do it on site) */
void MaxMe(XTensor & a, const XTensor & b);
/* return maximum of two tensor for each items (return an XTensor structure) */
XTensor Max(const XTensor & a, const XTensor & b);
/* return maximum of two tensor for each items */
void Max(const XTensor & a, const XTensor & b, XTensor & c);
/* return minimum of two tensor for each items */
void _Min(const XTensor * a, const XTensor * b, XTensor * c);
/* return minimum of two tensor for each items (do it on site) */
void _MinMe(XTensor * a, const XTensor * b);
/* return minimum of two tensor for each items (do it on site) */
void MinMe(XTensor & a, const XTensor & b);
/* return minimum of two tensor for each items (return an XTensor structure) */
XTensor Min(const XTensor & a, const XTensor & b);
/* return minimum of two tensor for each items */
void Min(const XTensor & a, const XTensor & b, XTensor & c);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // end __COMPARE_H__ #endif // end __COMPARE_H__
\ No newline at end of file
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
...@@ -46,26 +47,25 @@ void _Normalize(const XTensor * input, XTensor * output, int dim, ...@@ -46,26 +47,25 @@ void _Normalize(const XTensor * input, XTensor * output, int dim,
const XTensor * mean, const XTensor * var, const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon) const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
int dimRDI = input->order - dim - 1; CheckNTErrors((_IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!"); CheckNTErrors((_IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors"); CheckNTErrors((_IsSameShaped(mean, var)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors");
CheckNTErrors((input && output && mean && var && a && b), "Empty input tensors!"); CheckNTErrors((input && output && mean && var && a && b), "Empty input tensors!");
CheckNTErrors((dimRDI >= 0 && dimRDI < input->order), "Incorrect reduction dimension!"); CheckNTErrors((dim >= 0 && dim < input->order), "Incorrect reduction dimension!");
CheckNTErrors((input->order == mean->order + 1), "Incorrect reduction dimension!"); CheckNTErrors((input->order == mean->order + 1), "Incorrect reduction dimension!");
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSize[dim];
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
if (i < dimRDI) { if (i < dim) {
CheckNTErrors((input->dimSizeRDI[i] == mean->dimSizeRDI[i]), "Wrong size!"); CheckNTErrors((input->dimSize[i] == mean->dimSize[i]), "Wrong size!");
stride *= input->dimSizeRDI[i]; blockNum *= input->dimSize[i];
} }
else if (i > dimRDI) { else if (i > dim) {
CheckNTErrors((input->dimSizeRDI[i] == mean->dimSizeRDI[i - 1]), "Wrong size!"); CheckNTErrors((input->dimSize[i] == mean->dimSize[i - 1]), "Wrong size!");
blockNum *= input->dimSizeRDI[i]; stride *= input->dimSize[i];
} }
} }
blockSize = stride * strideNum; blockSize = stride * strideNum;
...@@ -203,8 +203,8 @@ void Normalize(const XTensor &input, XTensor &output, int dim, ...@@ -203,8 +203,8 @@ void Normalize(const XTensor &input, XTensor &output, int dim,
const XTensor &mean, const XTensor &var, const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon) const XTensor &a, const XTensor &b, DTYPE epsilon)
{ {
if (!output.isInit || !XTensor::IsSameShaped(&input, &output)) { if (!output.isInit || !IsSameShaped(input, output)) {
InitTensor(&output, &input); InitTensorV2(&output, &input);
} }
/* call _Normalize function */ /* call _Normalize function */
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论