Commit e925cfd9 by huchi

refactor the translator engine for nmt

parent 143e048c
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "./network/XNet.h"
#include "./tensor/XUtility.h"
#include "./tensor/function/FHeader.h"
#include "./tensor/core/CHeader.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace fnnlm;
using namespace transformer;
int main( int argc, const char ** argv )
{
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708);
TransformerMain(argc - 1, argv + 1);
//_CrtDumpMemoryLeaks();
return 0;
}
#include "Model.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
/* register a parameter with a unique name */
void Model::Register(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID)
{
parameters.AddParameter(name, dims, dataType, devID);
}
/* get a parameter by its name */
XTensor* Model::operator[](const char* name)
{
return parameters.GetParameter(name);
}
/* load a model from a binary file */
void Model::Load(const char* fn)
{
CheckNTErrors(parameters.list.Size() > 0, "empty tensor list");
FILE* file = fopen(fn, "rb");
LongList offset(parameters.list.Size());
/* check number of parameter */
unsigned long int number;
fread(&number, sizeof(number), 1, file);
CheckNTErrors(number == parameters.list.Size(), "parameter number not matched");
/* read offset from the file */
fread(parameters.list.items, sizeof(long), offset.Size(), file);
/* read parameters from the file */
for (int i = 0; i < offset.Size(); i++) {
parameters.list[i]->BinaryRead(file, offset[i]);
}
fclose(file);
}
/* dump a model to a binary file */
void Model::Dump(const char* fn)
{
FILE* file = fopen(fn, "wb");
/* dump number of parameter */
unsigned long int number = parameters.list.Size();
fwrite(&number, sizeof(number), 1, file);
/* dump offset of parameters */
unsigned long int offset = sizeof(number);
for (int i = 0; i < parameters.list.Size(); i++) {
if (i > 0) {
offset += parameters.list[i - 1]->unitNum;
}
fwrite(&offset, sizeof(offset), 1, file);
}
/* dump parameters to the file */
for (int i = 0; i < parameters.list.Size(); i++) {
parameters.list[i]->BinaryDump(file);
}
fclose(file);
}
/* get a parameter by its name */
XTensor* Model::Get(const char* name)
{
return parameters.GetParameter(name);
}
/* add a parameter to the list */
void Parameter::AddParameter(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID)
{
CheckNTErrors(GetParameter(name) == NULL, "the name must be unique");
IntList dim;
for (int i : dims) {
dim.Add(i);
}
XTensor* p = NewTensorV2(dims.size(), dim.items, dataType, devID);
strcpy(p->name, (char*)name);
list.Add(p);
}
/* get a parameter by its name */
XTensor* Parameter::GetParameter(const char* name)
{
for (int i = 0; i < list.Size(); i++) {
if (strcmp(list[i]->name, name) == 0)
return list[i];
}
/* if miss, return a null pointer */
return NULL;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* the model class
*
* $Created by: HU Chi (huchinlp@foxmail.com) 2019-09-12
*
*/
#ifndef __MODEL_H__
#define __MODEL_H__
#include <utility>
#include "../tensor/XGlobal.h"
#include "../tensor/XTensor.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
using Dim = std::initializer_list<int>;
/* Parameter is a base class for parameters */
struct Parameter {
public:
/* the parameter list */
TensorList list;
public:
/* add a parameter to the list */
void AddParameter(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID);
/* get a parameter by its name */
XTensor* GetParameter(const char* name);
};
/* Model is a base class for neural networks */
struct Model {
public:
Parameter parameters;
public:
/* load a model from a binary file */
void Load(const char* fn);
/* dump the model to a binary file */
void Dump(const char* fn);
/* get a parameter by its name */
XTensor* Get(const char* name);
/* get a parameter by its name */
XTensor* operator[] (const char* name);
/* register a parameter with a unique name */
void Register(const char* name, Dim dims, TENSOR_DATA_TYPE dataType, int devID);
};
}
#endif // __MODEL_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
#include <stdio.h>
#include "XNet.h"
#include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/transformer/Transformer.h"
#include <fstream>
#include <string>
using namespace nts;
using namespace transformer;
void test() {
XTensor posEmbeddingBase;
int length = 5;
int eSize = 4;
int d = 4;
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT);
float* data = new float[posEmbeddingBase.unitNum];
for (int pos = 0; pos < length; pos++) {
float* dp = data + pos * eSize;
//int channelSize = eSize / 2;
//int offset = 0;
//for(int i = 0; i < channelSize; i++){
// dp[offset++] = (float)sin(pos/pow(10000.0F, 2.0F*i/(d - 2)));
//}
//for(int i = 0; i < channelSize; i++){
// dp[offset++] = (float)cos(pos/pow(10000.0F, 2.0F*i/(d - 2)));
//}
for (int k = 0; k < eSize; k++) {
if (k % 2 == 0) {
int i = k / 2;
dp[k] = (float)sin(pos / pow(10000.0F, 2.0F * i / d));
}
else {
int i = (k - 1) / 2;
dp[k] = (float)cos(pos / pow(10000.0F, 2.0F * i / d));
}
}
}
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
posEmbeddingBase.Dump(stderr);
delete[] data;
}
struct A {
XTensor a;
void update(XTensor b) {
a = b;
}
};
void test2(A *a) {
XTensor x;
InitTensor2D(&x, 2, 3);
XTensor y;
InitTensor2D(&y, 3, 2);
float data[]{ 1,1,1,1,1,1 };
x.SetData(data, 6);
y.SetData(data, 6);
XTensor z;
z = MatrixMul(x, y);
a->update(z);
}
void TestMemory() {
int devID = 0;
int memSize = 1024;
XMem *mem = new XMem(devID, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
XTensor a;
InitTensor2D(&a, 5, 5, X_FLOAT, 0, mem);
float d[25]{ 0 };
for (int i = 0; i < 25; i++)
d[i] = float(i);
a.SetData(d, 25);
int index[]{ 0,1,2,3,4 };
for (int i = 0; i < 4; i++) {
XTensor srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 4 - i, X_INT, a.devID, a.mem);
InitTensor1D(&tgtIdx, 4 - i, X_INT, a.devID, a.mem);
srcIdx.SetData(index, srcIdx.unitNum);
tgtIdx.SetAscendingOrder(0);
a = CopyIndexed(a, 0, srcIdx, tgtIdx);
printf("\nround %d\n", i);
a.Dump(stderr);
}
delete mem;
}
int main(int argc, const char** argv)
{
TransformerMain(argc - 1, argv + 1);
return 0;
}
......@@ -173,6 +173,10 @@ private:
static
void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumAll */
static
void GradReduceSumAll(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */
static
void GradReduceSumSquared(XTensor * node, bool isEfficient);
......
......@@ -281,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
smallsGrad.Add(tail->grad);
if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail),
CheckNTErrors(_IsSameShaped(last, tail),
"Input tensors must be of the same size!");
}
......@@ -391,7 +391,7 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
/* if the tensor is used somewhere else, we need another SUM
for gradient accumulation */
else{
XTensor * inputGradTMP = NewTensorBuf(input, input->devID, input->mem);
XTensor * inputGradTMP = NewTensorBufV2(input, input->devID, input->mem);
_Merge(node->grad, inputGradTMP, whereToSplit + 1, 0);
_Sum(input->grad, inputGradTMP, input->grad);
......@@ -475,7 +475,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
somewhere else, we need another SUM for gradient
accumulation */
else{
XTensor * nodeGradTMP = NewTensorBuf(node, node->devID, node->mem);
XTensor * nodeGradTMP = NewTensorBufV2(node, node->devID, node->mem);
_Merge(&splits, nodeGradTMP, whereToSplit + 1);
_Sum(node->grad, nodeGradTMP, node->grad);
......@@ -501,7 +501,7 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
XTensor * output = node;
XTensor * input = income.tails[0];
XTensor * b = NewTensorBuf(input, input->devID, input->mem);
XTensor * b = NewTensorBufV2(input, input->devID, input->mem);
XNoder::MakeGrad(input);
int i = income.GetParamInt(0);
......@@ -543,7 +543,7 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
XTensor * g = NewTensorBuf(input->grad, input->devID, input->mem);
XTensor * g = NewTensorBufV2(input->grad, input->devID, input->mem);
_ReduceSum(output->grad, g, dim);
_Sum(input->grad, g, input->grad);
......
......@@ -77,104 +77,20 @@ backward propagation to obtain gradient
>> root - root node (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
void XNet::Backward(XTensor &root)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(NULL);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the gradient computation
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
TensorList roots(1);
roots.Add(&root);
TensorList golds(1);
golds.Add(&gold);
TensorList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
{
TensorList golds(roots.count);
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
}
Backward(roots, golds, paddings, loss);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
{
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
Backward(roots);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/
void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss)
void XNet::Backward(TensorList &roots)
{
Traverse(roots);
......@@ -187,39 +103,6 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings,
node->visitMark = NODE_UNFINISHED;
}
//XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
/*for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;*/
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
/*if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, padding, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
else {
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}*/
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
/*else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}*/
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);
......@@ -460,7 +343,6 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
}
}
/*
search for a node in a top-down manner by its name
>> top - the top most node
......
......@@ -61,25 +61,11 @@ struct XNet
void Clear();
/* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(XTensor &root);
/* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */
void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(TensorList &roots);
/* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false);
......
......@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL)
return;
if(!XTensor::IsSameShaped(node, node->grad)){
if(!_IsSameShaped(node, node->grad)){
delete node->grad;
node->grad = NewTensor(node);
node->grad->SetZeroAll();
......
......@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__
#define __XNODER_H__
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is a simple impelementation of the feed-forward network-baesd language
* model (FNNLM). See more details about FNNLM in
* "A Neural Probabilistic Language Model" by Bengio et al.
* Journal of Machine Learning Research 3 (2003) 1137¨C1155
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
* Today I was awarded as the most popular teacher in our college.
* It was the great honour for me!!!
*/
#ifndef __FNNLM_H__
#define __FNNLM_H__
#include "../../tensor/XGlobal.h"
#include "../../tensor/XTensor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
namespace fnnlm
{
#define _EXIT_(x)// exit(x)
#define CheckErrors(x, msg) { if(!(x)) { fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); _EXIT_(1); } }
#define ShowErrors(msg) { { fprintf(stderr, "Error! (%s line %d): %s\n", __FILENAME__, __LINE__, msg); _EXIT_(1); } }
#define MAX_N_GRAM 8
#define MAX_HIDDEN_NUM 8
/* an n-gram = a sequence of n words
words[0..n-2] is the history, and
words[n-1] is the word for prediction. */
struct NGram
{
int words[MAX_N_GRAM];
};
/* fnn model */
struct FNNModel
{
/* word embedding */
XTensor embeddingW;
/* parameter matrix of each hidden layer
hidden layer: y = f(x * w + b)
where x is the input, y is the output, w is
the tranformation (parameter) matrix, b is
the bias and f() is the activation function. */
XTensor hiddenW[MAX_HIDDEN_NUM];
/* bias of each hidden layer */
XTensor hiddenB[MAX_HIDDEN_NUM];
/* parameter matrix of the output layer */
XTensor outputW;
/* bias of the output layer */
XTensor outputB;
/* order of the language model */
int n;
/* embedding size */
int eSize;
/* number of hidden layers */
int hDepth;
/* hidden layer size */
int hSize;
/* vocabulary size */
int vSize;
/* id of the device for running the model */
int devID;
/* indicates whether we use memory pool */
bool useMemPool;
/* memory pool */
XMem * mem;
FNNModel(){ n = -1; vSize = -1;hDepth = 0;devID = -1;mem = NULL;};
~FNNModel(){delete mem;};
};
/* the network built on the fly */
struct FNNNet
{
/* embedding result of the previous n - 1 words */
XTensor embeddings[MAX_N_GRAM];
/* concatenation of embeddings */
XTensor embeddingCat;
/* output of the hidden layers */
XTensor hiddens[MAX_HIDDEN_NUM];
/* state of the hidden layers (before activation function) */
XTensor hiddenStates[MAX_HIDDEN_NUM];
/* state before softmax */
XTensor stateLast;
/* output of the net */
XTensor output;
};
/* entrance of the program */
int FNNLMMain(int argc, const char ** argv);
};
#endif
......@@ -15,7 +15,7 @@
* limitations under the License.
*/
/*
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
......@@ -41,12 +41,12 @@ public:
public:
bool IsEmpty(){
bool IsEmpty() {
return (k == NULL) && (v == NULL);
}
void Clear() {
if (k && v && k->id > 0 && v->id >0) {
if (k && v && k->id > 0 && v->id > 0) {
DelTensor(k);
DelTensor(v);
}
......@@ -84,32 +84,29 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* head number */
int nhead;
/* transformation matrix for query */
/* transformation matrix for Q */
XTensor wq;
/* bias for query */
/* bias for Q */
XTensor bq;
/* transformation matrix for query */
/* transformation matrix for K */
XTensor wk;
/* bias for query */
/* bias for K */
XTensor bk;
/* transformation matrix for query */
/* transformation matrix for V */
XTensor wv;
/* bias for query */
/* bias for V */
XTensor bv;
/* max relative window size */
XTensor rpEmbK;
/* RPR emb */
XTensor rp_embedding_k;
/* transformation after dot-product attention */
XTensor wa;
......@@ -140,7 +137,7 @@ public:
DTYPE dropoutP;
/* max relative window size */
int maxRP;
int max_relative_position;
public:
......@@ -151,23 +148,23 @@ public:
~T2TAttention();
/* initialize the model */
void InitModel(int argc, char ** argv,
void InitModel(int argc, char** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
int myDevID = -1);
/* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor *mask,
XTensor Make(XTensor& k, XTensor& q, XTensor& v, XTensor* mask,
bool isTraining, Cache* cache, int cacheType);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeAttention(XTensor *k, XTensor *q, XTensor *v, const XTensor *mask, bool isTraining, bool isEnc);
XTensor MakeAttention(XTensor* k, XTensor* q, XTensor* v, const XTensor* mask, bool isTraining, bool is_encoder);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeRPRAttention(XTensor *k, XTensor *q, XTensor *v, XTensor *mask, bool isTraining, bool isEnc);
XTensor MakeRPRAttention(XTensor* k, XTensor* q, XTensor* v, XTensor* mask, bool isTraining, bool is_encoder);
void GetRPEmbedding(XTensor* embMatrix, const int lenQ, const int lenKV, const int maxRelativeLen, const int device_id, const bool isEnc);
void GetRPEmbedding(XTensor* emb_matrix, const int len_q, const int len_kv, const int max_relative_length, const int device_id, const bool is_encoder);
void RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* attention, const bool isKey);
void RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* attention, const bool is_key);
};
}
......
......@@ -61,29 +61,27 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id
>> myMem - the memory pool
*/
void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
int myDevID)
{
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID;
mem = myMem;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 3);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamInt(argc, argv, "vsizetgt", &vSize, 34040);
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */
embedder.InitModel(argc, argv, devID, mem, false);
embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
......@@ -96,11 +94,11 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
fnns[i].InitModel(argc, argv, myDevID, myMem);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID, myMem);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
}
decodeLayerNorm->InitModel(argc, argv, myDevID);
}
......@@ -114,7 +112,7 @@ make the decoding network
>> isTraining - indicates whether the model is used for training
<< return - the output tensor of the encoder
*/
XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, const XTensor *mask, XTensor &maskEncDec, bool isTraining)
XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, XTensor &maskEncDec, bool isTraining)
{
XTensor x;
......
......@@ -22,7 +22,6 @@
#ifndef __T2TDECODER_H__
#define __T2TDECODER_H__
#include <array>
#include "T2TEncoder.h"
namespace transformer
......@@ -38,9 +37,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* layer number */
int nlayer;
......@@ -103,10 +99,10 @@ public:
/* initialize the model */
void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
int myDevID = -1);
/* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, const XTensor *mask, XTensor &maskEncDec, bool isTraining);
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, XTensor &maskEncDec, bool isTraining);
};
}
......
......@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder()
{
devID = -1;
mem = NULL;
vSize = -1;
maxLength = -1;
}
......@@ -46,12 +45,10 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, bool isEnc)
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{
devID = myDevID;
mem = myMem;
if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1);
......@@ -65,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "pad", &padIdx, 1);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID);
maxLength = maxLength + 1 + 1;
DTYPE v = 1.0F/(float)sqrt((float)eSize);
......@@ -83,7 +80,7 @@ make positional embeddings (of size eSize * length)
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
{
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum];
......@@ -101,14 +98,13 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
}
/* zero pad */
/* padding zeros */
int padStart = padIdx * eSize;
for (int i = padStart; i < padStart + eSize; i++)
for (int i = padStart; i < padStart + eSize; ++i)
data[i] = 0.F;
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
delete[] data;
}
......
......@@ -41,9 +41,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */
int vSize;
......@@ -74,7 +71,7 @@ public:
~T2TEmbedder();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL, bool isEnc = true);
void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length, int padIdx);
......
......@@ -53,20 +53,18 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem)
int myDevID)
{
devID = myDevID;
mem = myMem;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 35);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt(argc, argv, "vsize", &vSize, 34040);
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
......@@ -82,12 +80,11 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
fnns[i].InitModel(argc, argv, myDevID, myMem);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem);
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
}
encodeLayerNorm->InitModel(argc, argv, myDevID, myMem);
encodeLayerNorm->InitModel(argc, argv, myDevID);
}
/*
......@@ -104,6 +101,10 @@ XTensor AttEncoder::Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, boo
x = embedder.Make(input, 0);
/* dropout */
if(isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
for(int i = 0; i < nlayer; i++){
XTensor att;
XTensor ln;
......
......@@ -65,9 +65,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* layer number */
int nlayer;
......@@ -118,7 +115,7 @@ public:
/* initialize the model */
void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL);
int myDevID = -1);
/* make the encoding network */
XTensor Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, bool isTraining);
......
......@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{
devID = myDevID;
mem = myMem;
float minmax = 0;
......@@ -68,7 +66,7 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
InitTensor2DV2(&w2, outSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
fnnLayerNorm.InitModel(argc, argv, myDevID, myMem);
fnnLayerNorm.InitModel(argc, argv, myDevID);
//float scale = 1.0F;
//float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize));
......
......@@ -37,9 +37,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* size of input vector */
int inSize;
......@@ -76,7 +73,7 @@ public:
~T2TFNN();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */
XTensor Make(XTensor &input, bool isTraining);
......
......@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN()
{
devID = -1;
mem = NULL;
d = 0;
}
......@@ -46,21 +45,19 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{
devID = myDevID;
mem = myMem;
d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID);
InitTensor1D(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID);
//w.SetDataRand(1.0F, 1.0F);
//b.SetZeroAll();
w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll();
}
/*
......
......@@ -37,9 +37,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */
XTensor w;
......@@ -57,7 +54,7 @@ public:
~T2TLN();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */
XTensor Make(XTensor &input);
......
......@@ -35,7 +35,9 @@ XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
XTensor base;
XTensor lp;
base = (length + 5)/(1.0F + 5.0F);
//base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5);
lp = Power(base, alpha);
return lp;
......
......@@ -40,9 +40,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* the encoder */
AttEncoder * encoder;
......@@ -71,9 +68,6 @@ public:
/* initialize the model */
void InitModel(int argc, char ** argv);
/* reset cache for decoder */
void ResetCache();
/* make the encoding network */
XTensor MakeEncoder(XTensor &input, XTensor *mask, bool isTraining);
......@@ -110,9 +104,6 @@ public:
void Read(const char * fn);
};
void FastRead(XTensor* x, FILE* f);
void FastDump(XTensor* x, FILE* f);
void ConvertModelFile(const TensorList* params, const char* src, const char* tgt);
}
#endif
......@@ -25,14 +25,12 @@
#include "T2TEmbedding.h"
#include "../../tensor/core/CHeader.h"
namespace transformer
{
/* constructor */
T2TOutput::T2TOutput()
{
devID = -1;
mem = NULL;
vSize = -1;
inSize = -1;
hSize = -1;
......@@ -48,12 +46,10 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
>> myMem - the memory pool
*/
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{
devID = myDevID;
mem = myMem;
float minmax = 0;
......@@ -62,14 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
//float scale = 1.0F;
//float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
//w.SetDataRand(-finfout, finfout);
//DTYPE v = 1.0F/(float)sqrt((float)hSize);
//w.SetDataRandn(0, v);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID);
}
/*
......@@ -83,7 +72,6 @@ XTensor T2TOutput::Make(XTensor &input)
XTensor &x = input;
return Softmax(MMul(x, X_NOTRANS, w, X_TRANS), -1);
//return MulAndShift(x, X_NOTRANS, w, X_TRANS, b);
}
/*
......
......@@ -38,9 +38,6 @@ public:
/* device id */
int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */
int vSize;
......@@ -61,7 +58,7 @@ public:
~T2TOutput();
/* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */
XTensor Make(XTensor &input);
......
......@@ -146,7 +146,7 @@ public:
~T2TPredictor();
/* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state, XTensor * encoding);
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */
void SetStartSymbol(int symbol);
......@@ -155,12 +155,13 @@ public:
void Read(T2TModel * model, T2TStateBundle * state);
/* predict the next state */
void Predict(T2TStateBundle * next, XTensor & encoding,
XTensor & inputEnc, XTensor & paddingEnc,
XTensor& nonFinished, bool updateFinished);
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc);
/* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state);
/* get the predictions of the previous step */
XTensor GetLastPrediction(T2TStateBundle* state);
};
}
......
......@@ -62,12 +62,6 @@ private:
/* start symbol */
int startSymbol;
/* scalar of the input sequence (for max number of search steps) */
float scalarMaxLength;
/* indicate whether the early stop strategy is used */
bool isEarlyStop;
public:
/* constructor */
T2TSearch();
......@@ -79,8 +73,7 @@ public:
void Init(int argc, char ** argv);
/* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding,
XTensor * output, XTensor * score);
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
/* preparation */
void Prepare(int myBatchSize,int myBeamSize);
......@@ -101,7 +94,7 @@ public:
void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */
void Dump(XTensor * output, XTensor * score);
void Dump(XTensor * output);
/* check if the token is an end symbol */
bool IsEnd(int token);
......@@ -109,17 +102,6 @@ public:
/* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum);
/* penalize beams that completed */
int UpdateCompleted(T2TStateBundle * beam, XTensor & encoding,
XTensor& inputEnc, XTensor& paddingEnc,
IntList completedStates, XTensor &nonFinished);
/* check whether all hypotheses are completed */
bool IsAllCompleted(T2TStateBundle * beam);
/* check if any hypotheses are completed */
IntList IsAnyCompleted(T2TStateBundle * beam);
/* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam);
};
......
......@@ -15,17 +15,18 @@
* limitations under the License.
*/
/*
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include <math.h>
#include "T2TUtility.h"
#include "T2TTester.h"
#include "T2TSearch.h"
#include "T2TUtility.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h"
#include "..//..//tensor/XTensor.h"
using namespace nts;
......@@ -35,7 +36,6 @@ namespace transformer
/* constructor */
T2TTester::T2TTester()
{
}
/* de-constructor */
......@@ -44,39 +44,23 @@ T2TTester::~T2TTester()
}
/* initialize the model */
void T2TTester::Init(int argc, char** argv)
void T2TTester::Init(int argc, char ** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamInt(argc, argv, "vsize", &vSize, 34040);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1);
LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, false);
LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, true);
seacher.Init(argc, argv);
}
Result ExtractRes(XTensor& output, IntList& indices, int i) {
Result res;
XTensor sent, srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 1, X_INT, output.devID);
int idx[]{ i };
srcIdx.SetData(idx, 1);
InitTensor(&tgtIdx, &srcIdx);
tgtIdx.SetAscendingOrder(0);
sent = CopyIndexed(output, 0, srcIdx, tgtIdx);
res.data.Add((int*)sent.data, sent.unitNum);
res.id = indices[i];
return res;
}
/*
test the model
>> fn - test data file
>> ofn - output data file
>> model - model that is trained
*/
void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
{
int wc = 0;
int wordCount = 0;
......@@ -85,11 +69,10 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
int batchCount = 0;
/* data files */
FILE* ofile = fopen(ofn, "w");
FILE* ofile = fopen(ofn, "wb");
CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID;
XMem* mem = model->mem;
double startT = GetClockSec();
......@@ -102,23 +85,44 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
/* an array that keeps the sequences */
int* seqs = new int[MILLION];
batchLoader.Init(fn, 100, true);
batchLoader.Init(fn);
int count = 0;
while (!batchLoader.IsEmpty()) {
while (!batchLoader.IsEmpty())
{
count++;
wordCount = 0;
/*if (count % 10 == 0 && sentBatch < 128)
sentBatch *= 2;*/
/* reset cache for decoder */
model->ResetCache();
for (int i = 0; i < model->decoder->nlayer; ++i) {
model->decoder->selfCache[i].Clear();
model->decoder->contextCache[i].Clear();
}
IntList indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID);
vector<int> indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID);
XTensor output, score;
XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output, &score);
seacher.Search(model, &batchEnc, &paddingEnc, &output);
for (int i = 0; i < indices.size(); ++i) {
Result res;
XTensor sent, srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 1, X_INT, output.devID);
int idx[]{i};
srcIdx.SetData(idx, 1);
InitTensor(&tgtIdx, &srcIdx);
SetAscendingOrder(tgtIdx, 0);
sent = CopyIndexed(output, 0, srcIdx, tgtIdx);
res.values = sent;
res.id = indices[i];
batchLoader.resBuffer.emplace_back(res);
}
for (int i = 0; i < indices.Size(); i++)
batchLoader.resBuffer.Add(ExtractRes(output, indices, i));
wc = batchEnc.GetDim(-1);
wordCount += wc;
......@@ -126,41 +130,48 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
sentCount += batchEnc.GetDim(-2);
batchCount += 1;
if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] elapsed=%.1fs, sent=%d, sword=%d\n", elapsed, sentCount, wordCount);
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
}
batchLoader.SortRes();
batchLoader.RerankRes();
for (int i = 0; i < batchLoader.resBuffer.Size(); i++)
Dump(ofile, batchLoader.resBuffer[i].data);
for (auto res : batchLoader.resBuffer) {
Dump(ofile, &res.values);
}
fclose(ofile);
delete[] seqs;
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d)\n", elapsed, wordCountTotal, sentCount);
}
/*
dump the result into the file
>> file - data file
>> output - output list
>> output - output tensor
*/
void T2TTester::Dump(FILE* file, IntList& output)
void T2TTester::Dump(FILE * file, XTensor * output)
{
for (int i = 0; i < output.Size(); i++) {
int w = output[i];
if (w < 0) {
if (i == 0)
return;
else
break;
}
int seqLength = output->GetDim(-1);
for (int i = 0; i < output->unitNum; i += seqLength) {
for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j);
fprintf(file, "%d ", w);
if (w < 0)
break;
}
fprintf(file, "\n");
}
}
}
......@@ -62,7 +62,7 @@ public:
void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */
void Dump(FILE * file, IntList& output);
void Dump(FILE * file, XTensor * output);
};
}
......
......@@ -22,7 +22,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "T2TUtility.h"
namespace transformer
{
......@@ -115,11 +114,4 @@ void ShowParams(int argc, char ** argv)
fprintf(stderr, "\n");
}
/* dump tensors */
void DumpTensors(std::initializer_list<nts::XTensor*> list) {
int i(0);
for (auto& x : list)
x->Dump(stderr, std::to_string(++i).c_str());
}
}
......@@ -23,9 +23,6 @@
#define __T2TUTILITY_H__
#include <stdio.h>
#include <string>
#include "..//..//tensor/XTensor.h"
#include <initializer_list>
namespace transformer
{
......@@ -41,10 +38,6 @@ void LoadParamFloat(int argc, char ** argv, const char * name, float * p, float
/* show arguments */
void ShowParams(int argc, char ** argv);
/* dump tensors */
void DumpTensors(std::initializer_list<nts::XTensor*> list);
extern int llnum;
extern FILE * tf;
......
......@@ -29,35 +29,10 @@
#include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h"
#include "..//..//model/Model.h"
namespace transformer
{
struct AttModel : Model {
AttModel(int devID) {
Register("w1", {2,3,4}, X_FLOAT, devID);
Register("b1", {2,3,4}, X_FLOAT, devID);
Register("3", {2,3,4}, X_FLOAT, devID);
}
};
struct Transformer {
AttModel *att;
Transformer(int devID) {
att = new AttModel(devID);
}
~Transformer() {
delete att;
}
};
void test() {
Transformer model(0);
model.att->Get("w1")->SetZeroAll();
model.att->Get("w1")->Dump(stderr);
}
int TransformerMain(int argc, const char ** argv)
{
if(argc == 0)
......@@ -71,43 +46,24 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, args);
bool convertFile = false;
bool isBeamSearch = false;
bool convertModel = false;
char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH];
char * rawFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH];
char * outputFN = new char[MAX_LINE_LENGTH];
char * rawModel = new char[MAX_LINE_LENGTH];
LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "rawmodel", rawModel, "");
LoadParamString(argc, args, "input", testFN, "");
LoadParamString(argc, args, "rawinput", rawFN, "");
LoadParamString(argc, args, "rawModel", rawModel, "");
LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
LoadParamBool(argc, args, "convertfile", &convertFile, false);
LoadParamBool(argc, args, "convertmodel", &convertModel, false);
srand((unsigned int)time(NULL));
T2TModel model;
model.InitModel(argc, args);
/* convert test file from text to binary */
if (convertFile) {
DataSet::ConvertFile(rawFN, testFN);
}
/* convert parameters from text to binary */
if (convertModel) {
TensorList params(100);
model.GetParams(params);
ConvertModelFile(&params, rawModel, modelFN);
}
/* load the model if neccessary */
if(strcmp(modelFN, ""))
model.Read(modelFN);
......@@ -119,6 +75,7 @@ int TransformerMain(int argc, const char ** argv)
searcher.Test(testFN, outputFN, &model);
}
delete[] trainFN;
delete[] modelFN;
delete[] testFN;
delete[] outputFN;
......
......@@ -19,23 +19,27 @@
* $Created by: HU Chi (huchinlp@foxmail.com) 2019-04-05
*/
#include "DataSet.h"
#include "StringUtil.h"
#include <string>
#include <vector>
#include <fstream>
#include <algorithm>
#include "DataSet.h"
#include "StringUtil.h"
#include "../../../tensor/XUtility.h"
#include "..//..//..//tensor/XUtility.h"
using namespace nts;
using namespace std;
/* sort results by their ids */
void DataSet::SortRes()
{
auto cmp = [](Result& a, Result& b) {
bool Compare(Example& a, Example& b) {
return a.values.size() > b.values.size();
}
bool CompareRes(Result& a, Result& b) {
return a.id < b.id;
};
std::sort(resBuffer.items, resBuffer.items + resBuffer.count, cmp);
}
void DataSet::RerankRes(){
sort(resBuffer.begin(), resBuffer.end(), CompareRes);
}
/*
......@@ -43,30 +47,27 @@ load data from the file to the buffer
*/
void DataSet::LoadDataToBuffer()
{
string line;
buffer.clear();
bufferUsed = 0;
srcBuffer.Clear();
bufferSize = min(bufferSize, exampleNumber);
const string tokenDelimiter = " ";
for (int i = 0; i < bufferSize; i++) {
long off = offset[index++];
IntList data(off);
data.count = off;
fread(data.items, sizeof(int), off, fp);
int id = 0;
while (getline(*fp, line)) {
vector<int> values = Split<int>(line, tokenDelimiter);
Example example;
example.id = id++;
example.data = data;
srcBuffer.Add(example);
example.values = values;
buffer.emplace_back(example);
}
if (fp->eof()) {
fp->seekg(fp->beg);
}
if (sortBuffer) {
auto cmp = [](Example& a, Example& b) {
return a.data.Size() > b.data.Size();
};
std::sort(srcBuffer.items, srcBuffer.items + srcBuffer.count, cmp);
sort(buffer.begin(), buffer.end(), Compare);
}
resBuffer.reserve(buffer.size());
}
/*
......@@ -77,52 +78,43 @@ select a field and generate a mini-batch by indices
>>> devID - devices id, -1 for CPU
>>> mem - the memory pool
*/
IntList DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batchSize, int devID)
vector<int> DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc,
size_t batchSize, int devID)
{
if(srcBuffer.count == 0)
LoadDataToBuffer();
size_t realBatchSize = batchSize;
/* real batch size */
if ((srcBuffer.Size() - bufferUsed) < batchSize) {
realBatchSize = srcBuffer.Size() - bufferUsed;
if ((buffer.size()-bufferUsed) < batchSize) {
realBatchSize = buffer.size()-bufferUsed;
}
/* get the maximum sentence length in a mini-batch */
size_t maxLen = 0;
if (realBatchSize == 1) {
maxLen = srcBuffer[bufferUsed].data.Size();
}
for (size_t i = 0; i < realBatchSize - 1; i++) {
maxLen = max(maxLen, srcBuffer[bufferUsed + i].data.Size());
if (realBatchSize == 1)
maxLen = buffer[bufferUsed].values.size();
for (size_t i = 0; i < realBatchSize - 1; ++i) {
maxLen = max(maxLen, buffer[bufferUsed+i].values.size());
}
CheckNTErrors(maxLen != 0, "wrong length dectected");
int* batchValues = new int[maxLen * realBatchSize];
float* paddingValues = new float[maxLen * realBatchSize];
int* batchValues = new int[realBatchSize * maxLen];
float* paddingValues = new float[realBatchSize * maxLen];
for (int i = 0; i < realBatchSize * maxLen; i++) {
for (int i = 0; i < realBatchSize * maxLen; ++i) {
batchValues[i] = 1.0F;
}
memset(batchValues, 0, sizeof(int) * maxLen * realBatchSize);
memset(paddingValues, 0, sizeof(float) * maxLen * realBatchSize);
size_t cur = 0;
/* left padding */
IntList indices;
indices.Reserve(realBatchSize);
for (size_t i = 0; i < realBatchSize; i++) {
indices.Add(srcBuffer[bufferUsed + i].id);
IntList& data = srcBuffer[bufferUsed + i].data;
cur = maxLen * (i + 1) - data.Size();
for (int j = 0; j < data.Size(); j++) {
batchValues[cur] = data[j];
vector<int> indices;
indices.reserve(realBatchSize);
for (size_t i = 0; i < realBatchSize; ++i) {
indices.push_back(buffer[bufferUsed + i].id);
cur = maxLen * (i + 1) - buffer[bufferUsed+i].values.size();
for (int v : buffer[bufferUsed + i].values) {
batchValues[cur] = v;
paddingValues[cur++] = 1.0F;
}
cur = maxLen * (i + 1);
......@@ -135,6 +127,7 @@ IntList DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batc
batchEnc->SetData(batchValues, batchEnc->unitNum);
paddingEnc->SetData(paddingValues, paddingEnc->unitNum);
delete[] batchValues;
delete[] paddingValues;
......@@ -142,99 +135,17 @@ IntList DataSet::LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batc
}
/*
convert text file to binary file
format of the text file:
one sentence per line, seperated by a blank
format of the binary file:
part 1: number of all examples
part 2: offsets of all examples
part 3: the raw data
>>> src - the path of source text file
>>> tgt - the path of target binary file
*/
void nts::DataSet::ConvertFile(const char* src, const char* tgt)
{
ifstream ifile(src, ios::in);
FILE* ofile = fopen(tgt, "wb");
CheckNTErrors(ofile, "unable to create the output file");
string line;
size_t idx = 0;
const int maxExample = 10240;
IntList dataList[maxExample];
while (getline(ifile, line)){
SplitInt(line, " ", dataList[idx++]);
}
/* part 1: number of examples */
fwrite(&idx, sizeof(idx), 1, ofile);
/* part 2: offset of all examples */
for (int i = 0; i < idx; i++) {
size_t size = (dataList[i].Size());
fwrite(&size, sizeof(size), 1, ofile);
}
/* part 3: value of examples */
for (int i = 0; i < idx; i++) {
fwrite(dataList[i].items, sizeof(int), dataList[i].Size(), ofile);
}
ifile.close();
fclose(ofile);
}
/*
the constructor of DataSet
the binary data consists of three parts
part 1: number of all examples
part 2: offsets of all examples
part 3: the raw data
>>> fname - path of the data file
>>> myBufferSize - size of the data buffer
>>> mySortBuffer - whether sort the data
*/
void DataSet::Init(const char* fname, size_t myBufferSize, bool mySortBuffer)
void DataSet::Init(const char* fname)
{
id = 0;
index = 0;
fp = new ifstream(fname);
CheckNTErrors(fp->is_open(), "can not open the file");
bufferUsed = 0;
bufferSize = myBufferSize;
sortBuffer = mySortBuffer;
fp = fopen(fname, "rb");
CheckNTErrors(fp, "can not open the file");
/* read offsets */
exampleNumber = 0;
fread(&exampleNumber, sizeof(exampleNumber), 1, fp);
CheckNTErrors(exampleNumber > 0, "invalid example numbers");
offset.Reserve(exampleNumber);
for (int i = 0; i < exampleNumber; i++) {
size_t off;
fread(&off, sizeof(off), 1, fp);
offset.Add(off);
}
/* reset the buffer size if it is too big */
bufferSize = min(bufferSize, exampleNumber);
srcBuffer.Reserve(bufferSize);
}
/* check if the buffer is empty */
bool nts::DataSet::IsEmpty()
{
return (index >= offset.count) && (bufferUsed >= bufferSize);
LoadDataToBuffer();
if (bufferSize == 0)
bufferSize = buffer.size();
}
/* de-constructor */
nts::DataSet::~DataSet()
{
if (fp) {
fclose(fp);
}
}
\ No newline at end of file
......@@ -22,70 +22,79 @@
#ifndef __DATASET_H__
#define __DATASET_H__
#include <cstdio>
#include "../../..//tensor/XTensor.h"
#include "../../..//tensor/XGlobal.h"
namespace nts {
#include <cstdio>
#include <fstream>
#include <unordered_map>
#include <vector>
using namespace std;
using namespace nts;
/* `DataSet` maintains data buffers for the inference stage .*/
struct DataSet {
struct Example {
int id;
vector<int> values;
};
public:
struct Result {
int id;
XTensor values;
};
/* the data buffer */
ExampleList srcBuffer;
using BufferType = vector<Example>;
using ResBufferType = vector<Result>;
/* the result buffer */
ResultList resBuffer;
namespace nts { // namespace nts(NiuTrans.Tensor)
/* the offset of all examples in the data */
LongList offset;
/* A `DataSet` is associated with a file which contains variable length data.*/
struct DataSet {
/* wether sort the dataset */
bool sortBuffer;
/* the data buffer */
BufferType buffer;
/* id for each example */
size_t id;
/* the result buffer */
ResBufferType resBuffer;
/* the pointer to file stream */
ifstream* fp{nullptr};
/* size of the data buffer */
size_t bufferSize;
size_t bufferSize{ 0 };
/* size of used data in buffer */
size_t bufferUsed;
/* size of data in the src file */
size_t exampleNumber;
/* current index of the offset */
size_t index;
/* the pointer of the src file stream */
FILE * fp;
public:
size_t bufferUsed{ 0 };
/* check if the buffer is empty */
bool IsEmpty();
/* wether sort the dataset */
bool sortBuffer{ true };
/* load data from a file to the buffer */
void LoadDataToBuffer();
/* initlization function */
void Init(const char* fname, size_t myBufferSize, bool mySortBuffer);
/* rerank result for output */
void RerankRes();
/* generate a mini-batch */
IntList LoadBatch(XTensor * batchEnc, XTensor * paddingEnc, size_t batchSize, int devID);
vector<int> LoadBatch(XTensor * batchEnc, XTensor * paddingEnc,
size_t batchSize, int devID);
/* sort results by their ids */
void SortRes();
/* initlization function */
void Init(const char* fname);
/* transform text file to binary file */
static void ConvertFile(const char* src, const char* tgt);
/* check if the buffer is empty */
bool IsEmpty() {
if (bufferUsed < bufferSize)
return false;
return true;
}
/* de-constructor */
~DataSet();
~DataSet() {
if (fp)
fp->close();
delete fp;
}
};
} // namespace nts(NiuTrans.Tensor)
......
......@@ -21,66 +21,27 @@
#include "StringUtil.h"
/*
split string by delimiter, this will return indices of all sub-strings
>>> s - the original string
>>> delimiter - as it is
>>> a - the indices of all sub-strings
*/
void SplitToPos(const string& s, const string& delimiter, LongList& indices)
namespace nts {
/* split string by delimiter, this will return indices of all sub-strings */
vector<pair<int, int>> SplitToPos(const string& s, const string& delimiter)
{
vector<pair<int, int>> fields;
if (delimiter.length() == 0) {
indices.Add(0);
fields.emplace_back(0, s.length());
return fields;
}
int pos = 0;
int start = 0;
while ((pos = s.find(delimiter, start)) != string::npos) {
if (pos != start) {
indices.Add(start);
fields.emplace_back(start, pos);
}
start = pos + delimiter.length();
}
if (start != s.length()) {
indices.Add(start);
}
}
IntList SplitInt(const string& s, const string& delimiter)
{
IntList fields;
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtol(s.data() + indices[i], nullptr, 10));
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, IntList& fields)
{
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtol(s.data() + indices[i], nullptr, 10));
}
}
FloatList SplitFloat(const string& s, const string& delimiter)
{
FloatList fields;
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtof(s.data() + indices[i], nullptr));
fields.emplace_back(start, s.length());
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, FloatList& fields)
{
LongList indices;
SplitToPos(s, delimiter, indices);
for (int i = 0; i < indices.Size(); i++) {
fields.Add(strtof(s.data() + indices[i], nullptr));
}
}
\ No newline at end of file
......@@ -22,25 +22,85 @@
#ifndef __STRING_UTIL_H__
#define __STRING_UTIL_H__
#include <cstdlib>
#include <string>
#include "..//..//..//tensor/XList.h"
#include <utility>
#include <vector>
using namespace std;
using namespace nts;
namespace nts {
/* Splits a string based on the given delimiter string. Each pair in the
* returned vector has the start and past-the-end positions for each of the
* parts of the original string. Empty fields are not represented in the output.
*/
void SplitToPos(const string& s, const string& delimiter, LongList& indices);
vector<pair<int, int>> SplitToPos(const string& s, const string& delimiter);
/* Splits the given string and converts each part to the given T. */
template <typename T>
vector<T> Split(const string& s, const string& delimiter);
template <>
inline vector<string> Split(const string& s, const string& delimiter)
{
vector<string> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(s.substr(p.first, p.second - p.first));
}
return fields;
}
template <>
inline vector<int> Split(const string& s, const string& delimiter)
{
vector<int> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtol(s.data() + p.first, nullptr, 10));
}
return fields;
}
template <>
inline vector<int64_t> Split(const string& s, const string& delimiter)
{
vector<int64_t> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtoll(s.data() + p.first, nullptr, 10));
}
return fields;
}
IntList SplitInt(const string& s, const string& delimiter);
template <>
inline vector<float> Split(const string& s, const string& delimiter)
{
vector<float> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtof(s.data() + p.first, nullptr));
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, IntList& fields);
template <>
inline vector<uint8_t> Split(const string& s, const string& delimiter)
{
vector<uint8_t> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(strtol(s.data() + p.first, nullptr, 10));
}
return fields;
}
FloatList SplitFloat(const string& s, const string& delimiter);
template <>
inline vector<bool> Split(const string& s, const string& delimiter)
{
vector<bool> fields;
for (const auto& p : SplitToPos(s, delimiter)) {
fields.emplace_back(
static_cast<bool>(strtol(s.data() + p.first, nullptr, 10)));
}
return fields;
}
void SplitInt(const string& s, const string& delimiter, FloatList& fields);
} // namespace nts
#endif // __STRING_UTIL_H__
......@@ -26,183 +26,9 @@
*
*/
#ifdef WIN32
#include <wtypes.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include "XBLAS.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
#ifdef WIN32
HINSTANCE hBLASDll;
#endif
/* single-precision floating matrix-matrix multiplication */
void (*XBLAS_SGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating matrix-matrix multiplication */
void (*XBLAS_DGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
double *, OPENBLAS_CONST BLASINT);
/* single-precision floating vector-vector multiplication (rank-1) */
void (*XBLAS_SGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating vector-vector multiplication (rank-1) */
void (*XBLAS_DGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
double *, OPENBLAS_CONST BLASINT);
/* set the number of threads */
void (*XBLAS_SET_THREAD_NUM)(int);
/* get the number of threads */
//int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/
int (*XBLAS_GET_CORE_NUM)();
/* get the CPU corename */
//char * (*XBLAS_GET_CORE_NAME)();
/* get the parallelization type used by OpenBLAS */
//int (*XBLAS_GET_PARALLEL_TYPE)(void);
#if defined(USE_BLAS)
/* load some stuff for BLAS */
void LoadBLAS(const char * dllFileName)
{
#ifndef CUDA_BLAS
#ifdef _WIN32
#if defined(OPENBLAS)
/* non-ascii characters are not supported yet */
wchar_t * fn = new wchar_t[strlen(dllFileName) + 1];
memset(fn, 0, sizeof(wchar_t) * (strlen(dllFileName) + 1));
for(int i = 0; i < strlen(dllFileName); i++)
fn[i] = dllFileName[i];
hBLASDll = LoadLibrary((LPCWSTR)fn);
if(!hBLASDll){
XPRINT1(0, stderr, "[LoadBLAS] Error! Cannot load dll %s!\n", dllFileName);
exit(1);
}
/* matrix-matrix multiplicatoin */
(FARPROC&)XBLAS_SGEMM = GetProcAddress(hBLASDll, "cblas_sgemm");
(FARPROC&)XBLAS_DGEMM = GetProcAddress(hBLASDll, "cblas_dgemm");
/* vector-vector multiplication */
(FARPROC&)XBLAS_SGER = GetProcAddress(hBLASDll, "cblas_sger");
(FARPROC&)XBLAS_DGER = GetProcAddress(hBLASDll, "cblas_dger");
/* multi-threading */
(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_set_num_threads");
//(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "goto_set_num_threads");
//(FARPROC&)XBLAS_GET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_get_num_threads");
(FARPROC&)XBLAS_GET_CORE_NUM = GetProcAddress(hBLASDll, "openblas_get_num_procs");
//(FARPROC&)XBLAS_GET_CORE_NAME = GetProcAddress(hBLASDll, "openblas_get_corename");
//(FARPROC&)XBLAS_GET_PARALLEL_TYPE = GetProcAddress(hBLASDll, "openblas_get_parallel");
delete[] fn;
#endif // defined(OPENBLAS)
#if defined(MKL)
/* non-ascii characters are not supported yet */
wchar_t * fn = new wchar_t[strlen(dllFileName) + 1];
memset(fn, 0, sizeof(wchar_t) * (strlen(dllFileName) + 1));
for(int i = 0; i < strlen(dllFileName); i++)
fn[i] = dllFileName[i];
hBLASDll = LoadLibrary((LPCWSTR)fn);
if(!hBLASDll){
XPRINT1(0, stderr, "[LoadBLAS] Error! Cannot load dll %s!\n", dllFileName);
exit(1);
}
/* matrix-matrix multiplicatoin */
(FARPROC&)XBLAS_SGEMM = GetProcAddress(hBLASDll, "cblas_sgemm");
(FARPROC&)XBLAS_DGEMM = GetProcAddress(hBLASDll, "cblas_dgemm");
/* vector-vector multiplication */
(FARPROC&)XBLAS_SGER = GetProcAddress(hBLASDll, "cblas_sger");
(FARPROC&)XBLAS_DGER = GetProcAddress(hBLASDll, "cblas_dger");
/* multi-threading */
(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "MKL_Set_Num_Threads");
(FARPROC&)XBLAS_GET_CORE_NUM = GetProcAddress(hBLASDll, "MKL_Get_Max_Threads");
#endif // defined(MKL)
#else // _WIN32
XBLAS_SGEMM = &cblas_sgemm;
XBLAS_DGEMM = &cblas_dgemm;
XBLAS_SGER = &cblas_sger;
XBLAS_DGER = &cblas_dger;
#if defined(OPENBLAS)
XBLAS_SET_THREAD_NUM = &openblas_set_num_threads;
XBLAS_GET_CORE_NUM = &openblas_get_num_procs;
#endif // defined(OPENBLAS)
#if defined(MKL)
XBLAS_SET_THREAD_NUM = &mkl_set_num_threads;
XBLAS_GET_CORE_NUM = &mkl_get_max_num_threads;
#endif // defined(MKL)
#endif // _WIN32
XBLAS_SET_THREAD_NUM(1);
#endif // ndef(CUDA_BLAS)
}
/* unload the libs */
void UnloadBLAS()
{
#ifdef _WIN32
if(!FreeLibrary(hBLASDll)){
XPRINT(0, stderr, "[UnloadBLAS] Error! Cannot free the BLAS dll!\n");
exit(1);
}
#else
#endif
}
#else // undefined(USE_BLAS) || undefined(OPENBLAS)
void LoadBLAS(const char * dllFileName)
{
XPRINT(0, stderr, "[LoadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!\n");
exit(1);
}
void UnloadBLAS()
{
XPRINT(0, stderr, "[UnloadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!\n");
exit(1);
}
#endif // defined(USE_BLAS) && defined(OPENBLAS)
} /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
......@@ -34,7 +34,6 @@ namespace nts{
/* some of the code below is from OpenBLAS (https://github.com/xianyi/OpenBLAS) */
//#define OPENBLAS
#define OPENBLAS_CONST const
typedef int BLASINT;
......@@ -46,7 +45,26 @@ typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
#if defined(USE_BLAS)
#ifdef OPENBLAS
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM openblas_set_num_threads
#define XBLAS_GET_CORE_NUM openblas_get_num_procs
#endif
#ifdef MKL
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM MKL_Set_Num_Threads
#define XBLAS_GET_CORE_NUM MKL_Get_Max_Threads
#endif
/*
single/double-precision floating matrix-matrix multiplication (rank-3)
- SGEMM (ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
......@@ -62,14 +80,14 @@ where A, B and C are matrices,
LDB(=N) specifies the size of the first dimension of B as declared in the calling (sub) program,
and LDC(=N) specifies the size of the first dimension of C as declared in the calling (sub) program.
*/
extern "C" void (*XBLAS_SGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
extern "C" void XBLAS_SGEMM(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating matrix-matrix multiplication */
extern "C" void (*XBLAS_DGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
extern "C" void XBLAS_DGEMM(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
......@@ -88,24 +106,33 @@ where X and Y are vectors with m and n elements respectively,
E.g., if we are using CblasRowMajor, the leading dimension is the number of columns of A.
*/
extern "C" void (*XBLAS_SGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
extern "C" void XBLAS_SGER(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating vector-vector multiplication (rank-1) */
extern "C" void (*XBLAS_DGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
extern "C" void XBLAS_DGER(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
double *, OPENBLAS_CONST BLASINT);
/*
some description
*/
extern "C" void XBLAS_SAXPY(OPENBLAS_CONST BLASINT n, OPENBLAS_CONST float a, OPENBLAS_CONST float *x, OPENBLAS_CONST BLASINT incx, OPENBLAS_CONST float *y, OPENBLAS_CONST BLASINT incy);
/* double-precision floating sumMe function */
extern "C" void XBLAS_DAXPY(OPENBLAS_CONST BLASINT n, OPENBLAS_CONST double a, OPENBLAS_CONST double *x, OPENBLAS_CONST BLASINT incx, OPENBLAS_CONST double *y, OPENBLAS_CONST BLASINT incy);
/* set the number of threads */
extern "C" void (*XBLAS_SET_THREAD_NUM)(int);
extern "C" void XBLAS_SET_THREAD_NUM(int);
/* get the number of threads */
//extern "C" int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/
extern "C" int (*XBLAS_GET_CORE_NUM)();
extern "C" int XBLAS_GET_CORE_NUM();
/* get the CPU corename */
//extern "C" char * (*XBLAS_GET_CORE_NAME)();
......@@ -113,58 +140,6 @@ extern "C" int (*XBLAS_GET_CORE_NUM)();
/* get the parallelization type used by OpenBLAS */
//extern "C" int (*XBLAS_GET_PARALLEL_TYPE)(void);
/* linux systems */
#ifndef _WIN32
/* cblas functions that are imported from the lib. See cblas.h in OpenBlas for more information */
extern "C" void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST BLASINT K, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *A, OPENBLAS_CONST BLASINT lda,
OPENBLAS_CONST float *B, OPENBLAS_CONST BLASINT ldb,
OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST BLASINT ldc);
extern "C" void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST BLASINT K, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *A, OPENBLAS_CONST BLASINT lda,
OPENBLAS_CONST double *B, OPENBLAS_CONST BLASINT ldb,
OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST BLASINT ldc);
extern "C" void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *X, OPENBLAS_CONST BLASINT incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT incY,
float *A, OPENBLAS_CONST BLASINT lda);
extern "C" void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *X, OPENBLAS_CONST BLASINT incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT incY,
double *A, OPENBLAS_CONST BLASINT lda);
#if defined(OPENBLAS)
/* better control of multi-threading */
extern "C" void openblas_set_num_threads(int num_threads);
extern "C" void goto_set_num_threads(int num_threads);
//extern "C" int openblas_get_num_threads(void);
extern "C" int openblas_get_num_procs(void);
//extern "C" char* openblas_get_config(void);
//extern "C" char* openblas_get_corename(void);
//extern "C" int openblas_get_parallel(void);
#endif
#endif
#if defined(MKL)
/* better control of multi-threading */
//_Mkl_Api(void,MKL_Set_Num_Threads,(int nth))
//_Mkl_Api(int,MKL_Get_Max_Threads,(void))
extern "C" void MKL_Set_Num_Threads(int num_threads);
extern "C" int MKL_Get_Max_Threads();
#define mkl_set_num_threads MKL_Set_Num_Threads
#define mkl_get_max_num_threads MKL_Get_Max_Threads
//extern "C" void mkl_set_num_threads(int num_threads);
//extern "C" void omp_set_num_threads(int num_threads);
//extern "C" int mkl_get_max_num_threads();
#endif
#if defined(CUDA_BLAS)
......@@ -186,24 +161,8 @@ extern void BLASMatrixMULD(int deviceID, double * a, double * b, double * c, int
#endif
#endif
#ifdef _WIN32
#include "windows.h"
extern HINSTANCE hBLASDll;
#else
#endif
/* load some stuff for BLAS */
extern void LoadBLAS(const char * dllFileName);
/* unload the libs */
extern void UnloadBLAS();
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-21
*/
#ifndef __XCALL_H__
#define __XCALL_H__
#include "XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
* we define the "new and delete" functions below
*/
/* global flag for enabling gradient flows or not */
static bool X_ENABLE_GRAD = false;
/* initialize a XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor */
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a scalar V2 */
void InitTensor0DV2(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a scalar */
void InitTensor0D(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* initialize a tensor with a reference tensor V2 */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
/* generate a XTensor with no initialization */
XTensor * NewTensor();
/* generate a XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a scalar V2 */
XTensor * NewTensor0DV2(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* generate a scalar */
XTensor * NewTensor0D(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
} // namespace nts(NiuTrans.Tensor)
#endif // __XCALL_H__
\ No newline at end of file
......@@ -50,14 +50,6 @@ int CONST_MINUSONE = -1;
bool CONST_TRUE = true;
int verboseLevel = 0;
bool useBLAS = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL;
double myTime = 0;
......
......@@ -135,8 +135,6 @@ extern bool CONST_TRUE;
#define NIUTRANSNNDEBUG
extern int verboseLevel;
extern bool useBLAS;
extern bool useCUDA;
#define FFLUSH(FILEH) \
{ \
......
......@@ -249,26 +249,6 @@ inline int TensorListBase<T>::FindFirst(const T& item)
return -1;
}
template <>
inline int TensorListBase<Example>::FindFirst(const Example& item)
{
for (int i = 0; i < count; i++) {
if (item.id == items[i].id)
return i;
}
return -1;
}
template <>
inline int TensorListBase<Result>::FindFirst(const Result& item)
{
for (int i = 0; i < count; i++) {
if (item.id == items[i].id)
return i;
}
return -1;
}
/* clear the data array */
template <typename T>
void TensorListBase<T>::Clear()
......@@ -383,8 +363,6 @@ template struct TensorListBase<long>;
template struct TensorListBase<float>;
template struct TensorListBase<short>;
template struct TensorListBase<XTensor*>;
template struct TensorListBase<Result>;
template struct TensorListBase<Example>;
template struct TensorListBase<void*>;
} /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
......@@ -133,18 +133,6 @@ typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
struct Example {
int id;
IntList data;
};
struct Result {
int id;
IntList data;
};
typedef TensorListBase<Result> ResultList;
typedef TensorListBase<Example> ExampleList;
typedef TensorListBase<XTensor*> TensorList;
} /* end of the nts (NiuTrans.Tensor) namespace */
......
......@@ -31,8 +31,8 @@
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
int testxmemid = 0;
void * recordp = NULL;
//int testxmemid = 0;
//void * recordp = NULL;
/*
for managing the memories
......@@ -1482,7 +1482,7 @@ void XMem::ShowMemUsage(FILE * file)
}
fprintf(file, "mem:%.1fMB used:%.1fMB usage:%.3f\n",
(DTYPE)used/MILLION, (DTYPE)total/MILLION, (DTYPE)used/total);
(DTYPE)total/MILLION, (DTYPE)used/MILLION, (DTYPE)used/total);
}
#ifdef USE_CUDA
......@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256;
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512;
*myBufSize = MILLION * 128;
}
}
}
......
......@@ -480,12 +480,10 @@ public:
/* managing the memories */
extern XMemManager GMems;
//extern XMem * GMem;
extern XMem * GMem;
extern int testxmemid;
extern void * recordp;
//extern int testxmemid;
//extern void * recordp;
} /* end of the nts (NiuTrans.Tensor) namespace */
......
......@@ -105,6 +105,8 @@ const char * GetOPName(int type)
return "R_REDUCEMEAN";
else if (type == REDUCE_REDUCESUM)
return "R_REDUCESUM";
else if (type == REDUCE_REDUCESUMALL)
return "R_REDUCESUMALL";
else if (type == REDUCE_REDUCESUMSQUARED)
return "R_REDUCESUMSQUARED";
else if (type == REDUCE_REDUCEVARIANCE)
......@@ -135,6 +137,8 @@ const char * GetOPName(int type)
return "S_SPLIT";
else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST";
else if (type == SHAPE_STACK)
return "S_SHAPE_STACK";
else if (type == SHAPE_SQUEEZE)
return "S_SQUEEZE";
else if (type == SHAPE_TRANSPOSE)
......
......@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MAX MATH_MATRIXMULBATCHED + 1
#define MATH_MIN MATH_MAX + 1
#define MATH_MULTIPLY MATH_MIN + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
......@@ -74,7 +76,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1
#define REDUCE_REDUCESUMALL REDUCE_REDUCESUM + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUMALL + 1
#define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1
/* data and shape related operations */
......@@ -97,7 +100,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_RESHAPE SHAPE_PERMUTE + 1
#define SHAPE_SPLIT SHAPE_RESHAPE + 1
#define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1
#define SHAPE_SQUEEZE SHAPE_SPLIT_LIST + 1
#define SHAPE_STACK SHAPE_SPLIT_LIST + 1
#define SHAPE_SQUEEZE SHAPE_STACK + 1
#define SHAPE_TRANSPOSE SHAPE_SQUEEZE + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
......
......@@ -83,13 +83,17 @@
#include "shape/Permute.h"
#include "shape/Split.h"
#include "shape/Squeeze.h"
#include "shape/Stack.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
#include "sort/Sort.h"
#include "sort/TopK.h"
#include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/SetAscendingOrder.h"
#endif // __CHEADER_H__
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Div.h"
#include "Div.cuh"
#include "DivDim.h"
......@@ -48,9 +49,6 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
"Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim);
......@@ -63,17 +61,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
int blockSizeB = 1;
int blockSizeC = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] && a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......@@ -168,7 +166,7 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -253,8 +251,8 @@ where i is the index of the item
*/
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetDivDimIndex(a, b);
......
......@@ -122,7 +122,6 @@ where i is the item index
*/
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
int stride = 1;
int blockSizeA = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......
......@@ -26,6 +26,7 @@
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -56,7 +57,7 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){
if(_IsSameShaped(a, b)){
_Div(a, b, c, alpha);
return;
}
......@@ -188,8 +189,8 @@ i.e., a is divided with b by broadcasting
*/
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Div function */
......
......@@ -24,6 +24,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Mask.h"
#include "Mask.cuh"
......@@ -171,8 +172,8 @@ where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Mask function */
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "MatrixMulBatched.h"
#include "XTensorBLAS.h"
#include "MatrixMul2D.h"
......@@ -94,27 +95,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!");
CheckNTErrors(a->devID >= 0 && b->devID >= 0 && c->devID >= 0, "The tensors must be on GPUs");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSize[c->order - 2];
int cm = c->dimSize[c->order - 1];
CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1];
int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i];
for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSize[i];
}
int devIDBackup = 0;
......@@ -125,9 +126,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
a->data, transposedA, a->dataType, aBlockSize,
b->data, transposedB, b->dataType, bBlockSize,
c->data, c->dataType, cBlockSize, blockNum,
a->dimSizeRDI[1], a->dimSizeRDI[0],
b->dimSizeRDI[1], b->dimSizeRDI[0],
c->dimSizeRDI[1], c->dimSizeRDI[0], alpha, beta);
a->dimSize[a->order - 2], a->dimSize[a->order - 1],
b->dimSize[b->order - 2], b->dimSize[b->order - 1],
c->dimSize[c->order - 2], c->dimSize[c->order - 1], alpha, beta);
BacktoCudaDev(a->devID, devIDBackup);
#endif
......@@ -163,46 +164,43 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSize[c->order - 2];
int cm = c->dimSize[c->order - 1];
CheckNTErrors(am == bn && an == cn && bm == cm, "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1];
int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i];
for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSize[i];
}
int aDimSize[2] = {-a->dimSizeRDI[1], a->dimSizeRDI[0]};
int bDimSize[2] = {-b->dimSizeRDI[1], b->dimSizeRDI[0]};
int cDimSize[2] = {-c->dimSizeRDI[1], c->dimSizeRDI[0]};
int aDimSize[2] = {-a->dimSize[a->order - 2], a->dimSize[a->order - 1]};
int bDimSize[2] = {-b->dimSize[b->order - 2], b->dimSize[b->order - 1]};
int cDimSize[2] = {-c->dimSize[c->order - 2], c->dimSize[c->order - 1]};
XTensor * ai = NewTensor2D(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem);
XTensor * bi = NewTensor2D(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem);
XTensor * ci = NewTensor2D(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem);
XTensor * ai = NewTensor2DV2(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem);
XTensor * bi = NewTensor2DV2(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem);
XTensor * ci = NewTensor2DV2(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem);
for (int i = 0; i < blockNum; i++) {
ai->data = (char*)a->data + i * aRealBlockSize;
bi->data = (char*)b->data + i * bRealBlockSize;
ci->data = (char*)c->data + i * cRealBlockSize;
#ifdef USE_BLAS
if (useBLAS)
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif
......@@ -245,9 +243,9 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci))
if (!_IsSameShaped(aim, ai) ||
!_IsSameShaped(bim, bi) ||
!_IsSameShaped(cim, ci))
{
isUniform = false;
break;
......@@ -262,10 +260,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((bi->order == 2), "2d tensor (i.e., matrix) is required!");
CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!");
#ifdef USE_BLAS
if (useBLAS)
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif
......@@ -297,10 +292,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
int an = transposedA == X_TRANS ? a.dimSize[a.order - 1] : a.dimSize[a.order - 2];
int am = transposedA == X_TRANS ? a.dimSize[a.order - 2] : a.dimSize[a.order - 1];
int bn = transposedB == X_TRANS ? b.dimSize[b.order - 1] : b.dimSize[b.order - 2];
int bm = transposedB == X_TRANS ? b.dimSize[b.order - 2] : b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
......@@ -355,10 +350,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = a.dimSizeRDI[1];
int am = a.dimSizeRDI[0];
int bn = b.dimSizeRDI[1];
int bm = b.dimSizeRDI[0];
int an = a.dimSize[a.order - 2];
int am = a.dimSize[a.order - 1];
int bn = b.dimSize[b.order - 2];
int bm = b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
......
......@@ -37,7 +37,7 @@ int GetSumIndex(const XTensor &a, const XTensor &b)
{
if (a.order < b.order)
return -1;
if (XTensor::IsSameShaped(&a, &b))
if (IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -71,26 +71,27 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = x.dimSizeRDI[1];
int xm = x.dimSizeRDI[0];
int wn = w.dimSizeRDI[1];
int wm = w.dimSizeRDI[0];
int xn = x.dimSize[x.order - 2];
int xm = x.dimSize[x.order - 1];
int wn = w.dimSize[w.order - 2];
int wm = w.dimSize[w.order - 1];
CheckNTErrors(xm == wn, "Unmatched tensors in multiplication!");
int order = x.order + w.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < x.order; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i];
for (int i = 2; i < w.order; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i];
for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSize[i];
for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn;
dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem);
XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
_MatrixMul(&x, X_NOTRANS, &w, X_NOTRANS, tmp, alpha, 0, parallelRunner);
......@@ -148,24 +149,24 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = transposedA == X_TRANS ? x.dimSizeRDI[0] : x.dimSizeRDI[1];
int xm = transposedA == X_TRANS ? x.dimSizeRDI[1] : x.dimSizeRDI[0];
int wn = transposedB == X_TRANS ? w.dimSizeRDI[0] : w.dimSizeRDI[1];
int wm = transposedB == X_TRANS ? w.dimSizeRDI[1] : w.dimSizeRDI[0];
int xn = transposedA == X_TRANS ? x.dimSize[x.order - 1] : x.dimSize[x.order - 2];
int xm = transposedA == X_TRANS ? x.dimSize[x.order - 2] : x.dimSize[x.order - 1];
int wn = transposedB == X_TRANS ? w.dimSize[w.order - 1] : w.dimSize[w.order - 2];
int wm = transposedB == X_TRANS ? w.dimSize[w.order - 2] : w.dimSize[w.order - 1];
int order = x.order + w.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < x.order; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i];
for (int i = 2; i < w.order; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i];
for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSize[i];
for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn;
dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem);
XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
_MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner);
......@@ -205,7 +206,6 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
DelTensorBuf(tmp);
return c;
}
}
\ No newline at end of file
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Multiply.h"
#include "Multiply.cuh"
#include "MultiplyDim.h"
......@@ -48,9 +49,6 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
"Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim);
......@@ -63,18 +61,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
int blockSizeB = 1;
int blockSizeC = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......@@ -169,7 +167,7 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -254,8 +252,8 @@ where i is the index of the item
*/
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetMultiplyDimIndex(a, b);
......
......@@ -122,26 +122,25 @@ where i is the item index
*/
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
int stride = 1;
int blockSizeA = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......
......@@ -24,6 +24,7 @@
#include "MultiplyDim.h"
#include "MultiplyDim.cuh"
#include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
......@@ -57,7 +58,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){
if(_IsSameShaped(a, b)){
_Multiply(a, b, c, alpha);
return;
}
......@@ -203,8 +204,8 @@ i.e., a is multiplied with b by broadcasting
*/
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Multiply function */
......@@ -232,7 +233,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
{
CheckNTErrors(a->order == b->order, "Wrong tensor orders!");
CheckNTErrors(a->order == c->order, "Wrong tensor orders!");
CheckNTErrors(a->order > 0, "TODO!");
CheckNTErrors(a->order >= 0, "TODO!");
int order = a->order;
int count = 0;
......@@ -280,8 +281,8 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0)
source = b->data;
......@@ -371,8 +372,8 @@ where some of dimensions of b can be of size 1
*/
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _SumBroadcast function */
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Sub.h"
#include "Sub.cuh"
#include "SubDim.h"
......@@ -149,7 +150,7 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -223,8 +224,8 @@ tensor subtraction c = a - b * \beta
*/
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetSubDimIndex(a, b);
......
......@@ -26,6 +26,7 @@
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -61,7 +62,7 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return;
}
if (XTensor::IsSameShaped(a, b)) {
if (_IsSameShaped(a, b)) {
_Sub(a, b, c, beta);
return;
}
......@@ -188,8 +189,8 @@ i.e., a is subtracted with b by broadcasting
*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Sub function */
......
......@@ -39,7 +39,7 @@ where a is a tensor and b is a row vector
*/
template <class T, bool betaFired>
__global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -75,7 +75,7 @@ where a is a tensor and b is a colum vector
*/
template <class T, bool betaFired>
__global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
......
......@@ -22,7 +22,9 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../../XBLAS.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "Sum.h"
#include "Sum.cuh"
#include "SumDim.h"
......@@ -45,6 +47,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!");
CheckDev(a->devID, b->devID);
if(beta == 0){
_CopyValues(a, c);
return;
......@@ -157,6 +161,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
}
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor
>> b - another tensor for sum
......@@ -165,6 +182,8 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
int hitDim = -1;
......@@ -184,7 +203,7 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
}
/*
tensor summation c = a + b * \beta (return a XTensor structure)
tensor summation c = a + b * \beta (return an XTensor structure)
make a new tensor c to keep the result and return it
>> a - a tensor
......@@ -227,4 +246,45 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
return c;
}
/*
tensor summation c = a + b * \beta
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetSumDimIndex(a, b);
if (n == -1) {
/* call _Sum function */
_Sum(&a, &b, &c, beta);
/* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
}
}
else if (n >= 0 && n < a.order) {
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
/* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -45,15 +45,6 @@ void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta)
c[i] = a[i] + b[i] * beta;
}
__global__
void KernelADD(int * a, int * b, int * c, int size, int beta)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
c[i] = a[i] + b[i] * beta;
}
/*
tensor summation c = a + b * \beta (cuda version)
>> a - a tensor
......@@ -109,17 +100,6 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
KernelADD << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, a->unitNum, beta);
}
}
else if (a->dataType == X_INT &&
b->dataType == X_INT &&
c->dataType == X_INT)
{
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
KernelADD << <blocks, threads >> >((int*)a->data, (int*)b->data, (int*)c->data, a->unitNum, (int)beta);
}
else {
// TODO!!
ShowNTErrors("TODO!");
......
......@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing
*/
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/*
tensor summation c = a + b * \beta
......@@ -41,6 +42,9 @@ make a new tensor c to keep the result and return it
*/
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUM_H__
......@@ -26,6 +26,7 @@
#include "SumDim.h"
#include "SumDim.cuh"
#include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
......@@ -64,25 +65,11 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return;
}
if(XTensor::IsSameShaped(a, b)){
if(_IsSameShaped(a, b)){
_Sum(a, b, c, beta);
return;
}
/*int dims[MAX_TENSOR_DIM_NUM];
for(int i = 0; i < a->order; i++)
dims[i] = 1;
dims[n] = a->GetDim(n);
XTensor * b2 = NewTensor(a->order, dims, b->dataType, b->denseRatio, b->devID, b->mem);
_CopyValues(b, b2);
_SumBroadcast(a, b2, c, beta);
DelTensor(b2);
return;*/
if(a->devID >= 0 || b->devID >= 0 || c->devID >= 0){
#ifdef USE_CUDA
_CudaSumDim(a, b, c, n, beta);
......@@ -205,8 +192,8 @@ i.e., a is summed with b by broadcasting
*/
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _SumDim function */
......@@ -233,7 +220,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
{
CheckNTErrors(a->order == b->order, "Wrong tensor orders!");
CheckNTErrors(a->order == c->order, "Wrong tensor orders!");
CheckNTErrors(a->order > 0, "TODO!");
CheckNTErrors(a->order >= 0, "TODO!");
int order = a->order;
int count = 0;
......@@ -281,8 +268,8 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0)
source = b->data;
......@@ -374,8 +361,8 @@ c = a + b * \beta
*/
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _SumBroadcast function */
......
......@@ -87,17 +87,17 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize
int col = colIndex % colNum;
int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum)
if(row >= rowNum || block >= blockNum)
return;
if (threadIdx.x == 0)
if(threadIdx.x == 0)
bv[threadIdx.y] = b[row];
__syncthreads();
int offset = block * blockSize + row * colNum + col;
if (betaFired)
if(betaFired)
c[offset] = a[offset] + bv[threadIdx.y] * beta;
else
c[offset] = a[offset] + bv[threadIdx.y];
......
......@@ -22,6 +22,7 @@
#include "../../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -224,9 +225,9 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci))
if (!_IsSameShaped(aim, ai) ||
!_IsSameShaped(bim, bi) ||
!_IsSameShaped(cim, ci))
{
isUniform = false;
break;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "../../XName.h"
......@@ -131,7 +131,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
{
if (!output.isInit || input.dataType != output.dataType) {
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
InitTensorV2(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
}
_ConvertDataType(&input, &output);
......
......@@ -21,6 +21,7 @@
#include "OnehotAndIndex.h"
#include "OnehotAndIndex.cuh"
#include "SetData.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -31,7 +32,7 @@ convert onehot tensor to index tensor
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
*/
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size)
void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
......@@ -78,13 +79,13 @@ make a new tensor to keep the result and return it
>> size - the last dimension size of the onehot tensor
<< return - the index tensor
*/
XTensor OnehotToIndex(XTensor & onehot, int size)
XTensor OnehotToIndex(const XTensor & onehot, int size)
{
CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
XTensor index;
InitTensor(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
InitTensorV2(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
index.SetTMPFlag();
_OnehotToIndex(&onehot, &index, size);
......@@ -99,7 +100,8 @@ convert index tensor to onehot tensor
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoothingP)
void _IndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float labelSmoothingP)
{
CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
......@@ -109,11 +111,14 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < index->order; i++)
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
onehot->SetZeroAll();
//onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size;
_SetDataFixedFloat(onehot, lowconfidence);
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return;
......@@ -129,8 +134,49 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
DTYPE * od = onehotData + i * stride;
od[id] = 1;
od[id] = confidence;
}
}
/*
convert index tensor to onehot tensor
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP)
{
/*CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
onehot->SetZeroAll();
#ifdef USE_CUDA
if (onehot->devID >= 0) {
delete[] cudaIndex;
return;
}
#endif
int blockNum = n;
int stride = size;
int * indexData = (int *)index;
int * onehotData = (int *)onehot->data;
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
int * od = onehotData + i * stride;
od[id] = 1;
}*/
XTensor* cudaIndex = NewTensor1DV2(n, X_INT, onehot->devID);
cudaIndex->SetData(index, n);
_IndexToOnehot(cudaIndex, onehot, size, labelSmoothingP);
delete[] cudaIndex;
}
......@@ -143,7 +189,7 @@ make a new tensor to keep the result and return it
>> confidence - labelsmoothing
<< return - the onehot tensor
*/
XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
{
CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!")
......@@ -154,7 +200,7 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
int * dim = new int[order + 1];
memcpy(dim, index.dimSize, order * sizeof(int));
dim[order] = size;
InitTensor(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
InitTensorV2(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
_IndexToOnehot(&index, &onehot, size, labelSmoothingP);
......
......@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version)
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
*/
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size)
void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{
int devID = onehot->devID;
......@@ -111,13 +111,10 @@ void KernelIndexToOnehot(DTYPE * onehotData, int * indexData, int blockNum, int
int id = indexData[i];
//od[id] = 2.0;
//onehotData[i * stride + id] = 0.1;
if (offset == id)
od[offset] = confidence;
else{
od[offset] = lowconfidence;
}
//else
// od[offset] = lowconfidence;
}
/*
......@@ -127,7 +124,8 @@ convert index tensor to onehot tensor (cuda version)
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence)
void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence)
{
int devID = onehot->devID;
......
......@@ -27,10 +27,11 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor (cuda version) */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size);
void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert index tensor to onehot tensor (cuda version) */
void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence);
void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -27,18 +27,21 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int size);
void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor OnehotToIndex(XTensor & onehot, int num);
XTensor OnehotToIndex(const XTensor & onehot, int num);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IndexToOnehot(XTensor & index, int num, float labelSmoothingP);
XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -26,6 +26,114 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, int* index, int dim)
{
CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
CheckNTErrors(dim >= 0 && dim < a->order, "The input dimension is out of bounds!");
CheckNTErrors(a->dataType == c->dataType, "The tensor must be of the same data type!");
int stride = 1;
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int copyTimes = 1;
for (int i = 0; i < dim; i++)
{
copyTimes *= a->dimSize[i];
}
int cot = c->dimSize[dim];
int blockSize = stride * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data;
char * t = (char*)c->data;
for (int i = 0; i < copyTimes; i++) {
for (int j = 0; j < cot; ++j) {
XMemCopy(t + j * blockSize, c->devID, s + index[j] * blockSize, a->devID, blockSize);
}
s += stepSizeS;
t += stepSizeT;
}
}
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim)
{
if (index->devID >= 0)
{
int* indexCPU = new int[index->unitNum];
XMemCopy(indexCPU, -1, index->data,index->devID, index->unitNum * sizeof(int));
_Select(a, c, indexCPU, dim);
delete[] indexCPU;
}
else
{
_Select(a, c, (int *)index->data, dim);
}
}
/*
c = select(a)
>> a - input tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
<< return - the result of the generated tensor with selected data
*/
XTensor Select(const XTensor &a, XTensor &index, int dim)
{
int order = a.order;
int * dimSize = new int[order];
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
for (int i = 0; i < a.order; i++) {
if (i == dim) {
dimSize[i] = index.dimSize[0];
}
else
dimSize[i] = a.dimSize[i];
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMPFlag();
/* call _SelectRange function */
_Select(&a, &c, &index, dim);
/* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, &index, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
}
/* destroy variables */
delete[] dimSize;
return c;
}
/*
generate a tensor with selected data in range[low,high] along the given dimension
c = select(a)
......@@ -58,13 +166,12 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
}
int stride = 1;
int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
for(int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++)
copyTimes *= a->dimSizeRDI[i];
for (int i = 0; i < dim; i++)
copyTimes *= a->dimSize[i];
int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
......
......@@ -27,13 +27,16 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor * indexCPU);
void _Select(const XTensor * a, XTensor * c, int* index, int dim);
/* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim);
/*
generate a tensor with selected data c = select(a) (returna a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Select(const XTensor &a, XTensor &indexCPU);
XTensor Select(const XTensor &a, XTensor &index, int dim);
/*
generate a tensor with selected data in range[low,high] along the given dimension
......
......@@ -470,7 +470,7 @@ void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift)
void _SetDataRand(XTensor * tensor, int rNum, int cNum)
{
if (tensor == NULL || tensor->isInit == false || tensor->order !=2 ) {
InitTensor2D(tensor, rNum, cNum);
InitTensor2DV2(tensor, rNum, cNum);
}
_SetDataRand(tensor, 0.0F, 1.0F);
......@@ -519,7 +519,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper);
#endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//XTensor * t2 = NewTensorV2(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor);
//delete t2;
......
......@@ -21,6 +21,7 @@
#include <math.h>
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h"
#include "Binary.cuh"
......@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
......@@ -112,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
......@@ -169,8 +170,8 @@ XTensor funcName(const XTensor &a, T num)
_funcName(&a, &b, num); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
XLink::AddParamToHead(&b, num); \
} \
return b; \
} \
template XTensor funcName<int>(const XTensor&, int); \
......@@ -181,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double);
template<class T> \
void funcName(const XTensor &a, XTensor &b, T num) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \
} \
_funcName(&a, &b, num); \
if (a.enableGrad) { \
......
......@@ -23,6 +23,7 @@
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h"
#include "Binary.cuh"
......@@ -89,7 +90,7 @@ void Kernel##funcName(T1 * a, T1 * b, int size, T2 num)
template<class T> \
void _Cuda##funcName(const XTensor * a, XTensor * b, T num) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \
\
......
......@@ -21,6 +21,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h"
#include "Clip.cuh"
......@@ -43,7 +44,7 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
if (a->dataType == DEFAULT_DTYPE) {
DTYPE* d = (DTYPE*)a->data;
......@@ -84,7 +85,19 @@ void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper)
}
/*
set every entry to its clip value (return a XTensor structure)
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void ClipMe(XTensor& a, DTYPE lower, DTYPE upper)
{
_Clip(&a, &a, lower, upper);
}
/*
set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
>> lower - the lower border
......@@ -109,6 +122,23 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
return b;
}
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
{
if (!b.isInit || !IsSameShaped(a, b)) {
InitTensorV2(&b, &a);
}
/* call _Clip function */
_Clip(&a, &b, lower, upper);
/* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
}
}
/*
backward computation
......
......@@ -17,11 +17,11 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16/int added
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h"
#include "Clip.cuh"
......@@ -36,9 +36,8 @@ set each entry to its clip value (CUDA Kernel)
>> upper - the upper border
>> size - size of the data array
*/
template <class T>
__global__
void KernelClip(T * a, T * b, T lower, T upper, int size)
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -53,6 +52,21 @@ void KernelClip(T * a, T * b, T lower, T upper, int size)
}
/*
set each entry to its clip value with float16 data type value (CUDA Kernel)
This is for float16 computation
>> a - pointer to input data array
>> b - pointer to output data array
>> lower - the lower border
>> upper - the upper border
>> size - size of the data array
*/
__global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{
return;
}
/*
set each entry to its clip value
>> a - input tensor we are processing
>> b - output tensor we are processing
......@@ -61,7 +75,7 @@ set each entry to its clip value
*/
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
......@@ -78,11 +92,8 @@ void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
}
else if (a->dataType == X_INT) {
int lower1 = (int)lower;
int upper1 = (int)upper;
KernelClip << <blocks, threads >> >((int *)a->data, (int *)b->data, lower1, upper1, a->unitNum);
else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
}
else {
ShowNTErrors("TODO!");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论