Commit 99097e41 by huchi

add support for greedy search

parent bfa6fc90
......@@ -19,6 +19,10 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
#include <stdio.h>
#include "./network/XNet.h"
#include "./tensor/XUtility.h"
......@@ -27,9 +31,7 @@
#include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace fnnlm;
......@@ -37,19 +39,10 @@ using namespace transformer;
int main( int argc, const char ** argv )
{
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708);
/*_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
_CrtSetBreakAlloc(2708);*/
TransformerMain(argc - 1, argv + 1);
/*XTensor x;
InitTensor2D(&x, 2, 2);
float d[]{ 1,2,3,4 };
x.SetData(d, 4);
XTensor y;
y = ReduceSum(x, 0);
y.Dump(stderr);*/
//_CrtDumpMemoryLeaks();
return 0;
......
......@@ -34,7 +34,7 @@ T2TAttention::T2TAttention()
nhead = -1;
dk = -1;
dv = -1;
d = -1;
d = -1;
isMasked = false;
ignored = 0;
}
......@@ -62,7 +62,7 @@ void T2TAttention::InitModel(int argc, char** argv,
float minmax = 0;
LoadParamInt(argc, argv, "nhead", &nhead, 8);
LoadParamInt(argc, argv, "nhead", &nhead, 4);
LoadParamInt(argc, argv, "d", &dk, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &dv, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
......@@ -70,15 +70,15 @@ void T2TAttention::InitModel(int argc, char** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wq, d, d, X_FLOAT, devID);
InitTensor1D(&bq, d, X_FLOAT, devID);
InitTensor2D(&wk, d, d, X_FLOAT, devID);
InitTensor1D(&bk, d, X_FLOAT, devID);
InitTensor2D(&wv, d, d, X_FLOAT, devID);
InitTensor1D(&bv, d, X_FLOAT, devID);
InitTensor2D(&rp_embedding_k, max_relative_position * 2 + 1, d/nhead, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID);
InitTensor1D(&ba, d, X_FLOAT, devID);
InitTensor2DV2(&wq, d, d, X_FLOAT, devID);
InitTensor1DV2(&bq, d, X_FLOAT, devID);
InitTensor2DV2(&wk, d, d, X_FLOAT, devID);
InitTensor1DV2(&bk, d, X_FLOAT, devID);
InitTensor2DV2(&wv, d, d, X_FLOAT, devID);
InitTensor1DV2(&bv, d, X_FLOAT, devID);
InitTensor2DV2(&rp_embedding_k, max_relative_position * 2 + 1, d/nhead, X_FLOAT, devID);
InitTensor2DV2(&wo, d, d, X_FLOAT, devID);
InitTensor1DV2(&bo, d, X_FLOAT, devID);
}
/*
......@@ -94,24 +94,27 @@ make the network
>> cacheType - which type that cache is
<< return - multi-attention result
*/
XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, Cache* cache, int cacheType)
XTensor T2TAttention::Make(XTensor& k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, Cache* cache, int cacheType)
{
const bool isEnc = (!cache) ? true : false;
/* linear transformation before self-attention */
XTensor q2, k2, v2;
q2 = MatrixMul(q, X_NOTRANS, wq, X_TRANS) + bq;
q2 = MatrixMul(q, wq) + bq;
if (!cache) {
/* self attention for encoder layers */
k2 = MatrixMul(k, X_NOTRANS, wk, X_TRANS) + bk;
v2 = MatrixMul(v, X_NOTRANS, wv, X_TRANS) + bv;
return MakeRPRAttention(k2, q2, v2, mask, isTraining, isEnc);
k2 = MatrixMul(k, wk) + bk;
v2 = MatrixMul(v, wv) + bv;
return MakeRPRAttention(k2, q2, v2, mask, isTraining, isEnc);
}
else {
if (cacheType == SELF_ATT) {
k2 = MatrixMul(k, X_NOTRANS, wk, X_TRANS) + bk;
v2 = MatrixMul(v, X_NOTRANS, wv, X_TRANS) + bv;
k2 = MatrixMul(k, wk) + bk;
v2 = MatrixMul(v, wv) + bv;
/* if hit, we only concat the cache with the new token */
if (!cache->miss) {
......@@ -121,12 +124,13 @@ XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask,
cache->key = k2;
cache->value = v2;
cache->miss = false;
return MakeRPRAttention(cache->key, q2, cache->value, mask, isTraining, isEnc);
}
else if (cacheType == EN_DE_ATT) {
if (cache->miss) {
cache->key = MatrixMul(k, X_NOTRANS, wk, X_TRANS) + bk;
cache->value = MatrixMul(v, X_NOTRANS, wv, X_TRANS) + bv;
cache->key = MatrixMul(k, wk) + bk;
cache->value = MatrixMul(v, wv) + bv;
cache->miss = false;
}
return MakeAttention(cache->key, q2, cache->value, mask, isTraining, isEnc);
......@@ -134,50 +138,49 @@ XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask,
CheckNTErrors(0, "invalid cache type");
}
}
/*
make the attention network given keys, queries and values (after linear transformation)
>> k - keys. It might be of size B * L * H
where B = batch size, L = sequence length,
and H = vector size of each position
where B = batch size, L = sequence length,
and H = vector size of each position
>> q - queries
>> v - values
>> mask - as it is
>> isTraining - indicates whether the model is used for training
*/
XTensor T2TAttention::MakeAttention(XTensor &k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, bool is_encoder)
XTensor T2TAttention::MakeAttention(XTensor& k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, bool is_encoder)
{
XTensor kheads;
XTensor qheads;
XTensor vheads;
/* multi head */
kheads = Split(k, k.order - 1, nhead);
qheads = Split(q, q.order - 1, nhead);
vheads = Split(v, v.order - 1, nhead);
XTensor att;
XTensor att;
XTensor dot;
XTensor scalar;
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */
dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS);
/*if (isMasked && mask) {
_SumMe(&dot, mask);
}*/
/*if (isMasked && mask)
_SumMe(&dot, mask);*/
dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead));
scalar = Softmax(dot, -1);
/*if(isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);*/
if(isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);
att = BMMul(scalar, vheads);
/* concatenate the heads */
return MulAndShift(Merge(att, att.order - 1), X_NOTRANS, wa, X_TRANS, ba);
return MulAndShift(Merge(att, att.order - 1), wo, bo);
}
/*
......@@ -215,34 +218,32 @@ XTensor T2TAttention::MakeRPRAttention(XTensor& k, XTensor& q, XTensor& v, XTens
InitTensor4DV2(&dot, nhead, batch_size, len_q, len_kv, X_FLOAT, q.devID);
/* generate the relative emb index (L_q, L_kv) */
GetRPEmbedding(&emb_matrix, len_q, len_kv, max_relative_position, q.devID,is_encoder);
GetRPEmbedding(&emb_matrix, len_q, len_kv, max_relative_position, q.devID, is_encoder);
/* generate the relative key from the rp_embedding_k (L_q, L_kv, H/K) */
_Gather(&rp_embedding_k, &relative_key, &emb_matrix);
/* RPR dot product (K, B, L_q, L_kv)*/
qheads = qheads / float(nhead);
RPDotProduct(&qheads, &kheads, &relative_key, &dot, true);
/*if (isMasked && mask)
_SumMe(&dot, mask);*/
/* scale the dot result */
//dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead));
dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead));
/* softmax */
scalar = Softmax(dot, -1);
/*if (isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);*/
if (isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);
/* generate the relative attention output (K, B, L_q, H/K) */
att = BMMul(scalar, vheads);
/* concatenate the heads */
return MulAndShift(Merge(att, att.order - 1), X_NOTRANS, wa, X_TRANS, ba);
return MulAndShift(Merge(att, att.order - 1), wo, bo);
}
void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const int len_kv, const int max_relative_length, const int devID, const bool is_encoder)
......@@ -251,10 +252,11 @@ void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const in
XTensor range;
InitTensor1DV2(&range, len_kv, X_INT, devID);
int* index = new int[len_kv];
// for encoder self-attention which the L_q = L_kv
if (is_encoder)
{
for (int i = 0; i <len_kv; i++)
for (int i = 0; i < len_kv; i++)
index[i] = i;
range.SetData(index, len_kv);
XTensor range_2D, range_2D_t;
......@@ -267,7 +269,7 @@ void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const in
// for decoder self-attention which the L_q != L_kv, and L_q is 1
else
{
for (int i = 0; i <len_kv; i++)
for (int i = 0; i < len_kv; i++)
index[i] = -len_kv + i + 1;
range.SetData(index, len_kv);
_Unsqueeze(&range, emb_matrix, 0, len_q);
......@@ -299,7 +301,6 @@ void T2TAttention::RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* att
XTensor context;
InitTensor4DV2(&context, head_num, batch_size, len_q, last_dim, X_FLOAT, x->devID);
_MatrixMulBatched(x, X_NOTRANS, y, transpose_flag, &context);
//if (profiler_) profiler_->FinishTimer("RPDotPro-BMM");
// reshape and transpose x to (L_q, K*B, H/K or L_kv)
int merge_dims[] = { head_num * batch_size, len_q, x->dimSize[3] };
......@@ -323,5 +324,6 @@ void T2TAttention::RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* att
relative_t.Reshape(4, split_dims);
_Sum(&context, &relative_t, attention);
}
}
......@@ -90,14 +90,18 @@ public:
/* bias for V */
XTensor bv;
XTensor wBig;
XTensor bBig;
/* RPR emb */
XTensor rp_embedding_k;
/* transformation after dot-product attention */
XTensor wa;
XTensor wo;
/* bias after dot-product attention */
XTensor ba;
XTensor bo;
/* size of transformed Q and K */
int dk;
......
......@@ -31,27 +31,27 @@ namespace transformer
/* constructor */
AttDecoder::AttDecoder()
{
attentions = NULL;
selfAtt = NULL;
fnns = NULL;
attLayerNorms = NULL;
attentionsEnde = NULL;
attEndeLayerNorms = NULL;
decodeLayerNorm = NULL;
selfCache = NULL;
contextCache = NULL;
selfAttLayerNorms = NULL;
enDeAtt = NULL;
enDeAttLayerNorms = NULL;
decoderLayerNorm = NULL;
selfAttCache = NULL;
enDeAttCache = NULL;
}
/* de-constructor */
AttDecoder::~AttDecoder()
{
delete[] selfCache;
delete[] contextCache;
delete[] attentions;
delete[] selfAttCache;
delete[] enDeAttCache;
delete[] selfAtt;
delete[] fnns;
delete[] attLayerNorms;
delete[] attentionsEnde;
delete[] attEndeLayerNorms;
delete decodeLayerNorm;
delete[] selfAttLayerNorms;
delete[] enDeAtt;
delete[] enDeAttLayerNorms;
delete decoderLayerNorm;
}
/*
......@@ -71,7 +71,7 @@ void AttDecoder::InitModel(int argc, char ** argv,
devID = myDevID;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 3);
LoadParamInt(argc, argv, "nlayer", &nlayer, 4);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsizetgt", &vSize, 34040);
......@@ -83,24 +83,24 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* embedding model */
embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer];
selfAtt = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer];
attentionsEnde = new T2TAttention[nlayer];
attEndeLayerNorms = new T2TLN[nlayer];
decodeLayerNorm = new T2TLN;
selfCache = new Cache[nlayer];
contextCache = new Cache[nlayer];
selfAttLayerNorms = new T2TLN[nlayer];
enDeAtt = new T2TAttention[nlayer];
enDeAttLayerNorms = new T2TLN[nlayer];
decoderLayerNorm = new T2TLN;
selfAttCache = new Cache[nlayer];
enDeAttCache = new Cache[nlayer];
/* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
selfAtt[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
selfAttLayerNorms[i].InitModel(argc, argv, myDevID);
enDeAtt[i].InitModel(argc, argv, true, myIgnored, myDevID);
enDeAttLayerNorms[i].InitModel(argc, argv, myDevID);
}
decodeLayerNorm->InitModel(argc, argv, myDevID);
decoderLayerNorm->InitModel(argc, argv, myDevID);
}
/*
......@@ -131,48 +131,38 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, X
XTensor attNorm;
/* layer normalization */
inputNorm = attLayerNorms[i].Make(x);
//inputNorm.Dump(stderr, "inputNorm", 10);
inputNorm = selfAttLayerNorms[i].Make(x);
/******************/
/* self attention */
att = attentions[i].Make(inputNorm, inputNorm, inputNorm, NULL, isTraining, &selfCache[i], SELF_ATT);
att = selfAtt[i].Make(inputNorm, inputNorm, inputNorm, NULL, isTraining, &selfAttCache[i], SELF_ATT);
/* dropout */
if(isTraining && dropoutP > 0)
att = Dropout(att, dropoutP);
/* residual connection */
_SumMe(&att, &x);
//att.Dump(stderr, "Sum(att, x)", 10);
att = att + x;
/* layer normalization */
attNorm = attEndeLayerNorms[i].Make(att);
//attNorm.Dump(stderr, "attNorm", 10);
attNorm = enDeAttLayerNorms[i].Make(att);
/* encoder-decoder attention */
ende = attentionsEnde[i].Make(outputEnc, attNorm, outputEnc, &maskEncDec, isTraining, &contextCache[i], EN_DE_ATT);
//ende.Dump(stderr, "ende atten", 10);
ende = enDeAtt[i].Make(outputEnc, attNorm, outputEnc, &maskEncDec, isTraining, &enDeAttCache[i], EN_DE_ATT);
/* dropout */
if(isTraining && dropoutP > 0)
ende = Dropout(ende, dropoutP);
/* residual connection */
_SumMe(&ende, &att);
//res.Dump(stderr, "Sum(ende, att)", 10);
ende = ende + att;
/* fnn */
x = fnns[i].Make(ende, isTraining);
//x.Dump(stderr, "fnns[i]", 10);
}
x = decodeLayerNorm->Make(x);
//x.Dump(stderr, "decodeLayerNorm", 10);
x.SetName(DECODING_NAME);
x = decoderLayerNorm->Make(x);
return x;
}
......
......@@ -63,13 +63,13 @@ public:
T2TFNN * fnns;
/* attention model of each layer */
T2TAttention * attentions;
T2TAttention * selfAtt;
/* layer normalization for attention */
T2TLN * attLayerNorms;
T2TLN * selfAttLayerNorms;
/* layer normalization for decoder */
T2TLN * decodeLayerNorm;
T2TLN * decoderLayerNorm;
/* input tensor of the encoder */
XTensor * input;
......@@ -78,16 +78,16 @@ public:
XTensor * output;
/* encoder-decoder attention model of each layer */
T2TAttention * attentionsEnde;
T2TAttention * enDeAtt;
/* layer normalization for encoder-decoder attention */
T2TLN * attEndeLayerNorms;
T2TLN * enDeAttLayerNorms;
/* layer cache list */
Cache* selfCache;
Cache* selfAttCache;
/* layer cache list */
Cache* contextCache;
Cache* enDeAttCache;
public:
/* constructor */
......
......@@ -62,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "pad", &padIdx, 1);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
maxLength = maxLength + 1 + 1;
DTYPE v = 1.0F/(float)sqrt((float)eSize);
......@@ -80,7 +80,7 @@ make positional embeddings (of size eSize * length)
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
{
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum];
......@@ -113,47 +113,47 @@ make the network
*/
XTensor T2TEmbedder::Make(XTensor &input, int prevLen)
{
/* assert padding index is 1 */
///* assert padding index is 1 */
CheckNTErrors(input.order > 1, "Wrong input tensor size!");
CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!");
CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
//CheckNTErrors(input.order > 1, "Wrong input tensor size!");
//CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!");
//CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
//CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
XTensor wordEmbedding, position, posEmbedding;
InitTensor(&position, &input);
int* posData = new int[input.unitNum];
XTensor inputCPU;
InitTensorOnCPU(&inputCPU, &input);
_CopyValues(&input, &inputCPU);
for (int i = 0; i < inputCPU.GetDim(0); i++) {
int startNoPad = 2 + prevLen - 1;
int* p = ((int*)inputCPU.data) + i * inputCPU.GetDim(1);
for (int j = 0; j < inputCPU.GetDim(1); j++) {
if (p[j] == 1) {
posData[i * inputCPU.GetDim(1) + j] = 1;
}
else {
posData[i * inputCPU.GetDim(1) + j] = startNoPad++;
}
}
}
//
//XTensor wordEmbedding, position, posEmbedding;
//InitTensor(&position, &input);
position.SetData(posData, position.unitNum);
delete[] posData;
//int* posData = new int[input.unitNum];
/* we make positional embeddings first */
if(true){
posEmbedding = Gather(posEmbeddingBase, position);
}
//XTensor inputCPU;
//InitTensorOnCPU(&inputCPU, &input);
//_CopyValues(&input, &inputCPU);
/* then we make word embeddings */
//for (int i = 0; i < inputCPU.GetDim(0); i++) {
// int startNoPad = 2 + prevLen - 1;
// int* p = ((int*)inputCPU.data) + i * inputCPU.GetDim(1);
// for (int j = 0; j < inputCPU.GetDim(1); j++) {
// if (p[j] == 1) {
// posData[i * inputCPU.GetDim(1) + j] = 1;
// }
// else {
// posData[i * inputCPU.GetDim(1) + j] = startNoPad++;
// }
// }
//}
//position.SetData(posData, position.unitNum);
//delete[] posData;
///* we make positional embeddings first */
//if(true){
// posEmbedding = Gather(posEmbeddingBase, position);
//}
/* then we make word embeddings */
XTensor wordEmbedding;
wordEmbedding = Gather(w, input);
wordEmbedding = Linear(wordEmbedding, (float)sqrt((float)eSize));
......
......@@ -29,7 +29,7 @@ using namespace nts;
namespace transformer
{
#define DEFAULT_EMBEDDING_SIZE 512
#define DEFAULT_EMBEDDING_SIZE 128
/*
embedding (of word at position i):
......
......@@ -34,7 +34,7 @@ AttEncoder::AttEncoder()
attentions = NULL;
fnns = NULL;
attLayerNorms = NULL;
encodeLayerNorm = NULL;
encoderLayerNorm = NULL;
}
/* de-constructor */
......@@ -43,7 +43,7 @@ AttEncoder::~AttEncoder()
delete[] attentions;
delete[] fnns;
delete[] attLayerNorms;
delete encodeLayerNorm;
delete encoderLayerNorm;
}
/*
......@@ -61,7 +61,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
devID = myDevID;
ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 35);
LoadParamInt(argc, argv, "nlayer", &nlayer, 20);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsize", &vSize, 34040);
......@@ -76,7 +76,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer];
encodeLayerNorm = new T2TLN;
encoderLayerNorm = new T2TLN;
/* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){
......@@ -84,7 +84,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID);
}
encodeLayerNorm->InitModel(argc, argv, myDevID);
encoderLayerNorm->InitModel(argc, argv, myDevID);
}
/*
......@@ -123,13 +123,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, boo
/* fnn */
x = fnns[i].Make(res, isTraining);
}
x = encodeLayerNorm->Make(x);
x.SetName(ENCODING_NAME);
input.SetName(ENCODING_INPUT_NAME);
x = encoderLayerNorm->Make(x);
return x;
}
......
......@@ -93,11 +93,11 @@ public:
/* attention model of each layer */
T2TAttention * attentions;
/* layer normalization for attention */
/* layer normalizations for attention */
T2TLN * attLayerNorms;
/* layer normalization for encoder */
T2TLN * encodeLayerNorm;
T2TLN * encoderLayerNorm;
/* input tensor of the encoder */
XTensor * input;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,9 +15,9 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <math.h>
#include "T2TFNN.h"
......@@ -32,9 +32,9 @@ namespace transformer
/* constructor */
T2TFNN::T2TFNN()
{
inSize = -1;
inSize = -1;
outSize = -1;
hSize = -1;
hSize = -1;
}
/* deconstructor */
......@@ -42,28 +42,28 @@ T2TFNN::~T2TFNN()
{
}
/*
initialize the model
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> myDevID - device id
*/
void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
void T2TFNN::InitModel(int argc, char** argv, int myDevID)
{
devID = myDevID;
float minmax = 0;
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &outSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "fnnh", &hSize, outSize * 4);
LoadParamInt(argc, argv, "fnnh", &hSize, outSize * 8);
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2DV2(&w1, hSize, inSize, X_FLOAT, devID);
InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2DV2(&w2, outSize, hSize, X_FLOAT, devID);
InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
fnnLayerNorm.InitModel(argc, argv, myDevID);
......@@ -78,25 +78,25 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
//b2.SetZeroAll();
}
/*
make the network
/*
make the network
y = max(0, x * w1 + b1) * w2 + b2
>> input - the input tensor
>> return - the output tensor
>> return - the output tensor
*/
XTensor T2TFNN::Make(XTensor &input, bool isTraining)
XTensor T2TFNN::Make(XTensor& input, bool isTraining)
{
XTensor t1;
/* t1 = max(0, x * w1 + b1) */
t1 = Rectify(MulAndShift(fnnLayerNorm.Make(input), X_NOTRANS, w1, X_TRANS, b1));
if(isTraining && dropoutP > 0)
t1 = Rectify(MulAndShift(fnnLayerNorm.Make(input), w1, b1));
if (isTraining && dropoutP > 0)
t1 = Dropout(t1, dropoutP);
/* result = t1 * w2 + b2 */
XTensor res;
res = MulAndShift(t1, X_NOTRANS, w2, X_TRANS, b2);
res = MulAndShift(t1, w2, b2);
_SumMe(&res, &input);
return res;
}
......
......@@ -53,8 +53,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID)
d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID);
InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID);
}
/*
......@@ -78,7 +78,7 @@ XTensor T2TLN::Make(XTensor &input)
mean = ReduceMean(x, x.order - 1);
/* \sigma = (sum_i (x_i - \mu)^2)/m */
variance = ReduceVariance(x, x.order - 1, mean);
variance = ReduceVariance(x, x.order - 1, mean) + 1e-5F;
/* standard = sqrt(variance) */
standard = Power(variance, 0.5F);
......@@ -92,7 +92,7 @@ XTensor T2TLN::Make(XTensor &input)
xn = (x - meanFilled) / standardFilled;
/* result = x' * w + b */
return xn * w + b;
return xn * w + b;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,15 +15,16 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include "T2TModel.h"
#include "T2TUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../tensor/XUtility.h"
#include <cstdint>
namespace transformer
{
......@@ -49,22 +50,22 @@ T2TModel::~T2TModel()
delete outputLayer;
}
/*
initialize the model
/*
initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
*/
void T2TModel::InitModel(int argc, char ** argv)
void T2TModel::InitModel(int argc, char** argv)
{
LoadParamInt(argc, argv, "dev", &devID, -1);
LoadParamBool(argc, argv, "mt", &isMT, false);
LoadParamBool(argc, argv, "lm", &isLM, !isMT);
LoadParamInt(argc, argv, "nhead", &nhead, 8);
LoadParamInt(argc, argv, "nhead", &nhead, 4);
encoder->InitModel(argc, argv, true, 0, devID);
outputLayer->InitModel(argc, argv, devID);
if(isMT)
if (isMT)
decoder->InitModel(argc, argv, true, 0, devID);
TensorList params(10);
......@@ -76,21 +77,21 @@ void T2TModel::InitModel(int argc, char ** argv)
}
}
/*
/*
make the encoding network
>> input - input tensor
>> mask - the mask for positions that are/not involved in computation
>> isTraining - indicates whether we are training the model
<< return - encoding result
*/
XTensor T2TModel::MakeEncoder(XTensor &input, XTensor *mask, bool isTraining)
XTensor T2TModel::MakeEncoder(XTensor& input, XTensor* mask, bool isTraining)
{
XTensor nothing;
return encoder->Make(input, mask, nothing, isTraining);
}
/*
/*
make the decoding network
>> inputDec - input tensor of the decoder
>> outputEnc - output tensor of the encoder
......@@ -100,22 +101,22 @@ make the decoding network
>> isTraining - indicates whether we are training the model
<< return - encoding result
*/
XTensor T2TModel::MakeDecoder(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, XTensor &maskEncDec, bool isTraining)
XTensor T2TModel::MakeDecoder(XTensor& inputDec, XTensor& outputEnc, XTensor* mask, XTensor& maskEncDec, bool isTraining)
{
return decoder->Make(inputDec, outputEnc, mask, maskEncDec, isTraining);
}
/*
make the network for language modeling (with the output softmax layer)
/*
make the network for language modeling (with the output softmax layer)
>> input - input tensor
>> output - output tensor (distribution)
>> padding - padding of the sequences
>> isTraining - indicates whether the model is for training
*/
void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining)
void T2TModel::MakeLM(XTensor& input, XTensor& output, XTensor& padding, bool isTraining)
{
XTensor encoding;
/* generate mask to see "previous" words only */
//int len = input.GetDim(input.order - 2);
//int * dims = new int[input.order + 1];
......@@ -126,30 +127,30 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
//XTensor mask(input.order + 1, dims, X_FLOAT, 1.0F, input.devID, input.mem);
int len = input.GetDim(input.order - 1);
int * dims = new int[input.order + 2];
for(int i = 0; i < input.order; i++)
int* dims = new int[input.order + 2];
for (int i = 0; i < input.order; i++)
dims[i + 1] = input.GetDim(i);
dims[0] = nhead;
dims[input.order + 1] = len;
XTensor mask;
InitTensor(&mask, input.order + 2, dims, X_FLOAT, padding.devID);
InitTensorV2(&mask, input.order + 2, dims, X_FLOAT, 1.0F, padding.devID);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in
a given sequence. */
_SetDataLowTri(&mask, 1e9F, 0);
_ScaleAndShiftMe(&mask, 1.0F, -1e9F);
int * dimsPadding = new int[padding.order + 2];
for(int i = 0; i < padding.order - 1; i++)
int* dimsPadding = new int[padding.order + 2];
for (int i = 0; i < padding.order - 1; i++)
dimsPadding[i] = padding.GetDim(i);
dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.devID);
XTensor* padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.devID);
for(int i = 0; i < padding2->order; i++)
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
......@@ -169,13 +170,13 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
delete[] dims;
delete[] dimsPadding;
//DelTensorBuf(padding3);
DelTensorBuf(padding2);
}
/*
make the network for machine translation (with the output softmax layer)
/*
make the network for machine translation (with the output softmax layer)
>> inputEnc - input tensor of the encoder
>> inputDec - input tensor of the decoder
>> output - output tensor (distribution)
......@@ -183,7 +184,7 @@ make the network for machine translation (with the output softmax layer)
>> paddingDec - padding of the sequences (on the decoder side)
>> isTraining - indicates whether the model is for training
*/
void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, XTensor &paddingDec, bool isTraining)
void T2TModel::MakeMT(XTensor& inputEnc, XTensor& inputDec, XTensor& output, XTensor& paddingEnc, XTensor& paddingDec, bool isTraining)
{
XTensor encoding;
XTensor decoding;
......@@ -193,7 +194,7 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
/* encoder mask */
MakeMTMaskEnc(inputEnc, paddingEnc, maskEnc);
/* decoder mask */
MakeMTMaskDec(inputEnc, inputDec, paddingEnc, paddingDec, maskDec, maskEncDec, 0);
......@@ -204,8 +205,8 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
outputLayer->Make(decoding, output);
}
/*
make the mask for training MT models
/*
make the mask for training MT models
>> inputEnc - input of the encoder
>> inputDec - input of the decoder
>> paddingEnc - padding of the encoder input
......@@ -214,18 +215,18 @@ make the mask for training MT models
>> maksDec - mask of the decoder self-attention
>> maksEncDec - mask of the decoder enc-dec attention
*/
void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
XTensor &paddingEnc, XTensor &paddingDec,
XTensor &maskEnc, XTensor &maskDec, XTensor &maskEncDec)
void T2TModel::MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskEnc, XTensor& maskDec, XTensor& maskEncDec)
{
int len = inputDec.GetDim(inputDec.order - 1);
int * dims = new int[inputDec.order + 2];
for(int i = 0; i < inputDec.order; i++)
int* dims = new int[inputDec.order + 2];
for (int i = 0; i < inputDec.order; i++)
dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead;
dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in
a given sequence. */
......@@ -234,11 +235,10 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
XTensor* maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
XTensor* maskEncDecTMPDec = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
......@@ -248,21 +248,21 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
DelTensorBuf(maskEncDecTMPEnc);
/* padding on the source side */
int * dimsPadding = new int[paddingEnc.order + 2];
int* dimsPadding = new int[paddingEnc.order + 2];
for (int i = 0; i < paddingEnc.order - 1; i++)
dimsPadding[i] = paddingEnc.GetDim(i);
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
XTensor* padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
XTensor* padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
/* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
......@@ -270,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3);
InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
......@@ -282,49 +282,47 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
DelTensorBuf(padding3);
DelTensorBuf(padding2);
}
/*
make the mask of the encoder
>> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder input
>> maskEnc - mask of the encoder self-attention
*/
void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &maskEnc)
void T2TModel::MakeMTMaskEnc(XTensor& inputEnc, XTensor& paddingEnc, XTensor& maskEnc)
{
/* padding on the source side */
int * dimsPadding = new int[paddingEnc.order + 2];
int* dimsPadding = new int[paddingEnc.order + 2];
for (int i = 0; i < paddingEnc.order - 1; i++)
dimsPadding[i] = paddingEnc.GetDim(i);
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
XTensor* padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, paddingEnc.devID);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
XTensor* padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, paddingEnc.devID);
/* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
_Unsqueeze(padding2, padding3, 0, nhead);
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3);
InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
_Sum(&maskEnc, padding3, &maskEnc);
DelTensorBuf(padding3);
DelTensorBuf(padding2);
delete[] dimsPadding;
}
/*
make the mask of the decoder
>> inputEnc - input of the encoder
......@@ -334,34 +332,33 @@ make the mask of the decoder
>> maksDec - mask of the decoder self-attention
>> maksEncDec - mask of the decoder enc-dec attention
*/
void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
XTensor &paddingEnc, XTensor &paddingDec,
XTensor &maskDec, XTensor &maskEncDec, int incDim)
void T2TModel::MakeMTMaskDec(XTensor& inputEnc, XTensor& inputDec,
XTensor& paddingEnc, XTensor& paddingDec,
XTensor& maskDec, XTensor& maskEncDec, int incDim)
{
int len = inputDec.GetDim(inputDec.order - 1);
int * dims = new int[inputDec.order + 2];
for(int i = 0; i < inputDec.order; i++)
int* dims = new int[inputDec.order + 2];
for (int i = 0; i < inputDec.order; i++)
dims[i + 1] = inputDec.GetDim(i);
//dims[inputDec.order] += incDim;
dims[0] = nhead;
dims[inputDec.order + 1] = len;
//InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in
a given sequence. */
//_SetDataLowTri(&maskDec, 1e9F, 0);
//_SetDataLowTri(&maskDec, 1e9F, 0);
//_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F);
//_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F);
/* encoder-decoder mask that prevents the attention to padding dummy words */
/* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID);
XTensor* maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
XTensor* maskEncDecTMPDec = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
//paddingEnc.Dump(stderr, "paddingenc:");
......@@ -374,21 +371,21 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
_Unsqueeze(maskEncDecTMPEnc, &maskEncDec, 0, dims[0]);
//maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:");
DelTensorBuf(maskEncDecTMPDec);
DelTensorBuf(maskEncDecTMPEnc);
delete[] dims;
}
/*
/*
get parameter matrics
>> list - the list that keeps the parameter matrics
*/
void T2TModel::GetParams(TensorList &list)
void T2TModel::GetParams(TensorList& list)
{
list.Clear();
/* encoder parameters */
for(int i = 0; i < encoder->nlayer; i++){
for (int i = 0; i < encoder->nlayer; i++) {
list.Add(&encoder->attentions[i].wq);
list.Add(&encoder->attentions[i].wk);
list.Add(&encoder->attentions[i].wv);
......@@ -396,8 +393,8 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&encoder->attentions[i].bk);
list.Add(&encoder->attentions[i].bv);
list.Add(&encoder->attentions[i].rp_embedding_k);
list.Add(&encoder->attentions[i].wa);
list.Add(&encoder->attentions[i].ba);
list.Add(&encoder->attentions[i].wo);
list.Add(&encoder->attentions[i].bo);
list.Add(&encoder->fnns[i].w1);
list.Add(&encoder->fnns[i].b1);
list.Add(&encoder->fnns[i].w2);
......@@ -407,33 +404,33 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&encoder->fnns[i].fnnLayerNorm.w);
list.Add(&encoder->fnns[i].fnnLayerNorm.b);
}
list.Add(&encoder->encodeLayerNorm->w);
list.Add(&encoder->encodeLayerNorm->b);
list.Add(&encoder->encoderLayerNorm->w);
list.Add(&encoder->encoderLayerNorm->b);
/* decoder parameters */
if(isMT){
for(int i = 0; i < decoder->nlayer; i++){
list.Add(&decoder->attentions[i].wq);
list.Add(&decoder->attentions[i].wk);
list.Add(&decoder->attentions[i].wv);
list.Add(&decoder->attentions[i].bq);
list.Add(&decoder->attentions[i].bk);
list.Add(&decoder->attentions[i].bv);
list.Add(&decoder->attentions[i].rp_embedding_k);
list.Add(&decoder->attentions[i].wa);
list.Add(&decoder->attentions[i].ba);
list.Add(&decoder->attLayerNorms[i].w);
list.Add(&decoder->attLayerNorms[i].b);
list.Add(&decoder->attentionsEnde[i].wq);
list.Add(&decoder->attentionsEnde[i].wk);
list.Add(&decoder->attentionsEnde[i].wv);
list.Add(&decoder->attentionsEnde[i].bq);
list.Add(&decoder->attentionsEnde[i].bk);
list.Add(&decoder->attentionsEnde[i].bv);
list.Add(&decoder->attentionsEnde[i].wa);
list.Add(&decoder->attentionsEnde[i].ba);
list.Add(&decoder->attEndeLayerNorms[i].w);
list.Add(&decoder->attEndeLayerNorms[i].b);
if (isMT) {
for (int i = 0; i < decoder->nlayer; i++) {
list.Add(&decoder->selfAtt[i].wq);
list.Add(&decoder->selfAtt[i].wk);
list.Add(&decoder->selfAtt[i].wv);
list.Add(&decoder->selfAtt[i].bq);
list.Add(&decoder->selfAtt[i].bk);
list.Add(&decoder->selfAtt[i].bv);
list.Add(&decoder->selfAtt[i].rp_embedding_k);
list.Add(&decoder->selfAtt[i].wo);
list.Add(&decoder->selfAtt[i].bo);
list.Add(&decoder->selfAttLayerNorms[i].w);
list.Add(&decoder->selfAttLayerNorms[i].b);
list.Add(&decoder->enDeAtt[i].wq);
list.Add(&decoder->enDeAtt[i].wk);
list.Add(&decoder->enDeAtt[i].wv);
list.Add(&decoder->enDeAtt[i].bq);
list.Add(&decoder->enDeAtt[i].bk);
list.Add(&decoder->enDeAtt[i].bv);
list.Add(&decoder->enDeAtt[i].wo);
list.Add(&decoder->enDeAtt[i].bo);
list.Add(&decoder->enDeAttLayerNorms[i].w);
list.Add(&decoder->enDeAttLayerNorms[i].b);
list.Add(&decoder->fnns[i].w1);
list.Add(&decoder->fnns[i].b1);
list.Add(&decoder->fnns[i].w2);
......@@ -441,8 +438,8 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&decoder->fnns[i].fnnLayerNorm.w);
list.Add(&decoder->fnns[i].fnnLayerNorm.b);
}
list.Add(&decoder->decodeLayerNorm->w);
list.Add(&decoder->decodeLayerNorm->b);
list.Add(&decoder->decoderLayerNorm->w);
list.Add(&decoder->decoderLayerNorm->b);
}
/* shared embeddings */
......@@ -452,23 +449,23 @@ void T2TModel::GetParams(TensorList &list)
}
/*
dump the parameters
dump the parameters
>> fn - where to keep the model
>> model - the model
*/
void T2TModel::Dump(const char * fn)
void T2TModel::Dump(const char* fn)
{
double startT = GetClockSec();
FILE * file = fopen(fn, "wb");
FILE* file = fopen(fn, "wb");
CheckNTErrors(file, "Cannot open the model file");
TensorList params(100);
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
for (int i = 0; i < params.count; i++) {
XTensor* p = (XTensor*)params.Get(i);
p->Dump(file, "param:");
}
......@@ -480,38 +477,37 @@ void T2TModel::Dump(const char * fn)
}
/* read the parameters */
void T2TModel::Read(const char * fn)
void T2TModel::Read(const char* fn)
{
double startT = GetClockSec();
FILE * file = fopen(fn, "rb");
FILE* file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot open the model file");
TensorList params(100);
GetParams(params);
//uint64_t* offsets = new uint64_t[params.Size()];
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
FastRead(p, file);
// p->Read(file, "");
}
///* number of parameter */
//uint64_t param_number;
//fread(&param_number, sizeof(param_number), 1, file);
//CheckNTErrors(param_number == params.Size(), "parameter number not matched");
fclose(file);
///* parameter offsets */
//fread(offsets, sizeof(offsets[0]), params.Size(), file);
double elapsed = GetClockSec() - startT;
///* parameter values */
//for (int i = 0; i < params.Size(); i++)
// params[i]->BinaryRead(file, offsets[i]);
XPRINT1(0, stderr, "[INFO] model loaded (took %.1fs)\n", elapsed);
}
void FastRead(XTensor* x, FILE* f) {
float * dataBuf = new float[x->unitNum];
fread(dataBuf, sizeof(char), sizeof(float) * x->unitNum, f);
x->SetData(dataBuf, x->unitNum);
//delete[] offsets;
for (int i = 0; i < params.Size(); i++)
params[i]->BinaryRead(file, 0);
delete[] dataBuf;
fclose(file);
double elapsed = GetClockSec() - startT;
XPRINT1(0, stderr, "[INFO] model loaded (took %.1fs)\n", elapsed);
}
}
\ No newline at end of file
......@@ -103,7 +103,7 @@ public:
/* read the parameters */
void Read(const char * fn);
};
void FastRead(XTensor* x, FILE* f);
}
#endif
......@@ -56,13 +56,11 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID);
InitTensor2DV2(&w, vSize, hSize, X_FLOAT, devID);
}
/*
make the network (redefined output tensor)
>> input - input tensor
......@@ -72,9 +70,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{
XTensor &x = input;
output = LogSoftmax(MMul(x, X_NOTRANS, w, X_NOTRANS), -1);
output.SetName(OUTPUT_NAME);
output = LogSoftmax(MMul(x, X_NOTRANS, w, X_TRANS), -1);
}
}
......@@ -15,9 +15,9 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/
#include "T2TPredictor.h"
#include "../../tensor/core/CHeader.h"
......@@ -38,24 +38,24 @@ T2TStateBundle::T2TStateBundle()
/* de-constructor */
T2TStateBundle::~T2TStateBundle()
{
if(states != NULL)
if (states != NULL)
delete[] states;
}
/*
create states
/*
create states
>> num - number of states
*/
void T2TStateBundle::MakeStates(int num)
{
CheckNTErrors(num > 0, "invalid number");
if(states != NULL)
if (states != NULL)
delete[] states;
states = new T2TState[num];
for(int i = 0; i < num; i++){
for (int i = 0; i < num; i++) {
states[i].prediction = -1;
states[i].pid = T2T_PID_EMPTY;
states[i].isEnd = false;
......@@ -74,7 +74,7 @@ void T2TStateBundle::MakeStates(int num)
/* constructor */
T2TPredictor::T2TPredictor()
{
startSymbol = -1;
startSymbol = 2;
}
/* de-constructor */
......@@ -82,37 +82,44 @@ T2TPredictor::~T2TPredictor()
{
}
/*
create an initial state
/*
create an initial state
>> model - the t2t model
>> top - the top-most layer of the network
>> input - input of the network
>> beamSize - beam size
>> state - the state to be initialized
*/
void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state)
void T2TPredictor::Create(T2TModel* model, XTensor* top, const XTensor* input, int beamSize, T2TStateBundle* state)
{
int dims[MAX_TENSOR_DIM_NUM];
for (int i = 0; i < input->order - 1; i++)
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensor(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->endMark, input->order, dims, X_INT, input->devID);
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID);
float* data = new float[state->probPath.unitNum];
/*float* data = new float[state->probPath.unitNum];
for (int i = 0; i < state->probPath.unitNum; ++i) {
data[i] = -1e20F;
if (i % beamSize == 0)
data[i] = 0;
}
state->probPath.SetData(data, state->probPath.unitNum);
delete[] data;*/
SetDataFixed(state->probPath, -1e9F);
for (int i = 0; i < state->probPath.unitNum; ++i) {
if (i % beamSize == 0)
state->probPath.Set(0.0F, i);
}
state->nstep.SetZeroAll();
state->endMark.SetZeroAll();
delete[] data;
state->stateNum = 0;
}
......@@ -125,15 +132,15 @@ void T2TPredictor::SetStartSymbol(int symbol)
startSymbol = symbol;
}
/*
read a state
/*
read a state
>> model - the t2t model that keeps the network created so far
>> state - a set of states. It keeps
1) hypotheses (states)
2) probablities of hypotheses
3) parts of the network for expanding toward the next state
*/
void T2TPredictor::Read(T2TModel * model, T2TStateBundle * state)
void T2TPredictor::Read(T2TModel* model, T2TStateBundle* state)
{
m = model;
s = state;
......@@ -147,8 +154,7 @@ predict the next state
>> paddingEnc - padding of the encoder
>>> isStart - is the start or not
*/
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor * inputEnc, XTensor * paddingEnc, bool isStart)
void T2TPredictor::Predict(T2TStateBundle* next, XTensor* encoding, XTensor* inputEnc, XTensor* paddingEnc, bool isStart)
{
int dims[MAX_TENSOR_DIM_NUM];
......@@ -157,42 +163,43 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
/* the first token */
XTensor first;
CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!");
for(int i = 0; i < inputEnc->order - 1; i++)
for (int i = 0; i < inputEnc->order - 1; i++)
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
InitTensorV2(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID);
SetDataFixedInt(first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if (isStart) {
inputDec = Identity(first);
}
else{
else {
/* only pass one step to the decoder */
inputDec = GetLastPrediction(s);
inputDec.SetDevice(inputEnc->devID);
}
/* prediction probabilities */
XTensor &output = next->prob;
XTensor& output = next->prob;
XTensor decoding;
for(int i = 0; i < inputDec.order - 1; i++)
for (int i = 0; i < inputDec.order - 1; i++)
dims[i] = inputDec.GetDim(i);
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, 1.0F, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1);
XTensor maskDec;
XTensor maskEncDec;
/* decoder mask */
m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec, 0);
//m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec, 0);
/* make the decoding network */
decoding = m->decoder->Make(inputDec, *encoding, NULL, maskEncDec, false);
......@@ -203,38 +210,38 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
m->outputLayer->Make(decoding, output);
}
/*
generate paths up to the states of the current step
/*
generate paths up to the states of the current step
>> state - state bundle of the current step
*/
XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
XTensor T2TPredictor::GeneratePaths(T2TStateBundle* state)
{
CheckNTErrors(state->stateNum >= 0, "Illegal state!");
int distance = -1;
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
for (int i = 0; i < state->stateNum; i++) {
T2TState* cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
while (cur != NULL) {
nsteps++;
cur = cur->last;
}
if(nsteps > distance)
if (nsteps > distance)
distance = nsteps;
}
XTensor path;
InitTensor2D(&path, state->stateNum, distance, X_INT);
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
for (int i = 0; i < state->stateNum; i++) {
T2TState* cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
while (cur != NULL) {
nsteps++;
path.Set2DInt(cur->prediction, i, distance - nsteps);
cur = cur->last;
......@@ -253,7 +260,7 @@ XTensor T2TPredictor::GetLastPrediction(T2TStateBundle* state)
CheckNTErrors(state->stateNum >= 0, "Illegal state!");
XTensor lastPred;
InitTensor2D(&lastPred, state->stateNum, 1, X_INT);
InitTensor2DV2(&lastPred, state->stateNum, 1, X_INT);
for (int i = 0; i < state->stateNum; i++) {
T2TState* cur = state->states + i;
......
......@@ -15,10 +15,10 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start!
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start!
*/
#ifndef __T2TPREDICTOR_H__
#define __T2TPREDICTOR_H__
......@@ -39,8 +39,8 @@ public:
/* we assume that the prediction is an integer */
int prediction;
/* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis
/* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis
is empty if id = -1 */
int pid;
......@@ -66,7 +66,7 @@ public:
int nstep;
/* pointer to the previous state */
T2TState * last;
T2TState* last;
};
/* a bundle of states */
......@@ -75,7 +75,7 @@ class T2TStateBundle
public:
/* predictions */
XTensor prediction;
/* id of the previous state that generates the current one */
XTensor preID;
......@@ -95,7 +95,7 @@ public:
XTensor nstep;
/* list of states */
T2TState * states;
T2TState* states;
/* number of states */
int stateNum;
......@@ -114,19 +114,19 @@ public:
void MakeStates(int num);
};
/* The predictor reads the current state and then predicts the next.
/* The predictor reads the current state and then predicts the next.
It is exactly the same procedure of MT inference -
we get the state of previous words and then generate the next word.
Here, a state can be regared as the representation of words (word
Here, a state can be regared as the representation of words (word
indices, hidden states, embeddings and etc.). */
class T2TPredictor
{
private:
/* pointer to the transformer model */
T2TModel * m;
T2TModel* m;
/* current state */
T2TStateBundle * s;
T2TStateBundle* s;
/* start symbol */
int startSymbol;
......@@ -139,19 +139,19 @@ public:
~T2TPredictor();
/* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
void Create(T2TModel* model, XTensor* top, const XTensor* input, int beamSize, T2TStateBundle* state);
/* set the start symbol */
void SetStartSymbol(int symbol);
/* read a state */
void Read(T2TModel * model, T2TStateBundle * state);
void Read(T2TModel* model, T2TStateBundle* state);
/* predict the next state */
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc, bool isStart);
void Predict(T2TStateBundle* next, XTensor* encoding, XTensor* inputEnc, XTensor* paddingEnc, bool isStart);
/* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state);
XTensor GeneratePaths(T2TStateBundle* state);
/* get the predictions of the previous step */
XTensor GetLastPrediction(T2TStateBundle* state);
......
......@@ -15,9 +15,9 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include "T2TSearch.h"
#include "T2TUtility.h"
......@@ -27,7 +27,7 @@ using namespace nts;
namespace transformer
{
/* constructor */
T2TSearch::T2TSearch()
{
......@@ -38,15 +38,15 @@ T2TSearch::T2TSearch()
endSymbolNum = 0;
fullHypos = NULL;
endSymbols = new int[32];
startSymbol = -1;
startSymbol = 2;
}
/* de-constructor */
T2TSearch::~T2TSearch()
{
if(fullHypos != NULL)
if (fullHypos != NULL)
delete[] fullHypos;
if(endSymbols != NULL)
if (endSymbols != NULL)
delete[] endSymbols;
}
......@@ -55,7 +55,7 @@ initialize the model
>> argc - number of arguments
>> argv - list of pointers to the arguments
*/
void T2TSearch::Init(int argc, char ** argv)
void T2TSearch::Init(int argc, char** argv)
{
LoadParamInt(argc, argv, "beamsize", &beamSize, 1);
LoadParamInt(argc, argv, "batchsize", &batchSize, 1);
......@@ -63,18 +63,18 @@ void T2TSearch::Init(int argc, char ** argv)
LoadParamInt(argc, argv, "endid", endSymbols, 2);
LoadParamInt(argc, argv, "startid", &startSymbol, 2);
if(endSymbols[0] >= 0)
if (endSymbols[0] >= 0)
endSymbolNum = 1;
}
/*
search for the most promising states
/*
search for the most promising states
>> model - the transformer model
>> input - input of the model
>> padding - padding of the input
>> output - output that represents the sequences as rows
*/
void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output)
void T2TSearch::Search(T2TModel* model, XTensor* input, XTensor* padding, XTensor* output)
{
T2TPredictor predictor;
XTensor maskEnc;
......@@ -86,11 +86,11 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
CheckNTErrors(endSymbolNum > 0, "The search class is not initialized!");
CheckNTErrors(startSymbol >= 0, "The search class is not initialized!");
Prepare(input->unitNum/input->GetDim(-1), beamSize);
Prepare(input->unitNum / input->GetDim(-1), beamSize);
/* encoder mask */
model->MakeMTMaskEnc(*input, *padding, maskEnc);
//model->MakeMTMaskEnc(*input, *padding, maskEnc);
/* make the encoding network */
encoding = model->MakeEncoder(*input, &maskEnc, false);
......@@ -101,11 +101,11 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
encodingBeam.ReshapeMerged(encodingBeam.order - 4);
inputBeam.ReshapeMerged(inputBeam.order - 3);
paddingBeam.ReshapeMerged(paddingBeam.order - 3);
/* max output-length = 2 * source-length */
maxLength = input->GetDim(-1) * 2;
CheckNTErrors(maxLength > 0, "no max length specified!");
T2TStateBundle* states = new T2TStateBundle[maxLength + 1];
T2TStateBundle* first = states;
T2TStateBundle* cur;
......@@ -118,7 +118,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
first->isStart = true;
/* generate the sequence from left to right */
for(int i = 0 ; i < maxLength; i++){
for (int i = 0; i < maxLength; i++) {
cur = states + i;
next = states + i + 1;
......@@ -126,7 +126,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
predictor.Read(model, cur);
/* predict the next state */
predictor.Predict(next, &encodingBeam, &inputBeam, &paddingBeam, i==0);
predictor.Predict(next, &encodingBeam, &inputBeam, &paddingBeam, i == 0);
/* compute the model score (given the prediction probability) */
Score(cur, next);
......@@ -143,13 +143,13 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
/* fill the heap with imcomplete hypotheses if neccesary */
FillHeap(next);
Dump(output);
delete[] states;
}
/*
/*
prepare for search
>> batchSize - size of the batch
>> beamSize - size of the beam
......@@ -168,102 +168,100 @@ void T2TSearch::Prepare(int myBatchSize, int myBeamSize)
fullHypos[i].Init(beamSize);
}
/*
compute the model score for each hypothesis
/*
compute the model score for each hypothesis
>> prev - the beam of the previous state
>> beam - the beam that keeps a number of states
*/
void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
void T2TSearch::Score(T2TStateBundle* prev, T2TStateBundle* beam)
{
XTensor &score = beam->modelScore;
XTensor &prob = beam->prob;
XTensor &probPath = beam->probPath;
XTensor &probPathPrev = prev->probPath;
XTensor &lenPrev = prev->nstep;
XTensor &len = beam->nstep;
XTensor& score = beam->modelScore;
XTensor& prob = beam->prob;
XTensor& probPath = beam->probPath;
XTensor& probPathPrev = prev->probPath;
XTensor& lenPrev = prev->nstep;
XTensor& len = beam->nstep;
XTensor lp;
XTensor mask;
int order = prob.order;
int outputSize = prob.GetDim(-1);
int dims[MAX_TENSOR_DIM_NUM];
for(int i = 0; i < order; i++)
for (int i = 0; i < order; i++)
dims[i] = prob.GetDim(i);
InitTensor(&score, &prob);
InitTensor(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize);
InitTensorV2(&score, &prob);
InitTensorV2(&probPath, &prob);
prob.Reshape(prob.unitNum / outputSize, outputSize);
score.Reshape(score.unitNum / outputSize, outputSize);
probPath.Reshape(score.unitNum / outputSize, outputSize);
probPathPrev.Reshape(probPathPrev.unitNum);
/* the log-scale probability of the entire sequence */
_SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensor(&len, &lenPrev);
InitTensor(&lp, &lenPrev);
InitTensorV2(&len, &lenPrev);
InitTensorV2(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
/* the GNMT-like length penalty */
//lp = T2TLengthPenalizer::GNMT(len, alpha);
lp = T2TLengthPenalizer::GNMT(len, alpha);
//lp.Reshape(lp.unitNum);
lp.Reshape(lp.unitNum);
/* score = log-prob/lp */
//_DivDim(&probPath, &lp, &score, 0);
_DivDim(&probPath, &lp, &score, 0);
if (prev->isStart) {
XTensor firstMask = MakeFirstMask(beam);
XTensor firstMask;
firstMask = MakeFirstMask(beam);
firstMask.Reshape(firstMask.unitNum);
/* mask the hypotheses in the beam except the first one */
_SumDim(&score, &firstMask, &score, 0);
}
InitTensor(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT,
prev->endMark.devID);
mask.SetZeroAll();
InitTensorV2(&mask, prev->endMark.order, prev->endMark.dimSize, X_FLOAT, 1.0F, prev->endMark.devID);
mask.SetZeroAll();
_SetDataFixedCond(&mask, &prev->endMark, -1e9F);
mask.Reshape(mask.unitNum);
/* mask the completed hypotheses so that they cannot
/* mask the completed hypotheses so that they cannot
be involved in further sorting and beam search. */
_SumDim(&score, &mask, &score, 0);
prob.Reshape(order, dims);
score.Reshape(order, dims);
probPath.Reshape(order, dims);
probPathPrev.Reshape(order - 1, dims);
lp.Reshape(order - 1, dims);
mask.Reshape(order -1 , dims);
mask.Reshape(order - 1, dims);
}
/*
/*
generate tokens for the next state via beam pruning
>> beam - the beam that keeps a number of states
*/
void T2TSearch::Generate(T2TStateBundle * beam)
void T2TSearch::Generate(T2TStateBundle* beam)
{
int dims[MAX_TENSOR_DIM_NUM];
int dimsBeam[MAX_TENSOR_DIM_NUM];
int dimsTopK[MAX_TENSOR_DIM_NUM];
XTensor scoreTopK;
XTensor &score = beam->modelScore;
XTensor &index = beam->prediction;
XTensor &preID = beam->preID;
XTensor &probPath = beam->probPath;
XTensor &prob = beam->prob;
XTensor& score = beam->modelScore;
XTensor& index = beam->prediction;
XTensor& preID = beam->preID;
XTensor& probPath = beam->probPath;
XTensor& prob = beam->prob;
int order = score.order;
CheckNTErrors(order >= 3, "The tensor must be of order 2 or larger.");
CheckNTErrors(dimsBeam[order - 3] % beamSize == 0, "Wrong dimension size!");
for (int i = 0; i < order; i++) {
dims[i] = score.GetDim(i);
dimsBeam[i] = score.GetDim(i);
......@@ -277,15 +275,15 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsBeam[order - 1] *= beamSize;
dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize;
InitTensor(&scoreTopK, order, dimsTopK, score.dataType, score.devID);
InitTensor(&index, order, dimsTopK, X_INT, score.devID);
InitTensor(&preID, order, dimsTopK, X_INT, -1);
InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType, 1.0F, score.devID);
InitTensorV2(&index, order, dimsTopK, X_INT, 1.0F, score.devID);
InitTensorV2(&preID, order, dimsTopK, X_INT, 1.0F, -1);
/* mask the first and the padding id */
int dimMask[]{ score.GetDim(-1) };
XTensor mask;
InitTensor(&mask, 1, dimMask, X_FLOAT, -1);
InitTensorV2(&mask, 1, dimMask, X_FLOAT, 1.0F, -1);
mask.SetZeroAll();
mask.Set1D(-1e20F, 0);
mask.Set1D(-1e20F, 1);
......@@ -293,21 +291,21 @@ void T2TSearch::Generate(T2TStateBundle * beam)
//_SumDim(&score, &mask, 2);
score.Reshape(order, dimsBeam);
/* keep the most promissing candidates in the beam */
/* TODO: check this line */
TopK(score, scoreTopK, index, -1, beamSize);
CopyValues(index, preID);
/* "preID" represents the id (or the offset) of the previous state used to make the current
hypothesis. Note that we reshape the "score" tensor into a matrix where each
row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */
DescaleMe(preID, sizeVocab);
/* Then, we do something similar to "preID". For the top-k predictions, we need
/* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */
ModMe(index, sizeVocab);
......@@ -315,7 +313,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims);
/* we keep the top-k scores */
InitTensor(&score, &scoreTopK);
InitTensorV2(&score, &scoreTopK);
CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */
......@@ -334,9 +332,9 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* sequence probability of top-k candidates */
XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK);
InitTensorV2(&probPathTopK, &scoreTopK);
XTensor probTopK;
InitTensor(&probTopK, &scoreTopK);
InitTensorV2(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i);
......@@ -362,38 +360,38 @@ void T2TSearch::Generate(T2TStateBundle * beam)
prob = probTopK;
}
/*
expand the search graph
/*
expand the search graph
>> beam - the beam that keeps a number of states
*/
void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
void T2TSearch::Expand(T2TStateBundle* prev, T2TStateBundle* beam)
{
CheckNTErrors(beam->prediction.unitNum == beam->preID.unitNum, "A problem occurs in the beam!");
beam->MakeStates(beam->prediction.unitNum);
T2TState * states = beam->states;
XTensor & idRef = beam->preID;
XTensor & modelScoreRef = beam->modelScore;
XTensor & probRef = beam->prob;
XTensor & probPathRef = beam->probPath;
XTensor & predictionRef = beam->prediction;
XTensor & endMark = beam->endMark;
T2TState* states = beam->states;
XTensor& idRef = beam->preID;
XTensor& modelScoreRef = beam->modelScore;
XTensor& probRef = beam->prob;
XTensor& probPathRef = beam->probPath;
XTensor& predictionRef = beam->prediction;
XTensor& endMark = beam->endMark;
XTensor id;
XTensor modelScore;
XTensor prob;
XTensor probPath;
XTensor prediction;
XTensor endMarkCPU;
InitTensorOnCPU(&id, &idRef);
InitTensorOnCPU(&modelScore, &modelScoreRef);
InitTensorOnCPU(&prob, &probRef);
InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensor(&endMark, &predictionRef);
InitTensorV2(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */
CopyValues(idRef, id);
......@@ -403,19 +401,19 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
CopyValues(predictionRef, prediction);
CheckNTErrors(beam->stateNum == id.unitNum, "Errors occur in counting!");
/* Related variables are kept on the states of the graph. All these are
maintained on CPUs to ease the implementation of frequent access and
modification of the states. An alternative is to do this on GPUs but
/* Related variables are kept on the states of the graph. All these are
maintained on CPUs to ease the implementation of frequent access and
modification of the states. An alternative is to do this on GPUs but
it needs much more coding work and the speed-up is not obvious. */
for(int i = 0; i < beam->stateNum; i += beamSize){
for (int i = 0; i < beam->stateNum; i += beamSize) {
for (int j = 0; j < beamSize; j++) {
int k = i + j;
T2TState & state = states[k];
T2TState& state = states[k];
int offset = id.GetInt(k);
int pid = i / beamSize;
T2TState * last = prev->states + pid * beamSize + offset;
T2TState* last = prev->states + pid * beamSize + offset;
CheckNTErrors(offset >= 0, "Wrong state index!");
......@@ -457,48 +455,48 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
CopyValues(endMarkCPU, endMark);
}
/*
/*
collect hypotheses with ending symbols. Given a beam of hypotheses,
we remove the finished hypotheses and keep them in a heap.
>> beam - the beam that keeps a number of states
*/
void T2TSearch::Collect(T2TStateBundle * beam)
void T2TSearch::Collect(T2TStateBundle* beam)
{
T2TState * states = beam->states;
T2TState* states = beam->states;
for (int i = 0; i < beam->stateNum; i++) {
T2TState & state = states[i];
CheckNTErrors(state.pid >= 0 && state.pid < batchSize,
"Invalid sample id!");
T2TState& state = states[i];
CheckNTErrors(state.pid >= 0 && state.pid < batchSize,
"Invalid sample id!");
/* check if this is the first end symbol. It is false
if there have been end symbols in previously generated words. */
bool isCompleted = state.isCompleted && (state.last == NULL || !state.last->isCompleted);
/* we push the hypothesis into the heap when it is completed */
if(state.isEnd != 0)
if (state.isEnd != 0)
fullHypos[state.pid].Push(HeapNode<float>(&state, state.modelScore));
}
}
/*
fill the hypotheis heap with incomplete hypotheses
/*
fill the hypotheis heap with incomplete hypotheses
>> beam - the beam that keeps a number of states (final)
*/
void T2TSearch::FillHeap(T2TStateBundle * beam)
void T2TSearch::FillHeap(T2TStateBundle* beam)
{
bool * emptyFlags = new bool[batchSize];
bool* emptyFlags = new bool[batchSize];
for (int i = 0; i < batchSize; i++)
emptyFlags[i] = (fullHypos[i].Count() == 0);
T2TState * states = beam->states;
T2TState* states = beam->states;
for (int i = 0; i < beam->stateNum; i++) {
T2TState & state = states[i];
T2TState& state = states[i];
CheckNTErrors(state.pid >= 0 && state.pid < batchSize,
"Invalid sample id!");
"Invalid sample id!");
/* we push the imcomplete hypothesis into the heap */
if (emptyFlags[state.pid] && state.isEnd == 0)
......@@ -508,32 +506,32 @@ void T2TSearch::FillHeap(T2TStateBundle * beam)
delete[] emptyFlags;
}
/*
save the output sequences in a tensor
/*
save the output sequences in a tensor
>> output - output sequences (for return)
*/
void T2TSearch::Dump(XTensor * output)
void T2TSearch::Dump(XTensor* output)
{
int dims[3] = {batchSize, beamSize, maxLength};
int * words = new int[maxLength];
int dims[3] = { batchSize, beamSize, maxLength };
int* words = new int[maxLength];
InitTensor(output, 3, dims, X_INT);
InitTensorV2(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */
for(int h = 0; h < batchSize; h++){
for (int h = 0; h < batchSize; h++) {
XHeap<MIN_HEAP, float> &heap = fullHypos[h];
XHeap<MIN_HEAP, float>& heap = fullHypos[h];
/* for each output in the beam */
for(int i = 0; i < beamSize && heap.Count() > 0; i++){
T2TState * state = (T2TState *)heap.Pop().index;
for (int i = 0; i < beamSize && heap.Count() > 0; i++) {
T2TState* state = (T2TState*)heap.Pop().index;
int count = 0;
bool isCompleted = true;
/* we track the state from the end to the beginning */
while(state != NULL){
while (state != NULL) {
if (!state->isCompleted)
isCompleted = false;
if (isCompleted)
......@@ -544,7 +542,7 @@ void T2TSearch::Dump(XTensor * output)
}
/* dump the sentence to the output tensor */
for(int w = 0; w < count; w++)
for (int w = 0; w < count; w++)
output->Set3DInt(words[count - w - 1], h, beamSize - i - 1, w);
}
}
......@@ -552,38 +550,38 @@ void T2TSearch::Dump(XTensor * output)
delete[] words;
}
/*
check if the token is an end symbol
/*
check if the token is an end symbol
>> token - token to be checked
*/
bool T2TSearch::IsEnd(int token)
{
CheckNTErrors(endSymbolNum > 0, "No end symbol?");
for(int i = 0; i < endSymbolNum; i++){
if(endSymbols[i] == token)
for (int i = 0; i < endSymbolNum; i++) {
if (endSymbols[i] == token)
return true;
}
return false;
}
/*
/*
set end symbols for search
>> tokens - end symbols
>> tokenNum - number of the end symbols
*/
void T2TSearch::SetEnd(const int * tokens, const int tokenNum)
void T2TSearch::SetEnd(const int* tokens, const int tokenNum)
{
if(endSymbols != NULL)
if (endSymbols != NULL)
delete[] endSymbols;
if(tokenNum <= 0)
if (tokenNum <= 0)
return;
/* we may have multiple end symbols */
tokens = new int[tokenNum];
for(int i = 0; i < tokenNum; i++)
for (int i = 0; i < tokenNum; i++)
endSymbols[i] = tokens[i];
endSymbolNum = tokenNum;
}
......@@ -592,9 +590,9 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum)
make a mask to prevent duplicated entries in beam expansion for the first position
>> beam - the beam that keeps the searching states
*/
XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
XTensor T2TSearch::MakeFirstMask(T2TStateBundle* beam)
{
XTensor &prob = beam->prob;
XTensor& prob = beam->prob;
XTensor mask;
int order = prob.order;
......@@ -602,7 +600,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i);
InitTensor(&mask, order - 1, dims, X_FLOAT);
InitTensorV2(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) {
......
......@@ -15,9 +15,9 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#ifndef __T2TSEARCH_H__
#define __T2TSEARCH_H__
......@@ -40,10 +40,10 @@ private:
/* predictor */
T2TPredictor predictor;
/* max length of the generated sequence */
int maxLength;
/* beam size */
int beamSize;
......@@ -51,10 +51,10 @@ private:
int batchSize;
/* we keep the final hypotheses in a heap for each sentence in the batch. */
XHeap<MIN_HEAP, float> * fullHypos;
XHeap<MIN_HEAP, float>* fullHypos;
/* array of the end symbols */
int * endSymbols;
int* endSymbols;
/* number of the end symbols */
int endSymbolNum;
......@@ -68,42 +68,42 @@ public:
/* de-constructor */
~T2TSearch();
/* initialize the model */
void Init(int argc, char ** argv);
void Init(int argc, char** argv);
/* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
void Search(T2TModel* model, XTensor* input, XTensor* padding, XTensor* output);
/* preparation */
void Prepare(int myBatchSize,int myBeamSize);
void Prepare(int myBatchSize, int myBeamSize);
/* compute the model score for each hypothesis */
void Score(T2TStateBundle * prev, T2TStateBundle * beam);
void Score(T2TStateBundle* prev, T2TStateBundle* beam);
/* generate token indices via beam pruning */
void Generate(T2TStateBundle * beam);
void Generate(T2TStateBundle* beam);
/* expand the search graph */
void Expand(T2TStateBundle * prev, T2TStateBundle * beam);
void Expand(T2TStateBundle* prev, T2TStateBundle* beam);
/* collect hypotheses with ending symbol */
void Collect(T2TStateBundle * beam);
void Collect(T2TStateBundle* beam);
/* fill the hypotheis heap with incomplete hypothses */
void FillHeap(T2TStateBundle * beam);
void FillHeap(T2TStateBundle* beam);
/* save the output sequences in a tensor */
void Dump(XTensor * output);
void Dump(XTensor* output);
/* check if the token is an end symbol */
bool IsEnd(int token);
/* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum);
void SetEnd(const int* tokens, const int tokenNum);
/* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam);
XTensor MakeFirstMask(T2TStateBundle* beam);
};
}
......
......@@ -15,9 +15,9 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include <math.h>
#include "T2TUtility.h"
......@@ -44,23 +44,23 @@ T2TTester::~T2TTester()
}
/* initialize the model */
void T2TTester::Init(int argc, char ** argv)
void T2TTester::Init(int argc, char** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 34040);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1);
LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, true);
seacher.Init(argc, argv);
}
/*
/*
test the model
>> fn - test data file
>> ofn - output data file
>> model - model that is trained
*/
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
{
int wc = 0;
int wordCount = 0;
......@@ -86,7 +86,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
int* seqs = new int[MILLION];
batchLoader.Init(fn);
int count = 0;
while (!batchLoader.IsEmpty())
......@@ -94,23 +94,23 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
count++;
wordCount = 0;
for (int i = 0; i < model->decoder->nlayer; ++i) {
model->decoder->selfCache[i].miss = true;
model->decoder->contextCache[i].miss = true;
model->decoder->selfAttCache[i].miss = true;
model->decoder->enDeAttCache[i].miss = true;
}
vector<int> indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID);
XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output);
output.Dump(stderr);
for (int i = 0; i < indices.size(); ++i) {
Result res;
XTensor sent, srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 1, X_INT, output.devID);
int idx[]{i};
InitTensor1DV2(&srcIdx, 1, X_INT, output.devID);
int idx[]{ i };
srcIdx.SetData(idx, 1);
InitTensor(&tgtIdx, &srcIdx);
InitTensorV2(&tgtIdx, &srcIdx);
SetAscendingOrder(tgtIdx, 0);
sent = CopyIndexed(output, 0, srcIdx, tgtIdx);
......@@ -127,9 +127,9 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
}
......@@ -138,11 +138,11 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
for (auto res : batchLoader.resBuffer) {
Dump(ofile, &res.values);
}
fclose(ofile);
delete[] seqs;
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d)\n", elapsed, wordCountTotal, sentCount);
......@@ -153,7 +153,7 @@ dump the result into the file
>> file - data file
>> output - output tensor
*/
void T2TTester::Dump(FILE * file, XTensor * output)
void T2TTester::Dump(FILE* file, XTensor* output)
{
int seqLength = output->GetDim(-1);
......
......@@ -15,10 +15,10 @@
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :)
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :)
*/
#ifndef __T2TTESTER_H__
#define __T2TTESTER_H__
......@@ -41,7 +41,7 @@ public:
/* batch size for sentences */
int sentBatch;
/* for batching */
DataSet batchLoader;
......@@ -56,13 +56,13 @@ public:
~T2TTester();
/* initialize the model */
void Init(int argc, char ** argv);
void Init(int argc, char** argv);
/* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model);
void Test(const char* fn, const char* ofn, T2TModel* model);
/* dump the result into the file */
void Dump(FILE * file, XTensor * output);
void Dump(FILE* file, XTensor* output);
};
}
......
......@@ -38,7 +38,7 @@ namespace transformer
{
/* entrance of the program */
int TransformerMain(int argc, const char ** argv);
int TransformerMain(int argc, const char** argv);
}
......
......@@ -28,6 +28,7 @@
#include "XList.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
......@@ -363,6 +364,8 @@ template struct TensorListBase<long>;
template struct TensorListBase<float>;
template struct TensorListBase<short>;
template struct TensorListBase<XTensor*>;
template struct TensorListBase<uint64_t>;
template struct TensorListBase<void*>;
} /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
......@@ -26,6 +26,8 @@
#include "XMem.h"
#include "XGlobal.h"
#include <cstdint>
#ifndef __TensorList_H__
#define __TensorList_H__
......@@ -118,7 +120,14 @@ public:
void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */
T& operator[] (int i) { return GetItem(i); };
T& operator[] (int i) {
CheckNTErrors(i >= -count && i < count, "Index of a list item is out of scope!");
CheckNTErrors(count > 0, "Cannt index the item in an empty list!");
if (i < 0)
return items[count + i];
else
return items[i];
};
T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
};
......@@ -132,7 +141,7 @@ typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
typedef TensorListBase<uint64_t> UInt64List;
typedef TensorListBase<XTensor*> TensorList;
} /* end of the nts (NiuTrans.Tensor) namespace */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,16 +15,16 @@
* limitations under the License.
*/
/*
*
* implementation of tensors used in this work. It it is the basis of XMatrix
* and XVector
*
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
/*
*
* implementation of tensors used in this work. It it is the basis of XMatrix
* and XVector
*
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
#include <stdio.h>
#include <stdlib.h>
......@@ -53,7 +53,7 @@
#ifdef USE_CUDA
// the CUDA stuff
// the CUDA stuff
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuda.h>
......@@ -64,7 +64,7 @@
#endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
namespace nts {
int tensorIDGlobal = 0;
MUTEX_HANDLE tensorMutex;
......@@ -73,11 +73,11 @@ XTensor NULLTensor;
/* generate a tensor id */
int MakeTensorID()
{
if(tensorIDGlobal == 0)
if (tensorIDGlobal == 0)
MUTEX_INIT(tensorMutex);
MUTEX_LOCK(tensorMutex);
int id = tensorIDGlobal++;
int id = tensorIDGlobal++;
MUTEX_UNLOCK(tensorMutex);
return id;
......@@ -91,13 +91,13 @@ XTensor::XTensor()
id = MakeTensorID();
isDefaultDType = true;
isInGlobalMem = false;
isInGlobalMem = false;
isInit = false;
isTmp = false;
isTmp = false;
}
/* constructor */
XTensor::XTensor(const XTensor * reference)
XTensor::XTensor(const XTensor* reference)
{
Init();
SetDataPointer();
......@@ -106,13 +106,13 @@ XTensor::XTensor(const XTensor * reference)
InitTensorV2(this, reference);
}
/*
constructor
/*
constructor
>> myOrder - order of the tensor
>> myDevID - device id
>> myMem - memory pool used to allocating the data array
*/
XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
XTensor::XTensor(const int myOrder, int myDevID, XMem* myMem)
{
CheckNTErrors((myOrder >= 0), "Illegal tensor order1");
......@@ -125,8 +125,8 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
devID = myMem == NULL ? myDevID : myMem->devID;
}
/*
constructor
/*
constructor
>> myOrder - order of the tensor
>> myDimSize - size of each dimension
>> myDataType - unit size (e.g., int, float, and double)
......@@ -134,8 +134,8 @@ constructor
>> myDevID - device id
>> myMem - memory pool used to allocating the data array
*/
XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const float myDenseRatio, int myDevID, XMem * myMem)
XTensor::XTensor(const int myOrder, const int* myDimSize, const TENSOR_DATA_TYPE myDataType,
const float myDenseRatio, int myDevID, XMem* myMem)
{
Init();
SetDataPointer();
......@@ -145,12 +145,12 @@ XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYP
mem = myMem;
devID = myMem != NULL ? myMem->devID : myDevID;
if(order >= 0)
if (order >= 0)
Resize(myOrder, myDimSize, myDataType, myDenseRatio);
}
/* copy constructor */
XTensor::XTensor(const XTensor &reference)
XTensor::XTensor(const XTensor& reference)
{
Init();
SetDataPointer();
......@@ -158,13 +158,13 @@ XTensor::XTensor(const XTensor &reference)
ShallowCopy(reference);
data = NULL;
dataHost = NULL;
if(reference.isTmp){
if (reference.isTmp) {
devID = reference.devID;
mem = reference.mem;
data = reference.data;
signature = reference.signature;
/* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of "reference.data" in
......@@ -172,26 +172,26 @@ XTensor::XTensor(const XTensor &reference)
This is VERY tricky and there might be better solutions :) */
*reference.dataP = NULL;
}
else{
else {
devID = reference.devID;
mem = reference.mem;
InitTensorV2(this, &reference);
_CopyValues(&reference, this);
}
if(reference.isTmp)
if (reference.isTmp)
XLink::Replace(&reference, this);
else{
else {
CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
XLink::CopyIncoming(&reference, this);
}
isInit = true;
isTmp = reference.isTmp;
isTmp = reference.isTmp;
}
/* copy constructor (with right value reference) */
XTensor::XTensor(const XTensor &&reference)
XTensor::XTensor(const XTensor&& reference)
{
Init();
SetDataPointer();
......@@ -199,12 +199,12 @@ XTensor::XTensor(const XTensor &&reference)
ShallowCopy(reference);
data = NULL;
dataHost = NULL;
devID = reference.devID;
mem = reference.mem;
data = reference.data;
signature = reference.signature;
/* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of "reference.data" in
......@@ -215,7 +215,7 @@ XTensor::XTensor(const XTensor &&reference)
XLink::Replace(&reference, this);
isInit = true;
isTmp = reference.isTmp;
isTmp = reference.isTmp;
}
/* de-constructor */
......@@ -225,30 +225,30 @@ XTensor::~XTensor()
the connectivity of the graph. To kill memory
leak, we release the data of the new tensor
when its parent is deleted (see ClearIncoming). */
if(outgo.tailNum > 0){
if (outgo.tailNum > 0) {
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, dimSize, order * sizeof(int));
dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMPFlag();
newTensor->data = data;
data = NULL;
XLink::Replace(this, newTensor);
}
XLink::ClearOutgoing(this);
XLink::ClearIncoming(this);
DestroyData();
if(grad != NULL)
if (grad != NULL)
delete grad;
}
/* set the name of the tensor */
void XTensor::SetName(const char * myName)
void XTensor::SetName(const char* myName)
{
strcpy(name, myName);
}
......@@ -277,10 +277,10 @@ void XTensor::Init()
isInGlobalMem = false;
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false;
isTmp = false;
isTmp = false;
isGrad = false;
isVar = false;
enableGrad = true;
isVar = false;
enableGrad = X_ENABLE_GRAD;
visitMark = 0;
grad = NULL;
}
......@@ -288,26 +288,26 @@ void XTensor::Init()
/* delete data arrays */
void XTensor::DestroyData()
{
if(data != NULL && mem == NULL && !isShared)
if (data != NULL && mem == NULL && !isShared)
XMemFree(devID, data);
else if(data != NULL && isInGlobalMem)
else if (data != NULL && isInGlobalMem)
FreeData(this, mem);
else if(data != NULL)
else if (data != NULL)
mem->Release(data, GetDataSizeInChar(), signature);
data = NULL;
if(dataHost != NULL)
delete[] (char*)dataHost;
if (dataHost != NULL)
delete[](char*)dataHost;
dataHost = NULL;
}
/*
/*
shallow copy of the tensor
Note that we do not copy data array here
>> tensor - the source tensor
*/
void XTensor::ShallowCopy(const XTensor &tensor)
void XTensor::ShallowCopy(const XTensor& tensor)
{
strcpy(name, tensor.name);
order = tensor.order;
......@@ -318,7 +318,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
unitNum = tensor.unitNum;
isSparse = tensor.isSparse;
unitNumNonZero = tensor.unitNumNonZero;
denseRatio = tensor.denseRatio;
denseRatio = tensor.denseRatio;
isShared = tensor.isShared;
isDefaultDType = tensor.isDefaultDType;
isInGlobalMem = tensor.isInGlobalMem;
......@@ -330,17 +330,17 @@ XTensor& XTensor::operator= (const XTensor& tensor)
{
/* we must make a hard copy of the tensor if it is the input
of another node. */
if(outgo.tailNum > 0){
if (outgo.tailNum > 0) {
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, dimSize, order * sizeof(int));
dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMPFlag();
newTensor->data = data;
newTensor->dataHost = dataHost;
newTensor->signature = tensor.signature;
XLink::Replace(this, newTensor);
XLink::ClearOutgoing(this);
XLink::ClearIncoming(this);
......@@ -350,35 +350,35 @@ XTensor& XTensor::operator= (const XTensor& tensor)
dataHost = NULL;
}
if(false && !tensor.isTmp){
if (false && !tensor.isTmp) {
/* NOTE: this might lead to additional data copy by Mac LLVM compilers */
/* we make an identity transformation here */
if(outgo.tailNum > 0)
if (outgo.tailNum > 0)
XLink::ClearOutgoing(this);
XLink::ClearIncoming(this);
if(!_IsSameShaped(this, &tensor))
if (!_IsSameShaped(this, &tensor))
Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio);
_Identity(&tensor, this);
XLink::MakeLink(&tensor, NULL, this, FUNC_IDENTITY);
}
else{
else {
/* hard copy of the data array */
int size = unitNum * unitSize;
if( isInit && !isSparse && !tensor.isSparse &&
if (isInit && !isSparse && !tensor.isSparse &&
size == tensor.unitNum * tensor.unitSize &&
((devID < 0 && tensor.devID < 0) && devID == tensor.devID) &&
((devID < 0 && tensor.devID < 0) && devID == tensor.devID) &&
data != NULL)
{
XMemCopy(data, devID, tensor.data, tensor.devID, size);
if(dataHost != NULL && tensor.dataHost != NULL)
if (dataHost != NULL && tensor.dataHost != NULL)
XMemCopy(dataHost, -1, tensor.dataHost, tensor.devID, size);
}
else{
else {
DestroyData();
if(!isInit){
if (!isInit) {
devID = tensor.devID;
mem = tensor.mem;
}
......@@ -391,7 +391,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
ShallowCopy(tensor);
isInit = true;
isTmp = false;
isTmp = false;
CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
......@@ -407,17 +407,17 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
{
/* we must make a hard copy of the tensor if it is the input
of another node. */
if(outgo.tailNum > 0){
if (outgo.tailNum > 0) {
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, dimSize, order * sizeof(int));
dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMPFlag();
newTensor->data = data;
newTensor->dataHost = dataHost;
newTensor->signature = tensor.signature;
XLink::Replace(this, newTensor);
XLink::ClearOutgoing(this);
XLink::ClearIncoming(this);
......@@ -426,17 +426,17 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
data = NULL;
dataHost = NULL;
}
DestroyData();
ShallowCopy(tensor);
isInit = true;
devID = tensor.devID;
mem = tensor.mem;
mem = tensor.mem;
data = tensor.data;
signature = tensor.signature;
/* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of "reference.data" in
......@@ -456,7 +456,7 @@ XTensor XTensor::operator+ (const XTensor& tensor) const
}
/* overloading of the plus-sign */
XTensor XTensor::operator+ (const DTYPE shift) const
XTensor XTensor::operator+ (const DTYPE shift) const
{
return ScaleAndShift(*this, 1, shift);
}
......@@ -500,10 +500,10 @@ XTensor XTensor::operator/ (const XTensor& tensor) const
/* overloading of the division-sign */
XTensor XTensor::operator/ (const DTYPE scale) const
{
return ScaleAndShift(*this, (DTYPE)1/scale, 0);
return ScaleAndShift(*this, (DTYPE)1 / scale, 0);
}
/*
/*
linear transformation b = a * \scale + \shift
>> scale - the slope
>> shift - the intercept
......@@ -513,12 +513,12 @@ XTensor XTensor::Lin(DTYPE scale, DTYPE shift) const
return Linear(*this, scale, shift);
}
/*
relocate the data on the target device
/*
relocate the data on the target device
>> myDevId - target device id
>> myMem - memory pool on the target device
*/
void XTensor::SetDevice(int myDevId, XMem * myMem)
void XTensor::SetDevice(int myDevId, XMem* myMem)
{
if (myMem == NULL) {
myMem = GMems.GetMem(myDevId);
......@@ -527,9 +527,9 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
isInGlobalMem = false;
}
bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
bool XTensor::IsReduceShaped(const XTensor* a, const XTensor* b, int dim)
{
if(a == NULL || b == NULL)
if (a == NULL || b == NULL)
return false;
if ((a->order - 1) != b->order)
......@@ -541,61 +541,61 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
return false;
}
else if (i >= dim) {
if (a->dimSize[i+1] != b->dimSize[i])
if (a->dimSize[i + 1] != b->dimSize[i])
return false;
}
}
if(a->dataType != b->dataType)
if (a->dataType != b->dataType)
return false;
if(a->denseRatio != b->denseRatio)
if (a->denseRatio != b->denseRatio)
return false;
if(a->isSparse != b->isSparse)
if (a->isSparse != b->isSparse)
return false;
return true;
}
/*
set the size of each dimension
/*
set the size of each dimension
>> myDimSize - size of each dimension
*/
void XTensor::SetDim(int * myDimSize)
void XTensor::SetDim(int* myDimSize)
{
for (int i = 0; i < order; i++) {
dimSize[i] = myDimSize[i];
}
}
/*
get the size of a given dimension
/*
get the size of a given dimension
>> dim - the given dim we are looking at
*/
int XTensor::GetDim(const int dim) const
{
CheckNTErrors(dim < order, "dimenision is out of range!");
CheckNTErrors(dim >= -order, "dimenision is out of range!");
int d = dim;
if(dim < 0)
if (dim < 0)
d = order + dim;
return dimSize[d];
}
/*
reshape the tensor
/*
reshape the tensor
>> myOrder - order of the tensor
>> myDimSize - size of each dimension
*/
void XTensor::Reshape(const int myOrder, const int * myDimSize)
void XTensor::Reshape(const int myOrder, const int* myDimSize)
{
int dims[MAX_TENSOR_DIM_NUM];
int num = 1;
for(int i = 0; i < myOrder; i++){
for (int i = 0; i < myOrder; i++) {
num *= myDimSize[i];
dims[i] = abs(myDimSize[i]);
}
......@@ -606,7 +606,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize)
memcpy(dimSize, dims, sizeof(int) * order);
}
/*
/*
reshape the tensor into a vector
>> num - number of elements
*/
......@@ -616,14 +616,14 @@ void XTensor::Reshape(const int num)
Reshape(1, &dim);
}
/*
/*
reshape the tensor into a matrix
>> rowNum - number of rows
>> colNum - number of columns
*/
void XTensor::Reshape(const int rowNum, const int colNum)
{
int dims[2] = {rowNum, colNum};
int dims[2] = { rowNum, colNum };
Reshape(2, dims);
}
......@@ -663,7 +663,7 @@ XTensor XTensor::TypeAs(const XTensor input)
/* get the number of items in the data array */
int XTensor::GetSize() const
{
if(isSparse)
if (isSparse)
return unitNumNonZero;
else
return unitNum;
......@@ -672,39 +672,39 @@ int XTensor::GetSize() const
/* get the size of the memory space used */
int XTensor::GetDataSizeInChar() const
{
if(isSparse){
if (isSparse) {
int num = int(unitNum * denseRatio + 1);
int tupleSize = sizeof(int)+sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num);
int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize * (num);
return size;
}
else{
else {
return unitNum * unitSize;
}
}
/*
get unit size in terms of "dataType"
/*
get unit size in terms of "dataType"
>> myDataType - type of unit
<< return - unit size
*/
int XTensor::GetUnitSize(TENSOR_DATA_TYPE myDataType) const
{
if(myDataType == X_INT)
if (myDataType == X_INT)
return sizeof(int);
else if(myDataType == X_FLOAT)
else if (myDataType == X_FLOAT)
return sizeof(float);
else if(myDataType == X_DOUBLE)
else if (myDataType == X_DOUBLE)
return sizeof(double);
else if(myDataType == X_INT8)
else if (myDataType == X_INT8)
return 1;
else if(myDataType == X_FLOAT16)
else if (myDataType == X_FLOAT16)
return 2;
return sizeof(float);
}
/*
get offset (2D)
/*
get offset (2D)
>> row - index of demension 0
>> col - index of demension 1
*/
......@@ -717,8 +717,8 @@ MTYPE XTensor::GetOffset2D(int row, int col) const
return row * dimSize[1] + col;
}
/*
get offset (3D)
/*
get offset (3D)
>> d0 - index of demension 0
>> d1 - index of demension 1
>> d2 - index of demension 2
......@@ -733,49 +733,49 @@ MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const
return (d0 * dimSize[1] + d1) * dimSize[2] + d2;
}
/*
a vector with all entries of 0
/*
a vector with all entries of 0
>> stream - stream for the job pipeline
*/
void XTensor::SetZeroAll(XStream * stream)
void XTensor::SetZeroAll(XStream* stream)
{
if(data == NULL)
if (data == NULL)
return;
if(isSparse){
if(devID >= 0){
if (isSparse) {
if (devID >= 0) {
#ifdef USE_CUDA
int size = sizeof(int) + (sizeof(int)+sizeof(DTYPE)) * unitNumNonZero;
int size = sizeof(int) + (sizeof(int) + sizeof(DTYPE)) * unitNumNonZero;
int devIDBackup = 0;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
if(stream == NULL)
if (stream == NULL)
cudaMemset(data, 0, size);
else
cudaMemsetAsync(data, 0, size, stream->stream);
cudaSetDevice(devIDBackup);
#endif
}
else
*(int*)data = 0;
unitNumNonZero = 0;
unitNumNonZero = 0;
}
else{
if(devID >= 0){
else {
if (devID >= 0) {
#ifdef USE_CUDA
int devIDBackup = 0;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
if(stream == NULL)
if (stream == NULL)
cudaMemset(data, 0, unitNum * unitSize);
else
cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream);
cudaSetDevice(devIDBackup);
#endif
}
......@@ -784,14 +784,14 @@ void XTensor::SetZeroAll(XStream * stream)
}
}
/* set the tensor with an data array
/* set the tensor with an data array
>> d - input data. it must be on CPU
>> num - number of data items
>> beg - where we start the data copy in the data array of the tensor
*/
void XTensor::SetData(const void * d, int num, int beg)
void XTensor::SetData(const void* d, int num, int beg)
{
if (data == NULL || d ==NULL)
if (data == NULL || d == NULL)
return;
CheckNTErrors(!isSparse, "TODO");
......@@ -816,7 +816,7 @@ void XTensor::Range(DTYPE lower, DTYPE upper, DTYPE step)
_SetDataRange(this, lower, upper, step);
}
/*
/*
set the tensor items by a uniform distribution in range [lower, upper]
>> lower - lower value of the range
>> upper - upper value of the range
......@@ -830,7 +830,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
// srand((unsigned)time(0));
DTYPE variance = upper - lower;
void * d = NULL;
void* d = NULL;
if (dataType == X_FLOAT) {
d = new float[unitNum];
for (int i = 0; i < unitNum; i++) {
......@@ -849,12 +849,12 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
}
SetData(d, unitNum);
if (dataType == X_FLOAT) {
delete[] (float*)d;
delete[](float*)d;
}
else {
delete[] (double*)d;
delete[](double*)d;
}
}
......@@ -868,12 +868,12 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation)
double z;
double pi = 3.141592654;
if (phase == 0){
if (phase == 0) {
u = (rand() + 1.0) / (RAND_MAX + 1.0);
v = (rand() + 1.0) / (RAND_MAX + 1.0);
z = sqrt(-2.0 * log(u))* sin(2.0 * pi * v);
z = sqrt(-2.0 * log(u)) * sin(2.0 * pi * v);
}
else{
else {
z = sqrt(-2.0 * log(u)) * cos(2.0 * pi * v);
}
......@@ -881,7 +881,7 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation)
return mean + (z * standardDeviation);
}
/*
/*
set the tensor items by a normal distribution
>> mean - mean or expectation of the distribution
>> standardDeviation - standard deviation of the distribution
......@@ -894,7 +894,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
return;
// srand((unsigned)time(0));
void * d = NULL;
void* d = NULL;
if (dataType == X_FLOAT) {
d = new float[unitNum];
for (int i = 0; i < unitNum; i++) {
......@@ -914,31 +914,31 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
SetData(d, unitNum);
if (dataType == X_FLOAT) {
delete[] (float*)d;
delete[](float*)d;
}
else {
delete[] (double*)d;
delete[](double*)d;
}
}
/*
set tensor items with an array of offsets
/*
set tensor items with an array of offsets
>> offsets - offset for each data item
>> value - value for the data items
>> num - number of the data items
*/
void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num)
void XTensor::SetDataBatched(MTYPE* offsets, DTYPE value, int num)
{
_SetDataWithOffset(this, offsets, value, num);
}
/*
set tensor items with an array of values
/*
set tensor items with an array of values
>> offsets - offset for each data item
>> values - value for each data item
>> num - number of the data items
*/
void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
void XTensor::SetDataBatchedWithValues(MTYPE* offsets, void* values, int num)
{
_SetDataWithOffsetAndValue(this, offsets, values, num);
}
......@@ -949,8 +949,8 @@ void XTensor::SetDataPointer()
dataP = &data;
}
/*
get the value of a cell with the index
/*
get the value of a cell with the index
>> index - index of each dimension
>> size - size of the index
<< return - cell value
......@@ -961,7 +961,7 @@ DTYPE XTensor::Get(int index[], int size) const
return ToCPU(devID, GetCell(index, size));
}
/*
get the value of a cell with its offset
>> offset - offset in the array
......@@ -973,37 +973,37 @@ DTYPE XTensor::Get(int offset) const
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset).");
DTYPE * address = (DTYPE*)data + offset;
DTYPE* address = (DTYPE*)data + offset;
return ToCPU(devID, address);
}
/*
/*
get the pointer to a cell
>> index - index of each dimension
>> size - size of index
<< return - pointer to the cell
*/
void * XTensor::GetCell(int index[], int size) const
void* XTensor::GetCell(int index[], int size) const
{
CheckNTErrors((size == order), "Illegal index!");
int offset = index[0];
for(int i = 1; i < size; ++i){
for (int i = 1; i < size; ++i) {
CheckNTErrors((index[i] < dimSize[i]), "Index is out of range!");
offset = offset * dimSize[i] + index[i];
}
if(isSparse){
if (isSparse) {
DTYPE value;
void * p;
if(BinarySearch(offset, value, p))
void* p;
if (BinarySearch(offset, value, p))
return (char*)p + sizeof(int);
else
return NULL;
}
else{
else {
return ((char*)data) + offset * unitSize;
}
}
......@@ -1017,8 +1017,8 @@ DTYPE XTensor::Get0D() const
CheckNTErrors((order == 0), "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[1] = {0};
void * value = GetCell(dims, 0);
int dims[1] = { 0 };
void* value = GetCell(dims, 0);
return ToCPU(devID, value);
}
......@@ -1033,14 +1033,14 @@ DTYPE XTensor::Get1D(int i) const
CheckNTErrors((order == 1), "Cannot get a 1d cell for a tensor whose order is not 1!");
CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[1] = {i};
void * value = GetCell(dims, 1);
int dims[1] = { i };
void* value = GetCell(dims, 1);
return ToCPU(devID, value);
}
/*
/*
get the value of a cell in a 2d tensor in default type
>> ni - row index
>> mi - column index
......@@ -1053,14 +1053,14 @@ DTYPE XTensor::Get2D(int ni, int mi) const
CheckNTErrors((mi >= 0 && mi < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[2] = {ni, mi};
void * value = GetCell(dims, 2);
int dims[2] = { ni, mi };
void* value = GetCell(dims, 2);
return ToCPU(devID, value);
}
/*
get the value of a cell in a 3d tensor
/*
get the value of a cell in a 3d tensor
>> d0 - index of dimension 0
>> d1 - index of dimension 1
>> d2 - index of dimension 2
......@@ -1073,12 +1073,12 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2) const
CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[3] = {d0, d1, d2};
void * value = GetCell(dims, 3);
int dims[3] = { d0, d1, d2 };
void* value = GetCell(dims, 3);
return ToCPU(devID, value);
}
/*
get the int value of a cell by its offset
>> offset - offset of the item
......@@ -1089,9 +1089,9 @@ int XTensor::GetInt(int offset) const
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset).");
int * address = (int*)data + offset;
int* address = (int*)data + offset;
return ToCPUInt(devID, address);
}
......@@ -1104,8 +1104,8 @@ int XTensor::Get0DInt() const
CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors(dataType == X_INT, "The tensor is not in int type.");
int dims[1] = {0};
void * value = GetCell(dims, 0);
int dims[1] = { 0 };
void* value = GetCell(dims, 0);
return ToCPUInt(devID, value);
}
......@@ -1120,33 +1120,33 @@ int XTensor::Get1DInt(int i) const
CheckNTErrors(order == 1, "Cannot get a 1d cell for a tensor whose order is not 1!");
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in int type.");
int dims[1] = {i};
void * value = GetCell(dims, 1);
int dims[1] = { i };
void* value = GetCell(dims, 1);
return ToCPUInt(devID, value);
}
/*
/*
get the value of a cell in a 2d tensor in int type
>> ni - row index
>> mi - column index
<< return - value of cell(ni, mi) in int
*/
int XTensor::Get2DInt(int ni, int mi) const
int XTensor::Get2DInt(int ni, int mi) const
{
CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors(ni >= 0 && ni < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in default type.");
int dims[2] = {ni, mi};
void * value = GetCell(dims, 2);
int dims[2] = { ni, mi };
void* value = GetCell(dims, 2);
return ToCPUInt(devID, value);
}
/*
/*
get the value of a cell in a 3d tensor in int type
>> d0 - index of dimension 0
>> d1 - index of dimension 1
......@@ -1161,14 +1161,14 @@ int XTensor::Get3DInt(int d0, int d1, int d2) const
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in default type.");
int dims[3] = {d0, d1, d2};
void * value = GetCell(dims, 3);
int dims[3] = { d0, d1, d2 };
void* value = GetCell(dims, 3);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in the sparse tensor
/*
get the value of a cell in the sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor
<< return - value of the tuple
*/
......@@ -1177,14 +1177,14 @@ DTYPE XTensor::GetInSparse(int i) const
CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
char * d = (char*)data + sizeof(int);
DTYPE * value = (DTYPE*)(d + (sizeof(int) + sizeof(DTYPE)) * i + sizeof(int));
char* d = (char*)data + sizeof(int);
DTYPE* value = (DTYPE*)(d + (sizeof(int) + sizeof(DTYPE)) * i + sizeof(int));
return ToCPU(devID, value);
}
/*
get the key value of a tuple in a sparse tensor
/*
get the key value of a tuple in a sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor
<< return - key of the tuple
*/
......@@ -1193,14 +1193,14 @@ int XTensor::GetKeyInSparse(int i) const
CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
char * d = (char*)data + sizeof(int);
int * key = (int*)(d + (sizeof(int) + sizeof(DTYPE)) * i);
char* d = (char*)data + sizeof(int);
int* key = (int*)(d + (sizeof(int) + sizeof(DTYPE)) * i);
return ToCPUInt(devID, key);
}
/*
set the value of a cell
/*
set the value of a cell
>> value - value we tend to set
>> index - index of the cell for each dimension
>> size - size of the index
......@@ -1222,7 +1222,7 @@ bool XTensor::Set(DTYPE value, int offset)
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
DTYPE * d = (DTYPE*)data + offset;
DTYPE* d = (DTYPE*)data + offset;
return SetToDevice(devID, d, value);
}
......@@ -1237,13 +1237,13 @@ bool XTensor::Set0D(DTYPE value)
CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[1] = {0};
int dims[1] = { 0 };
return SetToDevice(devID, GetCell(dims, 0), value);
}
/*
set the value of a cell in a 1d tensor
/*
set the value of a cell in a 1d tensor
>> value - value we tend to set
>> i - item offset
<< return - succeeded or not
......@@ -1254,12 +1254,12 @@ bool XTensor::Set1D(DTYPE value, int i)
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[1] = {i};
int dims[1] = { i };
return SetToDevice(devID, GetCell(dims, 1), value);
}
/*
/*
set the value of a cell in a 2d tensor in default type
>> value - value we tend to set
>> ni - row index
......@@ -1273,12 +1273,12 @@ bool XTensor::Set2D(DTYPE value, int ni, int mi)
CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[2] = {ni, mi};
int dims[2] = { ni, mi };
return SetToDevice(devID, GetCell(dims, 2), value);
}
/*
/*
set the value of a cell in a 3d tensor in default type
>> value - value we tend to set
>> d0 - index of demension 0
......@@ -1294,11 +1294,11 @@ bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[3] = {d0, d1, d2};
int dims[3] = { d0, d1, d2 };
return SetToDevice(devID, GetCell(dims, 3), value);
}
/*
set the integer value of a cell by its offset
>> value - value we tend to set to the item
......@@ -1308,15 +1308,15 @@ bool XTensor::SetInt(int value, int offset)
{
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
int * d = (int*)data + offset;
int* d = (int*)data + offset;
return SetToDeviceInt(devID, d, value);
}
/*
set the integer value of a cell
/*
set the integer value of a cell
>> value - value we tend to set
>> index - index of the cell for each dimension
>> size - size of the index
......@@ -1339,13 +1339,13 @@ bool XTensor::Set0DInt(int value)
CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors(dataType == X_INT, "The tensor is not in integer type.");
int dims[1] = {0};
int dims[1] = { 0 };
return SetToDeviceInt(devID, GetCell(dims, 0), value);
}
/*
set the integer value of a cell in a 1d tensor
/*
set the integer value of a cell in a 1d tensor
>> value - value we tend to set
>> i - item offset
<< return - succeeded or not
......@@ -1356,12 +1356,12 @@ bool XTensor::Set1DInt(int value, int i)
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in integer type.");
int dims[1] = {i};
int dims[1] = { i };
return SetToDeviceInt(devID, GetCell(dims, 1), value);
}
/*
/*
set the integer value of a cell in a 2d tensor in default type
>> value - value we tend to set
>> ni - row index
......@@ -1375,12 +1375,12 @@ bool XTensor::Set2DInt(int value, int ni, int mi)
CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in integer type.");
int dims[2] = {ni, mi};
int dims[2] = { ni, mi };
return SetToDeviceInt(devID, GetCell(dims, 2), value);
}
/*
/*
set the integer value of a cell in a 3d tensor in default type
>> value - value we tend to set
>> d0 - index of demension 0
......@@ -1396,36 +1396,36 @@ bool XTensor::Set3DInt(int value, int d0, int d1, int d2)
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in integer type.");
int dims[3] = {d0, d1, d2};
int dims[3] = { d0, d1, d2 };
return SetToDeviceInt(devID, GetCell(dims, 3), value);
}
/*
/*
increase the value of a cell in a 2d tensor
>> value - value we tend to set
>> ni - row index
>> mi - column index
<< return - succeeded or not
*/
bool XTensor::Add2D(DTYPE value, int ni, int mi)
bool XTensor::Add2D(DTYPE value, int ni, int mi)
{
CheckNTErrors(ni >= 0 && ni < dimSize[0], "the row index is out of range!");
CheckNTErrors(mi >= 0 && mi < dimSize[1], "the column index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
CheckNTErrors(isSparse == false, "TODO!");
if(devID < 0){
DTYPE * p = (DTYPE*)data + ni * dimSize[1] + mi;
if (devID < 0) {
DTYPE* p = (DTYPE*)data + ni * dimSize[1] + mi;
CheckNTErrors((p != NULL), "No data array is found!");
CheckNTErrors((p != NULL), "No data array is found!");
*p = *p + value;
return true;
}
else{
int dims[2] = {ni, mi};
else {
int dims[2] = { ni, mi };
return SetToDevice(devID, GetCell(dims, 2), Get2D(ni, mi) + value);
}
}
......@@ -1433,31 +1433,31 @@ increase the value of a cell in a 2d tensor
/* get the number of non-zero elements (in a sparse tensor) */
int XTensor::GetNonzeroSize() const
{
if(!isSparse){
if (!isSparse) {
XPRINT(1, stderr, "WARNING! Counting non-zero elements in a dense tensor might be slow!\n");
CheckNTErrors(devID < 0, "TODO");
if(dataType == DEFAULT_DTYPE){
if (dataType == DEFAULT_DTYPE) {
int count = 0;
for(int i = 0; i < unitNum; i++){
for (int i = 0; i < unitNum; i++) {
DTYPE value = *(DTYPE*)((char*)data + i * sizeof(DTYPE));
if(value == 0)
if (value == 0)
count++;
}
return count;
}
else{
else {
ShowNTErrors("TODO!");
return -1;
}
}
else{
else {
/* return the head of the tuple list */
return unitNumNonZero;
}
}
/*
set the tensor as "temporary"
/*
set the tensor as "temporary"
>> myIsTMP - the flag
*/
void XTensor::SetTMPFlag(bool myIsTmp)
......@@ -1465,8 +1465,8 @@ void XTensor::SetTMPFlag(bool myIsTmp)
isTmp = myIsTmp;
}
/*
set the tensor as "keep-gradient"
/*
set the tensor as "keep-gradient"
>> myIsGrad - the flag
*/
void XTensor::SetGradFlag(bool myIsGrad)
......@@ -1474,18 +1474,18 @@ void XTensor::SetGradFlag(bool myIsGrad)
isGrad = myIsGrad;
}
/*
set the tensor as "variable"
/*
set the tensor as "variable"
>> myIsVar - the flag
*/
void XTensor::SetVarFlag(bool myIsVar)
{
isVar = myIsVar;
if(isVar)
if (isVar)
SetGradFlag(true);
}
/*
/*
resize a tensor with a specified tensor size
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
......@@ -1493,11 +1493,11 @@ resize a tensor with a specified tensor size
>> myDenseRatio - how often an element has non-zero value
<< return - succeeded or not
*/
bool XTensor::Resize(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType, const float myDenseRatio)
bool XTensor::Resize(const int myOrder, const int* myDimSize,
const TENSOR_DATA_TYPE myDataType, const float myDenseRatio)
{
/* free old mem */
if(data != NULL){
if (data != NULL) {
if (mem == NULL)
XMemFree(devID, data);
else
......@@ -1505,7 +1505,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
}
signature = mem != NULL ? mem->GetSignature() : 0;
order = myOrder;
unitNum = 1;
unitNumNonZero = 0;
......@@ -1513,11 +1513,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
bool filledData = true;
bool zeroData = false;
for(int i = 0; i < order; i++){
for (int i = 0; i < order; i++) {
dimSize[i] = abs(myDimSize[i]);
if(myDimSize[i] < 0)
if (myDimSize[i] < 0)
filledData = false;
if(myDimSize[i] == 0)
if (myDimSize[i] == 0)
zeroData = true;
unitNum *= dimSize[i];
}
......@@ -1528,20 +1528,20 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
dataType = myDataType;
unitSize = GetUnitSize(dataType);
if(myDataType != DEFAULT_DTYPE)
if (myDataType != DEFAULT_DTYPE)
isDefaultDType = false;
else
isDefaultDType = true;
if(zeroData){
if (zeroData) {
unitNum = 0;
return false;
}
if(isSparse){
if (isSparse) {
/*
for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrix as an example,
for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrix as an example,
we have key = m * i + j;
The data array is
---------
......@@ -1555,23 +1555,23 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
(1, 0, 5)
where the first number (2) indicates the number of elements.
*/
int num = int(unitNum * denseRatio + 1);
int tupleSize = sizeof(int)+sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num);
if(filledData){
int * d = NULL;
int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize * (num);
if(mem == NULL){
if (filledData) {
int* d = NULL;
if (mem == NULL) {
d = new int[size];
memset(d, 0, size);
}
else{
else {
d = (int*)mem->Alloc(mem->devID, size);
}
if(d == NULL)
if (d == NULL)
return false;
#if !defined(UNSAFE_BUT_FAST_MEM)
......@@ -1581,11 +1581,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
}
return true;
}
else{
if(filledData){
else {
if (filledData) {
/* allocate the new one */
if(mem == NULL){
data = XMemAlloc(devID, unitNum * unitSize);
if (mem == NULL) {
data = XMemAlloc(devID, unitNum * unitSize);
#if defined(UNSAFE_BUT_FAST_MEM)
XMemSet(devID, data, 0, unitNum * unitSize);
#endif
......@@ -1593,28 +1593,28 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
else
data = (void*)mem->Alloc(mem->devID, unitNum * unitSize);
if(data == NULL)
if (data == NULL)
return false;
}
#if !defined(UNSAFE_BUT_FAST_MEM)
if(data != NULL)
if (data != NULL)
XMem::SetZero(data, unitNum * unitSize, mem);
#endif
return true;
}
}
/*
/*
resize a tensor by another
>> myTensor - tensor for reference
*/
bool XTensor::Resize(const XTensor * myTensor)
bool XTensor::Resize(const XTensor* myTensor)
{
denseRatio = myTensor->denseRatio;
TENSOR_DATA_TYPE myDataType = myTensor->dataType;
if(myDataType != DEFAULT_DTYPE)
if (myDataType != DEFAULT_DTYPE)
isDefaultDType = false;
else
isDefaultDType = true;
......@@ -1622,7 +1622,7 @@ bool XTensor::Resize(const XTensor * myTensor)
return Resize(myTensor->order, myTensor->dimSize, myDataType, denseRatio);
}
/*
/*
binary search to find an element in a sparse tensor
>> key - for search
>> value - value for return
......@@ -1630,54 +1630,54 @@ binary search to find an element in a sparse tensor
it is the previous one if there is no hit
<< return - found it or not?
*/
bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
bool XTensor::BinarySearch(int key, DTYPE& value, void*& position) const
{
CheckNTErrors((isSparse), "A sparse tensor is required!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in the default type.");
int * d = (int*)data;
int* d = (int*)data;
if(key < 0 || *d == 0){
if (key < 0 || *d == 0) {
value = 0;
position = NULL;
return false;
}
int low = 0;
int high = *d - 1;
int low = 0;
int high = *d - 1;
int last = -1;
bool ok = false;
int * k = NULL;
int* k = NULL;
int headSize = sizeof(int);
int tupleSize = sizeof(int)+sizeof(DTYPE);
char * p = (char*)data + headSize;
int tupleSize = sizeof(int) + sizeof(DTYPE);
char* p = (char*)data + headSize;
while (low <= high){
int mid = low + (high-low)/2;
while (low <= high) {
int mid = low + (high - low) / 2;
k = (int*)(p + tupleSize * mid);
if (*k == key){
if (*k == key) {
ok = true;
high = mid -1;
high = mid - 1;
break;
}
else if(*k > key){
high = mid -1;
}
else{
low = mid +1;
else if (*k > key) {
high = mid - 1;
}
else {
low = mid + 1;
last = mid;
}
}
}
if(ok){
DTYPE * p = (DTYPE*)((char*)k + sizeof(int));
if (ok) {
DTYPE* p = (DTYPE*)((char*)k + sizeof(int));
value = *p;
position = k;
return true;
}
else{
else {
value = 0;
if(last == -1)
if (last == -1)
position = NULL;
else
position = (char*)data + headSize + tupleSize * last;
......@@ -1685,20 +1685,20 @@ bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
}
}
/*
dump data to a file
/*
dump data to a file
>> file - where to domp the data
>> label - label of the tensor
>> n - number of items to dump
>> beg - the first item id
>> verbose - verbose level
*/
void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, const int verbose)
void XTensor::Dump(FILE* file, const char* label, const int n, const int beg, const int verbose)
{
if (verbose > verboseLevel)
return;
void * d = data;
void* d = data;
bool isNewData = false;
#ifdef USE_CUDA
......@@ -1716,7 +1716,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
num *= dimSize[i];
num = int(num * denseRatio + 1);
int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num);
int size = sizeof(int) + tupleSize * (num);
d = new char[size];
memset(d, 0, size);
......@@ -1730,8 +1730,8 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
if (label != NULL)
fprintf(file, "%s ", label);
if(isInit){
if (isInit) {
fprintf(file, "order=%d dimsize=", order);
for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]);
......@@ -1739,21 +1739,21 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
fprintf(file, ",");
}
}
else{
else {
fprintf(file, "order=-1 dimsize=-1");
}
fprintf(file, " dtype=%s dense=%f\n", GetDataTypeName(dataType), denseRatio);
if(!isInit){
if (!isInit) {
fprintf(file, "NULL");
}
if (!isSparse) {
if (dataType == DEFAULT_DTYPE) {
int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for(int i = beg; i < end; i++){
for (int i = beg; i < end; i++) {
DTYPE f = ((DTYPE*)d)[i];
if(i == beg)
if (i == beg)
fprintf(file, "%e", f);
else
fprintf(file, " %e", f);
......@@ -1762,9 +1762,9 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
else if (dataType == X_INT) {
int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for(int i = beg; i < end; i++){
for (int i = beg; i < end; i++) {
int f = ((int*)d)[i];
if(i == beg)
if (i == beg)
fprintf(file, "%d", f);
else
fprintf(file, " %d", f);
......@@ -1795,7 +1795,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
/*
/*
dump data to a file
>> tensor - the tensor for dumping
>> file - where to domp the data
......@@ -1804,15 +1804,15 @@ dump data to a file
>> beg - the first item id
>> verbose - verbose level
*/
void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, const int n, const int beg, const int verbose)
void XTensor::Dump(const XTensor* tensor, FILE* file, const char* label, const int n, const int beg, const int verbose)
{
XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
_CopyValues(tensor, &a);
a.Dump(file, label, n, beg, verbose);
}
/*
dump data to a binary file
/*
dump data to a binary file
>> file - where to dump the data
*/
void XTensor::BinaryDump(FILE* file)
......@@ -1831,12 +1831,12 @@ void XTensor::BinaryDump(FILE* file)
}
}
/*
/*
read data from a file
>> file - where to load the data
>> label - label of the tensor
*/
void XTensor::Read(FILE * file, const char * label)
void XTensor::Read(FILE* file, const char* label)
{
char typeName[32] = "";
char dimSizeName[128] = "";
......@@ -1855,12 +1855,12 @@ void XTensor::Read(FILE * file, const char * label)
fgetc(file);
if (fscanf(file, "order=%d dimsize=%s dtype=%s dense=%f",
&dimNum, dimSizeName, typeName, &dRatio) < 4) {
&dimNum, dimSizeName, typeName, &dRatio) < 4) {
ShowNTErrors("Incorrect format when reading the tensor!");
}
char c;
do {
c = fgetc(file);
} while (c != '\n' && c != EOF);
......@@ -1869,7 +1869,7 @@ void XTensor::Read(FILE * file, const char * label)
int o = 0;
bool sameSize = true;
char * p = dimSizeName;
char* p = dimSizeName;
while (*p != 0) {
while (*p == ' ' || *p == '\t')
p++;
......@@ -1893,14 +1893,14 @@ void XTensor::Read(FILE * file, const char * label)
if (!sameSize || dRatio > denseRatio || GetDataType(typeName) != dataType)
Resize(dimNum, dims, GetDataType(typeName), dRatio);
void * dataBuf = XMemAlloc(-1, GetDataSizeInChar());
void * dataBackup = data;
void* dataBuf = XMemAlloc(-1, GetDataSizeInChar());
void* dataBackup = data;
data = dataBuf;
if (!isSparse) {
if (dataType == DEFAULT_DTYPE) {
for (int i = 0; i < unitNum; i++) {
DTYPE * f = ((DTYPE*)data) + i;
DTYPE* f = ((DTYPE*)data) + i;
if (fscanf(file, "%e", f) < 1) {
ShowNTErrors("Incorrect tensor format!");
}
......@@ -1943,23 +1943,23 @@ void XTensor::Read(FILE * file, const char * label)
delete[](char*)dataBuf;
}
/*
/*
read data from a binary file
>>> file - the file stream pointer
>>> offset - the distance from the start to this tensor
*/
void XTensor::BinaryRead(FILE* file, size_t offset)
{
fseek(file, offset, 0);
//fseek(file, offset, 0);
switch (dataType) {
case X_INT: {
int * d = new int[unitNum];
int* d = new int[unitNum];
fread(d, sizeof(int), unitNum, file);
SetData(d, unitNum);
delete[] d;
}
default: {
float * d = new float[unitNum];
float* d = new float[unitNum];
fread(d, sizeof(float), unitNum, file);
SetData(d, unitNum);
delete[] d;
......@@ -1971,7 +1971,7 @@ void XTensor::BinaryRead(FILE* file, size_t offset)
flush the data to the target device
>> targetMem - memory pool on the target device
*/
void XTensor::FlushToMem(XMem * targetMem)
void XTensor::FlushToMem(XMem* targetMem)
{
if (targetMem == NULL)
return;
......@@ -1984,7 +1984,7 @@ void XTensor::FlushToMem(XMem * targetMem)
CudaCPUToGPUFlush(&l, targetMem->devID, targetMem);
}
else if (mem != targetMem) {
void * tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
data = tmpData;
mem = targetMem;
......@@ -2008,29 +2008,29 @@ void XTensor::FlushToMem(XMem * targetMem)
}
/*
allocate the memory space of the tensor (in the global memory)
allocate the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process
>> myMem - the memory pool we are using
>> useBuf - indicates whether we use the buffer in the memory pool
*/
void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
void XTensor::AllocateData(XTensor* tensor, XMem* myMem, bool useBuf)
{
if(tensor == NULL)
if (tensor == NULL)
return;
if(myMem == NULL){
if(tensor->data != NULL)
if (myMem == NULL) {
if (tensor->data != NULL)
FreeData(tensor, NULL, false);
tensor->data = XMemAlloc(tensor->devID, tensor->GetDataSizeInChar());
tensor->isInGlobalMem = true;
}
else{
else {
CheckNTErrors((tensor->data == NULL), "Cannot renew the space for the tensor");
if(useBuf){
if (useBuf) {
tensor->data = myMem->AllocBuf(tensor->devID, tensor->GetDataSizeInChar());
tensor->isInGlobalMem = false;
}
else{
else {
tensor->data = myMem->AllocGlobal(tensor->devID, tensor->GetDataSizeInChar());
tensor->isInGlobalMem = true;
}
......@@ -2039,22 +2039,22 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
tensor->signature = 0;
}
/*
free the memory space of the tensor (in the global memory)
/*
free the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process
>> myMem - the memory pool we are using
>> useBuf - indicates whether we use the buffer in the memory pool
*/
void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
void XTensor::FreeData(XTensor* tensor, XMem* myMem, bool useBuf)
{
if(tensor == NULL)
if (tensor == NULL)
return;
if(myMem == NULL){
if (myMem == NULL) {
XMemFree(tensor->devID, tensor->data);
}
else{
if(tensor->isInGlobalMem)
else {
if (tensor->isInGlobalMem)
myMem->ReleaseGlobal(tensor->devID, tensor->data);
else
myMem->ReleaseBuf(tensor->devID, tensor->GetDataSizeInChar());
......@@ -2065,27 +2065,27 @@ void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
}
/* overloading of the plus-sign */
XTensor operator+ (const DTYPE shift, const XTensor &tensor)
XTensor operator+ (const DTYPE shift, const XTensor& tensor)
{
return ScaleAndShift(tensor, 1, shift);
}
/* overloading of the minus-sign */
XTensor operator- (const DTYPE shift, const XTensor &tensor)
XTensor operator- (const DTYPE shift, const XTensor& tensor)
{
return ScaleAndShift(tensor, 1, -shift);
}
/* overloading of the multiply-sign */
XTensor operator* (const DTYPE scale, const XTensor &tensor)
XTensor operator* (const DTYPE scale, const XTensor& tensor)
{
return ScaleAndShift(tensor, scale, 0);
}
/* overloading of the division-sign */
XTensor operator/ (const DTYPE scale, const XTensor &tensor)
XTensor operator/ (const DTYPE scale, const XTensor& tensor)
{
return ScaleAndShift(tensor, (DTYPE)1/scale, 0);
return ScaleAndShift(tensor, (DTYPE)1 / scale, 0);
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -86,7 +86,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim)
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip)+j * vecBufLength); \
} \
for (int j = 1; j < strideNum / 32; j++) { \
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength); \
const DTYPE* ptr = (DTYPE*)(ip + j * 4 * vecBufLength); \
vecBuf[0] = vecBuf[0]._vectorOp(VectorBuffer::loadu(ptr + 0 * vecBufLength)); \
vecBuf[1] = vecBuf[1]._vectorOp(VectorBuffer::loadu(ptr + 1 * vecBufLength)); \
vecBuf[2] = vecBuf[2]._vectorOp(VectorBuffer::loadu(ptr + 2 * vecBufLength)); \
......@@ -106,7 +106,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim)
else { \
/* data is separated */ \
for(int i = 0; i < blockNum; i++){ \
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){ \
for(int j = 0; j < stride / 32; j++){ \
DTYPE * ip = (DTYPE*)input->data + blockSize * i; \
DTYPE * op = (DTYPE*)output->data + stride * i; \
VectorBuffer vecBuf[4]; \
......
......@@ -42,7 +42,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
int num = input->dimSize[dim];
_ReduceSum(input, output, dim);
_ScaleAndShiftMe(output, (DTYPE)1/num, 0);
_ScaleAndShiftMe(output, 1.0F/(DTYPE)(num), 0);
}
/*
......
......@@ -105,7 +105,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength, isExp, power, bias);
}
for(int j = 1; j < strideNum / 32; j++){
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength);
const DTYPE* ptr = (DTYPE*)(ip + (j * 4) * vecBufLength);
vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias);
vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias);
vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias);
......@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} else{
//data is separated
for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){
for(int j = 0; j < stride / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL;
......@@ -133,8 +133,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
}
VectorBuffer vecBuf[4];
for(int k = 0; k < 4; k++){
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + j * 32 / sizeof(DTYPE));
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + k * 32 / sizeof(DTYPE));
}
for(int k = 1; k < strideNum; k++){
DTYPE * ptr = ip + k * stride + (j * 4) * vecBufLength;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论