Commit 99097e41 by huchi

add support for greedy search

parent bfa6fc90
...@@ -19,6 +19,10 @@ ...@@ -19,6 +19,10 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/ */
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
#include <stdio.h> #include <stdio.h>
#include "./network/XNet.h" #include "./network/XNet.h"
#include "./tensor/XUtility.h" #include "./tensor/XUtility.h"
...@@ -27,9 +31,7 @@ ...@@ -27,9 +31,7 @@
#include "./sample/fnnlm/FNNLM.h" #include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/Transformer.h" #include "./sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts; using namespace nts;
using namespace fnnlm; using namespace fnnlm;
...@@ -37,19 +39,10 @@ using namespace transformer; ...@@ -37,19 +39,10 @@ using namespace transformer;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF); /*_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708); _CrtSetBreakAlloc(2708);*/
TransformerMain(argc - 1, argv + 1); TransformerMain(argc - 1, argv + 1);
/*XTensor x;
InitTensor2D(&x, 2, 2);
float d[]{ 1,2,3,4 };
x.SetData(d, 4);
XTensor y;
y = ReduceSum(x, 0);
y.Dump(stderr);*/
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
return 0; return 0;
......
...@@ -34,7 +34,7 @@ T2TAttention::T2TAttention() ...@@ -34,7 +34,7 @@ T2TAttention::T2TAttention()
nhead = -1; nhead = -1;
dk = -1; dk = -1;
dv = -1; dv = -1;
d = -1; d = -1;
isMasked = false; isMasked = false;
ignored = 0; ignored = 0;
} }
...@@ -62,7 +62,7 @@ void T2TAttention::InitModel(int argc, char** argv, ...@@ -62,7 +62,7 @@ void T2TAttention::InitModel(int argc, char** argv,
float minmax = 0; float minmax = 0;
LoadParamInt(argc, argv, "nhead", &nhead, 8); LoadParamInt(argc, argv, "nhead", &nhead, 4);
LoadParamInt(argc, argv, "d", &dk, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &dk, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &dv, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &dv, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
...@@ -70,15 +70,15 @@ void T2TAttention::InitModel(int argc, char** argv, ...@@ -70,15 +70,15 @@ void T2TAttention::InitModel(int argc, char** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wq, d, d, X_FLOAT, devID); InitTensor2DV2(&wq, d, d, X_FLOAT, devID);
InitTensor1D(&bq, d, X_FLOAT, devID); InitTensor1DV2(&bq, d, X_FLOAT, devID);
InitTensor2D(&wk, d, d, X_FLOAT, devID); InitTensor2DV2(&wk, d, d, X_FLOAT, devID);
InitTensor1D(&bk, d, X_FLOAT, devID); InitTensor1DV2(&bk, d, X_FLOAT, devID);
InitTensor2D(&wv, d, d, X_FLOAT, devID); InitTensor2DV2(&wv, d, d, X_FLOAT, devID);
InitTensor1D(&bv, d, X_FLOAT, devID); InitTensor1DV2(&bv, d, X_FLOAT, devID);
InitTensor2D(&rp_embedding_k, max_relative_position * 2 + 1, d/nhead, X_FLOAT, devID); InitTensor2DV2(&rp_embedding_k, max_relative_position * 2 + 1, d/nhead, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID); InitTensor2DV2(&wo, d, d, X_FLOAT, devID);
InitTensor1D(&ba, d, X_FLOAT, devID); InitTensor1DV2(&bo, d, X_FLOAT, devID);
} }
/* /*
...@@ -94,24 +94,27 @@ make the network ...@@ -94,24 +94,27 @@ make the network
>> cacheType - which type that cache is >> cacheType - which type that cache is
<< return - multi-attention result << return - multi-attention result
*/ */
XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, Cache* cache, int cacheType) XTensor T2TAttention::Make(XTensor& k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, Cache* cache, int cacheType)
{ {
const bool isEnc = (!cache) ? true : false; const bool isEnc = (!cache) ? true : false;
/* linear transformation before self-attention */ /* linear transformation before self-attention */
XTensor q2, k2, v2; XTensor q2, k2, v2;
q2 = MatrixMul(q, X_NOTRANS, wq, X_TRANS) + bq;
q2 = MatrixMul(q, wq) + bq;
if (!cache) { if (!cache) {
/* self attention for encoder layers */ /* self attention for encoder layers */
k2 = MatrixMul(k, X_NOTRANS, wk, X_TRANS) + bk; k2 = MatrixMul(k, wk) + bk;
v2 = MatrixMul(v, X_NOTRANS, wv, X_TRANS) + bv; v2 = MatrixMul(v, wv) + bv;
return MakeRPRAttention(k2, q2, v2, mask, isTraining, isEnc); return MakeRPRAttention(k2, q2, v2, mask, isTraining, isEnc);
} }
else { else {
if (cacheType == SELF_ATT) { if (cacheType == SELF_ATT) {
k2 = MatrixMul(k, X_NOTRANS, wk, X_TRANS) + bk; k2 = MatrixMul(k, wk) + bk;
v2 = MatrixMul(v, X_NOTRANS, wv, X_TRANS) + bv; v2 = MatrixMul(v, wv) + bv;
/* if hit, we only concat the cache with the new token */ /* if hit, we only concat the cache with the new token */
if (!cache->miss) { if (!cache->miss) {
...@@ -121,12 +124,13 @@ XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask, ...@@ -121,12 +124,13 @@ XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask,
cache->key = k2; cache->key = k2;
cache->value = v2; cache->value = v2;
cache->miss = false; cache->miss = false;
return MakeRPRAttention(cache->key, q2, cache->value, mask, isTraining, isEnc); return MakeRPRAttention(cache->key, q2, cache->value, mask, isTraining, isEnc);
} }
else if (cacheType == EN_DE_ATT) { else if (cacheType == EN_DE_ATT) {
if (cache->miss) { if (cache->miss) {
cache->key = MatrixMul(k, X_NOTRANS, wk, X_TRANS) + bk; cache->key = MatrixMul(k, wk) + bk;
cache->value = MatrixMul(v, X_NOTRANS, wv, X_TRANS) + bv; cache->value = MatrixMul(v, wv) + bv;
cache->miss = false; cache->miss = false;
} }
return MakeAttention(cache->key, q2, cache->value, mask, isTraining, isEnc); return MakeAttention(cache->key, q2, cache->value, mask, isTraining, isEnc);
...@@ -134,50 +138,49 @@ XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask, ...@@ -134,50 +138,49 @@ XTensor T2TAttention::Make( XTensor& k, XTensor& q, XTensor& v, XTensor* mask,
CheckNTErrors(0, "invalid cache type"); CheckNTErrors(0, "invalid cache type");
} }
} }
/* /*
make the attention network given keys, queries and values (after linear transformation) make the attention network given keys, queries and values (after linear transformation)
>> k - keys. It might be of size B * L * H >> k - keys. It might be of size B * L * H
where B = batch size, L = sequence length, where B = batch size, L = sequence length,
and H = vector size of each position and H = vector size of each position
>> q - queries >> q - queries
>> v - values >> v - values
>> mask - as it is >> mask - as it is
>> isTraining - indicates whether the model is used for training >> isTraining - indicates whether the model is used for training
*/ */
XTensor T2TAttention::MakeAttention(XTensor &k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, bool is_encoder) XTensor T2TAttention::MakeAttention(XTensor& k, XTensor& q, XTensor& v, XTensor* mask, bool isTraining, bool is_encoder)
{ {
XTensor kheads; XTensor kheads;
XTensor qheads; XTensor qheads;
XTensor vheads; XTensor vheads;
/* multi head */ /* multi head */
kheads = Split(k, k.order - 1, nhead); kheads = Split(k, k.order - 1, nhead);
qheads = Split(q, q.order - 1, nhead); qheads = Split(q, q.order - 1, nhead);
vheads = Split(v, v.order - 1, nhead); vheads = Split(v, v.order - 1, nhead);
XTensor att; XTensor att;
XTensor dot; XTensor dot;
XTensor scalar; XTensor scalar;
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */ /* scalar = softmax(Q * K^T / sqrt(dk)) * V */
dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS); dot = BMMul(qheads, X_NOTRANS, kheads, X_TRANS);
/*if (isMasked && mask) { /*if (isMasked && mask)
_SumMe(&dot, mask); _SumMe(&dot, mask);*/
}*/
dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead)); dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead));
scalar = Softmax(dot, -1); scalar = Softmax(dot, -1);
/*if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);*/ scalar = Dropout(scalar, dropoutP);
att = BMMul(scalar, vheads); att = BMMul(scalar, vheads);
/* concatenate the heads */ /* concatenate the heads */
return MulAndShift(Merge(att, att.order - 1), X_NOTRANS, wa, X_TRANS, ba); return MulAndShift(Merge(att, att.order - 1), wo, bo);
} }
/* /*
...@@ -215,34 +218,32 @@ XTensor T2TAttention::MakeRPRAttention(XTensor& k, XTensor& q, XTensor& v, XTens ...@@ -215,34 +218,32 @@ XTensor T2TAttention::MakeRPRAttention(XTensor& k, XTensor& q, XTensor& v, XTens
InitTensor4DV2(&dot, nhead, batch_size, len_q, len_kv, X_FLOAT, q.devID); InitTensor4DV2(&dot, nhead, batch_size, len_q, len_kv, X_FLOAT, q.devID);
/* generate the relative emb index (L_q, L_kv) */ /* generate the relative emb index (L_q, L_kv) */
GetRPEmbedding(&emb_matrix, len_q, len_kv, max_relative_position, q.devID,is_encoder); GetRPEmbedding(&emb_matrix, len_q, len_kv, max_relative_position, q.devID, is_encoder);
/* generate the relative key from the rp_embedding_k (L_q, L_kv, H/K) */ /* generate the relative key from the rp_embedding_k (L_q, L_kv, H/K) */
_Gather(&rp_embedding_k, &relative_key, &emb_matrix); _Gather(&rp_embedding_k, &relative_key, &emb_matrix);
/* RPR dot product (K, B, L_q, L_kv)*/ /* RPR dot product (K, B, L_q, L_kv)*/
qheads = qheads / float(nhead);
RPDotProduct(&qheads, &kheads, &relative_key, &dot, true); RPDotProduct(&qheads, &kheads, &relative_key, &dot, true);
/*if (isMasked && mask) /*if (isMasked && mask)
_SumMe(&dot, mask);*/ _SumMe(&dot, mask);*/
/* scale the dot result */ /* scale the dot result */
//dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead)); dot = Linear(dot, 1.0F / (float)sqrt((float)dk / nhead));
/* softmax */ /* softmax */
scalar = Softmax(dot, -1); scalar = Softmax(dot, -1);
/*if (isTraining && dropoutP > 0) if (isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);*/ scalar = Dropout(scalar, dropoutP);
/* generate the relative attention output (K, B, L_q, H/K) */ /* generate the relative attention output (K, B, L_q, H/K) */
att = BMMul(scalar, vheads); att = BMMul(scalar, vheads);
/* concatenate the heads */ /* concatenate the heads */
return MulAndShift(Merge(att, att.order - 1), X_NOTRANS, wa, X_TRANS, ba); return MulAndShift(Merge(att, att.order - 1), wo, bo);
} }
void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const int len_kv, const int max_relative_length, const int devID, const bool is_encoder) void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const int len_kv, const int max_relative_length, const int devID, const bool is_encoder)
...@@ -251,10 +252,11 @@ void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const in ...@@ -251,10 +252,11 @@ void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const in
XTensor range; XTensor range;
InitTensor1DV2(&range, len_kv, X_INT, devID); InitTensor1DV2(&range, len_kv, X_INT, devID);
int* index = new int[len_kv]; int* index = new int[len_kv];
// for encoder self-attention which the L_q = L_kv // for encoder self-attention which the L_q = L_kv
if (is_encoder) if (is_encoder)
{ {
for (int i = 0; i <len_kv; i++) for (int i = 0; i < len_kv; i++)
index[i] = i; index[i] = i;
range.SetData(index, len_kv); range.SetData(index, len_kv);
XTensor range_2D, range_2D_t; XTensor range_2D, range_2D_t;
...@@ -267,7 +269,7 @@ void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const in ...@@ -267,7 +269,7 @@ void T2TAttention::GetRPEmbedding(XTensor* emb_matrix, const int len_q, const in
// for decoder self-attention which the L_q != L_kv, and L_q is 1 // for decoder self-attention which the L_q != L_kv, and L_q is 1
else else
{ {
for (int i = 0; i <len_kv; i++) for (int i = 0; i < len_kv; i++)
index[i] = -len_kv + i + 1; index[i] = -len_kv + i + 1;
range.SetData(index, len_kv); range.SetData(index, len_kv);
_Unsqueeze(&range, emb_matrix, 0, len_q); _Unsqueeze(&range, emb_matrix, 0, len_q);
...@@ -299,7 +301,6 @@ void T2TAttention::RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* att ...@@ -299,7 +301,6 @@ void T2TAttention::RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* att
XTensor context; XTensor context;
InitTensor4DV2(&context, head_num, batch_size, len_q, last_dim, X_FLOAT, x->devID); InitTensor4DV2(&context, head_num, batch_size, len_q, last_dim, X_FLOAT, x->devID);
_MatrixMulBatched(x, X_NOTRANS, y, transpose_flag, &context); _MatrixMulBatched(x, X_NOTRANS, y, transpose_flag, &context);
//if (profiler_) profiler_->FinishTimer("RPDotPro-BMM");
// reshape and transpose x to (L_q, K*B, H/K or L_kv) // reshape and transpose x to (L_q, K*B, H/K or L_kv)
int merge_dims[] = { head_num * batch_size, len_q, x->dimSize[3] }; int merge_dims[] = { head_num * batch_size, len_q, x->dimSize[3] };
...@@ -323,5 +324,6 @@ void T2TAttention::RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* att ...@@ -323,5 +324,6 @@ void T2TAttention::RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* att
relative_t.Reshape(4, split_dims); relative_t.Reshape(4, split_dims);
_Sum(&context, &relative_t, attention); _Sum(&context, &relative_t, attention);
} }
} }
...@@ -90,14 +90,18 @@ public: ...@@ -90,14 +90,18 @@ public:
/* bias for V */ /* bias for V */
XTensor bv; XTensor bv;
XTensor wBig;
XTensor bBig;
/* RPR emb */ /* RPR emb */
XTensor rp_embedding_k; XTensor rp_embedding_k;
/* transformation after dot-product attention */ /* transformation after dot-product attention */
XTensor wa; XTensor wo;
/* bias after dot-product attention */ /* bias after dot-product attention */
XTensor ba; XTensor bo;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
......
...@@ -31,27 +31,27 @@ namespace transformer ...@@ -31,27 +31,27 @@ namespace transformer
/* constructor */ /* constructor */
AttDecoder::AttDecoder() AttDecoder::AttDecoder()
{ {
attentions = NULL; selfAtt = NULL;
fnns = NULL; fnns = NULL;
attLayerNorms = NULL; selfAttLayerNorms = NULL;
attentionsEnde = NULL; enDeAtt = NULL;
attEndeLayerNorms = NULL; enDeAttLayerNorms = NULL;
decodeLayerNorm = NULL; decoderLayerNorm = NULL;
selfCache = NULL; selfAttCache = NULL;
contextCache = NULL; enDeAttCache = NULL;
} }
/* de-constructor */ /* de-constructor */
AttDecoder::~AttDecoder() AttDecoder::~AttDecoder()
{ {
delete[] selfCache; delete[] selfAttCache;
delete[] contextCache; delete[] enDeAttCache;
delete[] attentions; delete[] selfAtt;
delete[] fnns; delete[] fnns;
delete[] attLayerNorms; delete[] selfAttLayerNorms;
delete[] attentionsEnde; delete[] enDeAtt;
delete[] attEndeLayerNorms; delete[] enDeAttLayerNorms;
delete decodeLayerNorm; delete decoderLayerNorm;
} }
/* /*
...@@ -71,7 +71,7 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -71,7 +71,7 @@ void AttDecoder::InitModel(int argc, char ** argv,
devID = myDevID; devID = myDevID;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 3); LoadParamInt(argc, argv, "nlayer", &nlayer, 4);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsizetgt", &vSize, 34040); LoadParamInt(argc, argv, "vsizetgt", &vSize, 34040);
...@@ -83,24 +83,24 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -83,24 +83,24 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, false); embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer]; selfAtt = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer]; selfAttLayerNorms = new T2TLN[nlayer];
attentionsEnde = new T2TAttention[nlayer]; enDeAtt = new T2TAttention[nlayer];
attEndeLayerNorms = new T2TLN[nlayer]; enDeAttLayerNorms = new T2TLN[nlayer];
decodeLayerNorm = new T2TLN; decoderLayerNorm = new T2TLN;
selfCache = new Cache[nlayer]; selfAttCache = new Cache[nlayer];
contextCache = new Cache[nlayer]; enDeAttCache = new Cache[nlayer];
/* initialize the stacked layers */ /* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) { for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID); selfAtt[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID); selfAttLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID); enDeAtt[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID); enDeAttLayerNorms[i].InitModel(argc, argv, myDevID);
} }
decodeLayerNorm->InitModel(argc, argv, myDevID); decoderLayerNorm->InitModel(argc, argv, myDevID);
} }
/* /*
...@@ -131,48 +131,38 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, X ...@@ -131,48 +131,38 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, X
XTensor attNorm; XTensor attNorm;
/* layer normalization */ /* layer normalization */
inputNorm = attLayerNorms[i].Make(x); inputNorm = selfAttLayerNorms[i].Make(x);
//inputNorm.Dump(stderr, "inputNorm", 10);
/******************/ /******************/
/* self attention */ /* self attention */
att = attentions[i].Make(inputNorm, inputNorm, inputNorm, NULL, isTraining, &selfCache[i], SELF_ATT); att = selfAtt[i].Make(inputNorm, inputNorm, inputNorm, NULL, isTraining, &selfAttCache[i], SELF_ATT);
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
att = Dropout(att, dropoutP); att = Dropout(att, dropoutP);
/* residual connection */ /* residual connection */
_SumMe(&att, &x); att = att + x;
//att.Dump(stderr, "Sum(att, x)", 10);
/* layer normalization */ /* layer normalization */
attNorm = attEndeLayerNorms[i].Make(att); attNorm = enDeAttLayerNorms[i].Make(att);
//attNorm.Dump(stderr, "attNorm", 10);
/* encoder-decoder attention */ /* encoder-decoder attention */
ende = attentionsEnde[i].Make(outputEnc, attNorm, outputEnc, &maskEncDec, isTraining, &contextCache[i], EN_DE_ATT); ende = enDeAtt[i].Make(outputEnc, attNorm, outputEnc, &maskEncDec, isTraining, &enDeAttCache[i], EN_DE_ATT);
//ende.Dump(stderr, "ende atten", 10);
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
ende = Dropout(ende, dropoutP); ende = Dropout(ende, dropoutP);
/* residual connection */ /* residual connection */
_SumMe(&ende, &att); ende = ende + att;
//res.Dump(stderr, "Sum(ende, att)", 10);
/* fnn */ /* fnn */
x = fnns[i].Make(ende, isTraining); x = fnns[i].Make(ende, isTraining);
//x.Dump(stderr, "fnns[i]", 10);
} }
x = decodeLayerNorm->Make(x); x = decoderLayerNorm->Make(x);
//x.Dump(stderr, "decodeLayerNorm", 10);
x.SetName(DECODING_NAME);
return x; return x;
} }
......
...@@ -63,13 +63,13 @@ public: ...@@ -63,13 +63,13 @@ public:
T2TFNN * fnns; T2TFNN * fnns;
/* attention model of each layer */ /* attention model of each layer */
T2TAttention * attentions; T2TAttention * selfAtt;
/* layer normalization for attention */ /* layer normalization for attention */
T2TLN * attLayerNorms; T2TLN * selfAttLayerNorms;
/* layer normalization for decoder */ /* layer normalization for decoder */
T2TLN * decodeLayerNorm; T2TLN * decoderLayerNorm;
/* input tensor of the encoder */ /* input tensor of the encoder */
XTensor * input; XTensor * input;
...@@ -78,16 +78,16 @@ public: ...@@ -78,16 +78,16 @@ public:
XTensor * output; XTensor * output;
/* encoder-decoder attention model of each layer */ /* encoder-decoder attention model of each layer */
T2TAttention * attentionsEnde; T2TAttention * enDeAtt;
/* layer normalization for encoder-decoder attention */ /* layer normalization for encoder-decoder attention */
T2TLN * attEndeLayerNorms; T2TLN * enDeAttLayerNorms;
/* layer cache list */ /* layer cache list */
Cache* selfCache; Cache* selfAttCache;
/* layer cache list */ /* layer cache list */
Cache* contextCache; Cache* enDeAttCache;
public: public:
/* constructor */ /* constructor */
......
...@@ -62,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc) ...@@ -62,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "pad", &padIdx, 1); LoadParamInt(argc, argv, "pad", &padIdx, 1);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
maxLength = maxLength + 1 + 1; maxLength = maxLength + 1 + 1;
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
...@@ -80,7 +80,7 @@ make positional embeddings (of size eSize * length) ...@@ -80,7 +80,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -113,47 +113,47 @@ make the network ...@@ -113,47 +113,47 @@ make the network
*/ */
XTensor T2TEmbedder::Make(XTensor &input, int prevLen) XTensor T2TEmbedder::Make(XTensor &input, int prevLen)
{ {
/* assert padding index is 1 */ ///* assert padding index is 1 */
CheckNTErrors(input.order > 1, "Wrong input tensor size!"); //CheckNTErrors(input.order > 1, "Wrong input tensor size!");
CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!"); //CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!");
CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\""); //CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
CheckNTErrors(eSize > 0, "set embedding size by \"-esize\""); //CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
//
XTensor wordEmbedding, position, posEmbedding; //XTensor wordEmbedding, position, posEmbedding;
InitTensor(&position, &input); //InitTensor(&position, &input);
int* posData = new int[input.unitNum];
XTensor inputCPU;
InitTensorOnCPU(&inputCPU, &input);
_CopyValues(&input, &inputCPU);
for (int i = 0; i < inputCPU.GetDim(0); i++) {
int startNoPad = 2 + prevLen - 1;
int* p = ((int*)inputCPU.data) + i * inputCPU.GetDim(1);
for (int j = 0; j < inputCPU.GetDim(1); j++) {
if (p[j] == 1) {
posData[i * inputCPU.GetDim(1) + j] = 1;
}
else {
posData[i * inputCPU.GetDim(1) + j] = startNoPad++;
}
}
}
position.SetData(posData, position.unitNum); //int* posData = new int[input.unitNum];
delete[] posData;
/* we make positional embeddings first */ //XTensor inputCPU;
if(true){ //InitTensorOnCPU(&inputCPU, &input);
posEmbedding = Gather(posEmbeddingBase, position); //_CopyValues(&input, &inputCPU);
}
/* then we make word embeddings */
//for (int i = 0; i < inputCPU.GetDim(0); i++) {
// int startNoPad = 2 + prevLen - 1;
// int* p = ((int*)inputCPU.data) + i * inputCPU.GetDim(1);
// for (int j = 0; j < inputCPU.GetDim(1); j++) {
// if (p[j] == 1) {
// posData[i * inputCPU.GetDim(1) + j] = 1;
// }
// else {
// posData[i * inputCPU.GetDim(1) + j] = startNoPad++;
// }
// }
//}
//position.SetData(posData, position.unitNum);
//delete[] posData;
///* we make positional embeddings first */
//if(true){
// posEmbedding = Gather(posEmbeddingBase, position);
//}
/* then we make word embeddings */
XTensor wordEmbedding;
wordEmbedding = Gather(w, input); wordEmbedding = Gather(w, input);
wordEmbedding = Linear(wordEmbedding, (float)sqrt((float)eSize)); wordEmbedding = Linear(wordEmbedding, (float)sqrt((float)eSize));
......
...@@ -29,7 +29,7 @@ using namespace nts; ...@@ -29,7 +29,7 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define DEFAULT_EMBEDDING_SIZE 512 #define DEFAULT_EMBEDDING_SIZE 128
/* /*
embedding (of word at position i): embedding (of word at position i):
......
...@@ -34,7 +34,7 @@ AttEncoder::AttEncoder() ...@@ -34,7 +34,7 @@ AttEncoder::AttEncoder()
attentions = NULL; attentions = NULL;
fnns = NULL; fnns = NULL;
attLayerNorms = NULL; attLayerNorms = NULL;
encodeLayerNorm = NULL; encoderLayerNorm = NULL;
} }
/* de-constructor */ /* de-constructor */
...@@ -43,7 +43,7 @@ AttEncoder::~AttEncoder() ...@@ -43,7 +43,7 @@ AttEncoder::~AttEncoder()
delete[] attentions; delete[] attentions;
delete[] fnns; delete[] fnns;
delete[] attLayerNorms; delete[] attLayerNorms;
delete encodeLayerNorm; delete encoderLayerNorm;
} }
/* /*
...@@ -61,7 +61,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -61,7 +61,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
devID = myDevID; devID = myDevID;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 35); LoadParamInt(argc, argv, "nlayer", &nlayer, 20);
LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "hsize", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "esize", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "vsize", &vSize, 34040); LoadParamInt(argc, argv, "vsize", &vSize, 34040);
...@@ -76,7 +76,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -76,7 +76,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
attLayerNorms = new T2TLN[nlayer]; attLayerNorms = new T2TLN[nlayer];
encodeLayerNorm = new T2TLN; encoderLayerNorm = new T2TLN;
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
...@@ -84,7 +84,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -84,7 +84,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
fnns[i].InitModel(argc, argv, myDevID); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID); attLayerNorms[i].InitModel(argc, argv, myDevID);
} }
encodeLayerNorm->InitModel(argc, argv, myDevID); encoderLayerNorm->InitModel(argc, argv, myDevID);
} }
/* /*
...@@ -123,13 +123,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, boo ...@@ -123,13 +123,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor *mask, XTensor &maskEncDec, boo
/* fnn */ /* fnn */
x = fnns[i].Make(res, isTraining); x = fnns[i].Make(res, isTraining);
} }
x = encodeLayerNorm->Make(x); x = encoderLayerNorm->Make(x);
x.SetName(ENCODING_NAME);
input.SetName(ENCODING_INPUT_NAME);
return x; return x;
} }
......
...@@ -93,11 +93,11 @@ public: ...@@ -93,11 +93,11 @@ public:
/* attention model of each layer */ /* attention model of each layer */
T2TAttention * attentions; T2TAttention * attentions;
/* layer normalization for attention */ /* layer normalizations for attention */
T2TLN * attLayerNorms; T2TLN * attLayerNorms;
/* layer normalization for encoder */ /* layer normalization for encoder */
T2TLN * encodeLayerNorm; T2TLN * encoderLayerNorm;
/* input tensor of the encoder */ /* input tensor of the encoder */
XTensor * input; XTensor * input;
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/ */
#include <math.h> #include <math.h>
#include "T2TFNN.h" #include "T2TFNN.h"
...@@ -32,9 +32,9 @@ namespace transformer ...@@ -32,9 +32,9 @@ namespace transformer
/* constructor */ /* constructor */
T2TFNN::T2TFNN() T2TFNN::T2TFNN()
{ {
inSize = -1; inSize = -1;
outSize = -1; outSize = -1;
hSize = -1; hSize = -1;
} }
/* deconstructor */ /* deconstructor */
...@@ -42,28 +42,28 @@ T2TFNN::~T2TFNN() ...@@ -42,28 +42,28 @@ T2TFNN::~T2TFNN()
{ {
} }
/* /*
initialize the model initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID) void T2TFNN::InitModel(int argc, char** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
float minmax = 0; float minmax = 0;
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &outSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &outSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "fnnh", &hSize, outSize * 4); LoadParamInt(argc, argv, "fnnh", &hSize, outSize * 8);
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2DV2(&w1, hSize, inSize, X_FLOAT, devID); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b1, hSize, X_FLOAT, devID); InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2DV2(&w2, outSize, hSize, X_FLOAT, devID); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
fnnLayerNorm.InitModel(argc, argv, myDevID); fnnLayerNorm.InitModel(argc, argv, myDevID);
...@@ -78,25 +78,25 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID) ...@@ -78,25 +78,25 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
//b2.SetZeroAll(); //b2.SetZeroAll();
} }
/* /*
make the network make the network
y = max(0, x * w1 + b1) * w2 + b2 y = max(0, x * w1 + b1) * w2 + b2
>> input - the input tensor >> input - the input tensor
>> return - the output tensor >> return - the output tensor
*/ */
XTensor T2TFNN::Make(XTensor &input, bool isTraining) XTensor T2TFNN::Make(XTensor& input, bool isTraining)
{ {
XTensor t1; XTensor t1;
/* t1 = max(0, x * w1 + b1) */ /* t1 = max(0, x * w1 + b1) */
t1 = Rectify(MulAndShift(fnnLayerNorm.Make(input), X_NOTRANS, w1, X_TRANS, b1)); t1 = Rectify(MulAndShift(fnnLayerNorm.Make(input), w1, b1));
if(isTraining && dropoutP > 0) if (isTraining && dropoutP > 0)
t1 = Dropout(t1, dropoutP); t1 = Dropout(t1, dropoutP);
/* result = t1 * w2 + b2 */ /* result = t1 * w2 + b2 */
XTensor res; XTensor res;
res = MulAndShift(t1, X_NOTRANS, w2, X_TRANS, b2); res = MulAndShift(t1, w2, b2);
_SumMe(&res, &input); _SumMe(&res, &input);
return res; return res;
} }
......
...@@ -53,8 +53,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID) ...@@ -53,8 +53,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID)
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID); InitTensor1DV2(&b, d, X_FLOAT, devID);
} }
/* /*
...@@ -78,7 +78,7 @@ XTensor T2TLN::Make(XTensor &input) ...@@ -78,7 +78,7 @@ XTensor T2TLN::Make(XTensor &input)
mean = ReduceMean(x, x.order - 1); mean = ReduceMean(x, x.order - 1);
/* \sigma = (sum_i (x_i - \mu)^2)/m */ /* \sigma = (sum_i (x_i - \mu)^2)/m */
variance = ReduceVariance(x, x.order - 1, mean); variance = ReduceVariance(x, x.order - 1, mean) + 1e-5F;
/* standard = sqrt(variance) */ /* standard = sqrt(variance) */
standard = Power(variance, 0.5F); standard = Power(variance, 0.5F);
...@@ -92,7 +92,7 @@ XTensor T2TLN::Make(XTensor &input) ...@@ -92,7 +92,7 @@ XTensor T2TLN::Make(XTensor &input)
xn = (x - meanFilled) / standardFilled; xn = (x - meanFilled) / standardFilled;
/* result = x' * w + b */ /* result = x' * w + b */
return xn * w + b; return xn * w + b;
} }
} }
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University. * Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,15 +15,16 @@ ...@@ -15,15 +15,16 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/ */
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TUtility.h" #include "T2TUtility.h"
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include <cstdint>
namespace transformer namespace transformer
{ {
...@@ -49,22 +50,22 @@ T2TModel::~T2TModel() ...@@ -49,22 +50,22 @@ T2TModel::~T2TModel()
delete outputLayer; delete outputLayer;
} }
/* /*
initialize the model initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
*/ */
void T2TModel::InitModel(int argc, char ** argv) void T2TModel::InitModel(int argc, char** argv)
{ {
LoadParamInt(argc, argv, "dev", &devID, -1); LoadParamInt(argc, argv, "dev", &devID, -1);
LoadParamBool(argc, argv, "mt", &isMT, false); LoadParamBool(argc, argv, "mt", &isMT, false);
LoadParamBool(argc, argv, "lm", &isLM, !isMT); LoadParamBool(argc, argv, "lm", &isLM, !isMT);
LoadParamInt(argc, argv, "nhead", &nhead, 8); LoadParamInt(argc, argv, "nhead", &nhead, 4);
encoder->InitModel(argc, argv, true, 0, devID); encoder->InitModel(argc, argv, true, 0, devID);
outputLayer->InitModel(argc, argv, devID); outputLayer->InitModel(argc, argv, devID);
if(isMT) if (isMT)
decoder->InitModel(argc, argv, true, 0, devID); decoder->InitModel(argc, argv, true, 0, devID);
TensorList params(10); TensorList params(10);
...@@ -76,21 +77,21 @@ void T2TModel::InitModel(int argc, char ** argv) ...@@ -76,21 +77,21 @@ void T2TModel::InitModel(int argc, char ** argv)
} }
} }
/* /*
make the encoding network make the encoding network
>> input - input tensor >> input - input tensor
>> mask - the mask for positions that are/not involved in computation >> mask - the mask for positions that are/not involved in computation
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
<< return - encoding result << return - encoding result
*/ */
XTensor T2TModel::MakeEncoder(XTensor &input, XTensor *mask, bool isTraining) XTensor T2TModel::MakeEncoder(XTensor& input, XTensor* mask, bool isTraining)
{ {
XTensor nothing; XTensor nothing;
return encoder->Make(input, mask, nothing, isTraining); return encoder->Make(input, mask, nothing, isTraining);
} }
/* /*
make the decoding network make the decoding network
>> inputDec - input tensor of the decoder >> inputDec - input tensor of the decoder
>> outputEnc - output tensor of the encoder >> outputEnc - output tensor of the encoder
...@@ -100,22 +101,22 @@ make the decoding network ...@@ -100,22 +101,22 @@ make the decoding network
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
<< return - encoding result << return - encoding result
*/ */
XTensor T2TModel::MakeDecoder(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, XTensor &maskEncDec, bool isTraining) XTensor T2TModel::MakeDecoder(XTensor& inputDec, XTensor& outputEnc, XTensor* mask, XTensor& maskEncDec, bool isTraining)
{ {
return decoder->Make(inputDec, outputEnc, mask, maskEncDec, isTraining); return decoder->Make(inputDec, outputEnc, mask, maskEncDec, isTraining);
} }
/* /*
make the network for language modeling (with the output softmax layer) make the network for language modeling (with the output softmax layer)
>> input - input tensor >> input - input tensor
>> output - output tensor (distribution) >> output - output tensor (distribution)
>> padding - padding of the sequences >> padding - padding of the sequences
>> isTraining - indicates whether the model is for training >> isTraining - indicates whether the model is for training
*/ */
void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining) void T2TModel::MakeLM(XTensor& input, XTensor& output, XTensor& padding, bool isTraining)
{ {
XTensor encoding; XTensor encoding;
/* generate mask to see "previous" words only */ /* generate mask to see "previous" words only */
//int len = input.GetDim(input.order - 2); //int len = input.GetDim(input.order - 2);
//int * dims = new int[input.order + 1]; //int * dims = new int[input.order + 1];
...@@ -126,30 +127,30 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -126,30 +127,30 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
//XTensor mask(input.order + 1, dims, X_FLOAT, 1.0F, input.devID, input.mem); //XTensor mask(input.order + 1, dims, X_FLOAT, 1.0F, input.devID, input.mem);
int len = input.GetDim(input.order - 1); int len = input.GetDim(input.order - 1);
int * dims = new int[input.order + 2]; int* dims = new int[input.order + 2];
for(int i = 0; i < input.order; i++) for (int i = 0; i < input.order; i++)
dims[i + 1] = input.GetDim(i); dims[i + 1] = input.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[input.order + 1] = len; dims[input.order + 1] = len;
XTensor mask; XTensor mask;
InitTensor(&mask, input.order + 2, dims, X_FLOAT, padding.devID); InitTensorV2(&mask, input.order + 2, dims, X_FLOAT, 1.0F, padding.devID);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
a given sequence. */ a given sequence. */
_SetDataLowTri(&mask, 1e9F, 0); _SetDataLowTri(&mask, 1e9F, 0);
_ScaleAndShiftMe(&mask, 1.0F, -1e9F); _ScaleAndShiftMe(&mask, 1.0F, -1e9F);
int * dimsPadding = new int[padding.order + 2]; int* dimsPadding = new int[padding.order + 2];
for(int i = 0; i < padding.order - 1; i++) for (int i = 0; i < padding.order - 1; i++)
dimsPadding[i] = padding.GetDim(i); dimsPadding[i] = padding.GetDim(i);
dimsPadding[padding.order - 1] = padding.GetDim(-1); dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1); dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType, XTensor* padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.devID); padding.devID);
for(int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
...@@ -169,13 +170,13 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -169,13 +170,13 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
delete[] dims; delete[] dims;
delete[] dimsPadding; delete[] dimsPadding;
//DelTensorBuf(padding3); //DelTensorBuf(padding3);
DelTensorBuf(padding2); DelTensorBuf(padding2);
} }
/* /*
make the network for machine translation (with the output softmax layer) make the network for machine translation (with the output softmax layer)
>> inputEnc - input tensor of the encoder >> inputEnc - input tensor of the encoder
>> inputDec - input tensor of the decoder >> inputDec - input tensor of the decoder
>> output - output tensor (distribution) >> output - output tensor (distribution)
...@@ -183,7 +184,7 @@ make the network for machine translation (with the output softmax layer) ...@@ -183,7 +184,7 @@ make the network for machine translation (with the output softmax layer)
>> paddingDec - padding of the sequences (on the decoder side) >> paddingDec - padding of the sequences (on the decoder side)
>> isTraining - indicates whether the model is for training >> isTraining - indicates whether the model is for training
*/ */
void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, XTensor &paddingDec, bool isTraining) void T2TModel::MakeMT(XTensor& inputEnc, XTensor& inputDec, XTensor& output, XTensor& paddingEnc, XTensor& paddingDec, bool isTraining)
{ {
XTensor encoding; XTensor encoding;
XTensor decoding; XTensor decoding;
...@@ -193,7 +194,7 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe ...@@ -193,7 +194,7 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
/* encoder mask */ /* encoder mask */
MakeMTMaskEnc(inputEnc, paddingEnc, maskEnc); MakeMTMaskEnc(inputEnc, paddingEnc, maskEnc);
/* decoder mask */ /* decoder mask */
MakeMTMaskDec(inputEnc, inputDec, paddingEnc, paddingDec, maskDec, maskEncDec, 0); MakeMTMaskDec(inputEnc, inputDec, paddingEnc, paddingDec, maskDec, maskEncDec, 0);
...@@ -204,8 +205,8 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe ...@@ -204,8 +205,8 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
outputLayer->Make(decoding, output); outputLayer->Make(decoding, output);
} }
/* /*
make the mask for training MT models make the mask for training MT models
>> inputEnc - input of the encoder >> inputEnc - input of the encoder
>> inputDec - input of the decoder >> inputDec - input of the decoder
>> paddingEnc - padding of the encoder input >> paddingEnc - padding of the encoder input
...@@ -214,18 +215,18 @@ make the mask for training MT models ...@@ -214,18 +215,18 @@ make the mask for training MT models
>> maksDec - mask of the decoder self-attention >> maksDec - mask of the decoder self-attention
>> maksEncDec - mask of the decoder enc-dec attention >> maksEncDec - mask of the decoder enc-dec attention
*/ */
void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, void T2TModel::MakeMTMask(XTensor& inputEnc, XTensor& inputDec,
XTensor &paddingEnc, XTensor &paddingDec, XTensor& paddingEnc, XTensor& paddingDec,
XTensor &maskEnc, XTensor &maskDec, XTensor &maskEncDec) XTensor& maskEnc, XTensor& maskDec, XTensor& maskEncDec)
{ {
int len = inputDec.GetDim(inputDec.order - 1); int len = inputDec.GetDim(inputDec.order - 1);
int * dims = new int[inputDec.order + 2]; int* dims = new int[inputDec.order + 2];
for(int i = 0; i < inputDec.order; i++) for (int i = 0; i < inputDec.order; i++)
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
a given sequence. */ a given sequence. */
...@@ -234,11 +235,10 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -234,11 +235,10 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor* maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
paddingEnc.devID); XTensor* maskEncDecTMPDec = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F); _ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
...@@ -248,21 +248,21 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -248,21 +248,21 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
DelTensorBuf(maskEncDecTMPEnc); DelTensorBuf(maskEncDecTMPEnc);
/* padding on the source side */ /* padding on the source side */
int * dimsPadding = new int[paddingEnc.order + 2]; int* dimsPadding = new int[paddingEnc.order + 2];
for (int i = 0; i < paddingEnc.order - 1; i++) for (int i = 0; i < paddingEnc.order - 1; i++)
dimsPadding[i] = paddingEnc.GetDim(i); dimsPadding[i] = paddingEnc.GetDim(i);
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor* padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor* padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -270,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -270,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -282,49 +282,47 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -282,49 +282,47 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
DelTensorBuf(padding3); DelTensorBuf(padding3);
DelTensorBuf(padding2); DelTensorBuf(padding2);
} }
/* /*
make the mask of the encoder make the mask of the encoder
>> inputEnc - input of the encoder >> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder input >> paddingEnc - padding of the encoder input
>> maskEnc - mask of the encoder self-attention >> maskEnc - mask of the encoder self-attention
*/ */
void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &maskEnc) void T2TModel::MakeMTMaskEnc(XTensor& inputEnc, XTensor& paddingEnc, XTensor& maskEnc)
{ {
/* padding on the source side */ /* padding on the source side */
int * dimsPadding = new int[paddingEnc.order + 2]; int* dimsPadding = new int[paddingEnc.order + 2];
for (int i = 0; i < paddingEnc.order - 1; i++) for (int i = 0; i < paddingEnc.order - 1; i++)
dimsPadding[i] = paddingEnc.GetDim(i); dimsPadding[i] = paddingEnc.GetDim(i);
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor* padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, paddingEnc.devID);
paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor* padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, paddingEnc.devID);
paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
_Unsqueeze(padding2, padding3, 0, nhead); _Unsqueeze(padding2, padding3, 0, nhead);
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
_Sum(&maskEnc, padding3, &maskEnc); _Sum(&maskEnc, padding3, &maskEnc);
DelTensorBuf(padding3); DelTensorBuf(padding3);
DelTensorBuf(padding2); DelTensorBuf(padding2);
delete[] dimsPadding; delete[] dimsPadding;
} }
/* /*
make the mask of the decoder make the mask of the decoder
>> inputEnc - input of the encoder >> inputEnc - input of the encoder
...@@ -334,34 +332,33 @@ make the mask of the decoder ...@@ -334,34 +332,33 @@ make the mask of the decoder
>> maksDec - mask of the decoder self-attention >> maksDec - mask of the decoder self-attention
>> maksEncDec - mask of the decoder enc-dec attention >> maksEncDec - mask of the decoder enc-dec attention
*/ */
void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, void T2TModel::MakeMTMaskDec(XTensor& inputEnc, XTensor& inputDec,
XTensor &paddingEnc, XTensor &paddingDec, XTensor& paddingEnc, XTensor& paddingDec,
XTensor &maskDec, XTensor &maskEncDec, int incDim) XTensor& maskDec, XTensor& maskEncDec, int incDim)
{ {
int len = inputDec.GetDim(inputDec.order - 1); int len = inputDec.GetDim(inputDec.order - 1);
int * dims = new int[inputDec.order + 2]; int* dims = new int[inputDec.order + 2];
for(int i = 0; i < inputDec.order; i++) for (int i = 0; i < inputDec.order; i++)
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
//dims[inputDec.order] += incDim; //dims[inputDec.order] += incDim;
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
//InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec); //InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in This matrix can be used to block the attention to current or following words in
a given sequence. */ a given sequence. */
//_SetDataLowTri(&maskDec, 1e9F, 0); //_SetDataLowTri(&maskDec, 1e9F, 0);
//_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F); //_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F);
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor* maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
paddingEnc.devID); XTensor* maskEncDecTMPDec = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
//paddingEnc.Dump(stderr, "paddingenc:"); //paddingEnc.Dump(stderr, "paddingenc:");
...@@ -374,21 +371,21 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -374,21 +371,21 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
_Unsqueeze(maskEncDecTMPEnc, &maskEncDec, 0, dims[0]); _Unsqueeze(maskEncDecTMPEnc, &maskEncDec, 0, dims[0]);
//maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:"); //maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:");
DelTensorBuf(maskEncDecTMPDec); DelTensorBuf(maskEncDecTMPDec);
DelTensorBuf(maskEncDecTMPEnc); DelTensorBuf(maskEncDecTMPEnc);
delete[] dims; delete[] dims;
} }
/* /*
get parameter matrics get parameter matrics
>> list - the list that keeps the parameter matrics >> list - the list that keeps the parameter matrics
*/ */
void T2TModel::GetParams(TensorList &list) void T2TModel::GetParams(TensorList& list)
{ {
list.Clear(); list.Clear();
/* encoder parameters */ /* encoder parameters */
for(int i = 0; i < encoder->nlayer; i++){ for (int i = 0; i < encoder->nlayer; i++) {
list.Add(&encoder->attentions[i].wq); list.Add(&encoder->attentions[i].wq);
list.Add(&encoder->attentions[i].wk); list.Add(&encoder->attentions[i].wk);
list.Add(&encoder->attentions[i].wv); list.Add(&encoder->attentions[i].wv);
...@@ -396,8 +393,8 @@ void T2TModel::GetParams(TensorList &list) ...@@ -396,8 +393,8 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&encoder->attentions[i].bk); list.Add(&encoder->attentions[i].bk);
list.Add(&encoder->attentions[i].bv); list.Add(&encoder->attentions[i].bv);
list.Add(&encoder->attentions[i].rp_embedding_k); list.Add(&encoder->attentions[i].rp_embedding_k);
list.Add(&encoder->attentions[i].wa); list.Add(&encoder->attentions[i].wo);
list.Add(&encoder->attentions[i].ba); list.Add(&encoder->attentions[i].bo);
list.Add(&encoder->fnns[i].w1); list.Add(&encoder->fnns[i].w1);
list.Add(&encoder->fnns[i].b1); list.Add(&encoder->fnns[i].b1);
list.Add(&encoder->fnns[i].w2); list.Add(&encoder->fnns[i].w2);
...@@ -407,33 +404,33 @@ void T2TModel::GetParams(TensorList &list) ...@@ -407,33 +404,33 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&encoder->fnns[i].fnnLayerNorm.w); list.Add(&encoder->fnns[i].fnnLayerNorm.w);
list.Add(&encoder->fnns[i].fnnLayerNorm.b); list.Add(&encoder->fnns[i].fnnLayerNorm.b);
} }
list.Add(&encoder->encodeLayerNorm->w); list.Add(&encoder->encoderLayerNorm->w);
list.Add(&encoder->encodeLayerNorm->b); list.Add(&encoder->encoderLayerNorm->b);
/* decoder parameters */ /* decoder parameters */
if(isMT){ if (isMT) {
for(int i = 0; i < decoder->nlayer; i++){ for (int i = 0; i < decoder->nlayer; i++) {
list.Add(&decoder->attentions[i].wq); list.Add(&decoder->selfAtt[i].wq);
list.Add(&decoder->attentions[i].wk); list.Add(&decoder->selfAtt[i].wk);
list.Add(&decoder->attentions[i].wv); list.Add(&decoder->selfAtt[i].wv);
list.Add(&decoder->attentions[i].bq); list.Add(&decoder->selfAtt[i].bq);
list.Add(&decoder->attentions[i].bk); list.Add(&decoder->selfAtt[i].bk);
list.Add(&decoder->attentions[i].bv); list.Add(&decoder->selfAtt[i].bv);
list.Add(&decoder->attentions[i].rp_embedding_k); list.Add(&decoder->selfAtt[i].rp_embedding_k);
list.Add(&decoder->attentions[i].wa); list.Add(&decoder->selfAtt[i].wo);
list.Add(&decoder->attentions[i].ba); list.Add(&decoder->selfAtt[i].bo);
list.Add(&decoder->attLayerNorms[i].w); list.Add(&decoder->selfAttLayerNorms[i].w);
list.Add(&decoder->attLayerNorms[i].b); list.Add(&decoder->selfAttLayerNorms[i].b);
list.Add(&decoder->attentionsEnde[i].wq); list.Add(&decoder->enDeAtt[i].wq);
list.Add(&decoder->attentionsEnde[i].wk); list.Add(&decoder->enDeAtt[i].wk);
list.Add(&decoder->attentionsEnde[i].wv); list.Add(&decoder->enDeAtt[i].wv);
list.Add(&decoder->attentionsEnde[i].bq); list.Add(&decoder->enDeAtt[i].bq);
list.Add(&decoder->attentionsEnde[i].bk); list.Add(&decoder->enDeAtt[i].bk);
list.Add(&decoder->attentionsEnde[i].bv); list.Add(&decoder->enDeAtt[i].bv);
list.Add(&decoder->attentionsEnde[i].wa); list.Add(&decoder->enDeAtt[i].wo);
list.Add(&decoder->attentionsEnde[i].ba); list.Add(&decoder->enDeAtt[i].bo);
list.Add(&decoder->attEndeLayerNorms[i].w); list.Add(&decoder->enDeAttLayerNorms[i].w);
list.Add(&decoder->attEndeLayerNorms[i].b); list.Add(&decoder->enDeAttLayerNorms[i].b);
list.Add(&decoder->fnns[i].w1); list.Add(&decoder->fnns[i].w1);
list.Add(&decoder->fnns[i].b1); list.Add(&decoder->fnns[i].b1);
list.Add(&decoder->fnns[i].w2); list.Add(&decoder->fnns[i].w2);
...@@ -441,8 +438,8 @@ void T2TModel::GetParams(TensorList &list) ...@@ -441,8 +438,8 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&decoder->fnns[i].fnnLayerNorm.w); list.Add(&decoder->fnns[i].fnnLayerNorm.w);
list.Add(&decoder->fnns[i].fnnLayerNorm.b); list.Add(&decoder->fnns[i].fnnLayerNorm.b);
} }
list.Add(&decoder->decodeLayerNorm->w); list.Add(&decoder->decoderLayerNorm->w);
list.Add(&decoder->decodeLayerNorm->b); list.Add(&decoder->decoderLayerNorm->b);
} }
/* shared embeddings */ /* shared embeddings */
...@@ -452,23 +449,23 @@ void T2TModel::GetParams(TensorList &list) ...@@ -452,23 +449,23 @@ void T2TModel::GetParams(TensorList &list)
} }
/* /*
dump the parameters dump the parameters
>> fn - where to keep the model >> fn - where to keep the model
>> model - the model >> model - the model
*/ */
void T2TModel::Dump(const char * fn) void T2TModel::Dump(const char* fn)
{ {
double startT = GetClockSec(); double startT = GetClockSec();
FILE * file = fopen(fn, "wb"); FILE* file = fopen(fn, "wb");
CheckNTErrors(file, "Cannot open the model file"); CheckNTErrors(file, "Cannot open the model file");
TensorList params(100); TensorList params(100);
GetParams(params); GetParams(params);
for(int i = 0; i < params.count; i++){ for (int i = 0; i < params.count; i++) {
XTensor * p = (XTensor*)params.Get(i); XTensor* p = (XTensor*)params.Get(i);
p->Dump(file, "param:"); p->Dump(file, "param:");
} }
...@@ -480,38 +477,37 @@ void T2TModel::Dump(const char * fn) ...@@ -480,38 +477,37 @@ void T2TModel::Dump(const char * fn)
} }
/* read the parameters */ /* read the parameters */
void T2TModel::Read(const char * fn) void T2TModel::Read(const char* fn)
{ {
double startT = GetClockSec(); double startT = GetClockSec();
FILE * file = fopen(fn, "rb"); FILE* file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot open the model file"); CheckNTErrors(file, "Cannot open the model file");
TensorList params(100); TensorList params(100);
GetParams(params); GetParams(params);
//uint64_t* offsets = new uint64_t[params.Size()];
for(int i = 0; i < params.count; i++){ ///* number of parameter */
XTensor * p = (XTensor*)params.Get(i); //uint64_t param_number;
FastRead(p, file); //fread(&param_number, sizeof(param_number), 1, file);
// p->Read(file, ""); //CheckNTErrors(param_number == params.Size(), "parameter number not matched");
}
fclose(file); ///* parameter offsets */
//fread(offsets, sizeof(offsets[0]), params.Size(), file);
double elapsed = GetClockSec() - startT; ///* parameter values */
//for (int i = 0; i < params.Size(); i++)
// params[i]->BinaryRead(file, offsets[i]);
XPRINT1(0, stderr, "[INFO] model loaded (took %.1fs)\n", elapsed); //delete[] offsets;
} for (int i = 0; i < params.Size(); i++)
params[i]->BinaryRead(file, 0);
void FastRead(XTensor* x, FILE* f) {
float * dataBuf = new float[x->unitNum];
fread(dataBuf, sizeof(char), sizeof(float) * x->unitNum, f);
x->SetData(dataBuf, x->unitNum);
delete[] dataBuf; fclose(file);
double elapsed = GetClockSec() - startT;
XPRINT1(0, stderr, "[INFO] model loaded (took %.1fs)\n", elapsed);
} }
} }
\ No newline at end of file
...@@ -103,7 +103,7 @@ public: ...@@ -103,7 +103,7 @@ public:
/* read the parameters */ /* read the parameters */
void Read(const char * fn); void Read(const char * fn);
}; };
void FastRead(XTensor* x, FILE* f);
} }
#endif #endif
...@@ -56,13 +56,11 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID) ...@@ -56,13 +56,11 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
LoadParamInt(argc, argv, "vsizetgt", &vSize, -1); LoadParamInt(argc, argv, "vsizetgt", &vSize, -1);
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID); InitTensor2DV2(&w, vSize, hSize, X_FLOAT, devID);
} }
/* /*
make the network (redefined output tensor) make the network (redefined output tensor)
>> input - input tensor >> input - input tensor
...@@ -72,9 +70,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output) ...@@ -72,9 +70,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{ {
XTensor &x = input; XTensor &x = input;
output = LogSoftmax(MMul(x, X_NOTRANS, w, X_NOTRANS), -1); output = LogSoftmax(MMul(x, X_NOTRANS, w, X_TRANS), -1);
output.SetName(OUTPUT_NAME);
} }
} }
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/ */
#include "T2TPredictor.h" #include "T2TPredictor.h"
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
...@@ -38,24 +38,24 @@ T2TStateBundle::T2TStateBundle() ...@@ -38,24 +38,24 @@ T2TStateBundle::T2TStateBundle()
/* de-constructor */ /* de-constructor */
T2TStateBundle::~T2TStateBundle() T2TStateBundle::~T2TStateBundle()
{ {
if(states != NULL) if (states != NULL)
delete[] states; delete[] states;
} }
/* /*
create states create states
>> num - number of states >> num - number of states
*/ */
void T2TStateBundle::MakeStates(int num) void T2TStateBundle::MakeStates(int num)
{ {
CheckNTErrors(num > 0, "invalid number"); CheckNTErrors(num > 0, "invalid number");
if(states != NULL) if (states != NULL)
delete[] states; delete[] states;
states = new T2TState[num]; states = new T2TState[num];
for(int i = 0; i < num; i++){ for (int i = 0; i < num; i++) {
states[i].prediction = -1; states[i].prediction = -1;
states[i].pid = T2T_PID_EMPTY; states[i].pid = T2T_PID_EMPTY;
states[i].isEnd = false; states[i].isEnd = false;
...@@ -74,7 +74,7 @@ void T2TStateBundle::MakeStates(int num) ...@@ -74,7 +74,7 @@ void T2TStateBundle::MakeStates(int num)
/* constructor */ /* constructor */
T2TPredictor::T2TPredictor() T2TPredictor::T2TPredictor()
{ {
startSymbol = -1; startSymbol = 2;
} }
/* de-constructor */ /* de-constructor */
...@@ -82,37 +82,44 @@ T2TPredictor::~T2TPredictor() ...@@ -82,37 +82,44 @@ T2TPredictor::~T2TPredictor()
{ {
} }
/* /*
create an initial state create an initial state
>> model - the t2t model >> model - the t2t model
>> top - the top-most layer of the network >> top - the top-most layer of the network
>> input - input of the network >> input - input of the network
>> beamSize - beam size >> beamSize - beam size
>> state - the state to be initialized >> state - the state to be initialized
*/ */
void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state) void T2TPredictor::Create(T2TModel* model, XTensor* top, const XTensor* input, int beamSize, T2TStateBundle* state)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
for (int i = 0; i < input->order - 1; i++) for (int i = 0; i < input->order - 1; i++)
dims[i] = input->GetDim(i); dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize; dims[input->order - 1] = beamSize;
InitTensor(&state->probPath, input->order, dims, X_FLOAT, input->devID); InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, input->devID); InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID);
InitTensor(&state->endMark, input->order, dims, X_INT, input->devID); InitTensorV2(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID);
float* data = new float[state->probPath.unitNum]; /*float* data = new float[state->probPath.unitNum];
for (int i = 0; i < state->probPath.unitNum; ++i) { for (int i = 0; i < state->probPath.unitNum; ++i) {
data[i] = -1e20F; data[i] = -1e20F;
if (i % beamSize == 0) if (i % beamSize == 0)
data[i] = 0; data[i] = 0;
} }
state->probPath.SetData(data, state->probPath.unitNum); state->probPath.SetData(data, state->probPath.unitNum);
delete[] data;*/
SetDataFixed(state->probPath, -1e9F);
for (int i = 0; i < state->probPath.unitNum; ++i) {
if (i % beamSize == 0)
state->probPath.Set(0.0F, i);
}
state->nstep.SetZeroAll(); state->nstep.SetZeroAll();
state->endMark.SetZeroAll(); state->endMark.SetZeroAll();
delete[] data;
state->stateNum = 0; state->stateNum = 0;
} }
...@@ -125,15 +132,15 @@ void T2TPredictor::SetStartSymbol(int symbol) ...@@ -125,15 +132,15 @@ void T2TPredictor::SetStartSymbol(int symbol)
startSymbol = symbol; startSymbol = symbol;
} }
/* /*
read a state read a state
>> model - the t2t model that keeps the network created so far >> model - the t2t model that keeps the network created so far
>> state - a set of states. It keeps >> state - a set of states. It keeps
1) hypotheses (states) 1) hypotheses (states)
2) probablities of hypotheses 2) probablities of hypotheses
3) parts of the network for expanding toward the next state 3) parts of the network for expanding toward the next state
*/ */
void T2TPredictor::Read(T2TModel * model, T2TStateBundle * state) void T2TPredictor::Read(T2TModel* model, T2TStateBundle* state)
{ {
m = model; m = model;
s = state; s = state;
...@@ -147,8 +154,7 @@ predict the next state ...@@ -147,8 +154,7 @@ predict the next state
>> paddingEnc - padding of the encoder >> paddingEnc - padding of the encoder
>>> isStart - is the start or not >>> isStart - is the start or not
*/ */
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, void T2TPredictor::Predict(T2TStateBundle* next, XTensor* encoding, XTensor* inputEnc, XTensor* paddingEnc, bool isStart)
XTensor * inputEnc, XTensor * paddingEnc, bool isStart)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
...@@ -157,42 +163,43 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -157,42 +163,43 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
/* the first token */ /* the first token */
XTensor first; XTensor first;
CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!"); CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!");
for(int i = 0; i < inputEnc->order - 1; i++) for (int i = 0; i < inputEnc->order - 1; i++)
dims[i] = inputEnc->GetDim(i); dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, inputEnc->devID); InitTensorV2(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID);
SetDataFixedInt(first, startSymbol); SetDataFixedInt(first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
if (isStart) { if (isStart) {
inputDec = Identity(first); inputDec = Identity(first);
} }
else{ else {
/* only pass one step to the decoder */ /* only pass one step to the decoder */
inputDec = GetLastPrediction(s); inputDec = GetLastPrediction(s);
inputDec.SetDevice(inputEnc->devID); inputDec.SetDevice(inputEnc->devID);
} }
/* prediction probabilities */ /* prediction probabilities */
XTensor &output = next->prob; XTensor& output = next->prob;
XTensor decoding; XTensor decoding;
for(int i = 0; i < inputDec.order - 1; i++) for (int i = 0; i < inputDec.order - 1; i++)
dims[i] = inputDec.GetDim(i); dims[i] = inputDec.GetDim(i);
dims[inputDec.order - 1] = inputDec.GetDim(-1); dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec; XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID); InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, 1.0F, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1); SetDataFixedInt(paddingDec, 1);
XTensor maskDec; XTensor maskDec;
XTensor maskEncDec; XTensor maskEncDec;
/* decoder mask */ /* decoder mask */
m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec, 0); //m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec, 0);
/* make the decoding network */ /* make the decoding network */
decoding = m->decoder->Make(inputDec, *encoding, NULL, maskEncDec, false); decoding = m->decoder->Make(inputDec, *encoding, NULL, maskEncDec, false);
...@@ -203,38 +210,38 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -203,38 +210,38 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
m->outputLayer->Make(decoding, output); m->outputLayer->Make(decoding, output);
} }
/* /*
generate paths up to the states of the current step generate paths up to the states of the current step
>> state - state bundle of the current step >> state - state bundle of the current step
*/ */
XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state) XTensor T2TPredictor::GeneratePaths(T2TStateBundle* state)
{ {
CheckNTErrors(state->stateNum >= 0, "Illegal state!"); CheckNTErrors(state->stateNum >= 0, "Illegal state!");
int distance = -1; int distance = -1;
for(int i = 0; i < state->stateNum; i++){ for (int i = 0; i < state->stateNum; i++) {
T2TState * cur = state->states + i; T2TState* cur = state->states + i;
int nsteps = 0; int nsteps = 0;
while(cur != NULL){ while (cur != NULL) {
nsteps++; nsteps++;
cur = cur->last; cur = cur->last;
} }
if(nsteps > distance) if (nsteps > distance)
distance = nsteps; distance = nsteps;
} }
XTensor path; XTensor path;
InitTensor2D(&path, state->stateNum, distance, X_INT); InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll(); path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){ for (int i = 0; i < state->stateNum; i++) {
T2TState * cur = state->states + i; T2TState* cur = state->states + i;
int nsteps = 0; int nsteps = 0;
while(cur != NULL){ while (cur != NULL) {
nsteps++; nsteps++;
path.Set2DInt(cur->prediction, i, distance - nsteps); path.Set2DInt(cur->prediction, i, distance - nsteps);
cur = cur->last; cur = cur->last;
...@@ -253,7 +260,7 @@ XTensor T2TPredictor::GetLastPrediction(T2TStateBundle* state) ...@@ -253,7 +260,7 @@ XTensor T2TPredictor::GetLastPrediction(T2TStateBundle* state)
CheckNTErrors(state->stateNum >= 0, "Illegal state!"); CheckNTErrors(state->stateNum >= 0, "Illegal state!");
XTensor lastPred; XTensor lastPred;
InitTensor2D(&lastPred, state->stateNum, 1, X_INT); InitTensor2DV2(&lastPred, state->stateNum, 1, X_INT);
for (int i = 0; i < state->stateNum; i++) { for (int i = 0; i < state->stateNum; i++) {
T2TState* cur = state->states + i; T2TState* cur = state->states + i;
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start! * This is the first source file I create in 2019 - new start!
*/ */
#ifndef __T2TPREDICTOR_H__ #ifndef __T2TPREDICTOR_H__
#define __T2TPREDICTOR_H__ #define __T2TPREDICTOR_H__
...@@ -39,8 +39,8 @@ public: ...@@ -39,8 +39,8 @@ public:
/* we assume that the prediction is an integer */ /* we assume that the prediction is an integer */
int prediction; int prediction;
/* id of the problem. One can regard it as the sentence id when we /* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis translate a number of sentences in the batched manner. The hypothesis
is empty if id = -1 */ is empty if id = -1 */
int pid; int pid;
...@@ -66,7 +66,7 @@ public: ...@@ -66,7 +66,7 @@ public:
int nstep; int nstep;
/* pointer to the previous state */ /* pointer to the previous state */
T2TState * last; T2TState* last;
}; };
/* a bundle of states */ /* a bundle of states */
...@@ -75,7 +75,7 @@ class T2TStateBundle ...@@ -75,7 +75,7 @@ class T2TStateBundle
public: public:
/* predictions */ /* predictions */
XTensor prediction; XTensor prediction;
/* id of the previous state that generates the current one */ /* id of the previous state that generates the current one */
XTensor preID; XTensor preID;
...@@ -95,7 +95,7 @@ public: ...@@ -95,7 +95,7 @@ public:
XTensor nstep; XTensor nstep;
/* list of states */ /* list of states */
T2TState * states; T2TState* states;
/* number of states */ /* number of states */
int stateNum; int stateNum;
...@@ -114,19 +114,19 @@ public: ...@@ -114,19 +114,19 @@ public:
void MakeStates(int num); void MakeStates(int num);
}; };
/* The predictor reads the current state and then predicts the next. /* The predictor reads the current state and then predicts the next.
It is exactly the same procedure of MT inference - It is exactly the same procedure of MT inference -
we get the state of previous words and then generate the next word. we get the state of previous words and then generate the next word.
Here, a state can be regared as the representation of words (word Here, a state can be regared as the representation of words (word
indices, hidden states, embeddings and etc.). */ indices, hidden states, embeddings and etc.). */
class T2TPredictor class T2TPredictor
{ {
private: private:
/* pointer to the transformer model */ /* pointer to the transformer model */
T2TModel * m; T2TModel* m;
/* current state */ /* current state */
T2TStateBundle * s; T2TStateBundle* s;
/* start symbol */ /* start symbol */
int startSymbol; int startSymbol;
...@@ -139,19 +139,19 @@ public: ...@@ -139,19 +139,19 @@ public:
~T2TPredictor(); ~T2TPredictor();
/* create an initial state */ /* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state); void Create(T2TModel* model, XTensor* top, const XTensor* input, int beamSize, T2TStateBundle* state);
/* set the start symbol */ /* set the start symbol */
void SetStartSymbol(int symbol); void SetStartSymbol(int symbol);
/* read a state */ /* read a state */
void Read(T2TModel * model, T2TStateBundle * state); void Read(T2TModel* model, T2TStateBundle* state);
/* predict the next state */ /* predict the next state */
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc, bool isStart); void Predict(T2TStateBundle* next, XTensor* encoding, XTensor* inputEnc, XTensor* paddingEnc, bool isStart);
/* generate paths up to the states of the current step */ /* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state); XTensor GeneratePaths(T2TStateBundle* state);
/* get the predictions of the previous step */ /* get the predictions of the previous step */
XTensor GetLastPrediction(T2TStateBundle* state); XTensor GetLastPrediction(T2TStateBundle* state);
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/ */
#include "T2TSearch.h" #include "T2TSearch.h"
#include "T2TUtility.h" #include "T2TUtility.h"
...@@ -27,7 +27,7 @@ using namespace nts; ...@@ -27,7 +27,7 @@ using namespace nts;
namespace transformer namespace transformer
{ {
/* constructor */ /* constructor */
T2TSearch::T2TSearch() T2TSearch::T2TSearch()
{ {
...@@ -38,15 +38,15 @@ T2TSearch::T2TSearch() ...@@ -38,15 +38,15 @@ T2TSearch::T2TSearch()
endSymbolNum = 0; endSymbolNum = 0;
fullHypos = NULL; fullHypos = NULL;
endSymbols = new int[32]; endSymbols = new int[32];
startSymbol = -1; startSymbol = 2;
} }
/* de-constructor */ /* de-constructor */
T2TSearch::~T2TSearch() T2TSearch::~T2TSearch()
{ {
if(fullHypos != NULL) if (fullHypos != NULL)
delete[] fullHypos; delete[] fullHypos;
if(endSymbols != NULL) if (endSymbols != NULL)
delete[] endSymbols; delete[] endSymbols;
} }
...@@ -55,7 +55,7 @@ initialize the model ...@@ -55,7 +55,7 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
*/ */
void T2TSearch::Init(int argc, char ** argv) void T2TSearch::Init(int argc, char** argv)
{ {
LoadParamInt(argc, argv, "beamsize", &beamSize, 1); LoadParamInt(argc, argv, "beamsize", &beamSize, 1);
LoadParamInt(argc, argv, "batchsize", &batchSize, 1); LoadParamInt(argc, argv, "batchsize", &batchSize, 1);
...@@ -63,18 +63,18 @@ void T2TSearch::Init(int argc, char ** argv) ...@@ -63,18 +63,18 @@ void T2TSearch::Init(int argc, char ** argv)
LoadParamInt(argc, argv, "endid", endSymbols, 2); LoadParamInt(argc, argv, "endid", endSymbols, 2);
LoadParamInt(argc, argv, "startid", &startSymbol, 2); LoadParamInt(argc, argv, "startid", &startSymbol, 2);
if(endSymbols[0] >= 0) if (endSymbols[0] >= 0)
endSymbolNum = 1; endSymbolNum = 1;
} }
/* /*
search for the most promising states search for the most promising states
>> model - the transformer model >> model - the transformer model
>> input - input of the model >> input - input of the model
>> padding - padding of the input >> padding - padding of the input
>> output - output that represents the sequences as rows >> output - output that represents the sequences as rows
*/ */
void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output) void T2TSearch::Search(T2TModel* model, XTensor* input, XTensor* padding, XTensor* output)
{ {
T2TPredictor predictor; T2TPredictor predictor;
XTensor maskEnc; XTensor maskEnc;
...@@ -86,11 +86,11 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -86,11 +86,11 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
CheckNTErrors(endSymbolNum > 0, "The search class is not initialized!"); CheckNTErrors(endSymbolNum > 0, "The search class is not initialized!");
CheckNTErrors(startSymbol >= 0, "The search class is not initialized!"); CheckNTErrors(startSymbol >= 0, "The search class is not initialized!");
Prepare(input->unitNum/input->GetDim(-1), beamSize); Prepare(input->unitNum / input->GetDim(-1), beamSize);
/* encoder mask */ /* encoder mask */
model->MakeMTMaskEnc(*input, *padding, maskEnc); //model->MakeMTMaskEnc(*input, *padding, maskEnc);
/* make the encoding network */ /* make the encoding network */
encoding = model->MakeEncoder(*input, &maskEnc, false); encoding = model->MakeEncoder(*input, &maskEnc, false);
...@@ -101,11 +101,11 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -101,11 +101,11 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
encodingBeam.ReshapeMerged(encodingBeam.order - 4); encodingBeam.ReshapeMerged(encodingBeam.order - 4);
inputBeam.ReshapeMerged(inputBeam.order - 3); inputBeam.ReshapeMerged(inputBeam.order - 3);
paddingBeam.ReshapeMerged(paddingBeam.order - 3); paddingBeam.ReshapeMerged(paddingBeam.order - 3);
/* max output-length = 2 * source-length */ /* max output-length = 2 * source-length */
maxLength = input->GetDim(-1) * 2; maxLength = input->GetDim(-1) * 2;
CheckNTErrors(maxLength > 0, "no max length specified!"); CheckNTErrors(maxLength > 0, "no max length specified!");
T2TStateBundle* states = new T2TStateBundle[maxLength + 1]; T2TStateBundle* states = new T2TStateBundle[maxLength + 1];
T2TStateBundle* first = states; T2TStateBundle* first = states;
T2TStateBundle* cur; T2TStateBundle* cur;
...@@ -118,7 +118,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -118,7 +118,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
first->isStart = true; first->isStart = true;
/* generate the sequence from left to right */ /* generate the sequence from left to right */
for(int i = 0 ; i < maxLength; i++){ for (int i = 0; i < maxLength; i++) {
cur = states + i; cur = states + i;
next = states + i + 1; next = states + i + 1;
...@@ -126,7 +126,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -126,7 +126,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
predictor.Read(model, cur); predictor.Read(model, cur);
/* predict the next state */ /* predict the next state */
predictor.Predict(next, &encodingBeam, &inputBeam, &paddingBeam, i==0); predictor.Predict(next, &encodingBeam, &inputBeam, &paddingBeam, i == 0);
/* compute the model score (given the prediction probability) */ /* compute the model score (given the prediction probability) */
Score(cur, next); Score(cur, next);
...@@ -143,13 +143,13 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -143,13 +143,13 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
/* fill the heap with imcomplete hypotheses if neccesary */ /* fill the heap with imcomplete hypotheses if neccesary */
FillHeap(next); FillHeap(next);
Dump(output); Dump(output);
delete[] states; delete[] states;
} }
/* /*
prepare for search prepare for search
>> batchSize - size of the batch >> batchSize - size of the batch
>> beamSize - size of the beam >> beamSize - size of the beam
...@@ -168,102 +168,100 @@ void T2TSearch::Prepare(int myBatchSize, int myBeamSize) ...@@ -168,102 +168,100 @@ void T2TSearch::Prepare(int myBatchSize, int myBeamSize)
fullHypos[i].Init(beamSize); fullHypos[i].Init(beamSize);
} }
/* /*
compute the model score for each hypothesis compute the model score for each hypothesis
>> prev - the beam of the previous state >> prev - the beam of the previous state
>> beam - the beam that keeps a number of states >> beam - the beam that keeps a number of states
*/ */
void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) void T2TSearch::Score(T2TStateBundle* prev, T2TStateBundle* beam)
{ {
XTensor &score = beam->modelScore; XTensor& score = beam->modelScore;
XTensor &prob = beam->prob; XTensor& prob = beam->prob;
XTensor &probPath = beam->probPath; XTensor& probPath = beam->probPath;
XTensor &probPathPrev = prev->probPath; XTensor& probPathPrev = prev->probPath;
XTensor &lenPrev = prev->nstep; XTensor& lenPrev = prev->nstep;
XTensor &len = beam->nstep; XTensor& len = beam->nstep;
XTensor lp; XTensor lp;
XTensor mask; XTensor mask;
int order = prob.order; int order = prob.order;
int outputSize = prob.GetDim(-1); int outputSize = prob.GetDim(-1);
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
for(int i = 0; i < order; i++) for (int i = 0; i < order; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensor(&score, &prob);
InitTensor(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize); InitTensorV2(&score, &prob);
score.Reshape(score.unitNum/outputSize, outputSize); InitTensorV2(&probPath, &prob);
prob.Reshape(prob.unitNum / outputSize, outputSize);
score.Reshape(score.unitNum / outputSize, outputSize);
probPath.Reshape(score.unitNum / outputSize, outputSize); probPath.Reshape(score.unitNum / outputSize, outputSize);
probPathPrev.Reshape(probPathPrev.unitNum); probPathPrev.Reshape(probPathPrev.unitNum);
/* the log-scale probability of the entire sequence */ /* the log-scale probability of the entire sequence */
_SumDim(&prob, &probPathPrev, &probPath, 0); _SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensorV2(&len, &lenPrev);
InitTensorV2(&lp, &lenPrev);
InitTensor(&len, &lenPrev);
InitTensor(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F); _ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
/* the GNMT-like length penalty */ /* the GNMT-like length penalty */
//lp = T2TLengthPenalizer::GNMT(len, alpha); lp = T2TLengthPenalizer::GNMT(len, alpha);
//lp.Reshape(lp.unitNum); lp.Reshape(lp.unitNum);
/* score = log-prob/lp */ /* score = log-prob/lp */
//_DivDim(&probPath, &lp, &score, 0); _DivDim(&probPath, &lp, &score, 0);
if (prev->isStart) { if (prev->isStart) {
XTensor firstMask = MakeFirstMask(beam); XTensor firstMask;
firstMask = MakeFirstMask(beam);
firstMask.Reshape(firstMask.unitNum); firstMask.Reshape(firstMask.unitNum);
/* mask the hypotheses in the beam except the first one */ /* mask the hypotheses in the beam except the first one */
_SumDim(&score, &firstMask, &score, 0); _SumDim(&score, &firstMask, &score, 0);
} }
InitTensor(&mask, InitTensorV2(&mask, prev->endMark.order, prev->endMark.dimSize, X_FLOAT, 1.0F, prev->endMark.devID);
prev->endMark.order, prev->endMark.dimSize, X_FLOAT, mask.SetZeroAll();
prev->endMark.devID);
mask.SetZeroAll();
_SetDataFixedCond(&mask, &prev->endMark, -1e9F); _SetDataFixedCond(&mask, &prev->endMark, -1e9F);
mask.Reshape(mask.unitNum); mask.Reshape(mask.unitNum);
/* mask the completed hypotheses so that they cannot /* mask the completed hypotheses so that they cannot
be involved in further sorting and beam search. */ be involved in further sorting and beam search. */
_SumDim(&score, &mask, &score, 0); _SumDim(&score, &mask, &score, 0);
prob.Reshape(order, dims); prob.Reshape(order, dims);
score.Reshape(order, dims); score.Reshape(order, dims);
probPath.Reshape(order, dims); probPath.Reshape(order, dims);
probPathPrev.Reshape(order - 1, dims); probPathPrev.Reshape(order - 1, dims);
lp.Reshape(order - 1, dims); lp.Reshape(order - 1, dims);
mask.Reshape(order -1 , dims); mask.Reshape(order - 1, dims);
} }
/* /*
generate tokens for the next state via beam pruning generate tokens for the next state via beam pruning
>> beam - the beam that keeps a number of states >> beam - the beam that keeps a number of states
*/ */
void T2TSearch::Generate(T2TStateBundle * beam) void T2TSearch::Generate(T2TStateBundle* beam)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
int dimsBeam[MAX_TENSOR_DIM_NUM]; int dimsBeam[MAX_TENSOR_DIM_NUM];
int dimsTopK[MAX_TENSOR_DIM_NUM]; int dimsTopK[MAX_TENSOR_DIM_NUM];
XTensor scoreTopK; XTensor scoreTopK;
XTensor &score = beam->modelScore; XTensor& score = beam->modelScore;
XTensor &index = beam->prediction; XTensor& index = beam->prediction;
XTensor &preID = beam->preID; XTensor& preID = beam->preID;
XTensor &probPath = beam->probPath; XTensor& probPath = beam->probPath;
XTensor &prob = beam->prob; XTensor& prob = beam->prob;
int order = score.order; int order = score.order;
CheckNTErrors(order >= 3, "The tensor must be of order 2 or larger."); CheckNTErrors(order >= 3, "The tensor must be of order 2 or larger.");
CheckNTErrors(dimsBeam[order - 3] % beamSize == 0, "Wrong dimension size!"); CheckNTErrors(dimsBeam[order - 3] % beamSize == 0, "Wrong dimension size!");
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
dims[i] = score.GetDim(i); dims[i] = score.GetDim(i);
dimsBeam[i] = score.GetDim(i); dimsBeam[i] = score.GetDim(i);
...@@ -277,15 +275,15 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -277,15 +275,15 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsBeam[order - 1] *= beamSize; dimsBeam[order - 1] *= beamSize;
dimsTopK[order - 3] = dimsBeam[order - 3]; dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize; dimsTopK[order - 1] = beamSize;
InitTensor(&scoreTopK, order, dimsTopK, score.dataType, score.devID); InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType, 1.0F, score.devID);
InitTensor(&index, order, dimsTopK, X_INT, score.devID); InitTensorV2(&index, order, dimsTopK, X_INT, 1.0F, score.devID);
InitTensor(&preID, order, dimsTopK, X_INT, -1); InitTensorV2(&preID, order, dimsTopK, X_INT, 1.0F, -1);
/* mask the first and the padding id */ /* mask the first and the padding id */
int dimMask[]{ score.GetDim(-1) }; int dimMask[]{ score.GetDim(-1) };
XTensor mask; XTensor mask;
InitTensor(&mask, 1, dimMask, X_FLOAT, -1); InitTensorV2(&mask, 1, dimMask, X_FLOAT, 1.0F, -1);
mask.SetZeroAll(); mask.SetZeroAll();
mask.Set1D(-1e20F, 0); mask.Set1D(-1e20F, 0);
mask.Set1D(-1e20F, 1); mask.Set1D(-1e20F, 1);
...@@ -293,21 +291,21 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -293,21 +291,21 @@ void T2TSearch::Generate(T2TStateBundle * beam)
//_SumDim(&score, &mask, 2); //_SumDim(&score, &mask, 2);
score.Reshape(order, dimsBeam); score.Reshape(order, dimsBeam);
/* keep the most promissing candidates in the beam */ /* keep the most promissing candidates in the beam */
/* TODO: check this line */ /* TODO: check this line */
TopK(score, scoreTopK, index, -1, beamSize); TopK(score, scoreTopK, index, -1, beamSize);
CopyValues(index, preID); CopyValues(index, preID);
/* "preID" represents the id (or the offset) of the previous state used to make the current /* "preID" represents the id (or the offset) of the previous state used to make the current
hypothesis. Note that we reshape the "score" tensor into a matrix where each hypothesis. Note that we reshape the "score" tensor into a matrix where each
row means a previous state. The column number is size-of-beam \times vocab-size. We, row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of the therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */ previous state for each hypothesis in the top-k list. */
DescaleMe(preID, sizeVocab); DescaleMe(preID, sizeVocab);
/* Then, we do something similar to "preID". For the top-k predictions, we need /* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */ in the vocabulary by dividing it with vocab-size and computing the remainder. */
ModMe(index, sizeVocab); ModMe(index, sizeVocab);
...@@ -315,7 +313,7 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -315,7 +313,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims); score.Reshape(order, dims);
/* we keep the top-k scores */ /* we keep the top-k scores */
InitTensor(&score, &scoreTopK); InitTensorV2(&score, &scoreTopK);
CopyValues(scoreTopK, score); CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */ /* CPU data (TODO: remove GPU->CPU data copy!!!) */
...@@ -334,9 +332,9 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -334,9 +332,9 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* sequence probability of top-k candidates */ /* sequence probability of top-k candidates */
XTensor probPathTopK; XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK); InitTensorV2(&probPathTopK, &scoreTopK);
XTensor probTopK; XTensor probTopK;
InitTensor(&probTopK, &scoreTopK); InitTensorV2(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) { for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i); dims[i] = probPath.GetDim(i);
...@@ -362,38 +360,38 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -362,38 +360,38 @@ void T2TSearch::Generate(T2TStateBundle * beam)
prob = probTopK; prob = probTopK;
} }
/* /*
expand the search graph expand the search graph
>> beam - the beam that keeps a number of states >> beam - the beam that keeps a number of states
*/ */
void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) void T2TSearch::Expand(T2TStateBundle* prev, T2TStateBundle* beam)
{ {
CheckNTErrors(beam->prediction.unitNum == beam->preID.unitNum, "A problem occurs in the beam!"); CheckNTErrors(beam->prediction.unitNum == beam->preID.unitNum, "A problem occurs in the beam!");
beam->MakeStates(beam->prediction.unitNum); beam->MakeStates(beam->prediction.unitNum);
T2TState * states = beam->states; T2TState* states = beam->states;
XTensor & idRef = beam->preID; XTensor& idRef = beam->preID;
XTensor & modelScoreRef = beam->modelScore; XTensor& modelScoreRef = beam->modelScore;
XTensor & probRef = beam->prob; XTensor& probRef = beam->prob;
XTensor & probPathRef = beam->probPath; XTensor& probPathRef = beam->probPath;
XTensor & predictionRef = beam->prediction; XTensor& predictionRef = beam->prediction;
XTensor & endMark = beam->endMark; XTensor& endMark = beam->endMark;
XTensor id; XTensor id;
XTensor modelScore; XTensor modelScore;
XTensor prob; XTensor prob;
XTensor probPath; XTensor probPath;
XTensor prediction; XTensor prediction;
XTensor endMarkCPU; XTensor endMarkCPU;
InitTensorOnCPU(&id, &idRef); InitTensorOnCPU(&id, &idRef);
InitTensorOnCPU(&modelScore, &modelScoreRef); InitTensorOnCPU(&modelScore, &modelScoreRef);
InitTensorOnCPU(&prob, &probRef); InitTensorOnCPU(&prob, &probRef);
InitTensorOnCPU(&probPath, &probPathRef); InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef); InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef); InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensor(&endMark, &predictionRef); InitTensorV2(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow /* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */ and we can speed-up the process by doing the job on CPU. */
CopyValues(idRef, id); CopyValues(idRef, id);
...@@ -403,19 +401,19 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -403,19 +401,19 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
CopyValues(predictionRef, prediction); CopyValues(predictionRef, prediction);
CheckNTErrors(beam->stateNum == id.unitNum, "Errors occur in counting!"); CheckNTErrors(beam->stateNum == id.unitNum, "Errors occur in counting!");
/* Related variables are kept on the states of the graph. All these are /* Related variables are kept on the states of the graph. All these are
maintained on CPUs to ease the implementation of frequent access and maintained on CPUs to ease the implementation of frequent access and
modification of the states. An alternative is to do this on GPUs but modification of the states. An alternative is to do this on GPUs but
it needs much more coding work and the speed-up is not obvious. */ it needs much more coding work and the speed-up is not obvious. */
for(int i = 0; i < beam->stateNum; i += beamSize){ for (int i = 0; i < beam->stateNum; i += beamSize) {
for (int j = 0; j < beamSize; j++) { for (int j = 0; j < beamSize; j++) {
int k = i + j; int k = i + j;
T2TState & state = states[k]; T2TState& state = states[k];
int offset = id.GetInt(k); int offset = id.GetInt(k);
int pid = i / beamSize; int pid = i / beamSize;
T2TState * last = prev->states + pid * beamSize + offset; T2TState* last = prev->states + pid * beamSize + offset;
CheckNTErrors(offset >= 0, "Wrong state index!"); CheckNTErrors(offset >= 0, "Wrong state index!");
...@@ -457,48 +455,48 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -457,48 +455,48 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
CopyValues(endMarkCPU, endMark); CopyValues(endMarkCPU, endMark);
} }
/* /*
collect hypotheses with ending symbols. Given a beam of hypotheses, collect hypotheses with ending symbols. Given a beam of hypotheses,
we remove the finished hypotheses and keep them in a heap. we remove the finished hypotheses and keep them in a heap.
>> beam - the beam that keeps a number of states >> beam - the beam that keeps a number of states
*/ */
void T2TSearch::Collect(T2TStateBundle * beam) void T2TSearch::Collect(T2TStateBundle* beam)
{ {
T2TState * states = beam->states; T2TState* states = beam->states;
for (int i = 0; i < beam->stateNum; i++) { for (int i = 0; i < beam->stateNum; i++) {
T2TState & state = states[i]; T2TState& state = states[i];
CheckNTErrors(state.pid >= 0 && state.pid < batchSize, CheckNTErrors(state.pid >= 0 && state.pid < batchSize,
"Invalid sample id!"); "Invalid sample id!");
/* check if this is the first end symbol. It is false /* check if this is the first end symbol. It is false
if there have been end symbols in previously generated words. */ if there have been end symbols in previously generated words. */
bool isCompleted = state.isCompleted && (state.last == NULL || !state.last->isCompleted); bool isCompleted = state.isCompleted && (state.last == NULL || !state.last->isCompleted);
/* we push the hypothesis into the heap when it is completed */ /* we push the hypothesis into the heap when it is completed */
if(state.isEnd != 0) if (state.isEnd != 0)
fullHypos[state.pid].Push(HeapNode<float>(&state, state.modelScore)); fullHypos[state.pid].Push(HeapNode<float>(&state, state.modelScore));
} }
} }
/* /*
fill the hypotheis heap with incomplete hypotheses fill the hypotheis heap with incomplete hypotheses
>> beam - the beam that keeps a number of states (final) >> beam - the beam that keeps a number of states (final)
*/ */
void T2TSearch::FillHeap(T2TStateBundle * beam) void T2TSearch::FillHeap(T2TStateBundle* beam)
{ {
bool * emptyFlags = new bool[batchSize]; bool* emptyFlags = new bool[batchSize];
for (int i = 0; i < batchSize; i++) for (int i = 0; i < batchSize; i++)
emptyFlags[i] = (fullHypos[i].Count() == 0); emptyFlags[i] = (fullHypos[i].Count() == 0);
T2TState * states = beam->states; T2TState* states = beam->states;
for (int i = 0; i < beam->stateNum; i++) { for (int i = 0; i < beam->stateNum; i++) {
T2TState & state = states[i]; T2TState& state = states[i];
CheckNTErrors(state.pid >= 0 && state.pid < batchSize, CheckNTErrors(state.pid >= 0 && state.pid < batchSize,
"Invalid sample id!"); "Invalid sample id!");
/* we push the imcomplete hypothesis into the heap */ /* we push the imcomplete hypothesis into the heap */
if (emptyFlags[state.pid] && state.isEnd == 0) if (emptyFlags[state.pid] && state.isEnd == 0)
...@@ -508,32 +506,32 @@ void T2TSearch::FillHeap(T2TStateBundle * beam) ...@@ -508,32 +506,32 @@ void T2TSearch::FillHeap(T2TStateBundle * beam)
delete[] emptyFlags; delete[] emptyFlags;
} }
/* /*
save the output sequences in a tensor save the output sequences in a tensor
>> output - output sequences (for return) >> output - output sequences (for return)
*/ */
void T2TSearch::Dump(XTensor * output) void T2TSearch::Dump(XTensor* output)
{ {
int dims[3] = {batchSize, beamSize, maxLength}; int dims[3] = { batchSize, beamSize, maxLength };
int * words = new int[maxLength]; int* words = new int[maxLength];
InitTensor(output, 3, dims, X_INT); InitTensorV2(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1); SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */ /* heap for an input sentence in the batch */
for(int h = 0; h < batchSize; h++){ for (int h = 0; h < batchSize; h++) {
XHeap<MIN_HEAP, float> &heap = fullHypos[h]; XHeap<MIN_HEAP, float>& heap = fullHypos[h];
/* for each output in the beam */ /* for each output in the beam */
for(int i = 0; i < beamSize && heap.Count() > 0; i++){ for (int i = 0; i < beamSize && heap.Count() > 0; i++) {
T2TState * state = (T2TState *)heap.Pop().index; T2TState* state = (T2TState*)heap.Pop().index;
int count = 0; int count = 0;
bool isCompleted = true; bool isCompleted = true;
/* we track the state from the end to the beginning */ /* we track the state from the end to the beginning */
while(state != NULL){ while (state != NULL) {
if (!state->isCompleted) if (!state->isCompleted)
isCompleted = false; isCompleted = false;
if (isCompleted) if (isCompleted)
...@@ -544,7 +542,7 @@ void T2TSearch::Dump(XTensor * output) ...@@ -544,7 +542,7 @@ void T2TSearch::Dump(XTensor * output)
} }
/* dump the sentence to the output tensor */ /* dump the sentence to the output tensor */
for(int w = 0; w < count; w++) for (int w = 0; w < count; w++)
output->Set3DInt(words[count - w - 1], h, beamSize - i - 1, w); output->Set3DInt(words[count - w - 1], h, beamSize - i - 1, w);
} }
} }
...@@ -552,38 +550,38 @@ void T2TSearch::Dump(XTensor * output) ...@@ -552,38 +550,38 @@ void T2TSearch::Dump(XTensor * output)
delete[] words; delete[] words;
} }
/* /*
check if the token is an end symbol check if the token is an end symbol
>> token - token to be checked >> token - token to be checked
*/ */
bool T2TSearch::IsEnd(int token) bool T2TSearch::IsEnd(int token)
{ {
CheckNTErrors(endSymbolNum > 0, "No end symbol?"); CheckNTErrors(endSymbolNum > 0, "No end symbol?");
for(int i = 0; i < endSymbolNum; i++){ for (int i = 0; i < endSymbolNum; i++) {
if(endSymbols[i] == token) if (endSymbols[i] == token)
return true; return true;
} }
return false; return false;
} }
/* /*
set end symbols for search set end symbols for search
>> tokens - end symbols >> tokens - end symbols
>> tokenNum - number of the end symbols >> tokenNum - number of the end symbols
*/ */
void T2TSearch::SetEnd(const int * tokens, const int tokenNum) void T2TSearch::SetEnd(const int* tokens, const int tokenNum)
{ {
if(endSymbols != NULL) if (endSymbols != NULL)
delete[] endSymbols; delete[] endSymbols;
if(tokenNum <= 0) if (tokenNum <= 0)
return; return;
/* we may have multiple end symbols */ /* we may have multiple end symbols */
tokens = new int[tokenNum]; tokens = new int[tokenNum];
for(int i = 0; i < tokenNum; i++) for (int i = 0; i < tokenNum; i++)
endSymbols[i] = tokens[i]; endSymbols[i] = tokens[i];
endSymbolNum = tokenNum; endSymbolNum = tokenNum;
} }
...@@ -592,9 +590,9 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum) ...@@ -592,9 +590,9 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum)
make a mask to prevent duplicated entries in beam expansion for the first position make a mask to prevent duplicated entries in beam expansion for the first position
>> beam - the beam that keeps the searching states >> beam - the beam that keeps the searching states
*/ */
XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) XTensor T2TSearch::MakeFirstMask(T2TStateBundle* beam)
{ {
XTensor &prob = beam->prob; XTensor& prob = beam->prob;
XTensor mask; XTensor mask;
int order = prob.order; int order = prob.order;
...@@ -602,7 +600,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) ...@@ -602,7 +600,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++) for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensor(&mask, order - 1, dims, X_FLOAT); InitTensorV2(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll(); mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) { for (int i = 0; i < mask.unitNum; i++) {
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/ */
#ifndef __T2TSEARCH_H__ #ifndef __T2TSEARCH_H__
#define __T2TSEARCH_H__ #define __T2TSEARCH_H__
...@@ -40,10 +40,10 @@ private: ...@@ -40,10 +40,10 @@ private:
/* predictor */ /* predictor */
T2TPredictor predictor; T2TPredictor predictor;
/* max length of the generated sequence */ /* max length of the generated sequence */
int maxLength; int maxLength;
/* beam size */ /* beam size */
int beamSize; int beamSize;
...@@ -51,10 +51,10 @@ private: ...@@ -51,10 +51,10 @@ private:
int batchSize; int batchSize;
/* we keep the final hypotheses in a heap for each sentence in the batch. */ /* we keep the final hypotheses in a heap for each sentence in the batch. */
XHeap<MIN_HEAP, float> * fullHypos; XHeap<MIN_HEAP, float>* fullHypos;
/* array of the end symbols */ /* array of the end symbols */
int * endSymbols; int* endSymbols;
/* number of the end symbols */ /* number of the end symbols */
int endSymbolNum; int endSymbolNum;
...@@ -68,42 +68,42 @@ public: ...@@ -68,42 +68,42 @@ public:
/* de-constructor */ /* de-constructor */
~T2TSearch(); ~T2TSearch();
/* initialize the model */ /* initialize the model */
void Init(int argc, char ** argv); void Init(int argc, char** argv);
/* search for the most promising states */ /* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output); void Search(T2TModel* model, XTensor* input, XTensor* padding, XTensor* output);
/* preparation */ /* preparation */
void Prepare(int myBatchSize,int myBeamSize); void Prepare(int myBatchSize, int myBeamSize);
/* compute the model score for each hypothesis */ /* compute the model score for each hypothesis */
void Score(T2TStateBundle * prev, T2TStateBundle * beam); void Score(T2TStateBundle* prev, T2TStateBundle* beam);
/* generate token indices via beam pruning */ /* generate token indices via beam pruning */
void Generate(T2TStateBundle * beam); void Generate(T2TStateBundle* beam);
/* expand the search graph */ /* expand the search graph */
void Expand(T2TStateBundle * prev, T2TStateBundle * beam); void Expand(T2TStateBundle* prev, T2TStateBundle* beam);
/* collect hypotheses with ending symbol */ /* collect hypotheses with ending symbol */
void Collect(T2TStateBundle * beam); void Collect(T2TStateBundle* beam);
/* fill the hypotheis heap with incomplete hypothses */ /* fill the hypotheis heap with incomplete hypothses */
void FillHeap(T2TStateBundle * beam); void FillHeap(T2TStateBundle* beam);
/* save the output sequences in a tensor */ /* save the output sequences in a tensor */
void Dump(XTensor * output); void Dump(XTensor* output);
/* check if the token is an end symbol */ /* check if the token is an end symbol */
bool IsEnd(int token); bool IsEnd(int token);
/* set end symbols for search */ /* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum); void SetEnd(const int* tokens, const int tokenNum);
/* make a mask to prevent duplicated entries in beam expansion for the first position */ /* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam); XTensor MakeFirstMask(T2TStateBundle* beam);
}; };
} }
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/ */
#include <math.h> #include <math.h>
#include "T2TUtility.h" #include "T2TUtility.h"
...@@ -44,23 +44,23 @@ T2TTester::~T2TTester() ...@@ -44,23 +44,23 @@ T2TTester::~T2TTester()
} }
/* initialize the model */ /* initialize the model */
void T2TTester::Init(int argc, char ** argv) void T2TTester::Init(int argc, char** argv)
{ {
LoadParamInt(argc, argv, "vsize", &vSize, 34040); LoadParamInt(argc, argv, "vsize", &vSize, 34040);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize); LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1); LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1);
LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, true); LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, true);
seacher.Init(argc, argv); seacher.Init(argc, argv);
} }
/* /*
test the model test the model
>> fn - test data file >> fn - test data file
>> ofn - output data file >> ofn - output data file
>> model - model that is trained >> model - model that is trained
*/ */
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
{ {
int wc = 0; int wc = 0;
int wordCount = 0; int wordCount = 0;
...@@ -86,7 +86,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -86,7 +86,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
int* seqs = new int[MILLION]; int* seqs = new int[MILLION];
batchLoader.Init(fn); batchLoader.Init(fn);
int count = 0; int count = 0;
while (!batchLoader.IsEmpty()) while (!batchLoader.IsEmpty())
...@@ -94,23 +94,23 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -94,23 +94,23 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
count++; count++;
wordCount = 0; wordCount = 0;
for (int i = 0; i < model->decoder->nlayer; ++i) { for (int i = 0; i < model->decoder->nlayer; ++i) {
model->decoder->selfCache[i].miss = true; model->decoder->selfAttCache[i].miss = true;
model->decoder->contextCache[i].miss = true; model->decoder->enDeAttCache[i].miss = true;
} }
vector<int> indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID); vector<int> indices = batchLoader.LoadBatch(&batchEnc, &paddingEnc, sentBatch, devID);
XTensor output; XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output); seacher.Search(model, &batchEnc, &paddingEnc, &output);
output.Dump(stderr);
for (int i = 0; i < indices.size(); ++i) { for (int i = 0; i < indices.size(); ++i) {
Result res; Result res;
XTensor sent, srcIdx, tgtIdx; XTensor sent, srcIdx, tgtIdx;
InitTensor1D(&srcIdx, 1, X_INT, output.devID); InitTensor1DV2(&srcIdx, 1, X_INT, output.devID);
int idx[]{i}; int idx[]{ i };
srcIdx.SetData(idx, 1); srcIdx.SetData(idx, 1);
InitTensor(&tgtIdx, &srcIdx); InitTensorV2(&tgtIdx, &srcIdx);
SetAscendingOrder(tgtIdx, 0); SetAscendingOrder(tgtIdx, 0);
sent = CopyIndexed(output, 0, srcIdx, tgtIdx); sent = CopyIndexed(output, 0, srcIdx, tgtIdx);
...@@ -127,9 +127,9 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -127,9 +127,9 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
if (batchCount % 1 == 0) { if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n", "[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount); elapsed, sentCount, wordCount);
} }
} }
...@@ -138,11 +138,11 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -138,11 +138,11 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
for (auto res : batchLoader.resBuffer) { for (auto res : batchLoader.resBuffer) {
Dump(ofile, &res.values); Dump(ofile, &res.values);
} }
fclose(ofile); fclose(ofile);
delete[] seqs; delete[] seqs;
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d)\n", elapsed, wordCountTotal, sentCount); XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, sent=%d)\n", elapsed, wordCountTotal, sentCount);
...@@ -153,7 +153,7 @@ dump the result into the file ...@@ -153,7 +153,7 @@ dump the result into the file
>> file - data file >> file - data file
>> output - output tensor >> output - output tensor
*/ */
void T2TTester::Dump(FILE * file, XTensor * output) void T2TTester::Dump(FILE* file, XTensor* output)
{ {
int seqLength = output->GetDim(-1); int seqLength = output->GetDim(-1);
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :) * A week with no trips :)
*/ */
#ifndef __T2TTESTER_H__ #ifndef __T2TTESTER_H__
#define __T2TTESTER_H__ #define __T2TTESTER_H__
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
/* batch size for sentences */ /* batch size for sentences */
int sentBatch; int sentBatch;
/* for batching */ /* for batching */
DataSet batchLoader; DataSet batchLoader;
...@@ -56,13 +56,13 @@ public: ...@@ -56,13 +56,13 @@ public:
~T2TTester(); ~T2TTester();
/* initialize the model */ /* initialize the model */
void Init(int argc, char ** argv); void Init(int argc, char** argv);
/* test the model */ /* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model); void Test(const char* fn, const char* ofn, T2TModel* model);
/* dump the result into the file */ /* dump the result into the file */
void Dump(FILE * file, XTensor * output); void Dump(FILE* file, XTensor* output);
}; };
} }
......
...@@ -38,7 +38,7 @@ namespace transformer ...@@ -38,7 +38,7 @@ namespace transformer
{ {
/* entrance of the program */ /* entrance of the program */
int TransformerMain(int argc, const char ** argv); int TransformerMain(int argc, const char** argv);
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "XList.h" #include "XList.h"
#include "XGlobal.h" #include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
...@@ -363,6 +364,8 @@ template struct TensorListBase<long>; ...@@ -363,6 +364,8 @@ template struct TensorListBase<long>;
template struct TensorListBase<float>; template struct TensorListBase<float>;
template struct TensorListBase<short>; template struct TensorListBase<short>;
template struct TensorListBase<XTensor*>; template struct TensorListBase<XTensor*>;
template struct TensorListBase<uint64_t>;
template struct TensorListBase<void*>; template struct TensorListBase<void*>;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#include "XMem.h" #include "XMem.h"
#include "XGlobal.h" #include "XGlobal.h"
#include <cstdint>
#ifndef __TensorList_H__ #ifndef __TensorList_H__
#define __TensorList_H__ #define __TensorList_H__
...@@ -118,7 +120,14 @@ public: ...@@ -118,7 +120,14 @@ public:
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
T& operator[] (int i) { return GetItem(i); }; T& operator[] (int i) {
CheckNTErrors(i >= -count && i < count, "Index of a list item is out of scope!");
CheckNTErrors(count > 0, "Cannt index the item in an empty list!");
if (i < 0)
return items[count + i];
else
return items[i];
};
T& Get(int i) { return GetItem(i); }; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); }; void Set(int i, T item) { SetItem(i, item); };
}; };
...@@ -132,7 +141,7 @@ typedef TensorListBase<char*> StrList; ...@@ -132,7 +141,7 @@ typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList; typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList; typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList; typedef TensorListBase<short> ShortList;
typedef TensorListBase<uint64_t> UInt64List;
typedef TensorListBase<XTensor*> TensorList; typedef TensorListBase<XTensor*> TensorList;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,16 +15,16 @@ ...@@ -15,16 +15,16 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* *
* implementation of tensors used in this work. It it is the basis of XMatrix * implementation of tensors used in this work. It it is the basis of XMatrix
* and XVector * and XVector
* *
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes * $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
* *
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
#ifdef USE_CUDA #ifdef USE_CUDA
// the CUDA stuff // the CUDA stuff
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
...@@ -64,7 +64,7 @@ ...@@ -64,7 +64,7 @@
#endif #endif
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts {
int tensorIDGlobal = 0; int tensorIDGlobal = 0;
MUTEX_HANDLE tensorMutex; MUTEX_HANDLE tensorMutex;
...@@ -73,11 +73,11 @@ XTensor NULLTensor; ...@@ -73,11 +73,11 @@ XTensor NULLTensor;
/* generate a tensor id */ /* generate a tensor id */
int MakeTensorID() int MakeTensorID()
{ {
if(tensorIDGlobal == 0) if (tensorIDGlobal == 0)
MUTEX_INIT(tensorMutex); MUTEX_INIT(tensorMutex);
MUTEX_LOCK(tensorMutex); MUTEX_LOCK(tensorMutex);
int id = tensorIDGlobal++; int id = tensorIDGlobal++;
MUTEX_UNLOCK(tensorMutex); MUTEX_UNLOCK(tensorMutex);
return id; return id;
...@@ -91,13 +91,13 @@ XTensor::XTensor() ...@@ -91,13 +91,13 @@ XTensor::XTensor()
id = MakeTensorID(); id = MakeTensorID();
isDefaultDType = true; isDefaultDType = true;
isInGlobalMem = false; isInGlobalMem = false;
isInit = false; isInit = false;
isTmp = false; isTmp = false;
} }
/* constructor */ /* constructor */
XTensor::XTensor(const XTensor * reference) XTensor::XTensor(const XTensor* reference)
{ {
Init(); Init();
SetDataPointer(); SetDataPointer();
...@@ -106,13 +106,13 @@ XTensor::XTensor(const XTensor * reference) ...@@ -106,13 +106,13 @@ XTensor::XTensor(const XTensor * reference)
InitTensorV2(this, reference); InitTensorV2(this, reference);
} }
/* /*
constructor constructor
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDevID - device id >> myDevID - device id
>> myMem - memory pool used to allocating the data array >> myMem - memory pool used to allocating the data array
*/ */
XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem) XTensor::XTensor(const int myOrder, int myDevID, XMem* myMem)
{ {
CheckNTErrors((myOrder >= 0), "Illegal tensor order1"); CheckNTErrors((myOrder >= 0), "Illegal tensor order1");
...@@ -125,8 +125,8 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem) ...@@ -125,8 +125,8 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
devID = myMem == NULL ? myDevID : myMem->devID; devID = myMem == NULL ? myDevID : myMem->devID;
} }
/* /*
constructor constructor
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - size of each dimension >> myDimSize - size of each dimension
>> myDataType - unit size (e.g., int, float, and double) >> myDataType - unit size (e.g., int, float, and double)
...@@ -134,8 +134,8 @@ constructor ...@@ -134,8 +134,8 @@ constructor
>> myDevID - device id >> myDevID - device id
>> myMem - memory pool used to allocating the data array >> myMem - memory pool used to allocating the data array
*/ */
XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, XTensor::XTensor(const int myOrder, const int* myDimSize, const TENSOR_DATA_TYPE myDataType,
const float myDenseRatio, int myDevID, XMem * myMem) const float myDenseRatio, int myDevID, XMem* myMem)
{ {
Init(); Init();
SetDataPointer(); SetDataPointer();
...@@ -145,12 +145,12 @@ XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYP ...@@ -145,12 +145,12 @@ XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYP
mem = myMem; mem = myMem;
devID = myMem != NULL ? myMem->devID : myDevID; devID = myMem != NULL ? myMem->devID : myDevID;
if(order >= 0) if (order >= 0)
Resize(myOrder, myDimSize, myDataType, myDenseRatio); Resize(myOrder, myDimSize, myDataType, myDenseRatio);
} }
/* copy constructor */ /* copy constructor */
XTensor::XTensor(const XTensor &reference) XTensor::XTensor(const XTensor& reference)
{ {
Init(); Init();
SetDataPointer(); SetDataPointer();
...@@ -158,13 +158,13 @@ XTensor::XTensor(const XTensor &reference) ...@@ -158,13 +158,13 @@ XTensor::XTensor(const XTensor &reference)
ShallowCopy(reference); ShallowCopy(reference);
data = NULL; data = NULL;
dataHost = NULL; dataHost = NULL;
if(reference.isTmp){ if (reference.isTmp) {
devID = reference.devID; devID = reference.devID;
mem = reference.mem; mem = reference.mem;
data = reference.data; data = reference.data;
signature = reference.signature; signature = reference.signature;
/* what we really want to do is "reference.data = NULL;" /* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset "reference.data" As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of "reference.data" in here. So we save the ADDRESS of "reference.data" in
...@@ -172,26 +172,26 @@ XTensor::XTensor(const XTensor &reference) ...@@ -172,26 +172,26 @@ XTensor::XTensor(const XTensor &reference)
This is VERY tricky and there might be better solutions :) */ This is VERY tricky and there might be better solutions :) */
*reference.dataP = NULL; *reference.dataP = NULL;
} }
else{ else {
devID = reference.devID; devID = reference.devID;
mem = reference.mem; mem = reference.mem;
InitTensorV2(this, &reference); InitTensorV2(this, &reference);
_CopyValues(&reference, this); _CopyValues(&reference, this);
} }
if(reference.isTmp) if (reference.isTmp)
XLink::Replace(&reference, this); XLink::Replace(&reference, this);
else{ else {
CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!"); CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
XLink::CopyIncoming(&reference, this); XLink::CopyIncoming(&reference, this);
} }
isInit = true; isInit = true;
isTmp = reference.isTmp; isTmp = reference.isTmp;
} }
/* copy constructor (with right value reference) */ /* copy constructor (with right value reference) */
XTensor::XTensor(const XTensor &&reference) XTensor::XTensor(const XTensor&& reference)
{ {
Init(); Init();
SetDataPointer(); SetDataPointer();
...@@ -199,12 +199,12 @@ XTensor::XTensor(const XTensor &&reference) ...@@ -199,12 +199,12 @@ XTensor::XTensor(const XTensor &&reference)
ShallowCopy(reference); ShallowCopy(reference);
data = NULL; data = NULL;
dataHost = NULL; dataHost = NULL;
devID = reference.devID; devID = reference.devID;
mem = reference.mem; mem = reference.mem;
data = reference.data; data = reference.data;
signature = reference.signature; signature = reference.signature;
/* what we really want to do is "reference.data = NULL;" /* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset "reference.data" As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of "reference.data" in here. So we save the ADDRESS of "reference.data" in
...@@ -215,7 +215,7 @@ XTensor::XTensor(const XTensor &&reference) ...@@ -215,7 +215,7 @@ XTensor::XTensor(const XTensor &&reference)
XLink::Replace(&reference, this); XLink::Replace(&reference, this);
isInit = true; isInit = true;
isTmp = reference.isTmp; isTmp = reference.isTmp;
} }
/* de-constructor */ /* de-constructor */
...@@ -225,30 +225,30 @@ XTensor::~XTensor() ...@@ -225,30 +225,30 @@ XTensor::~XTensor()
the connectivity of the graph. To kill memory the connectivity of the graph. To kill memory
leak, we release the data of the new tensor leak, we release the data of the new tensor
when its parent is deleted (see ClearIncoming). */ when its parent is deleted (see ClearIncoming). */
if(outgo.tailNum > 0){ if (outgo.tailNum > 0) {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, dimSize, order * sizeof(int)); memcpy(dims, dimSize, order * sizeof(int));
dims[0] = -dims[0]; dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem); XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMPFlag(); newTensor->SetTMPFlag();
newTensor->data = data; newTensor->data = data;
data = NULL; data = NULL;
XLink::Replace(this, newTensor); XLink::Replace(this, newTensor);
} }
XLink::ClearOutgoing(this); XLink::ClearOutgoing(this);
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
DestroyData(); DestroyData();
if(grad != NULL) if (grad != NULL)
delete grad; delete grad;
} }
/* set the name of the tensor */ /* set the name of the tensor */
void XTensor::SetName(const char * myName) void XTensor::SetName(const char* myName)
{ {
strcpy(name, myName); strcpy(name, myName);
} }
...@@ -277,10 +277,10 @@ void XTensor::Init() ...@@ -277,10 +277,10 @@ void XTensor::Init()
isInGlobalMem = false; isInGlobalMem = false;
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM); memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false; isInit = false;
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false; isVar = false;
enableGrad = true; enableGrad = X_ENABLE_GRAD;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -288,26 +288,26 @@ void XTensor::Init() ...@@ -288,26 +288,26 @@ void XTensor::Init()
/* delete data arrays */ /* delete data arrays */
void XTensor::DestroyData() void XTensor::DestroyData()
{ {
if(data != NULL && mem == NULL && !isShared) if (data != NULL && mem == NULL && !isShared)
XMemFree(devID, data); XMemFree(devID, data);
else if(data != NULL && isInGlobalMem) else if (data != NULL && isInGlobalMem)
FreeData(this, mem); FreeData(this, mem);
else if(data != NULL) else if (data != NULL)
mem->Release(data, GetDataSizeInChar(), signature); mem->Release(data, GetDataSizeInChar(), signature);
data = NULL; data = NULL;
if(dataHost != NULL) if (dataHost != NULL)
delete[] (char*)dataHost; delete[](char*)dataHost;
dataHost = NULL; dataHost = NULL;
} }
/* /*
shallow copy of the tensor shallow copy of the tensor
Note that we do not copy data array here Note that we do not copy data array here
>> tensor - the source tensor >> tensor - the source tensor
*/ */
void XTensor::ShallowCopy(const XTensor &tensor) void XTensor::ShallowCopy(const XTensor& tensor)
{ {
strcpy(name, tensor.name); strcpy(name, tensor.name);
order = tensor.order; order = tensor.order;
...@@ -318,7 +318,7 @@ void XTensor::ShallowCopy(const XTensor &tensor) ...@@ -318,7 +318,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
unitNum = tensor.unitNum; unitNum = tensor.unitNum;
isSparse = tensor.isSparse; isSparse = tensor.isSparse;
unitNumNonZero = tensor.unitNumNonZero; unitNumNonZero = tensor.unitNumNonZero;
denseRatio = tensor.denseRatio; denseRatio = tensor.denseRatio;
isShared = tensor.isShared; isShared = tensor.isShared;
isDefaultDType = tensor.isDefaultDType; isDefaultDType = tensor.isDefaultDType;
isInGlobalMem = tensor.isInGlobalMem; isInGlobalMem = tensor.isInGlobalMem;
...@@ -330,17 +330,17 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -330,17 +330,17 @@ XTensor& XTensor::operator= (const XTensor& tensor)
{ {
/* we must make a hard copy of the tensor if it is the input /* we must make a hard copy of the tensor if it is the input
of another node. */ of another node. */
if(outgo.tailNum > 0){ if (outgo.tailNum > 0) {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, dimSize, order * sizeof(int)); memcpy(dims, dimSize, order * sizeof(int));
dims[0] = -dims[0]; dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem); XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMPFlag(); newTensor->SetTMPFlag();
newTensor->data = data; newTensor->data = data;
newTensor->dataHost = dataHost; newTensor->dataHost = dataHost;
newTensor->signature = tensor.signature; newTensor->signature = tensor.signature;
XLink::Replace(this, newTensor); XLink::Replace(this, newTensor);
XLink::ClearOutgoing(this); XLink::ClearOutgoing(this);
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
...@@ -350,35 +350,35 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -350,35 +350,35 @@ XTensor& XTensor::operator= (const XTensor& tensor)
dataHost = NULL; dataHost = NULL;
} }
if(false && !tensor.isTmp){ if (false && !tensor.isTmp) {
/* NOTE: this might lead to additional data copy by Mac LLVM compilers */ /* NOTE: this might lead to additional data copy by Mac LLVM compilers */
/* we make an identity transformation here */ /* we make an identity transformation here */
if(outgo.tailNum > 0) if (outgo.tailNum > 0)
XLink::ClearOutgoing(this); XLink::ClearOutgoing(this);
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
if(!_IsSameShaped(this, &tensor)) if (!_IsSameShaped(this, &tensor))
Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio); Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio);
_Identity(&tensor, this); _Identity(&tensor, this);
XLink::MakeLink(&tensor, NULL, this, FUNC_IDENTITY); XLink::MakeLink(&tensor, NULL, this, FUNC_IDENTITY);
} }
else{ else {
/* hard copy of the data array */ /* hard copy of the data array */
int size = unitNum * unitSize; int size = unitNum * unitSize;
if( isInit && !isSparse && !tensor.isSparse && if (isInit && !isSparse && !tensor.isSparse &&
size == tensor.unitNum * tensor.unitSize && size == tensor.unitNum * tensor.unitSize &&
((devID < 0 && tensor.devID < 0) && devID == tensor.devID) && ((devID < 0 && tensor.devID < 0) && devID == tensor.devID) &&
data != NULL) data != NULL)
{ {
XMemCopy(data, devID, tensor.data, tensor.devID, size); XMemCopy(data, devID, tensor.data, tensor.devID, size);
if(dataHost != NULL && tensor.dataHost != NULL) if (dataHost != NULL && tensor.dataHost != NULL)
XMemCopy(dataHost, -1, tensor.dataHost, tensor.devID, size); XMemCopy(dataHost, -1, tensor.dataHost, tensor.devID, size);
} }
else{ else {
DestroyData(); DestroyData();
if(!isInit){ if (!isInit) {
devID = tensor.devID; devID = tensor.devID;
mem = tensor.mem; mem = tensor.mem;
} }
...@@ -391,7 +391,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -391,7 +391,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
ShallowCopy(tensor); ShallowCopy(tensor);
isInit = true; isInit = true;
isTmp = false; isTmp = false;
CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!"); CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
...@@ -407,17 +407,17 @@ XTensor& XTensor::operator= (const XTensor&& tensor) ...@@ -407,17 +407,17 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
{ {
/* we must make a hard copy of the tensor if it is the input /* we must make a hard copy of the tensor if it is the input
of another node. */ of another node. */
if(outgo.tailNum > 0){ if (outgo.tailNum > 0) {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, dimSize, order * sizeof(int)); memcpy(dims, dimSize, order * sizeof(int));
dims[0] = -dims[0]; dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem); XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMPFlag(); newTensor->SetTMPFlag();
newTensor->data = data; newTensor->data = data;
newTensor->dataHost = dataHost; newTensor->dataHost = dataHost;
newTensor->signature = tensor.signature; newTensor->signature = tensor.signature;
XLink::Replace(this, newTensor); XLink::Replace(this, newTensor);
XLink::ClearOutgoing(this); XLink::ClearOutgoing(this);
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
...@@ -426,17 +426,17 @@ XTensor& XTensor::operator= (const XTensor&& tensor) ...@@ -426,17 +426,17 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
data = NULL; data = NULL;
dataHost = NULL; dataHost = NULL;
} }
DestroyData(); DestroyData();
ShallowCopy(tensor); ShallowCopy(tensor);
isInit = true; isInit = true;
devID = tensor.devID; devID = tensor.devID;
mem = tensor.mem; mem = tensor.mem;
data = tensor.data; data = tensor.data;
signature = tensor.signature; signature = tensor.signature;
/* what we really want to do is "reference.data = NULL;" /* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset "reference.data" As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of "reference.data" in here. So we save the ADDRESS of "reference.data" in
...@@ -456,7 +456,7 @@ XTensor XTensor::operator+ (const XTensor& tensor) const ...@@ -456,7 +456,7 @@ XTensor XTensor::operator+ (const XTensor& tensor) const
} }
/* overloading of the plus-sign */ /* overloading of the plus-sign */
XTensor XTensor::operator+ (const DTYPE shift) const XTensor XTensor::operator+ (const DTYPE shift) const
{ {
return ScaleAndShift(*this, 1, shift); return ScaleAndShift(*this, 1, shift);
} }
...@@ -500,10 +500,10 @@ XTensor XTensor::operator/ (const XTensor& tensor) const ...@@ -500,10 +500,10 @@ XTensor XTensor::operator/ (const XTensor& tensor) const
/* overloading of the division-sign */ /* overloading of the division-sign */
XTensor XTensor::operator/ (const DTYPE scale) const XTensor XTensor::operator/ (const DTYPE scale) const
{ {
return ScaleAndShift(*this, (DTYPE)1/scale, 0); return ScaleAndShift(*this, (DTYPE)1 / scale, 0);
} }
/* /*
linear transformation b = a * \scale + \shift linear transformation b = a * \scale + \shift
>> scale - the slope >> scale - the slope
>> shift - the intercept >> shift - the intercept
...@@ -513,12 +513,12 @@ XTensor XTensor::Lin(DTYPE scale, DTYPE shift) const ...@@ -513,12 +513,12 @@ XTensor XTensor::Lin(DTYPE scale, DTYPE shift) const
return Linear(*this, scale, shift); return Linear(*this, scale, shift);
} }
/* /*
relocate the data on the target device relocate the data on the target device
>> myDevId - target device id >> myDevId - target device id
>> myMem - memory pool on the target device >> myMem - memory pool on the target device
*/ */
void XTensor::SetDevice(int myDevId, XMem * myMem) void XTensor::SetDevice(int myDevId, XMem* myMem)
{ {
if (myMem == NULL) { if (myMem == NULL) {
myMem = GMems.GetMem(myDevId); myMem = GMems.GetMem(myDevId);
...@@ -527,9 +527,9 @@ void XTensor::SetDevice(int myDevId, XMem * myMem) ...@@ -527,9 +527,9 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
isInGlobalMem = false; isInGlobalMem = false;
} }
bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim) bool XTensor::IsReduceShaped(const XTensor* a, const XTensor* b, int dim)
{ {
if(a == NULL || b == NULL) if (a == NULL || b == NULL)
return false; return false;
if ((a->order - 1) != b->order) if ((a->order - 1) != b->order)
...@@ -541,61 +541,61 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim) ...@@ -541,61 +541,61 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
return false; return false;
} }
else if (i >= dim) { else if (i >= dim) {
if (a->dimSize[i+1] != b->dimSize[i]) if (a->dimSize[i + 1] != b->dimSize[i])
return false; return false;
} }
} }
if(a->dataType != b->dataType) if (a->dataType != b->dataType)
return false; return false;
if(a->denseRatio != b->denseRatio) if (a->denseRatio != b->denseRatio)
return false; return false;
if(a->isSparse != b->isSparse) if (a->isSparse != b->isSparse)
return false; return false;
return true; return true;
} }
/* /*
set the size of each dimension set the size of each dimension
>> myDimSize - size of each dimension >> myDimSize - size of each dimension
*/ */
void XTensor::SetDim(int * myDimSize) void XTensor::SetDim(int* myDimSize)
{ {
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
dimSize[i] = myDimSize[i]; dimSize[i] = myDimSize[i];
} }
} }
/* /*
get the size of a given dimension get the size of a given dimension
>> dim - the given dim we are looking at >> dim - the given dim we are looking at
*/ */
int XTensor::GetDim(const int dim) const int XTensor::GetDim(const int dim) const
{ {
CheckNTErrors(dim < order, "dimenision is out of range!"); CheckNTErrors(dim < order, "dimenision is out of range!");
CheckNTErrors(dim >= -order, "dimenision is out of range!"); CheckNTErrors(dim >= -order, "dimenision is out of range!");
int d = dim; int d = dim;
if(dim < 0) if (dim < 0)
d = order + dim; d = order + dim;
return dimSize[d]; return dimSize[d];
} }
/* /*
reshape the tensor reshape the tensor
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - size of each dimension >> myDimSize - size of each dimension
*/ */
void XTensor::Reshape(const int myOrder, const int * myDimSize) void XTensor::Reshape(const int myOrder, const int* myDimSize)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
int num = 1; int num = 1;
for(int i = 0; i < myOrder; i++){ for (int i = 0; i < myOrder; i++) {
num *= myDimSize[i]; num *= myDimSize[i];
dims[i] = abs(myDimSize[i]); dims[i] = abs(myDimSize[i]);
} }
...@@ -606,7 +606,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize) ...@@ -606,7 +606,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize)
memcpy(dimSize, dims, sizeof(int) * order); memcpy(dimSize, dims, sizeof(int) * order);
} }
/* /*
reshape the tensor into a vector reshape the tensor into a vector
>> num - number of elements >> num - number of elements
*/ */
...@@ -616,14 +616,14 @@ void XTensor::Reshape(const int num) ...@@ -616,14 +616,14 @@ void XTensor::Reshape(const int num)
Reshape(1, &dim); Reshape(1, &dim);
} }
/* /*
reshape the tensor into a matrix reshape the tensor into a matrix
>> rowNum - number of rows >> rowNum - number of rows
>> colNum - number of columns >> colNum - number of columns
*/ */
void XTensor::Reshape(const int rowNum, const int colNum) void XTensor::Reshape(const int rowNum, const int colNum)
{ {
int dims[2] = {rowNum, colNum}; int dims[2] = { rowNum, colNum };
Reshape(2, dims); Reshape(2, dims);
} }
...@@ -663,7 +663,7 @@ XTensor XTensor::TypeAs(const XTensor input) ...@@ -663,7 +663,7 @@ XTensor XTensor::TypeAs(const XTensor input)
/* get the number of items in the data array */ /* get the number of items in the data array */
int XTensor::GetSize() const int XTensor::GetSize() const
{ {
if(isSparse) if (isSparse)
return unitNumNonZero; return unitNumNonZero;
else else
return unitNum; return unitNum;
...@@ -672,39 +672,39 @@ int XTensor::GetSize() const ...@@ -672,39 +672,39 @@ int XTensor::GetSize() const
/* get the size of the memory space used */ /* get the size of the memory space used */
int XTensor::GetDataSizeInChar() const int XTensor::GetDataSizeInChar() const
{ {
if(isSparse){ if (isSparse) {
int num = int(unitNum * denseRatio + 1); int num = int(unitNum * denseRatio + 1);
int tupleSize = sizeof(int)+sizeof(DTYPE); int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num); int size = sizeof(int) + tupleSize * (num);
return size; return size;
} }
else{ else {
return unitNum * unitSize; return unitNum * unitSize;
} }
} }
/* /*
get unit size in terms of "dataType" get unit size in terms of "dataType"
>> myDataType - type of unit >> myDataType - type of unit
<< return - unit size << return - unit size
*/ */
int XTensor::GetUnitSize(TENSOR_DATA_TYPE myDataType) const int XTensor::GetUnitSize(TENSOR_DATA_TYPE myDataType) const
{ {
if(myDataType == X_INT) if (myDataType == X_INT)
return sizeof(int); return sizeof(int);
else if(myDataType == X_FLOAT) else if (myDataType == X_FLOAT)
return sizeof(float); return sizeof(float);
else if(myDataType == X_DOUBLE) else if (myDataType == X_DOUBLE)
return sizeof(double); return sizeof(double);
else if(myDataType == X_INT8) else if (myDataType == X_INT8)
return 1; return 1;
else if(myDataType == X_FLOAT16) else if (myDataType == X_FLOAT16)
return 2; return 2;
return sizeof(float); return sizeof(float);
} }
/* /*
get offset (2D) get offset (2D)
>> row - index of demension 0 >> row - index of demension 0
>> col - index of demension 1 >> col - index of demension 1
*/ */
...@@ -717,8 +717,8 @@ MTYPE XTensor::GetOffset2D(int row, int col) const ...@@ -717,8 +717,8 @@ MTYPE XTensor::GetOffset2D(int row, int col) const
return row * dimSize[1] + col; return row * dimSize[1] + col;
} }
/* /*
get offset (3D) get offset (3D)
>> d0 - index of demension 0 >> d0 - index of demension 0
>> d1 - index of demension 1 >> d1 - index of demension 1
>> d2 - index of demension 2 >> d2 - index of demension 2
...@@ -733,49 +733,49 @@ MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const ...@@ -733,49 +733,49 @@ MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const
return (d0 * dimSize[1] + d1) * dimSize[2] + d2; return (d0 * dimSize[1] + d1) * dimSize[2] + d2;
} }
/* /*
a vector with all entries of 0 a vector with all entries of 0
>> stream - stream for the job pipeline >> stream - stream for the job pipeline
*/ */
void XTensor::SetZeroAll(XStream * stream) void XTensor::SetZeroAll(XStream* stream)
{ {
if(data == NULL) if (data == NULL)
return; return;
if(isSparse){ if (isSparse) {
if(devID >= 0){ if (devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
int size = sizeof(int) + (sizeof(int)+sizeof(DTYPE)) * unitNumNonZero; int size = sizeof(int) + (sizeof(int) + sizeof(DTYPE)) * unitNumNonZero;
int devIDBackup = 0; int devIDBackup = 0;
cudaGetDevice(&devIDBackup); cudaGetDevice(&devIDBackup);
cudaSetDevice(devID); cudaSetDevice(devID);
if(stream == NULL) if (stream == NULL)
cudaMemset(data, 0, size); cudaMemset(data, 0, size);
else else
cudaMemsetAsync(data, 0, size, stream->stream); cudaMemsetAsync(data, 0, size, stream->stream);
cudaSetDevice(devIDBackup); cudaSetDevice(devIDBackup);
#endif #endif
} }
else else
*(int*)data = 0; *(int*)data = 0;
unitNumNonZero = 0; unitNumNonZero = 0;
} }
else{ else {
if(devID >= 0){ if (devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
int devIDBackup = 0; int devIDBackup = 0;
cudaGetDevice(&devIDBackup); cudaGetDevice(&devIDBackup);
cudaSetDevice(devID); cudaSetDevice(devID);
if(stream == NULL) if (stream == NULL)
cudaMemset(data, 0, unitNum * unitSize); cudaMemset(data, 0, unitNum * unitSize);
else else
cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream); cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream);
cudaSetDevice(devIDBackup); cudaSetDevice(devIDBackup);
#endif #endif
} }
...@@ -784,14 +784,14 @@ void XTensor::SetZeroAll(XStream * stream) ...@@ -784,14 +784,14 @@ void XTensor::SetZeroAll(XStream * stream)
} }
} }
/* set the tensor with an data array /* set the tensor with an data array
>> d - input data. it must be on CPU >> d - input data. it must be on CPU
>> num - number of data items >> num - number of data items
>> beg - where we start the data copy in the data array of the tensor >> beg - where we start the data copy in the data array of the tensor
*/ */
void XTensor::SetData(const void * d, int num, int beg) void XTensor::SetData(const void* d, int num, int beg)
{ {
if (data == NULL || d ==NULL) if (data == NULL || d == NULL)
return; return;
CheckNTErrors(!isSparse, "TODO"); CheckNTErrors(!isSparse, "TODO");
...@@ -816,7 +816,7 @@ void XTensor::Range(DTYPE lower, DTYPE upper, DTYPE step) ...@@ -816,7 +816,7 @@ void XTensor::Range(DTYPE lower, DTYPE upper, DTYPE step)
_SetDataRange(this, lower, upper, step); _SetDataRange(this, lower, upper, step);
} }
/* /*
set the tensor items by a uniform distribution in range [lower, upper] set the tensor items by a uniform distribution in range [lower, upper]
>> lower - lower value of the range >> lower - lower value of the range
>> upper - upper value of the range >> upper - upper value of the range
...@@ -830,7 +830,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper) ...@@ -830,7 +830,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
// srand((unsigned)time(0)); // srand((unsigned)time(0));
DTYPE variance = upper - lower; DTYPE variance = upper - lower;
void * d = NULL; void* d = NULL;
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
d = new float[unitNum]; d = new float[unitNum];
for (int i = 0; i < unitNum; i++) { for (int i = 0; i < unitNum; i++) {
...@@ -849,12 +849,12 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper) ...@@ -849,12 +849,12 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
} }
SetData(d, unitNum); SetData(d, unitNum);
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
delete[] (float*)d; delete[](float*)d;
} }
else { else {
delete[] (double*)d; delete[](double*)d;
} }
} }
...@@ -868,12 +868,12 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation) ...@@ -868,12 +868,12 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation)
double z; double z;
double pi = 3.141592654; double pi = 3.141592654;
if (phase == 0){ if (phase == 0) {
u = (rand() + 1.0) / (RAND_MAX + 1.0); u = (rand() + 1.0) / (RAND_MAX + 1.0);
v = (rand() + 1.0) / (RAND_MAX + 1.0); v = (rand() + 1.0) / (RAND_MAX + 1.0);
z = sqrt(-2.0 * log(u))* sin(2.0 * pi * v); z = sqrt(-2.0 * log(u)) * sin(2.0 * pi * v);
} }
else{ else {
z = sqrt(-2.0 * log(u)) * cos(2.0 * pi * v); z = sqrt(-2.0 * log(u)) * cos(2.0 * pi * v);
} }
...@@ -881,7 +881,7 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation) ...@@ -881,7 +881,7 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation)
return mean + (z * standardDeviation); return mean + (z * standardDeviation);
} }
/* /*
set the tensor items by a normal distribution set the tensor items by a normal distribution
>> mean - mean or expectation of the distribution >> mean - mean or expectation of the distribution
>> standardDeviation - standard deviation of the distribution >> standardDeviation - standard deviation of the distribution
...@@ -894,7 +894,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation) ...@@ -894,7 +894,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
return; return;
// srand((unsigned)time(0)); // srand((unsigned)time(0));
void * d = NULL; void* d = NULL;
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
d = new float[unitNum]; d = new float[unitNum];
for (int i = 0; i < unitNum; i++) { for (int i = 0; i < unitNum; i++) {
...@@ -914,31 +914,31 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation) ...@@ -914,31 +914,31 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
SetData(d, unitNum); SetData(d, unitNum);
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
delete[] (float*)d; delete[](float*)d;
} }
else { else {
delete[] (double*)d; delete[](double*)d;
} }
} }
/* /*
set tensor items with an array of offsets set tensor items with an array of offsets
>> offsets - offset for each data item >> offsets - offset for each data item
>> value - value for the data items >> value - value for the data items
>> num - number of the data items >> num - number of the data items
*/ */
void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num) void XTensor::SetDataBatched(MTYPE* offsets, DTYPE value, int num)
{ {
_SetDataWithOffset(this, offsets, value, num); _SetDataWithOffset(this, offsets, value, num);
} }
/* /*
set tensor items with an array of values set tensor items with an array of values
>> offsets - offset for each data item >> offsets - offset for each data item
>> values - value for each data item >> values - value for each data item
>> num - number of the data items >> num - number of the data items
*/ */
void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num) void XTensor::SetDataBatchedWithValues(MTYPE* offsets, void* values, int num)
{ {
_SetDataWithOffsetAndValue(this, offsets, values, num); _SetDataWithOffsetAndValue(this, offsets, values, num);
} }
...@@ -949,8 +949,8 @@ void XTensor::SetDataPointer() ...@@ -949,8 +949,8 @@ void XTensor::SetDataPointer()
dataP = &data; dataP = &data;
} }
/* /*
get the value of a cell with the index get the value of a cell with the index
>> index - index of each dimension >> index - index of each dimension
>> size - size of the index >> size - size of the index
<< return - cell value << return - cell value
...@@ -961,7 +961,7 @@ DTYPE XTensor::Get(int index[], int size) const ...@@ -961,7 +961,7 @@ DTYPE XTensor::Get(int index[], int size) const
return ToCPU(devID, GetCell(index, size)); return ToCPU(devID, GetCell(index, size));
} }
/* /*
get the value of a cell with its offset get the value of a cell with its offset
>> offset - offset in the array >> offset - offset in the array
...@@ -973,37 +973,37 @@ DTYPE XTensor::Get(int offset) const ...@@ -973,37 +973,37 @@ DTYPE XTensor::Get(int offset) const
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!"); CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!"); CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset)."); CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset).");
DTYPE * address = (DTYPE*)data + offset; DTYPE* address = (DTYPE*)data + offset;
return ToCPU(devID, address); return ToCPU(devID, address);
} }
/* /*
get the pointer to a cell get the pointer to a cell
>> index - index of each dimension >> index - index of each dimension
>> size - size of index >> size - size of index
<< return - pointer to the cell << return - pointer to the cell
*/ */
void * XTensor::GetCell(int index[], int size) const void* XTensor::GetCell(int index[], int size) const
{ {
CheckNTErrors((size == order), "Illegal index!"); CheckNTErrors((size == order), "Illegal index!");
int offset = index[0]; int offset = index[0];
for(int i = 1; i < size; ++i){ for (int i = 1; i < size; ++i) {
CheckNTErrors((index[i] < dimSize[i]), "Index is out of range!"); CheckNTErrors((index[i] < dimSize[i]), "Index is out of range!");
offset = offset * dimSize[i] + index[i]; offset = offset * dimSize[i] + index[i];
} }
if(isSparse){ if (isSparse) {
DTYPE value; DTYPE value;
void * p; void* p;
if(BinarySearch(offset, value, p)) if (BinarySearch(offset, value, p))
return (char*)p + sizeof(int); return (char*)p + sizeof(int);
else else
return NULL; return NULL;
} }
else{ else {
return ((char*)data) + offset * unitSize; return ((char*)data) + offset * unitSize;
} }
} }
...@@ -1017,8 +1017,8 @@ DTYPE XTensor::Get0D() const ...@@ -1017,8 +1017,8 @@ DTYPE XTensor::Get0D() const
CheckNTErrors((order == 0), "Cannot get a 0d cell for a tensor whose order is not 0!"); CheckNTErrors((order == 0), "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type."); CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[1] = {0}; int dims[1] = { 0 };
void * value = GetCell(dims, 0); void* value = GetCell(dims, 0);
return ToCPU(devID, value); return ToCPU(devID, value);
} }
...@@ -1033,14 +1033,14 @@ DTYPE XTensor::Get1D(int i) const ...@@ -1033,14 +1033,14 @@ DTYPE XTensor::Get1D(int i) const
CheckNTErrors((order == 1), "Cannot get a 1d cell for a tensor whose order is not 1!"); CheckNTErrors((order == 1), "Cannot get a 1d cell for a tensor whose order is not 1!");
CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!"); CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type."); CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[1] = {i}; int dims[1] = { i };
void * value = GetCell(dims, 1); void* value = GetCell(dims, 1);
return ToCPU(devID, value); return ToCPU(devID, value);
} }
/* /*
get the value of a cell in a 2d tensor in default type get the value of a cell in a 2d tensor in default type
>> ni - row index >> ni - row index
>> mi - column index >> mi - column index
...@@ -1053,14 +1053,14 @@ DTYPE XTensor::Get2D(int ni, int mi) const ...@@ -1053,14 +1053,14 @@ DTYPE XTensor::Get2D(int ni, int mi) const
CheckNTErrors((mi >= 0 && mi < dimSize[1]), "dimension 1 is out of range!"); CheckNTErrors((mi >= 0 && mi < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type."); CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[2] = {ni, mi}; int dims[2] = { ni, mi };
void * value = GetCell(dims, 2); void* value = GetCell(dims, 2);
return ToCPU(devID, value); return ToCPU(devID, value);
} }
/* /*
get the value of a cell in a 3d tensor get the value of a cell in a 3d tensor
>> d0 - index of dimension 0 >> d0 - index of dimension 0
>> d1 - index of dimension 1 >> d1 - index of dimension 1
>> d2 - index of dimension 2 >> d2 - index of dimension 2
...@@ -1073,12 +1073,12 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2) const ...@@ -1073,12 +1073,12 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2) const
CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!"); CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type."); CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
int dims[3] = {d0, d1, d2}; int dims[3] = { d0, d1, d2 };
void * value = GetCell(dims, 3); void* value = GetCell(dims, 3);
return ToCPU(devID, value); return ToCPU(devID, value);
} }
/* /*
get the int value of a cell by its offset get the int value of a cell by its offset
>> offset - offset of the item >> offset - offset of the item
...@@ -1089,9 +1089,9 @@ int XTensor::GetInt(int offset) const ...@@ -1089,9 +1089,9 @@ int XTensor::GetInt(int offset) const
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!"); CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!"); CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset)."); CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset).");
int * address = (int*)data + offset; int* address = (int*)data + offset;
return ToCPUInt(devID, address); return ToCPUInt(devID, address);
} }
...@@ -1104,8 +1104,8 @@ int XTensor::Get0DInt() const ...@@ -1104,8 +1104,8 @@ int XTensor::Get0DInt() const
CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!"); CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors(dataType == X_INT, "The tensor is not in int type."); CheckNTErrors(dataType == X_INT, "The tensor is not in int type.");
int dims[1] = {0}; int dims[1] = { 0 };
void * value = GetCell(dims, 0); void* value = GetCell(dims, 0);
return ToCPUInt(devID, value); return ToCPUInt(devID, value);
} }
...@@ -1120,33 +1120,33 @@ int XTensor::Get1DInt(int i) const ...@@ -1120,33 +1120,33 @@ int XTensor::Get1DInt(int i) const
CheckNTErrors(order == 1, "Cannot get a 1d cell for a tensor whose order is not 1!"); CheckNTErrors(order == 1, "Cannot get a 1d cell for a tensor whose order is not 1!");
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in int type."); CheckNTErrors(dataType == X_INT, "The tensor is not in int type.");
int dims[1] = {i}; int dims[1] = { i };
void * value = GetCell(dims, 1); void* value = GetCell(dims, 1);
return ToCPUInt(devID, value); return ToCPUInt(devID, value);
} }
/* /*
get the value of a cell in a 2d tensor in int type get the value of a cell in a 2d tensor in int type
>> ni - row index >> ni - row index
>> mi - column index >> mi - column index
<< return - value of cell(ni, mi) in int << return - value of cell(ni, mi) in int
*/ */
int XTensor::Get2DInt(int ni, int mi) const int XTensor::Get2DInt(int ni, int mi) const
{ {
CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors(ni >= 0 && ni < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(ni >= 0 && ni < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in default type."); CheckNTErrors(dataType == X_INT, "The tensor is not in default type.");
int dims[2] = {ni, mi}; int dims[2] = { ni, mi };
void * value = GetCell(dims, 2); void* value = GetCell(dims, 2);
return ToCPUInt(devID, value); return ToCPUInt(devID, value);
} }
/* /*
get the value of a cell in a 3d tensor in int type get the value of a cell in a 3d tensor in int type
>> d0 - index of dimension 0 >> d0 - index of dimension 0
>> d1 - index of dimension 1 >> d1 - index of dimension 1
...@@ -1161,14 +1161,14 @@ int XTensor::Get3DInt(int d0, int d1, int d2) const ...@@ -1161,14 +1161,14 @@ int XTensor::Get3DInt(int d0, int d1, int d2) const
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in default type."); CheckNTErrors(dataType == X_INT, "The tensor is not in default type.");
int dims[3] = {d0, d1, d2}; int dims[3] = { d0, d1, d2 };
void * value = GetCell(dims, 3); void* value = GetCell(dims, 3);
return ToCPUInt(devID, value); return ToCPUInt(devID, value);
} }
/* /*
get the value of a cell in the sparse tensor get the value of a cell in the sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor >> i - i-th tuple in the tuple list of the sparse tensor
<< return - value of the tuple << return - value of the tuple
*/ */
...@@ -1177,14 +1177,14 @@ DTYPE XTensor::GetInSparse(int i) const ...@@ -1177,14 +1177,14 @@ DTYPE XTensor::GetInSparse(int i) const
CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!"); CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
char * d = (char*)data + sizeof(int); char* d = (char*)data + sizeof(int);
DTYPE * value = (DTYPE*)(d + (sizeof(int) + sizeof(DTYPE)) * i + sizeof(int)); DTYPE* value = (DTYPE*)(d + (sizeof(int) + sizeof(DTYPE)) * i + sizeof(int));
return ToCPU(devID, value); return ToCPU(devID, value);
} }
/* /*
get the key value of a tuple in a sparse tensor get the key value of a tuple in a sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor >> i - i-th tuple in the tuple list of the sparse tensor
<< return - key of the tuple << return - key of the tuple
*/ */
...@@ -1193,14 +1193,14 @@ int XTensor::GetKeyInSparse(int i) const ...@@ -1193,14 +1193,14 @@ int XTensor::GetKeyInSparse(int i) const
CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!"); CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
char * d = (char*)data + sizeof(int); char* d = (char*)data + sizeof(int);
int * key = (int*)(d + (sizeof(int) + sizeof(DTYPE)) * i); int* key = (int*)(d + (sizeof(int) + sizeof(DTYPE)) * i);
return ToCPUInt(devID, key); return ToCPUInt(devID, key);
} }
/* /*
set the value of a cell set the value of a cell
>> value - value we tend to set >> value - value we tend to set
>> index - index of the cell for each dimension >> index - index of the cell for each dimension
>> size - size of the index >> size - size of the index
...@@ -1222,7 +1222,7 @@ bool XTensor::Set(DTYPE value, int offset) ...@@ -1222,7 +1222,7 @@ bool XTensor::Set(DTYPE value, int offset)
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!"); CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!"); CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
DTYPE * d = (DTYPE*)data + offset; DTYPE* d = (DTYPE*)data + offset;
return SetToDevice(devID, d, value); return SetToDevice(devID, d, value);
} }
...@@ -1237,13 +1237,13 @@ bool XTensor::Set0D(DTYPE value) ...@@ -1237,13 +1237,13 @@ bool XTensor::Set0D(DTYPE value)
CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!"); CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[1] = {0}; int dims[1] = { 0 };
return SetToDevice(devID, GetCell(dims, 0), value); return SetToDevice(devID, GetCell(dims, 0), value);
} }
/* /*
set the value of a cell in a 1d tensor set the value of a cell in a 1d tensor
>> value - value we tend to set >> value - value we tend to set
>> i - item offset >> i - item offset
<< return - succeeded or not << return - succeeded or not
...@@ -1254,12 +1254,12 @@ bool XTensor::Set1D(DTYPE value, int i) ...@@ -1254,12 +1254,12 @@ bool XTensor::Set1D(DTYPE value, int i)
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[1] = {i}; int dims[1] = { i };
return SetToDevice(devID, GetCell(dims, 1), value); return SetToDevice(devID, GetCell(dims, 1), value);
} }
/* /*
set the value of a cell in a 2d tensor in default type set the value of a cell in a 2d tensor in default type
>> value - value we tend to set >> value - value we tend to set
>> ni - row index >> ni - row index
...@@ -1273,12 +1273,12 @@ bool XTensor::Set2D(DTYPE value, int ni, int mi) ...@@ -1273,12 +1273,12 @@ bool XTensor::Set2D(DTYPE value, int ni, int mi)
CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[2] = {ni, mi}; int dims[2] = { ni, mi };
return SetToDevice(devID, GetCell(dims, 2), value); return SetToDevice(devID, GetCell(dims, 2), value);
} }
/* /*
set the value of a cell in a 3d tensor in default type set the value of a cell in a 3d tensor in default type
>> value - value we tend to set >> value - value we tend to set
>> d0 - index of demension 0 >> d0 - index of demension 0
...@@ -1294,11 +1294,11 @@ bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2) ...@@ -1294,11 +1294,11 @@ bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
int dims[3] = {d0, d1, d2}; int dims[3] = { d0, d1, d2 };
return SetToDevice(devID, GetCell(dims, 3), value); return SetToDevice(devID, GetCell(dims, 3), value);
} }
/* /*
set the integer value of a cell by its offset set the integer value of a cell by its offset
>> value - value we tend to set to the item >> value - value we tend to set to the item
...@@ -1308,15 +1308,15 @@ bool XTensor::SetInt(int value, int offset) ...@@ -1308,15 +1308,15 @@ bool XTensor::SetInt(int value, int offset)
{ {
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!"); CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!"); CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
int * d = (int*)data + offset; int* d = (int*)data + offset;
return SetToDeviceInt(devID, d, value); return SetToDeviceInt(devID, d, value);
} }
/* /*
set the integer value of a cell set the integer value of a cell
>> value - value we tend to set >> value - value we tend to set
>> index - index of the cell for each dimension >> index - index of the cell for each dimension
>> size - size of the index >> size - size of the index
...@@ -1339,13 +1339,13 @@ bool XTensor::Set0DInt(int value) ...@@ -1339,13 +1339,13 @@ bool XTensor::Set0DInt(int value)
CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!"); CheckNTErrors(order == 0, "Cannot get a 0d cell for a tensor whose order is not 0!");
CheckNTErrors(dataType == X_INT, "The tensor is not in integer type."); CheckNTErrors(dataType == X_INT, "The tensor is not in integer type.");
int dims[1] = {0}; int dims[1] = { 0 };
return SetToDeviceInt(devID, GetCell(dims, 0), value); return SetToDeviceInt(devID, GetCell(dims, 0), value);
} }
/* /*
set the integer value of a cell in a 1d tensor set the integer value of a cell in a 1d tensor
>> value - value we tend to set >> value - value we tend to set
>> i - item offset >> i - item offset
<< return - succeeded or not << return - succeeded or not
...@@ -1356,12 +1356,12 @@ bool XTensor::Set1DInt(int value, int i) ...@@ -1356,12 +1356,12 @@ bool XTensor::Set1DInt(int value, int i)
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in integer type."); CheckNTErrors(dataType == X_INT, "The tensor is not in integer type.");
int dims[1] = {i}; int dims[1] = { i };
return SetToDeviceInt(devID, GetCell(dims, 1), value); return SetToDeviceInt(devID, GetCell(dims, 1), value);
} }
/* /*
set the integer value of a cell in a 2d tensor in default type set the integer value of a cell in a 2d tensor in default type
>> value - value we tend to set >> value - value we tend to set
>> ni - row index >> ni - row index
...@@ -1375,12 +1375,12 @@ bool XTensor::Set2DInt(int value, int ni, int mi) ...@@ -1375,12 +1375,12 @@ bool XTensor::Set2DInt(int value, int ni, int mi)
CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(mi >= 0 && mi < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(dataType == X_INT, "The tensor is not in integer type."); CheckNTErrors(dataType == X_INT, "The tensor is not in integer type.");
int dims[2] = {ni, mi}; int dims[2] = { ni, mi };
return SetToDeviceInt(devID, GetCell(dims, 2), value); return SetToDeviceInt(devID, GetCell(dims, 2), value);
} }
/* /*
set the integer value of a cell in a 3d tensor in default type set the integer value of a cell in a 3d tensor in default type
>> value - value we tend to set >> value - value we tend to set
>> d0 - index of demension 0 >> d0 - index of demension 0
...@@ -1396,36 +1396,36 @@ bool XTensor::Set3DInt(int value, int d0, int d1, int d2) ...@@ -1396,36 +1396,36 @@ bool XTensor::Set3DInt(int value, int d0, int d1, int d2)
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in integer type."); CheckNTErrors((dataType == X_INT), "The tensor is not in integer type.");
int dims[3] = {d0, d1, d2}; int dims[3] = { d0, d1, d2 };
return SetToDeviceInt(devID, GetCell(dims, 3), value); return SetToDeviceInt(devID, GetCell(dims, 3), value);
} }
/* /*
increase the value of a cell in a 2d tensor increase the value of a cell in a 2d tensor
>> value - value we tend to set >> value - value we tend to set
>> ni - row index >> ni - row index
>> mi - column index >> mi - column index
<< return - succeeded or not << return - succeeded or not
*/ */
bool XTensor::Add2D(DTYPE value, int ni, int mi) bool XTensor::Add2D(DTYPE value, int ni, int mi)
{ {
CheckNTErrors(ni >= 0 && ni < dimSize[0], "the row index is out of range!"); CheckNTErrors(ni >= 0 && ni < dimSize[0], "the row index is out of range!");
CheckNTErrors(mi >= 0 && mi < dimSize[1], "the column index is out of range!"); CheckNTErrors(mi >= 0 && mi < dimSize[1], "the column index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
CheckNTErrors(isSparse == false, "TODO!"); CheckNTErrors(isSparse == false, "TODO!");
if(devID < 0){ if (devID < 0) {
DTYPE * p = (DTYPE*)data + ni * dimSize[1] + mi; DTYPE* p = (DTYPE*)data + ni * dimSize[1] + mi;
CheckNTErrors((p != NULL), "No data array is found!"); CheckNTErrors((p != NULL), "No data array is found!");
*p = *p + value; *p = *p + value;
return true; return true;
} }
else{ else {
int dims[2] = {ni, mi}; int dims[2] = { ni, mi };
return SetToDevice(devID, GetCell(dims, 2), Get2D(ni, mi) + value); return SetToDevice(devID, GetCell(dims, 2), Get2D(ni, mi) + value);
} }
} }
...@@ -1433,31 +1433,31 @@ increase the value of a cell in a 2d tensor ...@@ -1433,31 +1433,31 @@ increase the value of a cell in a 2d tensor
/* get the number of non-zero elements (in a sparse tensor) */ /* get the number of non-zero elements (in a sparse tensor) */
int XTensor::GetNonzeroSize() const int XTensor::GetNonzeroSize() const
{ {
if(!isSparse){ if (!isSparse) {
XPRINT(1, stderr, "WARNING! Counting non-zero elements in a dense tensor might be slow!\n"); XPRINT(1, stderr, "WARNING! Counting non-zero elements in a dense tensor might be slow!\n");
CheckNTErrors(devID < 0, "TODO"); CheckNTErrors(devID < 0, "TODO");
if(dataType == DEFAULT_DTYPE){ if (dataType == DEFAULT_DTYPE) {
int count = 0; int count = 0;
for(int i = 0; i < unitNum; i++){ for (int i = 0; i < unitNum; i++) {
DTYPE value = *(DTYPE*)((char*)data + i * sizeof(DTYPE)); DTYPE value = *(DTYPE*)((char*)data + i * sizeof(DTYPE));
if(value == 0) if (value == 0)
count++; count++;
} }
return count; return count;
} }
else{ else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
return -1; return -1;
} }
} }
else{ else {
/* return the head of the tuple list */ /* return the head of the tuple list */
return unitNumNonZero; return unitNumNonZero;
} }
} }
/* /*
set the tensor as "temporary" set the tensor as "temporary"
>> myIsTMP - the flag >> myIsTMP - the flag
*/ */
void XTensor::SetTMPFlag(bool myIsTmp) void XTensor::SetTMPFlag(bool myIsTmp)
...@@ -1465,8 +1465,8 @@ void XTensor::SetTMPFlag(bool myIsTmp) ...@@ -1465,8 +1465,8 @@ void XTensor::SetTMPFlag(bool myIsTmp)
isTmp = myIsTmp; isTmp = myIsTmp;
} }
/* /*
set the tensor as "keep-gradient" set the tensor as "keep-gradient"
>> myIsGrad - the flag >> myIsGrad - the flag
*/ */
void XTensor::SetGradFlag(bool myIsGrad) void XTensor::SetGradFlag(bool myIsGrad)
...@@ -1474,18 +1474,18 @@ void XTensor::SetGradFlag(bool myIsGrad) ...@@ -1474,18 +1474,18 @@ void XTensor::SetGradFlag(bool myIsGrad)
isGrad = myIsGrad; isGrad = myIsGrad;
} }
/* /*
set the tensor as "variable" set the tensor as "variable"
>> myIsVar - the flag >> myIsVar - the flag
*/ */
void XTensor::SetVarFlag(bool myIsVar) void XTensor::SetVarFlag(bool myIsVar)
{ {
isVar = myIsVar; isVar = myIsVar;
if(isVar) if (isVar)
SetGradFlag(true); SetGradFlag(true);
} }
/* /*
resize a tensor with a specified tensor size resize a tensor with a specified tensor size
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - the size of each dimension >> myDimSize - the size of each dimension
...@@ -1493,11 +1493,11 @@ resize a tensor with a specified tensor size ...@@ -1493,11 +1493,11 @@ resize a tensor with a specified tensor size
>> myDenseRatio - how often an element has non-zero value >> myDenseRatio - how often an element has non-zero value
<< return - succeeded or not << return - succeeded or not
*/ */
bool XTensor::Resize(const int myOrder, const int * myDimSize, bool XTensor::Resize(const int myOrder, const int* myDimSize,
const TENSOR_DATA_TYPE myDataType, const float myDenseRatio) const TENSOR_DATA_TYPE myDataType, const float myDenseRatio)
{ {
/* free old mem */ /* free old mem */
if(data != NULL){ if (data != NULL) {
if (mem == NULL) if (mem == NULL)
XMemFree(devID, data); XMemFree(devID, data);
else else
...@@ -1505,7 +1505,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1505,7 +1505,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
} }
signature = mem != NULL ? mem->GetSignature() : 0; signature = mem != NULL ? mem->GetSignature() : 0;
order = myOrder; order = myOrder;
unitNum = 1; unitNum = 1;
unitNumNonZero = 0; unitNumNonZero = 0;
...@@ -1513,11 +1513,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1513,11 +1513,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
bool filledData = true; bool filledData = true;
bool zeroData = false; bool zeroData = false;
for(int i = 0; i < order; i++){ for (int i = 0; i < order; i++) {
dimSize[i] = abs(myDimSize[i]); dimSize[i] = abs(myDimSize[i]);
if(myDimSize[i] < 0) if (myDimSize[i] < 0)
filledData = false; filledData = false;
if(myDimSize[i] == 0) if (myDimSize[i] == 0)
zeroData = true; zeroData = true;
unitNum *= dimSize[i]; unitNum *= dimSize[i];
} }
...@@ -1528,20 +1528,20 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1528,20 +1528,20 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
dataType = myDataType; dataType = myDataType;
unitSize = GetUnitSize(dataType); unitSize = GetUnitSize(dataType);
if(myDataType != DEFAULT_DTYPE) if (myDataType != DEFAULT_DTYPE)
isDefaultDType = false; isDefaultDType = false;
else else
isDefaultDType = true; isDefaultDType = true;
if(zeroData){ if (zeroData) {
unitNum = 0; unitNum = 0;
return false; return false;
} }
if(isSparse){ if (isSparse) {
/* /*
for sparse matrices, we use a list of tuple (key, value), for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrix as an example, ordered by key. Take a (2-dimensional) matrix as an example,
we have key = m * i + j; we have key = m * i + j;
The data array is The data array is
--------- ---------
...@@ -1555,23 +1555,23 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1555,23 +1555,23 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
(1, 0, 5) (1, 0, 5)
where the first number (2) indicates the number of elements. where the first number (2) indicates the number of elements.
*/ */
int num = int(unitNum * denseRatio + 1); int num = int(unitNum * denseRatio + 1);
int tupleSize = sizeof(int)+sizeof(DTYPE); int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num); int size = sizeof(int) + tupleSize * (num);
if(filledData){
int * d = NULL;
if(mem == NULL){ if (filledData) {
int* d = NULL;
if (mem == NULL) {
d = new int[size]; d = new int[size];
memset(d, 0, size); memset(d, 0, size);
} }
else{ else {
d = (int*)mem->Alloc(mem->devID, size); d = (int*)mem->Alloc(mem->devID, size);
} }
if(d == NULL) if (d == NULL)
return false; return false;
#if !defined(UNSAFE_BUT_FAST_MEM) #if !defined(UNSAFE_BUT_FAST_MEM)
...@@ -1581,11 +1581,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1581,11 +1581,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
} }
return true; return true;
} }
else{ else {
if(filledData){ if (filledData) {
/* allocate the new one */ /* allocate the new one */
if(mem == NULL){ if (mem == NULL) {
data = XMemAlloc(devID, unitNum * unitSize); data = XMemAlloc(devID, unitNum * unitSize);
#if defined(UNSAFE_BUT_FAST_MEM) #if defined(UNSAFE_BUT_FAST_MEM)
XMemSet(devID, data, 0, unitNum * unitSize); XMemSet(devID, data, 0, unitNum * unitSize);
#endif #endif
...@@ -1593,28 +1593,28 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1593,28 +1593,28 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
else else
data = (void*)mem->Alloc(mem->devID, unitNum * unitSize); data = (void*)mem->Alloc(mem->devID, unitNum * unitSize);
if(data == NULL) if (data == NULL)
return false; return false;
} }
#if !defined(UNSAFE_BUT_FAST_MEM) #if !defined(UNSAFE_BUT_FAST_MEM)
if(data != NULL) if (data != NULL)
XMem::SetZero(data, unitNum * unitSize, mem); XMem::SetZero(data, unitNum * unitSize, mem);
#endif #endif
return true; return true;
} }
} }
/* /*
resize a tensor by another resize a tensor by another
>> myTensor - tensor for reference >> myTensor - tensor for reference
*/ */
bool XTensor::Resize(const XTensor * myTensor) bool XTensor::Resize(const XTensor* myTensor)
{ {
denseRatio = myTensor->denseRatio; denseRatio = myTensor->denseRatio;
TENSOR_DATA_TYPE myDataType = myTensor->dataType; TENSOR_DATA_TYPE myDataType = myTensor->dataType;
if(myDataType != DEFAULT_DTYPE) if (myDataType != DEFAULT_DTYPE)
isDefaultDType = false; isDefaultDType = false;
else else
isDefaultDType = true; isDefaultDType = true;
...@@ -1622,7 +1622,7 @@ bool XTensor::Resize(const XTensor * myTensor) ...@@ -1622,7 +1622,7 @@ bool XTensor::Resize(const XTensor * myTensor)
return Resize(myTensor->order, myTensor->dimSize, myDataType, denseRatio); return Resize(myTensor->order, myTensor->dimSize, myDataType, denseRatio);
} }
/* /*
binary search to find an element in a sparse tensor binary search to find an element in a sparse tensor
>> key - for search >> key - for search
>> value - value for return >> value - value for return
...@@ -1630,54 +1630,54 @@ binary search to find an element in a sparse tensor ...@@ -1630,54 +1630,54 @@ binary search to find an element in a sparse tensor
it is the previous one if there is no hit it is the previous one if there is no hit
<< return - found it or not? << return - found it or not?
*/ */
bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const bool XTensor::BinarySearch(int key, DTYPE& value, void*& position) const
{ {
CheckNTErrors((isSparse), "A sparse tensor is required!"); CheckNTErrors((isSparse), "A sparse tensor is required!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in the default type."); CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in the default type.");
int * d = (int*)data; int* d = (int*)data;
if(key < 0 || *d == 0){ if (key < 0 || *d == 0) {
value = 0; value = 0;
position = NULL; position = NULL;
return false; return false;
} }
int low = 0; int low = 0;
int high = *d - 1; int high = *d - 1;
int last = -1; int last = -1;
bool ok = false; bool ok = false;
int * k = NULL; int* k = NULL;
int headSize = sizeof(int); int headSize = sizeof(int);
int tupleSize = sizeof(int)+sizeof(DTYPE); int tupleSize = sizeof(int) + sizeof(DTYPE);
char * p = (char*)data + headSize; char* p = (char*)data + headSize;
while (low <= high){ while (low <= high) {
int mid = low + (high-low)/2; int mid = low + (high - low) / 2;
k = (int*)(p + tupleSize * mid); k = (int*)(p + tupleSize * mid);
if (*k == key){ if (*k == key) {
ok = true; ok = true;
high = mid -1; high = mid - 1;
break; break;
}
else if(*k > key){
high = mid -1;
} }
else{ else if (*k > key) {
low = mid +1; high = mid - 1;
}
else {
low = mid + 1;
last = mid; last = mid;
} }
} }
if(ok){ if (ok) {
DTYPE * p = (DTYPE*)((char*)k + sizeof(int)); DTYPE* p = (DTYPE*)((char*)k + sizeof(int));
value = *p; value = *p;
position = k; position = k;
return true; return true;
} }
else{ else {
value = 0; value = 0;
if(last == -1) if (last == -1)
position = NULL; position = NULL;
else else
position = (char*)data + headSize + tupleSize * last; position = (char*)data + headSize + tupleSize * last;
...@@ -1685,20 +1685,20 @@ bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const ...@@ -1685,20 +1685,20 @@ bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
} }
} }
/* /*
dump data to a file dump data to a file
>> file - where to domp the data >> file - where to domp the data
>> label - label of the tensor >> label - label of the tensor
>> n - number of items to dump >> n - number of items to dump
>> beg - the first item id >> beg - the first item id
>> verbose - verbose level >> verbose - verbose level
*/ */
void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, const int verbose) void XTensor::Dump(FILE* file, const char* label, const int n, const int beg, const int verbose)
{ {
if (verbose > verboseLevel) if (verbose > verboseLevel)
return; return;
void * d = data; void* d = data;
bool isNewData = false; bool isNewData = false;
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -1716,7 +1716,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1716,7 +1716,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
num *= dimSize[i]; num *= dimSize[i];
num = int(num * denseRatio + 1); num = int(num * denseRatio + 1);
int tupleSize = sizeof(int) + sizeof(DTYPE); int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num); int size = sizeof(int) + tupleSize * (num);
d = new char[size]; d = new char[size];
memset(d, 0, size); memset(d, 0, size);
...@@ -1730,8 +1730,8 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1730,8 +1730,8 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
if (label != NULL) if (label != NULL)
fprintf(file, "%s ", label); fprintf(file, "%s ", label);
if(isInit){ if (isInit) {
fprintf(file, "order=%d dimsize=", order); fprintf(file, "order=%d dimsize=", order);
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]); fprintf(file, "%d", dimSize[i]);
...@@ -1739,21 +1739,21 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1739,21 +1739,21 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
fprintf(file, ","); fprintf(file, ",");
} }
} }
else{ else {
fprintf(file, "order=-1 dimsize=-1"); fprintf(file, "order=-1 dimsize=-1");
} }
fprintf(file, " dtype=%s dense=%f\n", GetDataTypeName(dataType), denseRatio); fprintf(file, " dtype=%s dense=%f\n", GetDataTypeName(dataType), denseRatio);
if(!isInit){ if (!isInit) {
fprintf(file, "NULL"); fprintf(file, "NULL");
} }
if (!isSparse) { if (!isSparse) {
if (dataType == DEFAULT_DTYPE) { if (dataType == DEFAULT_DTYPE) {
int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum); int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for(int i = beg; i < end; i++){ for (int i = beg; i < end; i++) {
DTYPE f = ((DTYPE*)d)[i]; DTYPE f = ((DTYPE*)d)[i];
if(i == beg) if (i == beg)
fprintf(file, "%e", f); fprintf(file, "%e", f);
else else
fprintf(file, " %e", f); fprintf(file, " %e", f);
...@@ -1762,9 +1762,9 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1762,9 +1762,9 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
} }
else if (dataType == X_INT) { else if (dataType == X_INT) {
int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum); int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for(int i = beg; i < end; i++){ for (int i = beg; i < end; i++) {
int f = ((int*)d)[i]; int f = ((int*)d)[i];
if(i == beg) if (i == beg)
fprintf(file, "%d", f); fprintf(file, "%d", f);
else else
fprintf(file, " %d", f); fprintf(file, " %d", f);
...@@ -1795,7 +1795,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1795,7 +1795,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
} }
} }
/* /*
dump data to a file dump data to a file
>> tensor - the tensor for dumping >> tensor - the tensor for dumping
>> file - where to domp the data >> file - where to domp the data
...@@ -1804,15 +1804,15 @@ dump data to a file ...@@ -1804,15 +1804,15 @@ dump data to a file
>> beg - the first item id >> beg - the first item id
>> verbose - verbose level >> verbose - verbose level
*/ */
void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, const int n, const int beg, const int verbose) void XTensor::Dump(const XTensor* tensor, FILE* file, const char* label, const int n, const int beg, const int verbose)
{ {
XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem); XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
_CopyValues(tensor, &a); _CopyValues(tensor, &a);
a.Dump(file, label, n, beg, verbose); a.Dump(file, label, n, beg, verbose);
} }
/* /*
dump data to a binary file dump data to a binary file
>> file - where to dump the data >> file - where to dump the data
*/ */
void XTensor::BinaryDump(FILE* file) void XTensor::BinaryDump(FILE* file)
...@@ -1831,12 +1831,12 @@ void XTensor::BinaryDump(FILE* file) ...@@ -1831,12 +1831,12 @@ void XTensor::BinaryDump(FILE* file)
} }
} }
/* /*
read data from a file read data from a file
>> file - where to load the data >> file - where to load the data
>> label - label of the tensor >> label - label of the tensor
*/ */
void XTensor::Read(FILE * file, const char * label) void XTensor::Read(FILE* file, const char* label)
{ {
char typeName[32] = ""; char typeName[32] = "";
char dimSizeName[128] = ""; char dimSizeName[128] = "";
...@@ -1855,12 +1855,12 @@ void XTensor::Read(FILE * file, const char * label) ...@@ -1855,12 +1855,12 @@ void XTensor::Read(FILE * file, const char * label)
fgetc(file); fgetc(file);
if (fscanf(file, "order=%d dimsize=%s dtype=%s dense=%f", if (fscanf(file, "order=%d dimsize=%s dtype=%s dense=%f",
&dimNum, dimSizeName, typeName, &dRatio) < 4) { &dimNum, dimSizeName, typeName, &dRatio) < 4) {
ShowNTErrors("Incorrect format when reading the tensor!"); ShowNTErrors("Incorrect format when reading the tensor!");
} }
char c; char c;
do { do {
c = fgetc(file); c = fgetc(file);
} while (c != '\n' && c != EOF); } while (c != '\n' && c != EOF);
...@@ -1869,7 +1869,7 @@ void XTensor::Read(FILE * file, const char * label) ...@@ -1869,7 +1869,7 @@ void XTensor::Read(FILE * file, const char * label)
int o = 0; int o = 0;
bool sameSize = true; bool sameSize = true;
char * p = dimSizeName; char* p = dimSizeName;
while (*p != 0) { while (*p != 0) {
while (*p == ' ' || *p == '\t') while (*p == ' ' || *p == '\t')
p++; p++;
...@@ -1893,14 +1893,14 @@ void XTensor::Read(FILE * file, const char * label) ...@@ -1893,14 +1893,14 @@ void XTensor::Read(FILE * file, const char * label)
if (!sameSize || dRatio > denseRatio || GetDataType(typeName) != dataType) if (!sameSize || dRatio > denseRatio || GetDataType(typeName) != dataType)
Resize(dimNum, dims, GetDataType(typeName), dRatio); Resize(dimNum, dims, GetDataType(typeName), dRatio);
void * dataBuf = XMemAlloc(-1, GetDataSizeInChar()); void* dataBuf = XMemAlloc(-1, GetDataSizeInChar());
void * dataBackup = data; void* dataBackup = data;
data = dataBuf; data = dataBuf;
if (!isSparse) { if (!isSparse) {
if (dataType == DEFAULT_DTYPE) { if (dataType == DEFAULT_DTYPE) {
for (int i = 0; i < unitNum; i++) { for (int i = 0; i < unitNum; i++) {
DTYPE * f = ((DTYPE*)data) + i; DTYPE* f = ((DTYPE*)data) + i;
if (fscanf(file, "%e", f) < 1) { if (fscanf(file, "%e", f) < 1) {
ShowNTErrors("Incorrect tensor format!"); ShowNTErrors("Incorrect tensor format!");
} }
...@@ -1943,23 +1943,23 @@ void XTensor::Read(FILE * file, const char * label) ...@@ -1943,23 +1943,23 @@ void XTensor::Read(FILE * file, const char * label)
delete[](char*)dataBuf; delete[](char*)dataBuf;
} }
/* /*
read data from a binary file read data from a binary file
>>> file - the file stream pointer >>> file - the file stream pointer
>>> offset - the distance from the start to this tensor >>> offset - the distance from the start to this tensor
*/ */
void XTensor::BinaryRead(FILE* file, size_t offset) void XTensor::BinaryRead(FILE* file, size_t offset)
{ {
fseek(file, offset, 0); //fseek(file, offset, 0);
switch (dataType) { switch (dataType) {
case X_INT: { case X_INT: {
int * d = new int[unitNum]; int* d = new int[unitNum];
fread(d, sizeof(int), unitNum, file); fread(d, sizeof(int), unitNum, file);
SetData(d, unitNum); SetData(d, unitNum);
delete[] d; delete[] d;
} }
default: { default: {
float * d = new float[unitNum]; float* d = new float[unitNum];
fread(d, sizeof(float), unitNum, file); fread(d, sizeof(float), unitNum, file);
SetData(d, unitNum); SetData(d, unitNum);
delete[] d; delete[] d;
...@@ -1971,7 +1971,7 @@ void XTensor::BinaryRead(FILE* file, size_t offset) ...@@ -1971,7 +1971,7 @@ void XTensor::BinaryRead(FILE* file, size_t offset)
flush the data to the target device flush the data to the target device
>> targetMem - memory pool on the target device >> targetMem - memory pool on the target device
*/ */
void XTensor::FlushToMem(XMem * targetMem) void XTensor::FlushToMem(XMem* targetMem)
{ {
if (targetMem == NULL) if (targetMem == NULL)
return; return;
...@@ -1984,7 +1984,7 @@ void XTensor::FlushToMem(XMem * targetMem) ...@@ -1984,7 +1984,7 @@ void XTensor::FlushToMem(XMem * targetMem)
CudaCPUToGPUFlush(&l, targetMem->devID, targetMem); CudaCPUToGPUFlush(&l, targetMem->devID, targetMem);
} }
else if (mem != targetMem) { else if (mem != targetMem) {
void * tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar()); void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar()); XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
data = tmpData; data = tmpData;
mem = targetMem; mem = targetMem;
...@@ -2008,29 +2008,29 @@ void XTensor::FlushToMem(XMem * targetMem) ...@@ -2008,29 +2008,29 @@ void XTensor::FlushToMem(XMem * targetMem)
} }
/* /*
allocate the memory space of the tensor (in the global memory) allocate the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process >> tensor - the tensor we intend to process
>> myMem - the memory pool we are using >> myMem - the memory pool we are using
>> useBuf - indicates whether we use the buffer in the memory pool >> useBuf - indicates whether we use the buffer in the memory pool
*/ */
void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf) void XTensor::AllocateData(XTensor* tensor, XMem* myMem, bool useBuf)
{ {
if(tensor == NULL) if (tensor == NULL)
return; return;
if(myMem == NULL){ if (myMem == NULL) {
if(tensor->data != NULL) if (tensor->data != NULL)
FreeData(tensor, NULL, false); FreeData(tensor, NULL, false);
tensor->data = XMemAlloc(tensor->devID, tensor->GetDataSizeInChar()); tensor->data = XMemAlloc(tensor->devID, tensor->GetDataSizeInChar());
tensor->isInGlobalMem = true; tensor->isInGlobalMem = true;
} }
else{ else {
CheckNTErrors((tensor->data == NULL), "Cannot renew the space for the tensor"); CheckNTErrors((tensor->data == NULL), "Cannot renew the space for the tensor");
if(useBuf){ if (useBuf) {
tensor->data = myMem->AllocBuf(tensor->devID, tensor->GetDataSizeInChar()); tensor->data = myMem->AllocBuf(tensor->devID, tensor->GetDataSizeInChar());
tensor->isInGlobalMem = false; tensor->isInGlobalMem = false;
} }
else{ else {
tensor->data = myMem->AllocGlobal(tensor->devID, tensor->GetDataSizeInChar()); tensor->data = myMem->AllocGlobal(tensor->devID, tensor->GetDataSizeInChar());
tensor->isInGlobalMem = true; tensor->isInGlobalMem = true;
} }
...@@ -2039,22 +2039,22 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf) ...@@ -2039,22 +2039,22 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
tensor->signature = 0; tensor->signature = 0;
} }
/* /*
free the memory space of the tensor (in the global memory) free the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process >> tensor - the tensor we intend to process
>> myMem - the memory pool we are using >> myMem - the memory pool we are using
>> useBuf - indicates whether we use the buffer in the memory pool >> useBuf - indicates whether we use the buffer in the memory pool
*/ */
void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf) void XTensor::FreeData(XTensor* tensor, XMem* myMem, bool useBuf)
{ {
if(tensor == NULL) if (tensor == NULL)
return; return;
if(myMem == NULL){ if (myMem == NULL) {
XMemFree(tensor->devID, tensor->data); XMemFree(tensor->devID, tensor->data);
} }
else{ else {
if(tensor->isInGlobalMem) if (tensor->isInGlobalMem)
myMem->ReleaseGlobal(tensor->devID, tensor->data); myMem->ReleaseGlobal(tensor->devID, tensor->data);
else else
myMem->ReleaseBuf(tensor->devID, tensor->GetDataSizeInChar()); myMem->ReleaseBuf(tensor->devID, tensor->GetDataSizeInChar());
...@@ -2065,27 +2065,27 @@ void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf) ...@@ -2065,27 +2065,27 @@ void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
} }
/* overloading of the plus-sign */ /* overloading of the plus-sign */
XTensor operator+ (const DTYPE shift, const XTensor &tensor) XTensor operator+ (const DTYPE shift, const XTensor& tensor)
{ {
return ScaleAndShift(tensor, 1, shift); return ScaleAndShift(tensor, 1, shift);
} }
/* overloading of the minus-sign */ /* overloading of the minus-sign */
XTensor operator- (const DTYPE shift, const XTensor &tensor) XTensor operator- (const DTYPE shift, const XTensor& tensor)
{ {
return ScaleAndShift(tensor, 1, -shift); return ScaleAndShift(tensor, 1, -shift);
} }
/* overloading of the multiply-sign */ /* overloading of the multiply-sign */
XTensor operator* (const DTYPE scale, const XTensor &tensor) XTensor operator* (const DTYPE scale, const XTensor& tensor)
{ {
return ScaleAndShift(tensor, scale, 0); return ScaleAndShift(tensor, scale, 0);
} }
/* overloading of the division-sign */ /* overloading of the division-sign */
XTensor operator/ (const DTYPE scale, const XTensor &tensor) XTensor operator/ (const DTYPE scale, const XTensor& tensor)
{ {
return ScaleAndShift(tensor, (DTYPE)1/scale, 0); return ScaleAndShift(tensor, (DTYPE)1 / scale, 0);
} }
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -86,7 +86,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim) ...@@ -86,7 +86,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim)
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip)+j * vecBufLength); \ vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip)+j * vecBufLength); \
} \ } \
for (int j = 1; j < strideNum / 32; j++) { \ for (int j = 1; j < strideNum / 32; j++) { \
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength); \ const DTYPE* ptr = (DTYPE*)(ip + j * 4 * vecBufLength); \
vecBuf[0] = vecBuf[0]._vectorOp(VectorBuffer::loadu(ptr + 0 * vecBufLength)); \ vecBuf[0] = vecBuf[0]._vectorOp(VectorBuffer::loadu(ptr + 0 * vecBufLength)); \
vecBuf[1] = vecBuf[1]._vectorOp(VectorBuffer::loadu(ptr + 1 * vecBufLength)); \ vecBuf[1] = vecBuf[1]._vectorOp(VectorBuffer::loadu(ptr + 1 * vecBufLength)); \
vecBuf[2] = vecBuf[2]._vectorOp(VectorBuffer::loadu(ptr + 2 * vecBufLength)); \ vecBuf[2] = vecBuf[2]._vectorOp(VectorBuffer::loadu(ptr + 2 * vecBufLength)); \
...@@ -106,7 +106,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim) ...@@ -106,7 +106,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim)
else { \ else { \
/* data is separated */ \ /* data is separated */ \
for(int i = 0; i < blockNum; i++){ \ for(int i = 0; i < blockNum; i++){ \
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){ \ for(int j = 0; j < stride / 32; j++){ \
DTYPE * ip = (DTYPE*)input->data + blockSize * i; \ DTYPE * ip = (DTYPE*)input->data + blockSize * i; \
DTYPE * op = (DTYPE*)output->data + stride * i; \ DTYPE * op = (DTYPE*)output->data + stride * i; \
VectorBuffer vecBuf[4]; \ VectorBuffer vecBuf[4]; \
......
...@@ -42,7 +42,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim) ...@@ -42,7 +42,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
int num = input->dimSize[dim]; int num = input->dimSize[dim];
_ReduceSum(input, output, dim); _ReduceSum(input, output, dim);
_ScaleAndShiftMe(output, (DTYPE)1/num, 0); _ScaleAndShiftMe(output, 1.0F/(DTYPE)(num), 0);
} }
/* /*
......
...@@ -105,7 +105,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -105,7 +105,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength, isExp, power, bias); vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength, isExp, power, bias);
} }
for(int j = 1; j < strideNum / 32; j++){ for(int j = 1; j < strideNum / 32; j++){
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength); const DTYPE* ptr = (DTYPE*)(ip + (j * 4) * vecBufLength);
vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias); vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias);
vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias); vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias);
vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias); vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias);
...@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} else{ } else{
//data is separated //data is separated
for(int i = 0; i < blockNum; i++){ for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){ for(int j = 0; j < stride / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i; DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i; DTYPE * op = (DTYPE*)output->data + stride * i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL; DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL;
...@@ -133,8 +133,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -133,8 +133,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} }
VectorBuffer vecBuf[4]; VectorBuffer vecBuf[4];
for(int k = 0; k < 4; k++){ for(int k = 0; k < 4; k++){
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + j * 32 / sizeof(DTYPE)); vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + k * 32 / sizeof(DTYPE));
} }
for(int k = 1; k < strideNum; k++){ for(int k = 1; k < strideNum; k++){
DTYPE * ptr = ip + k * stride + (j * 4) * vecBufLength; DTYPE * ptr = ip + k * stride + (j * 4) * vecBufLength;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论