Commit 90ffc411 by ltb

DEBUG HELP

parent 9d7cb741
......@@ -15,7 +15,7 @@
* limitations under the License.
*/
/*
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
......@@ -28,50 +28,170 @@
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
void BackwardTest();
void TransposeTest();
void SumDimTest();
void ReadFP16Test();
void TestConvert();
void TestMatrixbatched();
using namespace nts;
using namespace fnnlm;
using namespace transformer;
int main( int argc, const char ** argv )
int main(int argc, const char ** argv)
{
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708);
//ReadFP16Test();
//return 0;
//TestConvert();
//if(argc > 1 && !strcmp(argv[1], "-test"))
// Test();
//else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
// FNNLMMain(argc - 1, argv + 1);
//else if(argc > 1 && !strcmp(argv[1], "-t2t"))
// TransformerMain(argc - 1, argv + 1);
//else{
// fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
// fprintf(stderr, "neural networks in an easy way. \n\n");
// fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
// fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
// fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
//}
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t"))
TransformerMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
//_CrtDumpMemoryLeaks();
//TestConvert();
//TestMatrixbatched();
XTensor a;
InitTensor1DV2(&a, 4, X_INT,0);
Range(&a, 10.5, 3, -2);
a.Dump(stderr, "a:");
return 0;
}
void TestMatrixbatched() {
XTensor a;
XTensor b;
XTensor halfa;
XTensor halfb;
InitTensor4DV2(&a, 16, 8, 62, 64, X_FLOAT, 0);
InitTensor4DV2(&b, 16, 8, 64, 64, X_FLOAT, 0);
a.SetDataRand(-1.0F, 1.0F);
b.SetDataRand(-1.0F, 1.0F);
InitTensor4DV2(&halfa, 16, 8, 62, 64, X_FLOAT16, 0);
InitTensor4DV2(&halfb, 16, 8, 64, 64, X_FLOAT16, 0);
halfa = ConvertDataType(a, X_FLOAT16);
halfb = ConvertDataType(b, X_FLOAT16);
int repeat = 100000;
int avg_count = 10;
double avg_time1 = 0;
double avg_time2 = 0;
double start_float = GetClockSec();
for (int t = 0; t < avg_count; t++) {
double start_float = GetClockSec();
for (int i = 0; i < repeat; i++) {
MatrixMulBatched(&a, X_NOTRANS, &b, X_TRANS);
}
double elapsed_float = GetClockSec() - start_float;
avg_time1 += elapsed_float;
}
printf("elapsed_float=%.6fs\n", avg_time1 /avg_count);
double start_float16 = GetClockSec();
for (int t = 0; t < avg_count; t++) {
double start = GetClockSec();
for (int i = 0; i < repeat; i++) {
MatrixMulBatched(&halfa, X_NOTRANS, &halfb, X_TRANS);
}
double elapsed = GetClockSec() - start;
avg_time2 += elapsed;
}
printf("elapsed_float16=%.6fs\n", avg_time2 /avg_count);
}
void TestConvert() {
XTensor a;
XTensor halfa;
XTensor b;
XTensor halfb;
int devId = 7;
InitTensor3DV2(&a, 32, 16, 16646, X_FLOAT, devId);
InitTensor3DV2(&halfa, 32, 16, 16646, X_FLOAT16, devId);
InitTensor3DV2(&b, 32, 16, 16646, X_FLOAT, devId);
InitTensor3DV2(&halfb, 32, 16, 16646, X_FLOAT16, devId);
a.SetDataRand(-1.0F, 1.0F);
//b.SetDataRand(-1.0F, 1.0F);
int avg_count = 10;
double avg_time = 0.0;
double avg_time2 = 0.0;
int repeat = 1000;
//halfa = ConvertDataType(&a, X_FLOAT16);
//for (int t = 0; t < avg_count; t++) {
// double start_float = GetClockSec();
// for (int i = 0; i < repeat; i++) {
// //b = ConvertDataType(&halfa, X_FLOAT);
// _ConvertDataType(&halfa, &b);
// }
// double elapsed_float = GetClockSec() - start_float;
// avg_time += elapsed_float;
//}
//printf("ConvertDataType to float=%.6fs\n", avg_time / avg_count);
//for (int t = 0; t < avg_count; t++) {
// double start_float = GetClockSec();
// for (int i = 0; i < repeat; i++) {
// _ConvertDataType(&a,&halfa);
// }
// double elapsed_float = GetClockSec() - start_float;
// avg_time += elapsed_float;
//}
for (int t = 0; t < avg_count; t++) {
double start = GetClockSec();
for (int i = 0; i < repeat; i++) {
//halfb = ConvertDataType(&b, X_FLOAT16);
_ConvertDataType(&a, &halfa);
}
double elapsed = GetClockSec() - start;
avg_time2 += elapsed;
}
printf("ConvertDataType to float16 =%.6fs\n", avg_time2 / avg_count);
//printf("_ConvertDataType=%.6fs\n", avg_time/avg_count);
//float *data = new float[1];
//*data = -6.631311e+04 ;
//XTensor a;
//InitTensor2D(&a, 1, 1, X_FLOAT, 0);
//a.SetData(data,1);
//XTensor b;
//InitTensor2D(&b, 1, 1, X_FLOAT16, 0);
//_ConvertDataType(&a, &b);
//a.Dump(stderr, "a:");
//b.Dump(&b,stderr, "b:");
//_CrtDumpMemoryLeaks();
return 0;
}
void ReadFP16Test()
{
XTensor a;
InitTensor2D(&a, 2, 3,X_FLOAT, 0);
InitTensor2D(&a, 2, 3, X_FLOAT, 0);
a.SetDataRand(1.0, 5.0);
XTensor halfA;
halfA = ConvertDataType(a, X_FLOAT16);
......@@ -151,7 +271,7 @@ void TransposeTest()
int nnn = GDevs.nGPU;
InitTensor3D(&x, B, N, H, X_FLOAT, 0);
InitTensor4D(&y, K, B, N, H/K, X_FLOAT, 0);
InitTensor4D(&y, K, B, N, H / K, X_FLOAT, 0);
InitTensor3D(&z, B, N, H, X_FLOAT, 0);
cudaEvent_t ctime0;
......@@ -175,7 +295,7 @@ void TransposeTest()
cudaEventRecord(ctime0, 0);
double time0 = GetClock();
for(int i = 0; i < loops; i++)
for (int i = 0; i < loops; i++)
_Split(&x, &y, 2, K);
double time1 = GetClock();
......@@ -186,7 +306,7 @@ void TransposeTest()
cudaEventRecord(ctime2, 0);
double time2 = GetClock();
for(int i = 0; i < loops; i++)
for (int i = 0; i < loops; i++)
_Merge(&y, &x, 3);
double time3 = GetClock();
......@@ -197,7 +317,7 @@ void TransposeTest()
cudaEventRecord(ctime4, 0);
double time4 = GetClock();
for(int i = 0; i < loops; i++)
for (int i = 0; i < loops; i++)
_Sum(&x, &z, &x);
double time5 = GetClock();
......@@ -230,11 +350,11 @@ void SumDimTest()
DTYPE * data = new DTYPE[x.unitNum];
for(int i = 0; i < x.unitNum; i++)
for (int i = 0; i < x.unitNum; i++)
data[i] = (DTYPE)i;
x.SetData(data, x.unitNum);
for(int i = 0; i < y.unitNum; i++)
for (int i = 0; i < y.unitNum; i++)
data[i] = -(DTYPE)i;
y.SetData(data, y.unitNum);
......
......@@ -15,7 +15,7 @@
* limitations under the License.
*/
/*
/*
* backward computation for data operation
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-26
*/
......@@ -27,46 +27,46 @@
#include "../tensor/core/CHeader.h"
#include "../tensor/core/getandset/SetData.h"
namespace nts{
namespace nts {
/* compute dE/dx of a node */
void XDataGrad::MakeGrad(XTensor * node, bool isEfficent)
{
/* compute dE/dx of a node */
void XDataGrad::MakeGrad(XTensor * node, bool isEfficent)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == GETANDSET_CONVERTDATATYPE)
if (operID == GETANDSET_CONVERTDATATYPE)
GradConvertDataType(node, isEfficent);
else if(operID == GETANDSET_INDEXTOONEHOT)
else if (operID == GETANDSET_INDEXTOONEHOT)
GradIndexToOnehot(node, isEfficent);
else if(operID == GETANDSET_ONEHOTTOINDEX)
else if (operID == GETANDSET_ONEHOTTOINDEX)
GradOnehotToIndex(node, isEfficent);
else{
else {
ShowNTErrors("TODO!");
}
}
}
/* indicates whether the node is for a data operation */
bool XDataGrad::IsDataOP(XTensor * node)
{
/* indicates whether the node is for a data operation */
bool XDataGrad::IsDataOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & DATA_BASE) != 0;
}
/*
gradient computation for convert datatype
for
b = converdatatype(a)
we have
dE/da = convertdatatype(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
}
/*
gradient computation for convert datatype
for
b = converdatatype(a)
we have
dE/da = convertdatatype(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradConvertDataType(XTensor * node, bool isEfficent)
{
*/
void XDataGrad::GradConvertDataType(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for ConvertDataType!");
......@@ -75,25 +75,61 @@ void XDataGrad::GradConvertDataType(XTensor * node, bool isEfficent)
XNoder::MakeGrad(input);
XTensor * tmp = NewTensorBuf(input->grad, input->devID, input->mem);
//if (node->dataType == X_FLOAT) {
// FILE * Convert_grad_0 = fopen("Convert_grad_0", "wb");
// node->grad->Dump(node->grad, Convert_grad_0, "Convert_grad_0");
// fclose(Convert_grad_0);
//}
//if (node->dataType == X_FLOAT16) {
// FILE * Convert_grad_00 = fopen("Convert_grad_00", "wb");
// node->grad->Dump(node->grad, Convert_grad_00, "Convert_grad_00");
// fclose(Convert_grad_00);
//}
_ConvertDataType(node->grad, tmp);
//if (node->dataType == X_FLOAT) {
// FILE *Convert_grad_1 = fopen("Convert_grad_1", "wb");
// tmp->Dump(tmp, Convert_grad_1, "Convert_grad_1");
// fclose(Convert_grad_1);
//}
//if (node->dataType == X_FLOAT16) {
// FILE *Convert_grad_10 = fopen("Convert_grad_10", "wb");
// tmp->Dump(tmp, Convert_grad_10, "Convert_grad_10");
// fclose(Convert_grad_10);
//}
_SumMe(input->grad, tmp);
//if (node->dataType == X_FLOAT) {
// FILE *Convert_grad_2 = fopen("Convert_grad_2", "wb");
// input->grad->Dump(input->grad, Convert_grad_2, "Convert_grad_2");
// fclose(Convert_grad_2);
//}
//if (node->dataType == X_FLOAT16) {
// FILE *Convert_grad_20 = fopen("Convert_grad_20", "wb");
// input->grad->Dump(input->grad, Convert_grad_20, "Convert_grad_20");
// fclose(Convert_grad_20);
//}
DelTensorBuf(tmp);
node->visitMark = NODE_FINISHED;
}
/*
gradient computation for OnehotToIndex
for
b = OnehotToIndex(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
}
/*
gradient computation for OnehotToIndex
for
b = OnehotToIndex(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradOnehotToIndex(XTensor * node, bool isEfficent)
{
*/
void XDataGrad::GradOnehotToIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for IndexToOnehot!");
......@@ -102,20 +138,20 @@ void XDataGrad::GradOnehotToIndex(XTensor * node, bool isEfficent)
XNoder::MakeGrad(input);
node->visitMark = NODE_FINISHED;
}
/*
gradient computation for IndexToOnehot
for
b = IndexToOnehot(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
}
/*
gradient computation for IndexToOnehot
for
b = IndexToOnehot(a)
we have
dE/da = IndexToOnehot(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XDataGrad::GradIndexToOnehot(XTensor * node, bool isEfficent)
{
*/
void XDataGrad::GradIndexToOnehot(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for IndexToOnehot!");
......@@ -124,6 +160,6 @@ void XDataGrad::GradIndexToOnehot(XTensor * node, bool isEfficent)
XNoder::MakeGrad(input);
node->visitMark = NODE_FINISHED;
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -41,7 +41,7 @@ bool XNoder::IsLeaf(XTensor * node)
{
if(node == NULL)
return false;
// weight
if(node->income.tailNum == 0)
return true;
else
......
......@@ -35,7 +35,7 @@
namespace fnnlm
{
int step = 0;
#define MAX_NAME_LENGTH 1024
#define MAX_LINE_LENGTH_HERE 1024 * 32
......@@ -63,7 +63,7 @@ void Clear(FNNModel &model, bool isNodeGrad);
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model);
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model);
void Train(const char * train, bool isShuffled, FNNModel &model);
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad);
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad,int step);
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL);
void Dump(const char * fn, FNNModel &model);
void Read(const char * fn, FNNModel &model);
......@@ -77,7 +77,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
FNNModel &model, FNNModel &grad, FNNNet &net);
void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model);
void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model);
void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &model);
void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &model,int step);
/*
entry of the program
......@@ -463,7 +463,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
strcpy(name, train);
int epoch = 0;
int step = 0;
//int step = 0;
int wordCount = 0;
int wordCountTotal = 0;
int ngramNum = 1;
......@@ -533,7 +533,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
/* update model parameters */
Update(model, grad, learningRate, false);
Update(model, grad, learningRate, false,step);
/* get probabilities */
float prob = GetProb(output, gold);
......@@ -547,8 +547,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* this is implemented by gather function */
//ForwardAutoDiff(ngrams, ngramNum, output, model);
//if (step == 114) {
// exit(1);
//}
ForwardAutoDiffLin(ngrams, ngramNum, output, model);
ForwardAutoDiffLin(ngrams, ngramNum, output, model,step);
//XNet net;
//net.ShowNetwork(stdout, &output);
......@@ -564,22 +568,53 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
//}
/* this is implemented by multiply function */
lossTensor = CrossEntropy(output, gold);
//FILE* fOut1 = fopen("test3", "a");
//fprintf(fOut1, "step=%d ", step);
//lossTensor.Dump(&lossTensor, fOut1, "lossTensor:");
//fclose(fOut1);
//fflush(fOut1);
//if (step >112) {
// output.Dump(&output, stderr, "output:", 20);
//}
int stepTmp = step+1;
lossTensor = CrossEntropy(output, gold);
if (step > 680 && step < 685) {
char op[MAX_NAME_LENGTH];
sprintf(op, "output-%d", step);
FILE *out = fopen(op, "wb");
output.Dump(&output, out, "output:");
fclose(out);
fflush(out);
char gd[MAX_NAME_LENGTH];
sprintf(gd, "gold-%d", step);
FILE *golds = fopen(gd, "wb");
gold.Dump(&gold, golds, "gold:");
fclose(golds);
fflush(golds);
char lossTen[MAX_NAME_LENGTH];
sprintf(lossTen, "lossTensor-%d", step);
FILE *loss = fopen(lossTen, "wb");
lossTensor.Dump(&lossTensor, loss, "loss:");
fclose(loss);
fflush(loss);
}
//if (step > 110 && step < 116) {
// char lt[MAX_NAME_LENGTH];
// sprintf(lt, "lossTensor-%d", step);
// FILE *loss = fopen(lt, "wb");
// lossTensor.Dump(&lossTensor, loss, "loss:");
// fclose(loss);
//}
/* automatic differentiation */
autoDiffer.Backward(lossTensor);
/* update model parameters */
Update(model, grad, learningRate, true);
Update(model, grad, learningRate, true,step);
//if (step > 680 && step < 688) {
// char aw[MAX_NAME_LENGTH];
// sprintf(aw, "wf-%d", step);
// FILE *file = fopen(aw, "wb");
// model.embeddingW.Dump(file, aw);
//}
/* get probabilities */
float prob = ReduceSumAll(lossTensor);
loss += prob;
......@@ -625,7 +660,7 @@ update the model parameters using the delta rule
>> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
*/
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad,int step)
{
TensorList paraList(10);
TensorList gradList(10);
......@@ -648,7 +683,6 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
gradList.Add(&grad.hiddenW[i]);
gradList.Add(&grad.hiddenB[i]);
}
;
gradList.Add(&grad.embeddingW);
}
else{
......@@ -663,21 +697,29 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
gradList.Add(model.embeddingW.grad);
}
//FILE* fOut1 = fopen("test-2", "a");
for (int i = 0; i < paraList.count; i++) {
XTensor * para = (XTensor*)paraList.GetItem(i);
XTensor * paraGrad = (XTensor*)gradList.GetItem(i);
//fprintf(fOut1, "id=%d ", para->id);
//para->Dump(para, fOut1, "para:", 50);
//paraGrad->Dump(paraGrad, fOut1, "paraGrad:", 50);
if (step > 680 && step < 685) {
char embeddingW_grad[MAX_NAME_LENGTH];
sprintf(embeddingW_grad, "embeddingW_grad-%d", step);
FILE *ewg = fopen(embeddingW_grad, "wb");
model.embeddingW.grad->Dump(model.embeddingW.grad, ewg, "ewg");
fclose(ewg);
char outputW[MAX_NAME_LENGTH];
sprintf(outputW, "outputW_grad-%d", step);
FILE * owg = fopen(outputW, "wb");
model.outputW.grad->Dump(model.outputW.grad, owg, "outputW_grad:");
fclose(owg);
}
/* the delta rule */
_Sum(para, paraGrad, para, -epsilon);
}
//fprintf(fOut1, "\n");
//fclose(fOut1);
//fflush(fOut1);
}
/*
......@@ -1161,7 +1203,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
output = Softmax(MMul(hidden, model.outputW) + model.outputB, 1);
}
void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &model)
void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &model, int xstep)
{
int n = model.n;
int depth = model.hDepth;
......@@ -1184,6 +1226,7 @@ void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &mo
InitTensor1DV2(&words, size, X_INT, model.devID);
words.SetData(index, size);
/*words.Dump(&words, stderr, "words", 40);*/
/*test for Gather float16 datatype backward*/
//XTensor embeddingW16;
//XTensor embeddingBig16;
......@@ -1192,6 +1235,21 @@ void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &mo
//embeddingBig = ConvertDataType(embeddingBig16, X_FLOAT);
embeddingBig = Gather(model.embeddingW, words);
//embeddingBig.Dump(&embeddingBig, stderr, "embeddingBig", 20);
//if (step > 112 && step < 118) {
// char e[MAX_NAME_LENGTH];
// sprintf(e, "ew-%d", step);
// FILE *ew = fopen(e, "wb");
// model.embeddingW.Dump(ew, "ew");
// fclose(ew);
//}
//float words_reduce = ReduceSumAll(words);
//float emW_reduce = ReduceSumAll(model.embeddingW);
//float embed = ReduceSumAll(embeddingBig);
//fprintf(stderr, "words_reduce:%f emW_reduce:%f embed:%f\n", words_reduce,emW_reduce,embed);
delete[] index;
......@@ -1267,8 +1325,31 @@ void ForwardAutoDiffLin(NGram * ngrams, int batch, XTensor &output, FNNModel &mo
XTensor softmaxBefore16;
XTensor output16;
softmaxBefore = MMul(hidden, model.outputW) + model.outputB;
//float softmaxReduce = ReduceSumAll(softmaxBefore);
//fprintf(stderr,"softmaxReduce:%f\n", softmaxReduce);
softmaxBefore16 = ConvertDataType(softmaxBefore, X_FLOAT16);
//softmaxBefore.Dump(&softmaxBefore, stderr, "softmaxBefore", 20);
output16 = Softmax(softmaxBefore16, 1);
if (step > 680 && step < 685) {
char sm[MAX_NAME_LENGTH];
sprintf(sm,"output16-%d", step);
char smb[MAX_NAME_LENGTH];
sprintf(smb, "softmaxBefore16-%d", step);
FILE *softb = fopen(smb, "wb");
softmaxBefore16.Dump(&softmaxBefore16, softb, "softmaxBefore16:");
fclose(softb);
FILE *soft = fopen(sm, "wb");
output16.Dump(&output16, soft, "output16:");
fclose(soft);
}
output = ConvertDataType(output16, X_FLOAT);
//output = Softmax(MMul(hidden, model.outputW) + model.outputB, 1);
......
......@@ -38,7 +38,7 @@ using namespace nts;
namespace fnnlm
{
extern int step;
#define _EXIT_(x)// exit(x)
#define CheckErrors(x, msg) { if(!(x)) { fprintf(stderr, "Error! calling '%s' (%s line %d): %s\n", #x, __FILENAME__, __LINE__, msg); _EXIT_(1); } }
#define ShowErrors(msg) { { fprintf(stderr, "Error! (%s line %d): %s\n", __FILENAME__, __LINE__, msg); _EXIT_(1); } }
......
......@@ -1912,6 +1912,11 @@ void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, cons
_ConvertDataType(tensor, &a);
a.Dump(file, label, n, beg, verbose);
}
else if (tensor->dataType == X_INT) {
XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
_CopyValues(tensor, &a);
a.Dump(file, label, n, beg, verbose);
}
else
{
ShowNTErrors("TO DO!");
......@@ -2951,4 +2956,30 @@ void DelTensorBuf(XTensor * tensor)
delete tensor;
}
void Range(XTensor * tensor, int start, int end, int step)
{
if (tensor == NULL)
return;
/* get the length of tensor */
int length = tensor->GetDim(0);
/* compute the true length according to the (start, end, step) */
int a = abs(end - start);
int freq = ceil(1.0 * a / abs(step));
/* init a integer array to store the sequence */
int* index = new int[freq];
for (int i = 0; i < freq; i++)
index[i] = start + i * step;
CheckNTErrors((length == freq), "the length of the tensor is not matched");
/* set the data from the array */
tensor->SetData(index, freq);
delete[] index;
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -600,6 +600,8 @@ void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
void Range(XTensor * tensor, int start, int end, int step);
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
......@@ -23,6 +23,7 @@
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "ConvertDataType.cuh"
#include "../../core/math/Clip.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -156,6 +157,7 @@ convert data type (cuda code)
*/
void _CudaConvertDataType(const XTensor * input, XTensor * output)
{
if (input->dataType == output->dataType)
return;
......@@ -170,12 +172,16 @@ void _CudaConvertDataType(const XTensor * input, XTensor * output)
int devIDBackup;
ProtectCudaDev(input->devID, devIDBackup);
if(input->dataType == X_FLOAT && output->dataType == X_INT)
KernelFloatToInt<<<blocks, threads>>>((float*)input->data, (int*)output->data, input->unitNum);
else if(input->dataType == X_INT && output->dataType == X_FLOAT)
KernelIntToFloat<<<blocks, threads>>>((int*)input->data, (float*)output->data, input->unitNum);
else if(input->dataType == X_FLOAT && output->dataType == X_FLOAT16)
KernelFloatToFloat16<<<blocks, threads>>>((float*)input->data, (__half*)output->data, input->unitNum);
if (input->dataType == X_FLOAT && output->dataType == X_INT)
KernelFloatToInt << <blocks, threads >> > ((float*)input->data, (int*)output->data, input->unitNum);
else if (input->dataType == X_INT && output->dataType == X_FLOAT)
KernelIntToFloat << <blocks, threads >> > ((int*)input->data, (float*)output->data, input->unitNum);
else if (input->dataType == X_FLOAT && output->dataType == X_FLOAT16) {
XTensor *temp = NewTensor(input);
_Clip(input,temp, -60000.0F, 60000.0F);
KernelFloatToFloat16 << <blocks, threads >> > ((float*)temp->data, (__half*)output->data, input->unitNum);
delete temp;
}
else if(input->dataType == X_FLOAT16 && output->dataType == X_FLOAT)
KernelFloat16ToFloat<<<blocks, threads>>>((__half*)input->data, (float*)output->data, input->unitNum);
else if (input->dataType == X_FLOAT && output->dataType == X_INT8)
......
......@@ -29,6 +29,9 @@
#include "../core/arithmetic/Sum.h"
#include "../XDevice.h"
#include "../XUtility.h"
#include "../../sample/fnnlm/FNNLM.h"
using namespace fnnlm;
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -382,21 +385,77 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* make a matrix to keep \beta */
XTensor * beta = NewTensor(y->order - 1, dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
int a = 680;
int b = 685;
if (step > a && step < b) {
char softmax_dedys[1024];
sprintf(softmax_dedys, "softmax_dedy-%d", step);
FILE *softmax_dedy = fopen(softmax_dedys, "wb");
dedy->Dump(dedy, softmax_dedy, "softmax_dedy:");
fclose(softmax_dedy);
}
/* \beta = \sum_i (dE/dy_i * y_i) */
_Multiply(dedy, y, ytmp, 0, 0);
if (step > a && step < b) {
char softmax_ytmp1[1024];
sprintf(softmax_ytmp1, "softmax_ytmp1-%d", step);
FILE *ytemp_1 = fopen(softmax_ytmp1, "wb");
ytmp->Dump(ytmp, ytemp_1, "ytemp-1:");
fclose(ytemp_1);
}
_ReduceSum(ytmp, beta, leadDim);
if (step > a && step < b) {
char softmax_betas[1024];
sprintf(softmax_betas, "softmax_ytmp1-%d", step);
FILE *softmax_beta = fopen(softmax_betas, "wb");
beta->Dump(beta, softmax_beta, "beta:");
fclose(softmax_beta);
}
/* ytmp = dE/dy_j - \beta */
_Unsqueeze(beta, ytmp, leadDim, y->dimSize[leadDim]);
if (step > a && step < b) {
char softmax_ytmp2[1024];
sprintf(softmax_ytmp2, "softmax_ytmp2-%d", step);
FILE *ytmp_2 = fopen(softmax_ytmp2, "wb");
ytmp->Dump(ytmp, ytmp_2, "ytmp-2:");
fclose(ytmp_2);
}
_Sum(dedy, ytmp, ytmp, -1.0F);
if (step > a && step < b) {
char softmax_ytmp3[1024];
sprintf(softmax_ytmp3, "softmax_ytmp3-%d", step);
FILE *ytmp_3 = fopen(softmax_ytmp3, "wb");
ytmp->Dump(ytmp, ytmp_3, "ytmp-3:");
fclose(ytmp_3);
}
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
_Multiply(y, ytmp, dedx, 0, 0);
delete[] dimSize;
delete ytmp;
delete beta;
//FILE *file = fopen("softmax_dedx", "wb");
//dedx->Dump(dedx, file, "softmax_dedx:");
//fclose(file);
}
else{
ShowNTErrors("TODO!");
......
......@@ -30,6 +30,8 @@
#include "../core/math/ScaleAndShift.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceSumAll.h"
#include "../../sample/fnnlm/FNNLM.h"
using namespace fnnlm;
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -65,6 +67,14 @@ void _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * inter = NewTensor(output);
_Log(output, inter);
if (step > 680 && step < 685) {
char log[1024];
sprintf(log, "loginter-%d", step);
FILE *loginter = fopen(log, "wb");
inter->Dump(inter, loginter, "loginter:");
fclose(loginter);
fflush(loginter);
}
_MultiplyMe(inter, gold);
if(weight != NULL)
......
......@@ -35,6 +35,9 @@
#include "../core/reduce/ReduceSumAll.h"
#include "../core/shape/Transpose.h"
#include "../core/shape/Unsqueeze.h"
#include "../../sample/fnnlm/FNNLM.h"
using namespace fnnlm;
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -194,6 +197,16 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
delete[] paddingDims;
delete[] dims;
}
if (step > 680 && step < 685) {
char output_grad[1024];
sprintf(output_grad, "output_grad-%d", step);
FILE * file = fopen(output_grad, "wb");
dedy->Dump(dedy, file, "output_grad:");
fclose(file);
}
//if(padding != NULL) {
// XTensor * tmp = NewTensor(padding);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论