Commit c22e2e31 by liyinqiao

Merge code with Yuhao branch and update the size of memory pool.

parent 823abb4f
...@@ -41,9 +41,6 @@ using namespace nts; ...@@ -41,9 +41,6 @@ using namespace nts;
void SmallTest(); void SmallTest();
void TransposeTest(); void TransposeTest();
void LittleTest();
void T2TTest();
void T2TTest2();
void PowerTest(); void PowerTest();
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
...@@ -168,127 +165,5 @@ void TransposeTest() ...@@ -168,127 +165,5 @@ void TransposeTest()
delete[] data; delete[] data;
} }
void LittleTest()
{
int a = 5000;
int b = 100000;
int c = a*b;
printf("%d\n", c);
exit(1);
}
void T2TTest()
{
XTensor * input;
XTensor * weight;
XTensor * output;
XTensor * gold;
XTensor * dedy;
XTensor * dedx;
XTensor * dedxTmp;
XTensor * dedw;
XTensor * padding;
DTYPE loss;
int * dimSize = new int[2];
dimSize[0] = 256;
dimSize[1] = 10001;
int * dimSize2 = new int[3];
dimSize2[0] = 2;
dimSize2[1] = 31;
dimSize2[2] = 256;
int * dimSize3 = new int[3];
dimSize3[0] = 2;
dimSize3[1] = 31;
dimSize3[2] = 10001;
int * dimSize4 = new int[2];
dimSize4[0] = 2;
dimSize4[1] = 31;
input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//weight = NewTensor(2, dimSize);
//dedw = NewTensor(2, dimSize);
//input = NewTensor(3, dimSize2);
//gold = NewTensor(3, dimSize3);
//output = NewTensor(3, dimSize3);
//dedy = NewTensor(3, dimSize3);
//dedx = NewTensor(3, dimSize3);
//dedxTmp = NewTensor(3, dimSize3);
//padding = NewTensor(2, dimSize4);
myRead(input, "x.txt", "x");
myRead(weight, "w.txt", "w");
myRead(gold, "gold.txt", "gold");
myRead(padding, "padding.txt", "padding");
XTensor inter;
inter = MMul(*input, *weight);
_Softmax(&inter, output, 2);
//_LogMe(output);
loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
printf("loss: %f\n", loss);
_CrossEntropyBackward(dedy, output, gold, NULL);
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
myDump(dedy, "dedy.txt", "dedy");
_SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
_Sub(output, gold, dedxTmp);
myDump(dedx, "dedx.txt", "dedx");
dedx->Dump(stderr, "dedx", 200);
dedxTmp->Dump(stderr, "dedxTmp", 200);
input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
_MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
myDump(dedw, "dedw.txt", "dedw");
}
void T2TTest2()
{
int dimSize[3];
dimSize[0] = 161;
dimSize[1] = 47;
dimSize[2] = 10001;
XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " ");
_SetDataFixedFloat(probs, 1.0F);
probs->Reshape(1, probs->unitNum);
DTYPE sum = _ReduceSumAll(probs);
printf("%e\n", sum);
//XTensor tmp;
//tmp = IsNonZero(*probs);
//DTYPE nonZeroNum = ReduceSumAll(tmp);
//printf("%f\n", nonZeroNum);
//
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//printf("%e\n", gpu);
}
...@@ -39,7 +39,7 @@ where a is a tensor and b is a row vector ...@@ -39,7 +39,7 @@ where a is a tensor and b is a row vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta) void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x; int col = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -75,7 +75,7 @@ where a is a tensor and b is a colum vector ...@@ -75,7 +75,7 @@ where a is a tensor and b is a colum vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta) void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
......
...@@ -78,7 +78,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -78,7 +78,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
else { else {
if (!a->isSparse && !b->isSparse) { if (!a->isSparse && !b->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!"); CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!");
if (a->dataType == DEFAULT_DTYPE && if (a->dataType == DEFAULT_DTYPE &&
b->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE) c->dataType == DEFAULT_DTYPE)
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
tensor summation of a tensor and a row vector tensor summation of a tensor and a row vector
c = a + b * \beta c = a + b * \beta
where a is a tensor and b is a row vector where a is a tensor and b is a row vector
>> a - pointer to the data array of a >> a - pointer to the data array of a
>> b - pointer to the data array of b >> b - pointer to the data array of b
......
...@@ -209,4 +209,4 @@ XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP) ...@@ -209,4 +209,4 @@ XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
return onehot; return onehot;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -153,4 +153,4 @@ void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot, ...@@ -153,4 +153,4 @@ void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -45,8 +45,6 @@ void _Select(const XTensor * a, XTensor * c, int* index, int dim) ...@@ -45,8 +45,6 @@ void _Select(const XTensor * a, XTensor * c, int* index, int dim)
int stride = 1; int stride = 1;
for (int i = dim + 1; i < a->order; i++) for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
printf("\n%d %d\n", a->order - dim - 1,stride);
int copyTimes = 1; int copyTimes = 1;
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
{ {
...@@ -94,12 +92,46 @@ void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim) ...@@ -94,12 +92,46 @@ void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim)
} }
/* /*
c = select(a)
>> a - input tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
<< return - the result of the generated tensor with selected data
*/ */
/*XTensor Select(const XTensor &a, int* index, int dim) XTensor Select(const XTensor &a, XTensor &index, int dim)
{ {
int order = a.order;
int * dimSize = new int[order];
}*/ CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
for (int i = 0; i < a.order; i++) {
if (i == dim) {
dimSize[i] = index.dimSize[0];
}
else
dimSize[i] = a.dimSize[i];
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMPFlag();
/* call _SelectRange function */
_Select(&a, &c, &index, dim);
/* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, &index, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
}
/* destroy variables */
delete[] dimSize;
return c;
}
/* /*
generate a tensor with selected data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
...@@ -192,10 +224,12 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -192,10 +224,12 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
_SelectRange(&a, &c, dim, low, high); _SelectRange(&a, &c, dim, low, high);
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT); if (a.enableGrad) {
XLink::AddParamToHeadInt(&c, dim); XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, low); XLink::AddParamToHeadInt(&c, dim);
XLink::AddParamToHeadInt(&c, high); XLink::AddParamToHeadInt(&c, low);
XLink::AddParamToHeadInt(&c, high);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -36,7 +36,7 @@ void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim); ...@@ -36,7 +36,7 @@ void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim);
generate a tensor with selected data c = select(a) (returna a XTensor structure) generate a tensor with selected data c = select(a) (returna a XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
XTensor Select(const XTensor &a, XTensor &indexCPU); XTensor Select(const XTensor &a, XTensor &index, int dim);
/* /*
generate a tensor with selected data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
......
...@@ -78,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -78,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
_cudaFuncName(a, b, num); \ _cudaFuncName(a, b, num); \
return; \ return; \
} \ } \
CheckNTErrors((_IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -113,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -113,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
if (a->devID >= 0) { \ if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
CheckNTErrors((_IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -170,8 +170,8 @@ XTensor funcName(const XTensor &a, T num) ...@@ -170,8 +170,8 @@ XTensor funcName(const XTensor &a, T num)
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if(a.enableGrad){ \ if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
} \ } \
XLink::AddParamToHead(&b, num); \
return b; \ return b; \
} \ } \
template XTensor funcName<int>(const XTensor&, int); \ template XTensor funcName<int>(const XTensor&, int); \
...@@ -182,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double); ...@@ -182,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double);
template<class T> \ template<class T> \
void funcName(const XTensor &a, XTensor &b, T num) \ void funcName(const XTensor &a, XTensor &b, T num) \
{ \ { \
if (!b.isInit || !IsSameShaped(a, b)) { \ if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \ InitTensorV2(&b, &a); \
} \ } \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if (a.enableGrad) { \ if (a.enableGrad) { \
......
...@@ -37,7 +37,7 @@ set each entry to its clip value (CUDA Kernel) ...@@ -37,7 +37,7 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size) void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
......
...@@ -33,28 +33,6 @@ gather indexed sub-tensors ...@@ -33,28 +33,6 @@ gather indexed sub-tensors
>> s - the source tensor >> s - the source tensor
>> t - the target tensor >> t - the target tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
*/
void _Gather(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize)
{
int * tgtIndex = new int[indexSize];
for(int i = 0; i < indexSize; i++)
tgtIndex[i] = i;
_CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, 1);
delete[] tgtIndex;
}
/*
gather indexed sub-tensors
>> s - the source tensor
>> t - the target tensor
>> srcIndex - index of the source sub-tensors >> srcIndex - index of the source sub-tensors
>> dim - the leading dimension to define "sub-tensors" >> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2, e.g., for a tensor of size (3, 2, 4) and dim = 2,
...@@ -143,7 +121,10 @@ XTensor Gather(XTensor &s, XTensor &index) ...@@ -143,7 +121,10 @@ XTensor Gather(XTensor &s, XTensor &index)
_Gather(&s, &t, &index); _Gather(&s, &t, &index);
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER); if (s.enableGrad)
{
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
}
if(index.order > 1) { if(index.order > 1) {
int * dims = new int[index.order + 1]; int * dims = new int[index.order + 1];
......
...@@ -75,7 +75,6 @@ gather indexed sub-tensors(cuda version) ...@@ -75,7 +75,6 @@ gather indexed sub-tensors(cuda version)
>> stride - stride of a data block >> stride - stride of a data block
>> strideNum - strideNum of a data block >> strideNum - strideNum of a data block
>> blockNum - block size of data >> blockNum - block size of data
*/ */
__global__ __global__
void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int stride, int strideNum, int blockNum) void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int stride, int strideNum, int blockNum)
......
...@@ -27,9 +27,6 @@ ...@@ -27,9 +27,6 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* gather selected sub-tensors */ /* gather selected sub-tensors */
void _Gather(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize);
/* gather selected sub-tensors */
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex); void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex);
/* gather selected sub-tensors accoding to the dimension */ /* gather selected sub-tensors accoding to the dimension */
......
...@@ -272,4 +272,4 @@ void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index) ...@@ -272,4 +272,4 @@ void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index)
} }
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -416,4 +416,4 @@ void _CudaSpreadForGather(XTensor * source, XTensor * collection, XTensor * srcI ...@@ -416,4 +416,4 @@ void _CudaSpreadForGather(XTensor * source, XTensor * collection, XTensor * srcI
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -203,8 +203,11 @@ XTensor funcName(const XTensor & input, int dim) ...@@ -203,8 +203,11 @@ XTensor funcName(const XTensor & input, int dim)
funcOp(&input, &output, dim); \ funcOp(&input, &output, dim); \
\ \
/* tensor connection */ \ /* tensor connection */ \
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); \ if(input.enableGrad) \
XLink::AddParamToHeadInt(&output, dim); \ { \
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); \
XLink::AddParamToHeadInt(&output, dim); \
} \
\ \
/* destroy variables */ \ /* destroy variables */ \
delete[] dimSize; \ delete[] dimSize; \
......
...@@ -742,7 +742,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -742,7 +742,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
strideNum, blockNum, sp, power, isExp); strideNum, blockNum, sp, power, isExp);
} }
} }
else if (stride != 1 && stride * blockNum > 4096){ else if (stride != 1 && stride * blockNum > 4096) {
//GDevs->GetGridAndBlockSize2D(devID, stride * blockNum, strideNum,MAX_INT, cudaGridSize, cudaBlockSize); //GDevs->GetGridAndBlockSize2D(devID, stride * blockNum, strideNum,MAX_INT, cudaGridSize, cudaBlockSize);
//unsigned int* goutput = (unsigned int *)input->data; //unsigned int* goutput = (unsigned int *)input->data;
//convert2uintV2 << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> > ((float*)input->data, goutput, stride, strideNum, blockNum, strideNum*blockNum*stride); //convert2uintV2 << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> > ((float*)input->data, goutput, stride, strideNum, blockNum, strideNum*blockNum*stride);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "VectorBuffer.h" #include "VectorBuffer.h"
//#include "math.h" #include "math.h"
namespace nts { namespace nts {
/* data size for each buffer */ /* data size for each buffer */
int VectorBuffer::size() int VectorBuffer::size()
...@@ -172,7 +172,6 @@ VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) { ...@@ -172,7 +172,6 @@ VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) {
VectorBuffer VectorBuffer::minData(const VectorBuffer &a) { VectorBuffer VectorBuffer::minData(const VectorBuffer &a) {
for (int i = 0; i != a.size(); i++) { for (int i = 0; i != a.size(); i++) {
this->values[i] = MIN(a[i], this->values[i]); this->values[i] = MIN(a[i], this->values[i]);
printf("runhere");
} }
return *this; return *this;
} }
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23 * $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/ */
//#include <cstring>
#include "../../XGlobal.h" #include "../../XGlobal.h"
namespace nts { namespace nts {
......
...@@ -828,7 +828,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -828,7 +828,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
else if (k < 22) workerNum = 128; else if (k < 22) workerNum = 128;
else if (k < 44) workerNum = 64; else if (k < 44) workerNum = 64;
else workerNum = 32; else workerNum = 32;
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
......
...@@ -74,7 +74,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -74,7 +74,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = leadDim + 1; i < y->order; i++) for (int i = leadDim + 1; i < x->order; i++)
stride *= y->dimSize[i]; stride *= y->dimSize[i];
blockSize = stride * dimensionSize; blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize; blockNum = y->unitNum / blockSize;
......
...@@ -74,7 +74,7 @@ bool Test() ...@@ -74,7 +74,7 @@ bool Test()
wrong = !TestSumDim() || wrong; wrong = !TestSumDim() || wrong;
wrong = !TestTan() || wrong; wrong = !TestTan() || wrong;
wrong = !TestTranspose() || wrong; wrong = !TestTranspose() || wrong;
//wrong = !TestTopK() || wrong; wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong; wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong; wrong = !TestXMem() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论