Commit 58181c8d by liyinqiao

1. Clean the codes;

2. Merge with Xiao branch.
parent d1714e17
...@@ -166,6 +166,8 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step) ...@@ -166,6 +166,8 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step)
if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH) if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH)
break; break;
CheckNTErrors(seqCount % step == 0, "Wrong number of sequences!");
} }
nseqBuf = seqCount; nseqBuf = seqCount;
......
...@@ -293,10 +293,10 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -293,10 +293,10 @@ void T2TSearch::Generate(T2TStateBundle * beam)
CopyValues(index, preID); CopyValues(index, preID);
/* "preID" represents the id (or the offset) of previous state used to make the current /* "preID" represents the id (or the offset) of the previous state used to make the current
hypothesis. Note that we reshape the "score" tensor into a matrix where each hypothesis. Note that we reshape the "score" tensor into a matrix where each
row means a previous state. The column number is size-of-beam * vocab-size. We, row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */ previous state for each hypothesis in the top-k list. */
Descale(preID, sizeVocab); Descale(preID, sizeVocab);
......
...@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID) ...@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
cudaError_t error = cudaSetDevice(devID); cudaError_t error = cudaSetDevice(devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
#else #else
...@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID) ...@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
SetFastFlags(); SetFastFlags();
} }
/* switch to a get current dev */ /* get the id of the current GPU device */
int XDevice::GetGPUDevice() int XDevice::GetGPUDevice()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice() ...@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice()
cudaError_t error = cudaGetDevice(&devID); cudaError_t error = cudaGetDevice(&devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
...@@ -248,7 +250,7 @@ void XDevice::SetFastFlags() ...@@ -248,7 +250,7 @@ void XDevice::SetFastFlags()
#endif #endif
} }
/* reset cuda flag for more efficient cuda execution (all devices) */ /* reset the cuda flag for more efficient cuda execution (all devices) */
void XDevice::SetFastFlagsAllDevices() void XDevice::SetFastFlagsAllDevices()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -274,7 +276,7 @@ XDevManager::~XDevManager() ...@@ -274,7 +276,7 @@ XDevManager::~XDevManager()
} }
/* initialize it and get the CPU and GPU information */ /* initialization */
void XDevManager::Init() void XDevManager::Init()
{ {
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
...@@ -318,7 +320,7 @@ void XDevManager::Clear() ...@@ -318,7 +320,7 @@ void XDevManager::Clear()
#ifdef USE_CUDA #ifdef USE_CUDA
/* get the handle of GPU */ /* get the handle of a given GPU */
cublasHandle_t * XDevManager::GetCudaHandle(const int devID) cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -326,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID) ...@@ -326,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
return GPUs[devID].GetCublasHandle(); return GPUs[devID].GetCublasHandle();
} }
/* get the stream of cuda */ /* get the stream of a given GPU */
cudaStream_t * XDevManager::GetCudaStream(const int devID) cudaStream_t * XDevManager::GetCudaStream(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -523,7 +525,7 @@ get device ids for the given device information ...@@ -523,7 +525,7 @@ get device ids for the given device information
devInfo = "0:CPU-1 1:GPU-0 2:CPU-1" devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
means that the first device is CPU, the second device means that the first device is CPU, the second device
is GPU-0, the third device is CPU. is GPU-0, the third device is CPU.
>> devIDs - device sequence specified by devInfo >> devIDs - device IDs specified by devInfo
<< return - number of devices << return - number of devices
*/ */
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
...@@ -565,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) ...@@ -565,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
return devCount; return devCount;
} }
/* show id sequence */ /* show device IDs */
void XDevManager::ShowDeviceIDs(char * devInfo, char * msg) void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
{ {
msg[0] = 0; msg[0] = 0;
......
...@@ -63,7 +63,7 @@ constructor ...@@ -63,7 +63,7 @@ constructor
>> myMode - mode of running the memory pool >> myMode - mode of running the memory pool
UNI_FREE: free all the space at the end of using the memory pool UNI_FREE: free all the space at the end of using the memory pool
FREE_ON_THE_FLY: normal "malloc" and "free" mode FREE_ON_THE_FLY: normal "malloc" and "free" mode
>> myBlockSize - size of memory block >> myBlockSize - size of a memory block
>> myBlockNum - number of memory blocks >> myBlockNum - number of memory blocks
>> myBufSize - size of buffer >> myBufSize - size of buffer
*/ */
...@@ -108,7 +108,7 @@ initialize it ...@@ -108,7 +108,7 @@ initialize it
>> myMode - mode of running the memory pool >> myMode - mode of running the memory pool
UNI_FREE: free all the space at the end of using the memory pool UNI_FREE: free all the space at the end of using the memory pool
FREE_ON_THE_FLY: normal "malloc" and "free" mode FREE_ON_THE_FLY: normal "malloc" and "free" mode
>> myBlockSize - size of memory block >> myBlockSize - size of a memory block
>> myBlockNum - number of memory blocks >> myBlockNum - number of memory blocks
>> myBufSize - size of buffer >> myBufSize - size of buffer
*/ */
...@@ -222,8 +222,8 @@ void XMem::Free(int myDevID, void * mem) ...@@ -222,8 +222,8 @@ void XMem::Free(int myDevID, void * mem)
} }
/* /*
get signature get the signature
<< return - return the signature << return - the signature
*/ */
MTYPE XMem::GetSignature() MTYPE XMem::GetSignature()
{ {
...@@ -231,7 +231,7 @@ MTYPE XMem::GetSignature() ...@@ -231,7 +231,7 @@ MTYPE XMem::GetSignature()
} }
/* /*
use string as the name of the memory pool set the name of the memory pool
>> myName - name of the memory pool >> myName - name of the memory pool
*/ */
void XMem::SetName(const char * myName) void XMem::SetName(const char * myName)
...@@ -264,7 +264,7 @@ void XMem::SetDevice(int myDevID) ...@@ -264,7 +264,7 @@ void XMem::SetDevice(int myDevID)
} }
/* /*
switch to the device (with fast cuda execution mode) we want to work switch to the device (with fast cuda execution mode) we intend to work on
>> myDevID - device id(-1: CPU memory, >=0: GPU device ID) >> myDevID - device id(-1: CPU memory, >=0: GPU device ID)
*/ */
void XMem::SetDeviceFast(int myDevID) void XMem::SetDeviceFast(int myDevID)
...@@ -280,7 +280,7 @@ void XMem::SetDeviceFast(int myDevID) ...@@ -280,7 +280,7 @@ void XMem::SetDeviceFast(int myDevID)
} }
/* /*
run in static mode run in the static mode
>> myIsStatic - specify if the memory allocation is static >> myIsStatic - specify if the memory allocation is static
*/ */
void XMem::SetStaticMode(bool myIsStatic) void XMem::SetStaticMode(bool myIsStatic)
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is an implementation of queue. Actually we intend to use it to maintain
* a priority job list
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-04-05
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "XQueue.h"
#include "XDevice.h"
#include "XList.h"
#include "XUtility.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
/**************************************
job item used in queues
*/
/* constructor */
JobQueueNode::JobQueueNode()
{
job = NULL;
args = new TensorList(1);
}
/* de-constructor */
JobQueueNode::~JobQueueNode()
{
delete args;
}
/**************************************
This class provides standard utilities of Queue.
*/
/* constuctor */
XQueue::XQueue(int mySize)
{
queue = new void*[mySize];
memset(queue, 0, sizeof(void*) * mySize);
size = mySize;
itemCount = 0;
head = 0;
tail = 0;
isJobQueue = false;
jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false;
runningJobCount = 0;
jobStream = NULL;
jobStream1 = NULL;
jobStream2 = NULL;
MUTEX_INIT(enqueueMutex);
MUTEX_INIT(dequeueMutex);
COND_INIT(queueCond);
MUTEX_INIT(jobQueueMutex);
}
/* deconstructor */
XQueue::~XQueue()
{
delete[] queue;
delete jobDequeuerArgs;
delete jobStream;
delete jobStream1;
delete jobStream2;
//if(isJobQueue)
// StopJobConsumer();
MUTEX_DELE(enqueueMutex);
MUTEX_DELE(dequeueMutex);
COND_DELE(queueCond);
MUTEX_DELE(jobQueueMutex);
}
/*
put an item in the tail of the queue
>> item - the item we intend to add into the queue
*/
void XQueue::Enqueue(void * item)
{
MUTEX_LOCK(enqueueMutex);
MUTEX_LOCK(dequeueMutex);
CheckNTErrors((itemCount < size), "Put too many items into the queue!");
queue[tail] = item;
tail = (tail + 1) % size;
itemCount++;
COND_SIGNAL(queueCond);
MUTEX_UNLOCK(dequeueMutex);
MUTEX_UNLOCK(enqueueMutex);
}
/*
fetch an item from head of the queue
<< return - the head item of the queue
*/
void * XQueue::Dequeue()
{
MUTEX_LOCK(dequeueMutex);
while(itemCount == 0)
{
#ifdef WIN32
MUTEX_UNLOCK(dequeueMutex);
#endif
COND_WAIT(queueCond, dequeueMutex);
#ifdef WIN32
MUTEX_LOCK(dequeueMutex);
#endif
}
void * r = queue[head];
head = (head + 1) % size;
itemCount--;
MUTEX_UNLOCK(dequeueMutex);
return r;
}
/* return if the queue is empty */
bool XQueue::IsEmpty()
{
return itemCount == 0;
}
/* wait until the queue is empty */
void XQueue::WaitForEmptyJobQueue()
{
while(runningJobCount > 0){
XSleep(10);
}
if(jobStream != NULL){
CheckNTErrors((jobStream->IsFinished()), "None fineished jobs remain");
jobStream->Clear();
}
if(jobStream1 != NULL){
CheckNTErrors((jobStream1->IsFinished()), "None fineished jobs remain");
jobStream1->Clear();
}
if(jobStream2 != NULL){
CheckNTErrors((jobStream2->IsFinished()), "None fineished jobs remain");
jobStream2->Clear();
}
}
int devids[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
int cpuid = -1;
/*
run job consumer (in another thread)
>> jobDevID - id of the device for running the jobs
*/
void XQueue::RunJobConsumer(int jobDevID)
{
CheckNTErrors((jobDevID < 16), "device id is out of scope!");
isJobQueue = true;
jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid);
jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs;
jobDequeuer.Start();
jobDequeuer.LetItGo();
}
/* stop the job consumer */
void XQueue::StopJobConsumer()
{
jobDequeuerBreak = true;
XSleep(10);
EnqueueJob(NULL, NULL);
jobDequeuer.End();
isJobQueue = false;
}
/* add a job item to process */
void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{
MUTEX_LOCK(jobQueueMutex);
runningJobCount++;
MUTEX_UNLOCK(jobQueueMutex);
JobQueueNode * node = new JobQueueNode();
node->job = job;
if(jobArgs != NULL)
node->args->AddList(jobArgs);
Enqueue(node);
}
/* job item consumer */
void XQueue::DequeueJobs(TensorList * args)
{
CheckNTErrors((args->count == 2), "Illegal arguments!");
XQueue * q = (XQueue*)args->GetItem(0);
int devID = *(int*)args->GetItem(1);
int devIDBackup = XDevice::GetGPUDevice();
if(devID >= 0)
XDevice::SetGPUDevice(devID);
while(1){
JobQueueNode * node = (JobQueueNode*)q->Dequeue();
if(q->GetJobBreak())
break;
CheckNTErrors((node != NULL), "Illegal job!");
/* process a job */
((TFunction)node->job)(node->args);
delete node;
MUTEX_LOCK(q->jobQueueMutex);
q->runningJobCount--;
MUTEX_UNLOCK(q->jobQueueMutex);
}
if(devID >= 0)
XDevice::SetGPUDevice(devIDBackup);
}
/* get the break flag */
bool XQueue::GetJobBreak()
{
return jobDequeuerBreak;
}
/* get job stream */
XStream * XQueue::GetJobStream(int n)
{
if(n == 0)
return jobStream;
else if(n == 1)
return jobStream1;
else if(n == 2)
return jobStream2;
else{
ShowNTErrors("invalid stream id!");
}
return NULL;
}
/* make job streams */
void XQueue::MakeJobStreams(int devID, int devID1, int devID2)
{
if(devID != INVALID_DEVICE_ID)
jobStream = new XStream(0, devID);
if(devID1 != INVALID_DEVICE_ID)
jobStream1 = new XStream(0, devID1);
if(devID2 != INVALID_DEVICE_ID)
jobStream2 = new XStream(0, devID2);
}
} /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -81,11 +81,7 @@ int MakeTensorID() ...@@ -81,11 +81,7 @@ int MakeTensorID()
return id; return id;
} }
/* /* constructor */
constructor
>> myOrder - order of the tensor
>> myMem - memory pool used to allocating the data array
*/
XTensor::XTensor() XTensor::XTensor()
{ {
Init(); Init();
...@@ -130,9 +126,9 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem) ...@@ -130,9 +126,9 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
/* /*
constructor constructor
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - the size of each dimension >> myDimSize - size of each dimension
>> myDataType - unit size (e.g., int, float, and double) >> myDataType - unit size (e.g., int, float, and double)
>> myDenseRatio - how often an element has non-zero value >> myDenseRatio - how often an element has a non-zero value
>> myDevID - device id >> myDevID - device id
>> myMem - memory pool used to allocating the data array >> myMem - memory pool used to allocating the data array
*/ */
...@@ -168,10 +164,10 @@ XTensor::XTensor(const XTensor &reference) ...@@ -168,10 +164,10 @@ XTensor::XTensor(const XTensor &reference)
signature = reference.signature; signature = reference.signature;
/* what we really want to do is "reference.data = NULL;" /* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset reference.data As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of reference.data in here. So we save the ADDRESS of "reference.data" in
reference.dataP, and do this work by updating "*reference.dataP". "reference.dataP", and do this work by updating "*reference.dataP".
This is VERY tricky and might not be the best solution :) */ This is VERY tricky and there might be better solutions :) */
*reference.dataP = NULL; *reference.dataP = NULL;
} }
else{ else{
...@@ -208,10 +204,10 @@ XTensor::XTensor(const XTensor &&reference) ...@@ -208,10 +204,10 @@ XTensor::XTensor(const XTensor &&reference)
signature = reference.signature; signature = reference.signature;
/* what we really want to do is "reference.data = NULL;" /* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset reference.data As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of reference.data in here. So we save the ADDRESS of "reference.data" in
reference.dataP, and do this work by updating "*reference.dataP". "reference.dataP", and do this work by updating "*reference.dataP".
This is VERY tricky and might not be the best solution :) */ This is VERY tricky and there might be better solutions :) */
*reference.dataP = NULL; *reference.dataP = NULL;
XLink::Replace(&reference, this); XLink::Replace(&reference, this);
...@@ -305,7 +301,7 @@ void XTensor::DestroyData() ...@@ -305,7 +301,7 @@ void XTensor::DestroyData()
} }
/* /*
shallow copy of tensor shallow copy of the tensor
Note that we do not copy data array here Note that we do not copy data array here
>> tensor - the source tensor >> tensor - the source tensor
*/ */
...@@ -353,7 +349,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -353,7 +349,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
} }
if(false && !tensor.isTmp){ if(false && !tensor.isTmp){
/* NOTE: this might lead to additional data copy on Mac machines */ /* NOTE: this might lead to additional data copy by Mac LLVM compilers */
/* we make an identity transformation here */ /* we make an identity transformation here */
if(outgo.tailNum > 0) if(outgo.tailNum > 0)
...@@ -440,10 +436,10 @@ XTensor& XTensor::operator= (const XTensor&& tensor) ...@@ -440,10 +436,10 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
signature = tensor.signature; signature = tensor.signature;
/* what we really want to do is "reference.data = NULL;" /* what we really want to do is "reference.data = NULL;"
As "reference" is constant, we cannot reset reference.data As "reference" is constant, we cannot reset "reference.data"
here. So we save the ADDRESS of reference.data in here. So we save the ADDRESS of "reference.data" in
reference.dataP, and do this work by updating "*reference.dataP". "reference.dataP", and do this work by updating "*reference.dataP".
This is VERY tricky and might not be the best solution :) */ This is VERY tricky and there might be better solutions :) */
*tensor.dataP = NULL; *tensor.dataP = NULL;
XLink::Replace(&tensor, this); XLink::Replace(&tensor, this);
...@@ -526,7 +522,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem) ...@@ -526,7 +522,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
} }
/* /*
judge whether the two matrices are in the same type and size check whether the two matrices are in the same type and size
>> a - input tensor >> a - input tensor
>> b - anther tensor to compare with >> b - anther tensor to compare with
<< return - whether the two input tensors are identical << return - whether the two input tensors are identical
...@@ -556,6 +552,18 @@ bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b) ...@@ -556,6 +552,18 @@ bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b)
return true; return true;
} }
/*
check whether the three matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
>> c - a tensor again
<< return - whether the two input tensors are identical
*/
bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
{
return IsSameShaped(a, b) && IsSameShaped(a, c);
}
bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim) bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
{ {
if (a == NULL || b == NULL) if (a == NULL || b == NULL)
...@@ -588,18 +596,6 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim) ...@@ -588,18 +596,6 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
} }
/* /*
judge whether the three matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
>> c - a tensor again
<< return - whether the two input tensors are identical
*/
bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
{
return IsSameShaped(a, b) && IsSameShaped(a, c);
}
/*
set the size of each dimension set the size of each dimension
>> myDimSize - size of each dimension >> myDimSize - size of each dimension
*/ */
...@@ -630,7 +626,7 @@ int XTensor::GetDim(const int dim) const ...@@ -630,7 +626,7 @@ int XTensor::GetDim(const int dim) const
/* /*
reshape the tensor reshape the tensor
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - the size of each dimension >> myDimSize - size of each dimension
*/ */
void XTensor::Reshape(const int myOrder, const int * myDimSize) void XTensor::Reshape(const int myOrder, const int * myDimSize)
{ {
...@@ -652,7 +648,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize) ...@@ -652,7 +648,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize)
} }
/* /*
reshape the tensor to a vector reshape the tensor into a vector
>> num - number of elements >> num - number of elements
*/ */
void XTensor::Reshape(const int num) void XTensor::Reshape(const int num)
...@@ -662,7 +658,7 @@ void XTensor::Reshape(const int num) ...@@ -662,7 +658,7 @@ void XTensor::Reshape(const int num)
} }
/* /*
reshape the tensor to a matrix reshape the tensor into a matrix
>> rowNum - number of rows >> rowNum - number of rows
>> colNum - number of columns >> colNum - number of columns
*/ */
...@@ -708,7 +704,7 @@ int XTensor::GetSize() const ...@@ -708,7 +704,7 @@ int XTensor::GetSize() const
return unitNum; return unitNum;
} }
/* get size of the memory used */ /* get the size of the memory space used */
int XTensor::GetDataSizeInChar() int XTensor::GetDataSizeInChar()
{ {
if(isSparse){ if(isSparse){
...@@ -826,7 +822,7 @@ void XTensor::SetZeroAll(XStream * stream) ...@@ -826,7 +822,7 @@ void XTensor::SetZeroAll(XStream * stream)
/* set the tensor with an data array /* set the tensor with an data array
>> d - input data. it must be on CPU >> d - input data. it must be on CPU
>> num - number of data items >> num - number of data items
>> beg - where we start this in the data array of the tensor >> beg - where we start the data copy in the data array of the tensor
*/ */
void XTensor::SetData(const void * d, int num, int beg) void XTensor::SetData(const void * d, int num, int beg)
{ {
...@@ -846,7 +842,7 @@ set the tensor items by a uniform distribution in range [lower, upper] ...@@ -846,7 +842,7 @@ set the tensor items by a uniform distribution in range [lower, upper]
*/ */
void XTensor::SetDataRand(DTYPE lower, DTYPE upper) void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
{ {
// TODO: cuda code!!!!!!! // TODO: GPU code!!!!!!!
if (data == NULL) if (data == NULL)
return; return;
...@@ -884,7 +880,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper) ...@@ -884,7 +880,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
/* a gauss distribution (Box-Muller method) */ /* a gauss distribution (Box-Muller method) */
double GaussRand(DTYPE mean, DTYPE standardDeviation) double GaussRand(DTYPE mean, DTYPE standardDeviation)
{ {
// TODO: cuda code!!!!!!! // TODO: GPU code!!!!!!!
static double u, v; static double u, v;
static int phase = 0; static int phase = 0;
...@@ -947,7 +943,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation) ...@@ -947,7 +943,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
/* /*
set tensor items with an array of offsets set tensor items with an array of offsets
>> offsets - offset for each data item >> offsets - offset for each data item
>> value - value for data items >> value - value for the data items
>> num - number of the data items >> num - number of the data items
*/ */
void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num) void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num)
...@@ -967,7 +963,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num) ...@@ -967,7 +963,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
} }
/* check whether the data array is the same as the answer /* check whether the data array is the same as the answer
>> d - input data. it must be on CPU >> d - input data (it must be on CPUs)
>> num - number of data items >> num - number of data items
>> beg - where we start this in the data array of the tensor >> beg - where we start this in the data array of the tensor
*/ */
...@@ -1001,7 +997,7 @@ void XTensor::SetDataPointer() ...@@ -1001,7 +997,7 @@ void XTensor::SetDataPointer()
dataP = &data; dataP = &data;
} }
/* compare two number */ /* compare two numbers */
bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError) bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
{ {
if(a == b) if(a == b)
...@@ -1014,7 +1010,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError) ...@@ -1014,7 +1010,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
return (fabs((a - b) / a) < relError) ? true : false; return (fabs((a - b) / a) < relError) ? true : false;
} }
/* check whether the data array is the same as the answer */ /* check whether the data array is the same as the "answer" */
bool XTensor::CheckData(const void * d, int num, float tolerance, int beg) bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
{ {
if (data == NULL || d == NULL) if (data == NULL || d == NULL)
...@@ -1088,7 +1084,7 @@ void XTensor::SetAscendingOrder(int dim) ...@@ -1088,7 +1084,7 @@ void XTensor::SetAscendingOrder(int dim)
/* /*
get the value of a cell with the index get the value of a cell with the index
>> index - index of each dimension >> index - index of each dimension
>> size - size of index >> size - size of the index
<< return - cell value << return - cell value
*/ */
DTYPE XTensor::Get(int index[], int size) DTYPE XTensor::Get(int index[], int size)
...@@ -1099,7 +1095,7 @@ DTYPE XTensor::Get(int index[], int size) ...@@ -1099,7 +1095,7 @@ DTYPE XTensor::Get(int index[], int size)
} }
/* /*
get the value of a cell with the offset get the value of a cell with its offset
>> offset - offset in the array >> offset - offset in the array
<< return - cell value << return - cell value
*/ */
...@@ -1689,7 +1685,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1689,7 +1685,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
} }
/* /*
resize a tensor by another one resize a tensor by another
>> myTensor - tensor for reference >> myTensor - tensor for reference
*/ */
bool XTensor::Resize(const XTensor * myTensor) bool XTensor::Resize(const XTensor * myTensor)
...@@ -1711,7 +1707,7 @@ binary search to find an element in a sparse tensor ...@@ -1711,7 +1707,7 @@ binary search to find an element in a sparse tensor
>> value - value for return >> value - value for return
>> position - the position of the tuple. >> position - the position of the tuple.
it is the previous one if there is no hit it is the previous one if there is no hit
<< return - find it or not? << return - found it or not?
*/ */
bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
{ {
...@@ -1880,10 +1876,10 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1880,10 +1876,10 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
/* /*
dump data to a file dump data to a file
>> tensor - tensor whose data is dumped >> tensor - the tensor for dumping
>> file - where to domp the data >> file - where to domp the data
>> label - label of the tensor >> label - label of the tensor
>> n - number of items to dump >> n - number of the items to dump
>> beg - the first item id >> beg - the first item id
>> verbose - verbose level >> verbose - verbose level
*/ */
...@@ -2050,7 +2046,7 @@ void XTensor::FlushToMem(XMem * targetMem) ...@@ -2050,7 +2046,7 @@ void XTensor::FlushToMem(XMem * targetMem)
allocate the memory space of the tensor (in the global memory) allocate the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process >> tensor - the tensor we intend to process
>> myMem - the memory pool we are using >> myMem - the memory pool we are using
>> useBuf - use the buffer in the memory pool >> useBuf - indicates whether we use the buffer in the memory pool
*/ */
void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf) void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
{ {
...@@ -2082,7 +2078,7 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf) ...@@ -2082,7 +2078,7 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
free the memory space of the tensor (in the global memory) free the memory space of the tensor (in the global memory)
>> tensor - the tensor we intend to process >> tensor - the tensor we intend to process
>> myMem - the memory pool we are using >> myMem - the memory pool we are using
>> useBuf - use the buffer in the memory pool >> useBuf - indicates whether we use the buffer in the memory pool
*/ */
void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf) void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
{ {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论