XDevice.h 5.3 KB
Newer Older
linye committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
/* NiuTrans.Tensor - an open-source tensor library
 * Copyright (C) 2017, Natural Language Processing Lab, Northestern University. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 *
 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2016-06-23
 *
 */

#ifndef __XDEVICE_H__
#define __XDEVICE_H__

#include "XThread.h"
#include "XStream.h"

#ifdef USE_CUDA

/* the CUDA stuff */
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuda.h>

#endif

/* the nts (NiuTrans.Tensor) namespace */
namespace nts{

#define MAX_LENGTH_OF_DEVICE_NAME 64
#define MAX_CPU_NUM 16
#define MAX_GPU_NUM 16
#define MAX_DEVICE_NUM MAX_CPU_NUM+MAX_GPU_NUM
#define INVALID_DEVICE_ID -1000
#define CURRENT_GPU 1000
//#define CUDA_UVA 1 // Unified Virtual Address Space of Cuda

/*
a class that records the basic information for each GPU/CPU device
e.g., the memory limit, warp size of a GPU and etc.
*/
class XDevice
{
public:
    /* 
    device id 
    <0:  CPU memory
    >=0: GPU device ID
    */
    int devID;

    /* size of the memory */
    int memSize;

    /* warp size of an (Navida) GPU */
    int GPUWarpSize;

    /* indicates whether the device class has been initialized */
    bool isInitialized;

    /* 
    max grid size (or number of blocks) of an (Navida) GPU 
    NOTE: the grid size is alone with three dimensions (x, y, z)
    */
    int GPUMaxGridSize[3];

    /*
    max block size (or number of threads per block) of an (Navida) GPU 
    NOTE: the block size is alone with three dimensions (x, y, z)
    */
    int GPUMaxBlockSize[3];

    /* max thread number that is supported */
    int GPUMaxThreadNum;

    /* max (and optimal) thread number for a block */
    int GPUMaxThreadNumPerBlock;

    /* name of the device */
    char name[MAX_LENGTH_OF_DEVICE_NAME];

    /* name of the device */
    char name2[MAX_LENGTH_OF_DEVICE_NAME];

    /* specify whether Unified Virtual Address Space (UVA) is supported */
    bool isUVASupported;

    /* default stream for the device */
    XStream * stream;
    
#ifdef USE_CUDA
    /* mutex for handle (GPU cublas) */
    MUTEX_HANDLE cublasMutex;

    /* handle used for cublas */
    cublasHandle_t cublasHandle;

    /* specify if the handle is initialized */
    bool isHandleReady;
#endif


public:
    /* constructor */
    XDevice();

    /* de-constructor */
    ~XDevice();

    /* initialize it and get the device information */
    void Init(int myDevID);

    /* clear it */
    void Clear();

#ifdef USE_CUDA
    /* get cublas handle */
    cublasHandle_t * GetCublasHandle();

    /* get the stream of cuda */
    cudaStream_t * GetCudaStream();
#endif

    /* switch to a device */
    static
    void SetGPUDevice(int devID);

    /* switch to a device (with fast GPU execution mode) */
    static
    void SetGPUDeviceFast(int devID);

    /* switch to a get current dev */
    static
    int GetGPUDevice();

    /* reset cuda flag for more efficient cuda execution */
    static
    void SetFastFlags();

    /* reset cuda flag for more efficient cuda execution (all devices) */
    static
    void SetFastFlagsAllDevices();
};

/*
a class for the management of devices
*/
class XDevManager
{
public:
    /* CPU device information */
    XDevice CPUs[MAX_CPU_NUM];

    /* number of CPUs */
    int nCPU;

    /* GPU device information */
    XDevice GPUs[MAX_GPU_NUM];

    /* number of GPUs */
    int nGPU;

public:
    /* constructor */
    XDevManager();

    /* de-constructor */
    ~XDevManager();

    /* initialize it and get the CPU and GPU information */
    void Init();

    /* clear it */
    void Clear();

#ifdef USE_CUDA
    /* get the handle of GPU */
    cublasHandle_t * GetCudaHandle(const int devID);

    /* get the stream of cuda */
    cudaStream_t * GetCudaStream(const int devID);
#endif

    /* get grid and block sizes that max potential */
    int GetCudaThread(const int devID, const int n, int * gridSize, int * blockSize);

    /* get grid and block sizes that max potential (2-dimension assignment) */
    int GetCudaThread2D(const int devID, const int n, const int m, int nLimit, int * gridSize, int * blockSize);

    /* get device ids for the given device information */
    int GetDeviceIDs(char * devInfo, int * devIDs);

    /* show id sequence */
    void ShowDeviceIDs(char * devInfo, char * msg);

    /* show device information */
    void ShowDevInfo();

    /* get the device information in string */
    char * GetDevString(int devID);
};

/* managing the devices */
extern XDevManager GDevs;

/* keep the device config */

#define ProtectCudaDev(devID, devIDBackup) \
{ \
    cudaGetDevice(&devIDBackup); \
    if(devIDBackup != devID) \
        cudaSetDevice(devID); \
} \

#define BacktoCudaDev(devID, devIDBackup) \
{ \
    if(devIDBackup != devID) \
        cudaSetDevice(devIDBackup); \
} \

} /* end of the nts (NiuTrans.Tensor) namespace */

#endif