提交 4268885c 编写于 作者: D dangqingqing

Remove global variable in CHECK_CUDA and CHECK_CUDNN and fix compiler bug.

ISSUE=4586769

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1409 1ad973e4-5ce8-4261-8a94-b56d1f490c56
上级 b72beee4
......@@ -175,15 +175,16 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
} /* namespace dynload */
/**
* Check build-in cudnn function using glog and it also
* Check build-in cudnn function using glog and it **does not**
* support << operator for more details error info.
*/
cudnnStatus_t g_cudnnStat;
#define CHECK_CUDNN(cudnn_func) \
g_cudnnStat = cudnn_func; \
CHECK_EQ(CUDNN_STATUS_SUCCESS, g_cudnnStat) \
<< "Cudnn Error: " \
<< dynload::cudnnGetErrorString(g_cudnnStat) << ". " \
#define CHECK_CUDNN(cudnnFunc) \
do { \
cudnnStatus_t cudnnStat = cudnnFunc; \
CHECK_EQ(CUDNN_STATUS_SUCCESS, cudnnStat) \
<< "Cudnn Error: " \
<< dynload::cudnnGetErrorString(cudnnStat); \
} while (0)
bool g_is_libcudnn_init = false;
int g_cudnn_lib_version = 0;
......
......@@ -178,14 +178,15 @@ int g_cuda_lib_version = 0;
#define HPPL_GPU_MEMORY_SIZE (256*4)
/**
* Check build-in cuda function using glog and it also
* Check build-in cuda function using glog and it **does not**
* support << operator for more details error info.
*/
cudaError_t cudaStat;
#define CHECK_CUDA(cuda_func) \
cudaStat = cuda_func; \
CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \
<< dynload::cudaGetErrorString(cudaStat) << ". " \
#define CHECK_CUDA(cudaFunc) \
do { \
cudaError_t cudaStat = cudaFunc; \
CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \
<< dynload::cudaGetErrorString(cudaStat); \
} while (0)
/**
* @brief thread resource.
......
......@@ -29,7 +29,7 @@ namespace paddle {
* parameters servers.
*/
// TODO:(yanfei)
// TODO(yanfei):
// I think that the biggest feature of rdma is packet lossless control
// feature instead of high bandwiths, zero copy and gpu-direct rdma in
// theroy.
......@@ -164,7 +164,7 @@ protected:
static const std::string kElasticAverage;
};
// TODO:(yanfei)
// TODO(yanfei):
// do parameters level synchronization Optimization at pserver end with
// ConcurrentRemoteParameterUpdater to get more parallelization, at last
// to really hide pserver latency in backward computation.
......@@ -241,7 +241,7 @@ private:
bool oneBatchFinished_;
};
// TODO:(yanfei)
// TODO(yanfei):
// merge sparse updater with dense updater, and could help to reduce
// the synchronization between sparse and dense udpater. it could also
// reduce the threads for managing all connections.
......
......@@ -188,7 +188,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config,
bool gpuData = FLAGS_use_gpu && (!FLAGS_parallel_nn) &&
(!IGradientMachineMode::dataMustInCpu(mode_,
(!IGradientMachineMode::dataMustInCpu(mode_,
FLAGS_trainer_count));
dataProvider_ = dataProvider;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册