From d962c2a997c86c004e4781238d384c8cc078171c Mon Sep 17 00:00:00 2001 From: qijun Date: Fri, 28 Jul 2017 16:22:12 +0800 Subject: [PATCH] fix bug in CUDADeviceContext --- cmake/flags.cmake | 2 +- paddle/platform/device_context.cc | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 34fd3488930..ef31c252038 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -153,7 +153,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF) # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. # So, don't set these flags here. -LIST(APPEND CUDA_NVCC_FLAGS -std=c++11) +LIST(APPEND CUDA_NVCC_FLAGS -std=c++11 --default-stream per-thread) LIST(APPEND CUDA_NVCC_FLAGS --use_fast_math) if(CMAKE_BUILD_TYPE STREQUAL "Debug") diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 44afb5d4ee6..5218d89d543 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -44,7 +44,19 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device() const { CUDADeviceContext::CUDADeviceContext(GPUPlace place) : place_(place) { SetDeviceId(place_.device); PADDLE_ENFORCE(cudaStreamCreate(&stream_)); - eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_)); + // TODO (qijun) Pass a created cuda stream to Eigen::CudaStreamDevice directly + // here will cause segment fault. We must implement a class derived from + // Eigen::StreamInterface, and reinitialize it with a cuda stream and a gpu id + // later. Please refer to the implementation of class EigenCudaStreamDevice + // in TensorFlow. + // + // We find that CUDA 7 introduces a new option, the per-thread default stream, + // that has two effects. Please refer to https://devblogs.nvidia.com/ + // parallelforall/gpu-pro-tip-cuda-7-streams-simplify-concurrency/ + // + // So, we decide to use default stream and add –default-stream per-thread nvcc + // flag. Than, two threads with two CUDADeviceContexts will run parallelly. + eigen_stream_.reset(new Eigen::CudaStreamDevice()); eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); } -- GitLab