diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 8035d93bfec75b20a54c5af0521ab724cafba8ca..9cc4233e43267472d405c3e4e617f0782e1430ea 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,7 +1,7 @@ add_subdirectory(detail) cc_library(memory SRCS memory.cc) -cc_library(memcpy SRCS memcpy.cc DEPS device_context) +cc_library(memcpy SRCS memcpy.cc) cc_library(paddle_memory DEPS diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index c1ad60d16064753ef50c4d90bef4fcc88cb1a310..acfc0639736beb82df41b851664e7bcd079b5eb1 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -16,5 +16,8 @@ ELSE() set(GPU_CTX_DEPS) ENDIF() -cc_library(device_context SRCS device_context.cc DEPS memory place eigen3 ${GPU_CTX_DEPS}) +# memcpy deoends on device_context, here add deps individually for +# avoiding cycle dependencies +cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator + system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index dc345bdd57ed2d7560a8027f70df3ef1d1cfbb97..f92c15ae450e94de44d27e77763e791e6bae4426 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -57,7 +57,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { } void* allocate(size_t num_bytes) const override { - paddle::memory::Alloc(place_, num_bytes); + return paddle::memory::Alloc(place_, num_bytes); } void deallocate(void* buffer) const override { @@ -86,7 +86,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { GPUPlace place_; const cudaStream_t* stream_; // not owned; const cudaDeviceProp* device_prop_; // not owned; - mutable char* scratch_; + mutable void* scratch_; mutable unsigned int* semaphore_; }; @@ -145,7 +145,7 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() { if (!cudnn_handle_) { SetDeviceId(place_.device); PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); - PADDLE_ENFORCE(dynload::cudnnSetStream(cudnnHandle_t, stream_)); + PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_)); } return cudnn_handle_; } @@ -160,7 +160,7 @@ curandGenerator_t CUDADeviceContext::curand_generator() { PADDLE_ENFORCE( dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_)); - PADDLE_ENFORCE(dynload::curandSetStream(curandGenerator_t, stream_)); + PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_)); } return curand_generator_; } diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index b68e177c0a62d8855c7ea2a40baa6ebef16183ff..c5042ae33e47e04521e59e0d91ddd8d4efffe50a 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -52,6 +52,7 @@ class CPUDeviceContext : public DeviceContext { }; #ifndef PADDLE_ONLY_CPU +class EigenCudaStreamDevice; class CUDADeviceContext : public DeviceContext { public: @@ -92,7 +93,7 @@ class CUDADeviceContext : public DeviceContext { uint64_t seed_; // clang-format off - cudaStream_t stream_{nullptr} + cudaStream_t stream_{nullptr}; cudnnHandle_t cudnn_handle_{nullptr}; cublasHandle_t cublas_handle_{nullptr}; curandGenerator_t curand_generator_{nullptr};