diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 7a198aec6cf12c92cb24a8e560508d06db5e1dcf..4e34e8d02cc676235a607c94c08ddae213de3a90 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,14 +1,8 @@ add_subdirectory(dynload) -nv_test(cuda_test SRCS cuda_test.cu DEPS dyload_cuda) +nv_test(cuda_test SRCS cuda_test.cu DEPS dynload_cuda) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) -IF(WITH_GPU) - set(GPU_CTX_DEPS dyload_cuda dynamic_loader ) -ELSE() - set(GPU_CTX_DEPS) -ENDIF() -cc_library(device_context SRCS device_context.cc DEPS place eigen3 ${GPU_CTX_DEPS}) -nv_test(device_context_test SRCS device_context_test.cc DEPS device_context glog gflags) +nv_test(device_context_test SRCS device_context_test.cc DEPS place eigen3 dynload_cuda) diff --git a/paddle/platform/cuda_device_context.h b/paddle/platform/cuda_device_context.h new file mode 100644 index 0000000000000000000000000000000000000000..e0d79631c57596810d25b2f790fb6b3a20b5e062 --- /dev/null +++ b/paddle/platform/cuda_device_context.h @@ -0,0 +1,148 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/enforce.h" +#include "paddle/platform/cuda.h" +#include "paddle/platform/dynload/cublas.h" +#include "paddle/platform/dynload/cudnn.h" +#include "paddle/platform/dynload/curand.h" +#define EIGEN_USE_GPU +#include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace platform { + +class GPUPlaceGuard { + public: + explicit GPUPlaceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) { + if (previous_ != new_place) { + paddle::platform::SetDeviceId(new_place.device); + } + } + + ~GPUPlaceGuard() { paddle::platform::SetDeviceId(previous_.device); } + + private: + GPUPlace previous_; +}; + +class CUDADeviceContext : public DeviceContext { + public: + explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { + GPUPlaceGuard guard(gpu_place_); + paddle::platform::throw_on_error(cudaStreamCreate(&stream_), + "cudaStreamCreate failed"); + eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); + eigen_device_ = new Eigen::GpuDevice(eigen_stream_); + } + + void Wait() { + paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), + "cudaStreamSynchronize failed"); + } + + cudaStream_t stream() { return stream_; } + + Eigen::GpuDevice eigen_device() { return *eigen_device_; } + + cublasHandle_t cublas_handle() { + if (!blas_handle_) { + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) == + CUBLAS_STATUS_SUCCESS, + "cublasCreate failed"); + PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream( + blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, + "cublasSetStream failed"); + } + return blas_handle_; + } + + cudnnHandle_t cudnn_handle() { + if (!dnn_handle_) { + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) == + CUDNN_STATUS_SUCCESS, + "cudnnCreate failed"); + PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream( + dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, + "cudnnSetStream failed"); + } + return dnn_handle_; + } + + curandGenerator_t curand_generator() { + if (!rand_generator_) { + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( + &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == + CURAND_STATUS_SUCCESS, + "curandCreateGenerator failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( + rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, + "curandSetPseudoRandomGeneratorSeed failed"); + PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream( + rand_generator_, stream_) == CURAND_STATUS_SUCCESS, + "curandSetStream failed"); + } + return rand_generator_; + } + + ~CUDADeviceContext() { + Wait(); + if (blas_handle_) { + PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) == + CUBLAS_STATUS_SUCCESS, + "cublasDestroy failed"); + } + + if (dnn_handle_) { + PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) == + CUDNN_STATUS_SUCCESS, + "cudnnDestroy failed"); + } + + if (rand_generator_) { + PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator( + rand_generator_) == CURAND_STATUS_SUCCESS, + "curandDestroyGenerator failed"); + } + + delete eigen_stream_; + delete eigen_device_; + + paddle::platform::throw_on_error(cudaStreamDestroy(stream_), + "cudaStreamDestroy failed"); + } + + private: + GPUPlace gpu_place_; + cudaStream_t stream_; + + Eigen::CudaStreamDevice* eigen_stream_; + Eigen::GpuDevice* eigen_device_; + + cublasHandle_t blas_handle_{nullptr}; + + cudnnHandle_t dnn_handle_{nullptr}; + + int random_seed_; + curandGenerator_t rand_generator_{nullptr}; +}; +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc deleted file mode 100644 index a2dea2ed1e11817c23dd2dc55a578d8fbd21ecb2..0000000000000000000000000000000000000000 --- a/paddle/platform/device_context.cc +++ /dev/null @@ -1,13 +0,0 @@ -#include - -namespace paddle { -namespace platform { -namespace dynload { -namespace dummy { -// Make DeviceContext A library. -int DUMMY_VAR_FOR_DEV_CTX = 0; - -} // namespace dummy -} // namespace dynload -} // namespace platform -} // namespace paddle \ No newline at end of file diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 160eb4e12060b36c4fefba499d4e83b9aab92848..f30c14712627301dfcf990b528fbcda6f337dc9b 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -13,16 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - #include "paddle/framework/enforce.h" -#ifndef PADDLE_ONLY_CPU -#include "paddle/platform/cuda.h" -#include "paddle/platform/dynload/cublas.h" -#include "paddle/platform/dynload/cudnn.h" -#include "paddle/platform/dynload/curand.h" -#define EIGEN_USE_GPU -#endif -#include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" namespace paddle { @@ -33,128 +24,18 @@ class DeviceContext { virtual ~DeviceContext() {} }; -class CPUDeviceContext : public DeviceContext {}; - -#ifndef PADDLE_ONLY_CPU - -class GPUPlaceGuard { +class CPUDeviceContext : public DeviceContext { public: - explicit GPUPlaceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) { - if (previous_ != new_place) { - paddle::platform::SetDeviceId(new_place.device); + Eigen::DefaultDevice eigen_handle() { + if (!eigen_handle_) { + eigen_handle_ = new Eigen::DefaultDevice(); } + return *eigen_handle_; } - ~GPUPlaceGuard() { paddle::platform::SetDeviceId(previous_.device); } - private: - GPUPlace previous_; + Eigen::DefaultDevice* eigen_handle_{nullptr}; }; -class CUDADeviceContext : public DeviceContext { - public: - explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { - GPUPlaceGuard guard(gpu_place_); - paddle::platform::throw_on_error(cudaStreamCreate(&stream_), - "cudaStreamCreate failed"); - eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); - eigen_device_ = new Eigen::GpuDevice(eigen_stream_); - } - - void Wait() { - paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), - "cudaStreamSynchronize failed"); - } - - cudaStream_t stream() { return stream_; } - - Eigen::GpuDevice eigen_device() { return *eigen_device_; } - - cublasHandle_t cublas_handle() { - if (!blas_handle_) { - GPUPlaceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) == - CUBLAS_STATUS_SUCCESS, - "cublasCreate failed"); - PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream( - blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, - "cublasSetStream failed"); - } - return blas_handle_; - } - - cudnnHandle_t cudnn_handle() { - if (!dnn_handle_) { - GPUPlaceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) == - CUDNN_STATUS_SUCCESS, - "cudnnCreate failed"); - PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream( - dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, - "cudnnSetStream failed"); - } - return dnn_handle_; - } - - curandGenerator_t curand_generator() { - if (!rand_generator_) { - GPUPlaceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( - &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == - CURAND_STATUS_SUCCESS, - "curandCreateGenerator failed"); - PADDLE_ENFORCE( - paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( - rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, - "curandSetPseudoRandomGeneratorSeed failed"); - PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream( - rand_generator_, stream_) == CURAND_STATUS_SUCCESS, - "curandSetStream failed"); - } - return rand_generator_; - } - - ~CUDADeviceContext() { - Wait(); - if (blas_handle_) { - PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) == - CUBLAS_STATUS_SUCCESS, - "cublasDestroy failed"); - } - - if (dnn_handle_) { - PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) == - CUDNN_STATUS_SUCCESS, - "cudnnDestroy failed"); - } - - if (rand_generator_) { - PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator( - rand_generator_) == CURAND_STATUS_SUCCESS, - "curandDestroyGenerator failed"); - } - - delete eigen_stream_; - delete eigen_device_; - - paddle::platform::throw_on_error(cudaStreamDestroy(stream_), - "cudaStreamDestroy failed"); - } - - private: - GPUPlace gpu_place_; - cudaStream_t stream_; - - Eigen::CudaStreamDevice* eigen_stream_; - Eigen::GpuDevice* eigen_device_; - - cublasHandle_t blas_handle_{nullptr}; - - cudnnHandle_t dnn_handle_{nullptr}; - - int random_seed_; - curandGenerator_t rand_generator_{nullptr}; -}; -#endif } // namespace platform } // namespace paddle diff --git a/paddle/platform/device_context_test.cc b/paddle/platform/device_context_test.cc index 61be4a307dbf073be7dff4564183240834cc7df6..cc81e9e789f3ebdf14f6ea2aa0706c52eed26385 100644 --- a/paddle/platform/device_context_test.cc +++ b/paddle/platform/device_context_test.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/platform/device_context.h" #include "gtest/gtest.h" +#include "paddle/platform/cuda_device_context.h" TEST(CUDADeviceContext, Init) { int count = paddle::platform::GetDeviceCount(); diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index 4a8866b3d364542f315978859e96290c6f067f6f..d205ead84598e04eea523be32139959a02e0dd83 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) -nv_library(dyload_cuda SRCS cublas.cc cudnn.cc curand.cc) +nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)