diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index cc6b52e9271ff00e91f2ca172815c543eb99261d..ebacd5d6dc850ebde5212bba39aea4dde8e4eb03 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -4,3 +4,5 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) + +nv_test(device_context_test SRCS device_context_test.cc DEPS dynamic_loader place eigen3 glog gflags) diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h new file mode 100644 index 0000000000000000000000000000000000000000..fcef0a5e3058f1c9d54f9e06a54a09286e2454fd --- /dev/null +++ b/paddle/platform/device_context.h @@ -0,0 +1,159 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/enforce.h" +#ifndef PADDLE_ONLY_CPU +#include "paddle/platform/cuda.h" +#include "paddle/platform/dynload/cublas.h" +#include "paddle/platform/dynload/cudnn.h" +#include "paddle/platform/dynload/curand.h" +#define EIGEN_USE_GPU +#endif +#include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace platform { + +class DeviceContext { + public: + virtual ~DeviceContext() {} +}; + +class CPUDeviceContext : public DeviceContext {}; + +#ifndef PADDLE_ONLY_CPU +class GPUPlaceGuard { + public: + explicit GPUPlaceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) { + if (previous_ != new_place) { + paddle::platform::SetDeviceId(new_place.device); + } + } + + ~GPUPlaceGuard() { paddle::platform::SetDeviceId(previous_.device); } + + private: + GPUPlace previous_; +}; + +class CUDADeviceContext : public DeviceContext { + public: + explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { + GPUPlaceGuard guard(gpu_place_); + paddle::platform::throw_on_error(cudaStreamCreate(&stream_), + "cudaStreamCreate failed"); + eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); + eigen_device_ = new Eigen::GpuDevice(eigen_stream_); + } + + void Wait() { + paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), + "cudaStreamSynchronize failed"); + } + + cudaStream_t stream() { return stream_; } + + Eigen::GpuDevice eigen_device() { return *eigen_device_; } + + cublasHandle_t cublas_handle() { + if (!blas_handle_) { + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) == + CUBLAS_STATUS_SUCCESS, + "cublasCreate failed"); + PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream( + blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, + "cublasSetStream failed"); + } + return blas_handle_; + } + + cudnnHandle_t cudnn_handle() { + if (!dnn_handle_) { + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) == + CUDNN_STATUS_SUCCESS, + "cudnnCreate failed"); + PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream( + dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, + "cudnnSetStream failed"); + } + return dnn_handle_; + } + + curandGenerator_t curand_generator() { + if (!rand_generator_) { + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( + &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == + CURAND_STATUS_SUCCESS, + "curandCreateGenerator failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( + rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, + "curandSetPseudoRandomGeneratorSeed failed"); + PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream( + rand_generator_, stream_) == CURAND_STATUS_SUCCESS, + "curandSetStream failed"); + } + return rand_generator_; + } + + ~CUDADeviceContext() { + Wait(); + if (blas_handle_) { + PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) == + CUBLAS_STATUS_SUCCESS, + "cublasDestroy failed"); + } + + if (dnn_handle_) { + PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) == + CUDNN_STATUS_SUCCESS, + "cudnnDestroy failed"); + } + + if (rand_generator_) { + PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator( + rand_generator_) == CURAND_STATUS_SUCCESS, + "curandDestroyGenerator failed"); + } + + delete eigen_stream_; + delete eigen_device_; + + paddle::platform::throw_on_error(cudaStreamDestroy(stream_), + "cudaStreamDestroy failed"); + } + + private: + GPUPlace gpu_place_; + cudaStream_t stream_; + + Eigen::CudaStreamDevice* eigen_stream_; + Eigen::GpuDevice* eigen_device_; + + cublasHandle_t blas_handle_{nullptr}; + + cudnnHandle_t dnn_handle_{nullptr}; + + int random_seed_; + curandGenerator_t rand_generator_{nullptr}; +}; +#endif +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/device_context_test.cc b/paddle/platform/device_context_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..61be4a307dbf073be7dff4564183240834cc7df6 --- /dev/null +++ b/paddle/platform/device_context_test.cc @@ -0,0 +1,33 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/device_context.h" +#include "gtest/gtest.h" + +TEST(CUDADeviceContext, Init) { + int count = paddle::platform::GetDeviceCount(); + for (int i = 0; i < count; i++) { + paddle::platform::CUDADeviceContext* device_context = + new paddle::platform::CUDADeviceContext(i); + Eigen::GpuDevice gpu_device = device_context->eigen_device(); + ASSERT_NE(nullptr, gpu_device.stream()); + cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); + ASSERT_NE(nullptr, cudnn_handle); + cublasHandle_t cublas_handle = device_context->cublas_handle(); + ASSERT_NE(nullptr, cublas_handle); + curandGenerator_t curand_handle = device_context->curand_generator(); + ASSERT_NE(nullptr, curand_handle); + delete device_context; + } +}