From 524f6e9b36bc348b2e428b05b50fc6d60f173279 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 29 Sep 2018 13:38:06 +0800 Subject: [PATCH] Refine code --- paddle/fluid/memory/allocation/CMakeLists.txt | 5 ++- .../memory/allocation/allocator_facade.cc | 4 +- .../fluid/memory/allocation/cuda_allocator.cc | 25 ++--------- ...st.cu => selected_rows_functor_test.cu.cc} | 3 +- paddle/fluid/platform/CMakeLists.txt | 1 + paddle/fluid/platform/cuda_device_guard.cc | 22 +++++++++ paddle/fluid/platform/cuda_device_guard.h | 45 +++++++++++++++++++ 7 files changed, 79 insertions(+), 26 deletions(-) rename paddle/fluid/operators/math/{selected_rows_functor_test.cu => selected_rows_functor_test.cu.cc} (99%) create mode 100644 paddle/fluid/platform/cuda_device_guard.cc create mode 100644 paddle/fluid/platform/cuda_device_guard.h diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index a932b16440..3c972368b6 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(allocator SRCS allocator.cc DEPS place) cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator) cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) -nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator gpu_info) +nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard) if (WITH_GPU) nv_test(best_fit_allocator_test @@ -40,4 +40,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS locked_allocator best_fit_allocator naive_managed_allocator - aligned_allocator) + aligned_allocator + cuda_device_guard) diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index fc508e75f1..48b5f45d77 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -21,6 +21,7 @@ #include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/naive_managed_allocator.h" +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_CUDA @@ -45,6 +46,7 @@ class AllocatorFacadePrivate { } AllocatorFacadePrivate() { + std::cout << "Init Allocator Facade" << std::endl; InitCPUAllocator(); InitCUDAAllocator(); } @@ -60,10 +62,10 @@ class AllocatorFacadePrivate { void InitCUDAAllocator() { #ifdef PADDLE_WITH_CUDA for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { + platform::CUDADeviceGuard guard(dev_id); auto cuda_allocator = NaiveManagedAllocator::Create(std::unique_ptr( new CUDAAllocator(platform::CUDAPlace(dev_id)))); - auto allocation = cuda_allocator->Allocate(platform::GpuMaxChunkSize()); auto allocator = NaiveManagedAllocator::Create(std::unique_ptr( new LockedAllocator(std::unique_ptr( diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 14e0868332..bf9aced57f 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -16,34 +16,14 @@ #include #include #include +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/gpu_info.h" namespace paddle { namespace memory { namespace allocation { - -class CUDADeviceGuard { - public: - explicit CUDADeviceGuard(int dev_id) { - int prev_id = platform::GetCurrentDeviceId(); - if (prev_id != dev_id) { - prev_id_ = prev_id; - platform::SetDeviceId(dev_id); - } - } - - ~CUDADeviceGuard() { - if (prev_id_ != -1) { - platform::SetDeviceId(prev_id_); - } - } - - private: - int prev_id_{-1}; -}; - std::unique_ptr CUDAAllocator::Allocate(size_t size, Attr attr) { - CUDADeviceGuard guard(place_.device); + platform::CUDADeviceGuard guard(place_.device); void* ptr; auto status = cudaMalloc(&ptr, size); if (UNLIKELY(status != cudaSuccess)) { @@ -57,6 +37,7 @@ std::unique_ptr CUDAAllocator::Allocate(size_t size, Attr attr) { } void CUDAAllocator::Free(Allocation* allocation) { + platform::CUDADeviceGuard guard(place_.device); auto* cuda_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(cuda_allocation); PADDLE_ENFORCE_EQ(boost::get(cuda_allocation->place()), diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc similarity index 99% rename from paddle/fluid/operators/math/selected_rows_functor_test.cu rename to paddle/fluid/operators/math/selected_rows_functor_test.cu.cc index 5fc50aba25..cfb4055d09 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cu +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/math/selected_rows_functor.h" #include #include "gtest/gtest.h" #include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/operators/math/selected_rows_functor.h" TEST(selected_rows_functor, gpu_add) { paddle::platform::CUDAPlace gpu_place(0); @@ -38,6 +38,7 @@ TEST(selected_rows_functor, gpu_add) { {static_cast(rows1.size()), row_numel}), gpu_place); functor(ctx, in1_value, 1.0); + PADDLE_ENFORCE(cudaDeviceSynchronize()); std::vector rows2{0, 5, 7, 9}; std::unique_ptr selected_rows2{ diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 5af8af640e..0d0613e1a4 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -73,3 +73,4 @@ cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) IF(WITH_GPU) nv_test(cuda_helper_test SRCS cuda_helper_test.cu) ENDIF() +nv_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info) diff --git a/paddle/fluid/platform/cuda_device_guard.cc b/paddle/fluid/platform/cuda_device_guard.cc new file mode 100644 index 0000000000..8582ec9f60 --- /dev/null +++ b/paddle/fluid/platform/cuda_device_guard.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/cuda_device_guard.h" + +namespace paddle { +namespace platform { +// Even this source file does not contains any code, it is better to keep this +// source file for cmake dependency. +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cuda_device_guard.h b/paddle/fluid/platform/cuda_device_guard.h new file mode 100644 index 0000000000..a85ebf4b81 --- /dev/null +++ b/paddle/fluid/platform/cuda_device_guard.h @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace platform { + +class CUDADeviceGuard { + public: + explicit inline CUDADeviceGuard(int dev_id) { + int prev_id = platform::GetCurrentDeviceId(); + if (prev_id != dev_id) { + prev_id_ = prev_id; + platform::SetDeviceId(dev_id); + } + } + + inline ~CUDADeviceGuard() { + if (prev_id_ != -1) { + platform::SetDeviceId(prev_id_); + } + } + + CUDADeviceGuard(const CUDADeviceGuard& o) = delete; + CUDADeviceGuard& operator=(const CUDADeviceGuard& o) = delete; + + private: + int prev_id_{-1}; +}; + +} // namespace platform +} // namespace paddle -- GitLab