diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 3c972368b615a931e5629df48a3f5fff650cd670..937b26f807545cc8cf844787fc9e94c18499ba77 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -42,3 +42,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS naive_managed_allocator aligned_allocator cuda_device_guard) + +nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/allocation_and_eigen_test.cu b/paddle/fluid/memory/allocation/allocation_and_eigen_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..e4d690c296cfe9aa273c9b94688b44ef62bf5e97 --- /dev/null +++ b/paddle/fluid/memory/allocation/allocation_and_eigen_test.cu @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/for_range.h" +#include "unsupported/Eigen/CXX11/Tensor" +struct FillZero { + public: + float* ptr_; + + __device__ void operator()(size_t i) { ptr_[i] = 0.0f; } +}; + +namespace paddle { +TEST(Eigen, main) { + framework::Tensor tensor; + platform::CUDAPlace gpu(0); + float* ptr = tensor.mutable_data({10, 10}, gpu); + auto& dev_ctx = *reinterpret_cast( + platform::DeviceContextPool::Instance().Get(gpu)); + PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100)); + + platform::ForRange for_range(dev_ctx, 100); + for_range(FillZero{ptr}); + dev_ctx.Wait(); + + auto eigen_vec = framework::EigenVector::Flatten(tensor); + auto& eigen_dev = *dev_ctx.eigen_device(); + eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f); +} +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 48b5f45d7761b452c6010e29fc30c2657d0296a7..bfd5f959faca56168cd0acfd31d23cdf7cbbb965 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -46,7 +46,6 @@ class AllocatorFacadePrivate { } AllocatorFacadePrivate() { - std::cout << "Init Allocator Facade" << std::endl; InitCPUAllocator(); InitCUDAAllocator(); } diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index bf9aced57fe88267eb00168808b1f930eb456080..7b477c53ea2ead54073658a015ca7dc5a41309eb 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -31,7 +31,6 @@ std::unique_ptr CUDAAllocator::Allocate(size_t size, Attr attr) { "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, status, cudaGetErrorString(status))); } - return std::unique_ptr( new CUDAAllocation(ptr, size, platform::Place(place_))); } diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 91101356436c26171eaca2fe01dfd4d937e71717..0f7ce471f0f40edb3b34c4fdad9c2dbdc278505e 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -72,7 +72,7 @@ cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) if(WITH_GPU) nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) - nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function) + nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) endif() cc_test(concat_test SRCS concat_test.cc DEPS concat) cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 0b97f5123a8dcf01da9a50acb1a1fad399a42c84..7d6c3412ce31f798354159ab41569f7b1bfcbfc6 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -9,11 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_context.h" - #include #include #include #include +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/memory/memory.h" #ifdef PADDLE_WITH_CUDA @@ -205,7 +205,7 @@ class CudnnHolder { CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place), cudnn_holder_(nullptr) { - SetDeviceId(place_.device); + CUDADeviceGuard guard(place_.device); compute_capability = GetCUDAComputeCapability(place_.device); multi_process = GetCUDAMultiProcessors(place_.device); max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device); diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 4c99f4be321160caf0ee2f89a655bdfb933408e3..25a693ab95f1b360a532527e7838dca0b952f294 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" @@ -64,7 +65,7 @@ void InitP2P(std::vector devices) { LOG(WARNING) << "Cannot enable P2P access from " << devices[i] << " to " << devices[j]; } else { - cudaSetDevice(devices[i]); + platform::CUDADeviceGuard guard(devices[i]); cudaDeviceEnablePeerAccess(devices[j], 0); } }