提交 8e3fdc6e 编写于 作者: Y Yu Yang

Fix SetDevice on init

上级 524f6e9b
...@@ -42,3 +42,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS ...@@ -42,3 +42,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
naive_managed_allocator naive_managed_allocator
aligned_allocator aligned_allocator
cuda_device_guard) cuda_device_guard)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "unsupported/Eigen/CXX11/Tensor"
struct FillZero {
public:
float* ptr_;
__device__ void operator()(size_t i) { ptr_[i] = 0.0f; }
};
namespace paddle {
TEST(Eigen, main) {
framework::Tensor tensor;
platform::CUDAPlace gpu(0);
float* ptr = tensor.mutable_data<float>({10, 10}, gpu);
auto& dev_ctx = *reinterpret_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(gpu));
PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100));
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, 100);
for_range(FillZero{ptr});
dev_ctx.Wait();
auto eigen_vec = framework::EigenVector<float>::Flatten(tensor);
auto& eigen_dev = *dev_ctx.eigen_device();
eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f);
}
} // namespace paddle
...@@ -46,7 +46,6 @@ class AllocatorFacadePrivate { ...@@ -46,7 +46,6 @@ class AllocatorFacadePrivate {
} }
AllocatorFacadePrivate() { AllocatorFacadePrivate() {
std::cout << "Init Allocator Facade" << std::endl;
InitCPUAllocator(); InitCPUAllocator();
InitCUDAAllocator(); InitCUDAAllocator();
} }
......
...@@ -31,7 +31,6 @@ std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) { ...@@ -31,7 +31,6 @@ std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status))); status, cudaGetErrorString(status)));
} }
return std::unique_ptr<Allocation>( return std::unique_ptr<Allocation>(
new CUDAAllocation(ptr, size, platform::Place(place_))); new CUDAAllocation(ptr, size, platform::Place(place_)));
} }
......
...@@ -72,7 +72,7 @@ cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) ...@@ -72,7 +72,7 @@ cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col)
cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)
if(WITH_GPU) if(WITH_GPU)
nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function)
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function)
endif() endif()
cc_test(concat_test SRCS concat_test.cc DEPS concat) cc_test(concat_test SRCS concat_test.cc DEPS concat)
cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info) cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
...@@ -9,11 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,11 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include <set> #include <set>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -205,7 +205,7 @@ class CudnnHolder { ...@@ -205,7 +205,7 @@ class CudnnHolder {
CUDADeviceContext::CUDADeviceContext(CUDAPlace place) CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
: place_(place), cudnn_holder_(nullptr) { : place_(place), cudnn_holder_(nullptr) {
SetDeviceId(place_.device); CUDADeviceGuard guard(place_.device);
compute_capability = GetCUDAComputeCapability(place_.device); compute_capability = GetCUDAComputeCapability(place_.device);
multi_process = GetCUDAMultiProcessors(place_.device); multi_process = GetCUDAMultiProcessors(place_.device);
max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device); max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -64,7 +65,7 @@ void InitP2P(std::vector<int> devices) { ...@@ -64,7 +65,7 @@ void InitP2P(std::vector<int> devices) {
LOG(WARNING) << "Cannot enable P2P access from " << devices[i] LOG(WARNING) << "Cannot enable P2P access from " << devices[i]
<< " to " << devices[j]; << " to " << devices[j];
} else { } else {
cudaSetDevice(devices[i]); platform::CUDADeviceGuard guard(devices[i]);
cudaDeviceEnablePeerAccess(devices[j], 0); cudaDeviceEnablePeerAccess(devices[j], 0);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册