提交 8e3fdc6e 编写于 作者: Y Yu Yang

Fix SetDevice on init

上级 524f6e9b
......@@ -42,3 +42,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
naive_managed_allocator
aligned_allocator
cuda_device_guard)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "unsupported/Eigen/CXX11/Tensor"
struct FillZero {
public:
float* ptr_;
__device__ void operator()(size_t i) { ptr_[i] = 0.0f; }
};
namespace paddle {
TEST(Eigen, main) {
framework::Tensor tensor;
platform::CUDAPlace gpu(0);
float* ptr = tensor.mutable_data<float>({10, 10}, gpu);
auto& dev_ctx = *reinterpret_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(gpu));
PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100));
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, 100);
for_range(FillZero{ptr});
dev_ctx.Wait();
auto eigen_vec = framework::EigenVector<float>::Flatten(tensor);
auto& eigen_dev = *dev_ctx.eigen_device();
eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f);
}
} // namespace paddle
......@@ -46,7 +46,6 @@ class AllocatorFacadePrivate {
}
AllocatorFacadePrivate() {
std::cout << "Init Allocator Facade" << std::endl;
InitCPUAllocator();
InitCUDAAllocator();
}
......
......@@ -31,7 +31,6 @@ std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status)));
}
return std::unique_ptr<Allocation>(
new CUDAAllocation(ptr, size, platform::Place(place_)));
}
......
......@@ -72,7 +72,7 @@ cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col)
cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)
if(WITH_GPU)
nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function)
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function)
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function)
endif()
cc_test(concat_test SRCS concat_test.cc DEPS concat)
cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
......@@ -9,11 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
#include <set>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/memory/memory.h"
#ifdef PADDLE_WITH_CUDA
......@@ -205,7 +205,7 @@ class CudnnHolder {
CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
: place_(place), cudnn_holder_(nullptr) {
SetDeviceId(place_.device);
CUDADeviceGuard guard(place_.device);
compute_capability = GetCUDAComputeCapability(place_.device);
multi_process = GetCUDAMultiProcessors(place_.device);
max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
......@@ -64,7 +65,7 @@ void InitP2P(std::vector<int> devices) {
LOG(WARNING) << "Cannot enable P2P access from " << devices[i]
<< " to " << devices[j];
} else {
cudaSetDevice(devices[i]);
platform::CUDADeviceGuard guard(devices[i]);
cudaDeviceEnablePeerAccess(devices[j], 0);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册