未验证 提交 03972d5a 编写于 作者: C Chen Weihang 提交者: GitHub

Fix incompatible error for custom op Placetype (#43749)

* fix incompatible error

* rmeove default constructor

* add macro

* fix cpu make error

* add DefaultGPUPlace api
上级 bafd8dec
...@@ -39,24 +39,6 @@ limitations under the License. */ ...@@ -39,24 +39,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
namespace detail {
static Place GetCorrectPlaceByPlaceType(const Place &place_type) {
auto alloc_type = place_type.GetType();
switch (alloc_type) {
case AllocationType::CPU:
return place_type;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case AllocationType::GPU:
return phi::Place(AllocationType::GPU,
phi::backends::gpu::GetCurrentDeviceId());
#endif
default:
PADDLE_THROW(phi::errors::Unavailable(
"The PlaceType is a legacy design, only supports CPU and GPU, "
"and will not support other place types in the future."));
}
}
} // namespace detail
/////// Tensor Methods //////// /////// Tensor Methods ////////
...@@ -78,7 +60,7 @@ Tensor::Tensor(const Place &place) { ...@@ -78,7 +60,7 @@ Tensor::Tensor(const Place &place) {
"Reason: A legal tensor cannot be constructed only based on " "Reason: A legal tensor cannot be constructed only based on "
"the `place`, and datatype, shape, layout, etc. is also " "the `place`, and datatype, shape, layout, etc. is also "
"required."; "required.";
DefaultAllocator alloc(detail::GetCorrectPlaceByPlaceType(place)); DefaultAllocator alloc(place);
impl_ = std::move(std::make_shared<phi::DenseTensor>( impl_ = std::move(std::make_shared<phi::DenseTensor>(
&alloc, &alloc,
std::move(phi::DenseTensorMeta( std::move(phi::DenseTensorMeta(
...@@ -94,7 +76,7 @@ Tensor::Tensor(const Place &place, const std::vector<int64_t> &shape) { ...@@ -94,7 +76,7 @@ Tensor::Tensor(const Place &place, const std::vector<int64_t> &shape) {
"Reason: A legal tensor cannot be constructed only based on " "Reason: A legal tensor cannot be constructed only based on "
"the `place` and `shape`, and datatype, layout, etc. is also " "the `place` and `shape`, and datatype, layout, etc. is also "
"required."; "required.";
DefaultAllocator alloc(detail::GetCorrectPlaceByPlaceType(place)); DefaultAllocator alloc(place);
impl_ = std::move(std::make_shared<phi::DenseTensor>( impl_ = std::move(std::make_shared<phi::DenseTensor>(
&alloc, &alloc,
std::move(phi::DenseTensorMeta(phi::DataType::FLOAT32, std::move(phi::DenseTensorMeta(phi::DataType::FLOAT32,
......
cc_library(phi_place SRCS place.cc) if(WITH_GPU)
nv_library(
phi_place
SRCS place.cc
DEPS phi_gpu_info)
elseif(WITH_ROCM)
hip_library(
phi_place
SRCS place.cc
DEPS phi_gpu_info)
else()
cc_library(phi_place SRCS place.cc)
endif()
cc_library( cc_library(
scalar scalar
SRCS scalar.cc SRCS scalar.cc
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/phi/api/ext/exception.h" #include "paddle/phi/api/ext/exception.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
namespace phi { namespace phi {
...@@ -109,14 +110,32 @@ uint32_t Place::Hash::operator()(const Place &place) const { ...@@ -109,14 +110,32 @@ uint32_t Place::Hash::operator()(const Place &place) const {
return hash_value; return hash_value;
} }
namespace detail {
static int8_t GetCorrectDeviceIdByPlaceType(
const paddle::PlaceType &place_type) {
switch (place_type) {
case paddle::PlaceType::kCPU:
return 0;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case paddle::PlaceType::kGPU:
return phi::backends::gpu::GetCurrentDeviceId();
#endif
default:
PD_THROW(
"The PlaceType is a legacy design, only supports CPU and GPU, "
"and will not support other place types in the future.");
}
}
} // namespace detail
Place::Place(paddle::PlaceType type) Place::Place(paddle::PlaceType type)
: device(0), : device(detail::GetCorrectDeviceIdByPlaceType(type)),
alloc_type_(static_cast<AllocationType>(type)), alloc_type_(static_cast<AllocationType>(type)),
device_type_id_(GetOrRegisterGlobalDeviceTypeId("")) { device_type_id_(GetOrRegisterGlobalDeviceTypeId("")) {
LOG_FIRST_N(WARNING, 1) LOG_FIRST_N(WARNING, 1)
<< "The `paddle::PlaceType::kCPU/kGPU` is deprecated since version " << "The `paddle::PlaceType::kCPU/kGPU` is deprecated since version "
"2.3, and will be removed in version 2.4! Please use " "2.3, and will be removed in version 2.4! Please use "
"`paddle::CPUPlace()/GPUPlace()` to represent the place type."; "`paddle::CPUPlace()/DefaultGPUPlace()` to represent the place type.";
} }
} // namespace phi } // namespace phi
...@@ -139,4 +158,13 @@ bool operator==(PlaceType place_type, const Place &place) { ...@@ -139,4 +158,13 @@ bool operator==(PlaceType place_type, const Place &place) {
return static_cast<AllocationType>(place_type) == place.GetType(); return static_cast<AllocationType>(place_type) == place.GetType();
} }
GPUPlace DefaultGPUPlace() {
return GPUPlace(
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
phi::backends::gpu::GetCurrentDeviceId());
#else
0);
#endif
}
} // namespace paddle } // namespace paddle
...@@ -256,4 +256,6 @@ enum class PlaceType { ...@@ -256,4 +256,6 @@ enum class PlaceType {
PADDLE_API bool operator==(const Place& place, PlaceType place_type); PADDLE_API bool operator==(const Place& place, PlaceType place_type);
PADDLE_API bool operator==(PlaceType place_type, const Place& place); PADDLE_API bool operator==(PlaceType place_type, const Place& place);
PADDLE_API GPUPlace DefaultGPUPlace();
} // namespace paddle } // namespace paddle
...@@ -55,6 +55,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) { ...@@ -55,6 +55,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
CHECK_GPU_INPUT(x); CHECK_GPU_INPUT(x);
auto out = paddle::empty_like(x); auto out = paddle::empty_like(x);
PD_CHECK(x.place() == paddle::DefaultGPUPlace());
int64_t numel = x.numel(); int64_t numel = x.numel();
int64_t block = 512; int64_t block = 512;
int64_t grid = (numel + block - 1) / block; int64_t grid = (numel + block - 1) / block;
...@@ -75,6 +77,8 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x, ...@@ -75,6 +77,8 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
CHECK_GPU_INPUT(grad_out); CHECK_GPU_INPUT(grad_out);
auto grad_x = paddle::empty_like(x); auto grad_x = paddle::empty_like(x);
PD_CHECK(x.place() == paddle::DefaultGPUPlace());
int64_t numel = out.numel(); int64_t numel = out.numel();
int64_t block = 512; int64_t block = 512;
int64_t grid = (numel + block - 1) / block; int64_t grid = (numel + block - 1) / block;
...@@ -101,12 +105,12 @@ std::vector<paddle::Tensor> relu_cuda_double_backward( ...@@ -101,12 +105,12 @@ std::vector<paddle::Tensor> relu_cuda_double_backward(
int64_t grid = (numel + block - 1) / block; int64_t grid = (numel + block - 1) / block;
PD_DISPATCH_FLOATING_AND_HALF_TYPES( PD_DISPATCH_FLOATING_AND_HALF_TYPES(
out.type(), "relu_cuda_double_backward_kernel", ([&] { out.type(), "relu_cuda_double_backward_kernel", ([&] {
relu_cuda_double_backward_kernel< relu_cuda_double_backward_kernel<data_t>
data_t><<<grid, block, 0, out.stream()>>>( <<<grid, block, 0, out.stream()>>>(
out.data<data_t>(), out.data<data_t>(),
ddx.data<data_t>(), ddx.data<data_t>(),
ddout.mutable_data<data_t>(out.place()), ddout.mutable_data<data_t>(out.place()),
numel); numel);
})); }));
std::cout << "Debug info: run relu gpu double backward success." << std::endl; std::cout << "Debug info: run relu gpu double backward success." << std::endl;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册