未验证 提交 b532315d 编写于 作者: W wuyefeilin 提交者: GitHub

[Phi] Move elementwise_floordiv and elementwise_pow to phi (#40993)

* mv floordiv to phi

* mv elementwise_pow to phi

* fix as review
上级 59765362
...@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_floordiv_op.h"
#include <string> #include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
...@@ -63,12 +61,6 @@ namespace ops = paddle::operators; ...@@ -63,12 +61,6 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(elementwise_floordiv, ops::ElementwiseOp, REGISTER_OP_WITHOUT_GRADIENT(elementwise_floordiv, ops::ElementwiseOp,
ops::ElementwiseFloorDivOpMaker); ops::ElementwiseFloorDivOpMaker);
REGISTER_OP_CPU_KERNEL(
elementwise_floordiv,
ops::ElementwiseFloorDivKernel<paddle::platform::CPUDeviceContext, int>,
ops::ElementwiseFloorDivKernel<paddle::platform::CPUDeviceContext,
int64_t>);
REGISTER_OP_VERSION(elementwise_floordiv) REGISTER_OP_VERSION(elementwise_floordiv)
.AddCheckpoint( .AddCheckpoint(
R"ROC(Register elementwise_floordiv for adding the attribute of Scale_y)ROC", R"ROC(Register elementwise_floordiv for adding the attribute of Scale_y)ROC",
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_floordiv_op.h"
namespace paddle {
namespace operators {
template <typename T>
class ElementwiseFloorDivKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
std::vector<const framework::Tensor*> ins;
std::vector<framework::Tensor*> outs;
const auto& cuda_ctx =
ctx.template device_context<platform::CUDADeviceContext>();
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
paddle::operators::LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T,
T>(
cuda_ctx, ins, &outs, axis, FloorDivFunctor<T>());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
elementwise_floordiv,
ops::ElementwiseFloorDivKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseFloorDivKernel<plat::CUDADeviceContext, int64_t>);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
void elementwise_floor_div(const framework::ExecutionContext &ctx,
const framework::Tensor *x,
const framework::Tensor *y, framework::Tensor *z) {
int axis = ctx.Attr<int>("axis");
auto x_dims = x->dims();
auto y_dims = y->dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseComputeEx<FloorDivFunctor<T>, DeviceContext, T>(
ctx, x, y, axis, FloorDivFunctor<T>(), z);
} else {
ElementwiseComputeEx<InverseFloorDivFunctor<T>, DeviceContext, T>(
ctx, x, y, axis, InverseFloorDivFunctor<T>(), z);
}
}
template <typename DeviceContext, typename T>
class ElementwiseFloorDivKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *x = ctx.Input<framework::LoDTensor>("X");
auto *y = ctx.Input<framework::LoDTensor>("Y");
auto *z = ctx.Output<framework::LoDTensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
// dtype of x and y is int64 or int32
elementwise_floor_div<DeviceContext, T>(ctx, x, y, z);
}
};
} // namespace operators
} // namespace paddle
...@@ -49,23 +49,6 @@ using DivFunctor = phi::funcs::DivideFunctor<T>; ...@@ -49,23 +49,6 @@ using DivFunctor = phi::funcs::DivideFunctor<T>;
template <typename T> template <typename T>
using InverseDivFunctor = phi::funcs::InverseDivideFunctor<T>; using InverseDivFunctor = phi::funcs::InverseDivideFunctor<T>;
// Floor Divide
template <typename T>
struct FloorDivFunctor {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(a / b));
}
};
template <typename T>
struct InverseFloorDivFunctor {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(b / a));
}
};
#undef DIV_ERROR_INFO #undef DIV_ERROR_INFO
// Maximum // Maximum
......
...@@ -9,8 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,8 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_pow_op.h"
#include <string> #include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
...@@ -70,19 +68,6 @@ REGISTER_OPERATOR(elementwise_pow, ops::ElementwiseOp, ...@@ -70,19 +68,6 @@ REGISTER_OPERATOR(elementwise_pow, ops::ElementwiseOp,
ops::ElementwisePowOpGradMaker<paddle::imperative::OpBase>); ops::ElementwisePowOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(elementwise_pow_grad, ops::ElementwiseOpGrad); REGISTER_OPERATOR(elementwise_pow_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_pow,
ops::ElementwisePowKernel<paddle::platform::CPUDeviceContext, float>,
ops::ElementwisePowKernel<paddle::platform::CPUDeviceContext, double>,
ops::ElementwisePowKernel<paddle::platform::CPUDeviceContext, int>,
ops::ElementwisePowKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
elementwise_pow_grad,
ops::ElementwisePowGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ElementwisePowGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ElementwisePowGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ElementwisePowGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_VERSION(elementwise_pow) REGISTER_OP_VERSION(elementwise_pow)
.AddCheckpoint( .AddCheckpoint(
R"ROC(Register elementwise_pow for adding the attribute of Scale_y)ROC", R"ROC(Register elementwise_pow for adding the attribute of Scale_y)ROC",
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_pow_op.h"
namespace ops = paddle::operators;
namespace paddle {
namespace operators {
template <typename T>
class ElementwisePowKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
std::vector<const framework::Tensor*> ins;
std::vector<framework::Tensor*> outs;
const auto& cuda_ctx =
ctx.template device_context<platform::CUDADeviceContext>();
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
paddle::operators::LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T,
T>(cuda_ctx, ins, &outs,
axis, PowFunctor<T>());
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
elementwise_pow,
ops::ElementwisePowKernel<paddle::platform::CUDADeviceContext, float>,
ops::ElementwisePowKernel<paddle::platform::CUDADeviceContext, double>,
ops::ElementwisePowKernel<paddle::platform::CUDADeviceContext, int>,
ops::ElementwisePowKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
elementwise_pow_grad,
ops::ElementwisePowGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ElementwisePowGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ElementwisePowGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::ElementwisePowGradKernel<paddle::platform::CUDADeviceContext,
int64_t>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cmath>
#include <type_traits>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace paddle {
namespace operators {
template <typename T>
struct PowFunctor {
inline HOSTDEVICE T operator()(const T a, const T b) const {
// TODO(wujionghao): A potential speed improvement is supporting different
// types in C++.
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
if (std::is_integral<T>::value) {
return std::llrint(
std::pow(static_cast<double>(a), static_cast<double>(b)));
}
#endif
return std::pow(a, b);
}
};
template <typename DeviceContext, typename T>
class ElementwisePowKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using Tensor = framework::LoDTensor;
auto* x = ctx.Input<Tensor>("X");
PADDLE_ENFORCE_EQ(x != nullptr, true,
platform::errors::NotFound(
"Cannot get input Variable X, Variable name = %s",
ctx.InputName("X")));
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
ElementwiseComputeEx<PowFunctor<T>, DeviceContext, T>(ctx, x, y, axis,
PowFunctor<T>(), z);
}
};
template <typename T>
struct PowGradDX {
HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
if (std::is_integral<T>::value) {
return dout * y *
std::pow(static_cast<double>(x), static_cast<double>(y - 1));
}
#endif
return dout * y * std::pow(x, y - 1);
}
};
template <typename T, typename Enable = void>
struct PowGradDY {
HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
if (std::is_integral<T>::value) {
return dout * std::log(static_cast<double>(x)) *
std::pow(static_cast<double>(x), static_cast<double>(y));
}
#endif
return dout * std::log(x) * std::pow(x, y);
}
};
template <typename DeviceContext, typename T>
class ElementwisePowGradKernel : public ElemwiseGradKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElemwiseGradKernel<T>::Compute(ctx);
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* out = dout;
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
ElemwiseGradCompute<DeviceContext, T, PowGradDX<T>, PowGradDY<T>>(
ctx, *x, *y, *out, *dout, axis, dx, dy, PowGradDX<T>(), PowGradDY<T>());
}
};
} // namespace operators
} // namespace paddle
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/operators/elementwise/elementwise_npu.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/elementwise/elementwise_pow_op.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle { namespace paddle {
......
...@@ -323,3 +323,11 @@ PD_REGISTER_KERNEL(minimum_grad, ...@@ -323,3 +323,11 @@ PD_REGISTER_KERNEL(minimum_grad,
int, int,
int64_t, int64_t,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(elementwise_pow_grad,
CPU,
ALL_LAYOUT,
phi::ElementwisePowGradKernel,
float,
double,
int,
int64_t) {}
...@@ -113,6 +113,36 @@ void ModuloRawKernel(const Context& dev_ctx, ...@@ -113,6 +113,36 @@ void ModuloRawKernel(const Context& dev_ctx,
} }
} }
template <typename T, typename Context>
void FloorDivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
funcs::ElementwiseCompute<funcs::FloorDivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::FloorDivideFunctor<T>(), out);
} else {
funcs::ElementwiseCompute<funcs::InverseFloorDivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::InverseFloorDivideFunctor<T>(), out);
}
}
template <typename T, typename Context>
void ElementwisePowRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
funcs::ElementwiseCompute<funcs::ElementwisePowFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::ElementwisePowFunctor<T>(), out);
}
// Create the definition of Add // Create the definition of Add
DEFINE_CPU_ELEMENTWISE_OP(Add) DEFINE_CPU_ELEMENTWISE_OP(Add)
...@@ -207,3 +237,17 @@ PD_REGISTER_KERNEL(modulo_raw, ...@@ -207,3 +237,17 @@ PD_REGISTER_KERNEL(modulo_raw,
double, double,
int, int,
int64_t) {} int64_t) {}
PD_REGISTER_KERNEL(floor_divide_raw,
CPU,
ALL_LAYOUT,
phi::FloorDivideRawKernel,
int,
int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow_raw,
CPU,
ALL_LAYOUT,
phi::ElementwisePowRawKernel,
float,
double,
int,
int64_t) {}
...@@ -159,4 +159,13 @@ void MinimumGradKernel(const Context& dev_ctx, ...@@ -159,4 +159,13 @@ void MinimumGradKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy); DenseTensor* dy);
template <typename T, typename Context>
void ElementwisePowGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
} // namespace phi } // namespace phi
...@@ -81,6 +81,25 @@ void ModuloKernel(const Context& dev_ctx, ...@@ -81,6 +81,25 @@ void ModuloKernel(const Context& dev_ctx,
int axis = -1; int axis = -1;
ModuloRawKernel<T>(dev_ctx, x, y, axis, out); ModuloRawKernel<T>(dev_ctx, x, y, axis, out);
} }
template <typename T, typename Context>
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
FloorDivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
ElementwisePowRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi } // namespace phi
using complex64 = ::phi::dtype::complex<float>; using complex64 = ::phi::dtype::complex<float>;
...@@ -151,6 +170,16 @@ PD_REGISTER_KERNEL(minimum, ...@@ -151,6 +170,16 @@ PD_REGISTER_KERNEL(minimum,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
modulo, CPU, ALL_LAYOUT, phi::ModuloKernel, float, double, int, int64_t) {} modulo, CPU, ALL_LAYOUT, phi::ModuloKernel, float, double, int, int64_t) {}
PD_REGISTER_KERNEL(
floor_divide, CPU, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
CPU,
ALL_LAYOUT,
phi::ElementwisePowKernel,
float,
double,
int,
int64_t) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...@@ -226,4 +255,14 @@ PD_REGISTER_KERNEL(minimum, ...@@ -226,4 +255,14 @@ PD_REGISTER_KERNEL(minimum,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
modulo, GPU, ALL_LAYOUT, phi::ModuloKernel, float, double, int, int64_t) {} modulo, GPU, ALL_LAYOUT, phi::ModuloKernel, float, double, int, int64_t) {}
PD_REGISTER_KERNEL(
floor_divide, GPU, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
GPU,
ALL_LAYOUT,
phi::ElementwisePowKernel,
float,
double,
int,
int64_t) {}
#endif #endif
...@@ -124,6 +124,32 @@ void ModuloKernel(const Context& dev_ctx, ...@@ -124,6 +124,32 @@ void ModuloKernel(const Context& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context>
void FloorDivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void ElementwisePowRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx, DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -200,4 +226,27 @@ DenseTensor Modulo(const Context& dev_ctx, ...@@ -200,4 +226,27 @@ DenseTensor Modulo(const Context& dev_ctx,
ModuloKernel<T, Context>(dev_ctx, x, y, &dense_out); ModuloKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out; return dense_out;
} }
template <typename T, typename Context>
DenseTensor FloorDivide(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
FloorDivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
template <typename T, typename Context>
DenseTensor ElementwisePow(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
ElementwisePowKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
} // namespace phi } // namespace phi
...@@ -538,5 +538,40 @@ struct InverseModuloFunctor< ...@@ -538,5 +538,40 @@ struct InverseModuloFunctor<
return res; return res;
} }
}; };
template <typename T>
struct FloorDivideFunctor {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(a / b));
}
};
template <typename T>
struct InverseFloorDivideFunctor {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(b / a));
}
};
template <typename T>
struct ElementwisePowFunctor {
inline HOSTDEVICE T operator()(const T a, const T b) const {
// TODO(wujionghao): A potential speed improvement is supporting different
// types in C++.
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
if (std::is_integral<T>::value) {
return std::llrint(
std::pow(static_cast<double>(a), static_cast<double>(b)));
}
#endif
return std::pow(a, b);
}
};
} // namespace funcs } // namespace funcs
} // namespace phi } // namespace phi
...@@ -382,3 +382,11 @@ PD_REGISTER_KERNEL(minimum_grad, ...@@ -382,3 +382,11 @@ PD_REGISTER_KERNEL(minimum_grad,
int64_t, int64_t,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(elementwise_pow_grad,
GPU,
ALL_LAYOUT,
phi::ElementwisePowGradKernel,
float,
double,
int,
int64_t) {}
...@@ -55,6 +55,10 @@ DEFINE_CUDA_ELEMENTWISE_OP(Maximum) ...@@ -55,6 +55,10 @@ DEFINE_CUDA_ELEMENTWISE_OP(Maximum)
DEFINE_CUDA_ELEMENTWISE_OP(Minimum) DEFINE_CUDA_ELEMENTWISE_OP(Minimum)
// Create the definition of Modulo // Create the definition of Modulo
DEFINE_CUDA_ELEMENTWISE_OP(Modulo) DEFINE_CUDA_ELEMENTWISE_OP(Modulo)
// Create the definition of FloorDivide
DEFINE_CUDA_ELEMENTWISE_OP(FloorDivide)
// Create the definition of Pow
DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow)
} // namespace phi } // namespace phi
...@@ -148,3 +152,17 @@ PD_REGISTER_KERNEL(modulo_raw, ...@@ -148,3 +152,17 @@ PD_REGISTER_KERNEL(modulo_raw,
double, double,
int, int,
int64_t) {} int64_t) {}
PD_REGISTER_KERNEL(floor_divide_raw,
GPU,
ALL_LAYOUT,
phi::FloorDivideRawKernel,
int,
int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow_raw,
GPU,
ALL_LAYOUT,
phi::ElementwisePowRawKernel,
float,
double,
int,
int64_t) {}
...@@ -666,4 +666,44 @@ struct MinGradDy { ...@@ -666,4 +666,44 @@ struct MinGradDy {
return dout * static_cast<T>(x >= y); return dout * static_cast<T>(x >= y);
} }
}; };
template <typename T>
struct PowGradDX {
HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
if (std::is_integral<T>::value) {
return dout * y *
std::pow(static_cast<double>(x), static_cast<double>(y - 1));
}
#endif
return dout * y * std::pow(x, y - 1);
}
};
template <typename T, typename Enable = void>
struct PowGradDY {
HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
if (std::is_integral<T>::value) {
return dout * std::log(static_cast<double>(x)) *
std::pow(static_cast<double>(x), static_cast<double>(y));
}
#endif
return dout * std::log(x) * std::pow(x, y);
}
};
template <typename T, typename Context>
void ElementwisePowGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
phi::funcs::ElemwiseGradCompute<Context, T, PowGradDX<T>, PowGradDY<T>>(
dev_ctx, x, y, dout, dout, axis, dx, dy, PowGradDX<T>(), PowGradDY<T>());
}
} // namespace phi } // namespace phi
...@@ -82,6 +82,24 @@ KernelSignature ElementwiseModOpArgumentMapping( ...@@ -82,6 +82,24 @@ KernelSignature ElementwiseModOpArgumentMapping(
return KernelSignature("modulo_raw", {"X", "Y"}, {"axis"}, {"Out"}); return KernelSignature("modulo_raw", {"X", "Y"}, {"axis"}, {"Out"});
} }
KernelSignature ElementwiseFloorDivOpArgumentMapping(
const ArgumentMappingContext& ctx) {
int axis = paddle::any_cast<int>(ctx.Attr("axis"));
if (axis == -1) {
return KernelSignature("floor_divide", {"X", "Y"}, {}, {"Out"});
}
return KernelSignature("floor_divide_raw", {"X", "Y"}, {"axis"}, {"Out"});
}
KernelSignature ElementwisePowOpArgumentMapping(
const ArgumentMappingContext& ctx) {
int axis = paddle::any_cast<int>(ctx.Attr("axis"));
if (axis == -1) {
return KernelSignature("elementwise_pow", {"X", "Y"}, {}, {"Out"});
}
return KernelSignature("elementwise_pow_raw", {"X", "Y"}, {"axis"}, {"Out"});
}
KernelSignature ElementwiseAddGradOpArgumentMapping( KernelSignature ElementwiseAddGradOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature("add_grad", return KernelSignature("add_grad",
...@@ -200,6 +218,13 @@ KernelSignature ElementwiseMinGradOpArgumentMapping( ...@@ -200,6 +218,13 @@ KernelSignature ElementwiseMinGradOpArgumentMapping(
{"axis"}, {"axis"},
{GradVarName("X"), GradVarName("Y")}); {GradVarName("X"), GradVarName("Y")});
} }
KernelSignature ElementwisePowGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("elementwise_pow_grad",
{"X", "Y", GradVarName("Out")},
{"axis"},
{GradVarName("X"), GradVarName("Y")});
}
} // namespace phi } // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(elementwise_add, add); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add, add);
...@@ -209,6 +234,7 @@ PD_REGISTER_BASE_KERNEL_NAME(elementwise_div, divide); ...@@ -209,6 +234,7 @@ PD_REGISTER_BASE_KERNEL_NAME(elementwise_div, divide);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_max, maximum); PD_REGISTER_BASE_KERNEL_NAME(elementwise_max, maximum);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_min, minimum); PD_REGISTER_BASE_KERNEL_NAME(elementwise_min, minimum);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_mod, modulo); PD_REGISTER_BASE_KERNEL_NAME(elementwise_mod, modulo);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_floordiv, floor_divide);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad, add_grad); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad, add_grad);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad_grad, add_double_grad); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad_grad, add_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_triple_grad, add_triple_grad); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_triple_grad, add_triple_grad);
...@@ -240,6 +266,10 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_min, ...@@ -240,6 +266,10 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_min,
phi::ElementwiseMinOpArgumentMapping); phi::ElementwiseMinOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_mod, PD_REGISTER_ARG_MAPPING_FN(elementwise_mod,
phi::ElementwiseModOpArgumentMapping); phi::ElementwiseModOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_floordiv,
phi::ElementwiseFloorDivOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_pow,
phi::ElementwisePowOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_add_grad, PD_REGISTER_ARG_MAPPING_FN(elementwise_add_grad,
phi::ElementwiseAddGradOpArgumentMapping); phi::ElementwiseAddGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_add_grad_grad, PD_REGISTER_ARG_MAPPING_FN(elementwise_add_grad_grad,
...@@ -272,3 +302,5 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_max_grad, ...@@ -272,3 +302,5 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_max_grad,
phi::ElementwiseMaxGradOpArgumentMapping); phi::ElementwiseMaxGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_min_grad, PD_REGISTER_ARG_MAPPING_FN(elementwise_min_grad,
phi::ElementwiseMinGradOpArgumentMapping); phi::ElementwiseMinGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elementwise_pow_grad,
phi::ElementwisePowGradOpArgumentMapping);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册