From 941308c2e3975df8e6e9784ff1ae87339f01cee5 Mon Sep 17 00:00:00 2001 From: limingshu <61349199+JamesLim-sy@users.noreply.github.com> Date: Fri, 4 Jun 2021 08:31:57 +0800 Subject: [PATCH] Reimplement logical functors with the new optimized elementwise function (#33089) --- .../fluid/operators/controlflow/logical_op.cu | 72 ++++++++++++++++--- .../elementwise/elementwise_max_op.cu | 1 - .../elementwise/elementwise_min_op.cu | 1 - .../elementwise/elementwise_mul_op.cu | 43 +---------- .../elementwise/elementwise_op_broadcast.cu.h | 6 ++ .../elementwise/elementwise_op_function.h | 55 ++++++++++++-- 6 files changed, 120 insertions(+), 58 deletions(-) diff --git a/paddle/fluid/operators/controlflow/logical_op.cu b/paddle/fluid/operators/controlflow/logical_op.cu index 7ca54b488b..6cbcd516e0 100644 --- a/paddle/fluid/operators/controlflow/logical_op.cu +++ b/paddle/fluid/operators/controlflow/logical_op.cu @@ -13,12 +13,68 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/controlflow/logical_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" -REGISTER_BINARY_LOGICAL_KERNEL(logical_and, CUDA, - paddle::operators::LogicalAndFunctor); -REGISTER_BINARY_LOGICAL_KERNEL(logical_or, CUDA, - paddle::operators::LogicalOrFunctor); -REGISTER_UNARY_LOGICAL_KERNEL(logical_not, CUDA, - paddle::operators::LogicalNotFunctor); -REGISTER_BINARY_LOGICAL_KERNEL(logical_xor, CUDA, - paddle::operators::LogicalXorFunctor); +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +namespace paddle { +namespace operators { + +#define LOGICAL_BINARY_FUNCTOR(func_name, op) \ + template \ + struct func_name { \ + using ELEMENT_TYPE = T; \ + HOSTDEVICE bool operator()(const T* args) const { \ + return args[0] op args[1]; \ + } \ + }; + +LOGICAL_BINARY_FUNCTOR(CudaOrFunctor, ||) +LOGICAL_BINARY_FUNCTOR(CudaAndFunctor, &&) +LOGICAL_BINARY_FUNCTOR(CudaXorFunctor, ^) +#undef LOGICAL_BINARY_FUNCTOR + +template +struct CudaNotFunctor { + using ELEMENT_TYPE = T; + HOSTDEVICE bool operator()(const T* args) const { return !args[0]; } +}; + +template +class BinaryLogicalOpKernel + : public framework::OpKernel { + public: + using InT = typename Functor::ELEMENT_TYPE; + using OutT = bool; + void Compute(const framework::ExecutionContext& ctx) const override { + auto functor = Functor(); + std::vector ins; + std::vector outs; + const auto& cuda_ctx = + ctx.template device_context(); + int axis = PackTensorsIntoVector(ctx, &ins, &outs); + + if (ins.size() == 1) { + LaunchElementwiseCudaKernel( + cuda_ctx, ins, &outs, axis, functor); + } else { + LaunchElementwiseCudaKernel( + cuda_ctx, ins, &outs, axis, functor); + } + } +}; + +} // namespace operators +} // namespace paddle + +#define REGISTER_LOGICAL_CUDA_KERNEL(op_name, func) \ + REGISTER_OP_CUDA_KERNEL( \ + op_name, \ + ops::BinaryLogicalOpKernel>); + +REGISTER_LOGICAL_CUDA_KERNEL(logical_or, CudaOrFunctor) +REGISTER_LOGICAL_CUDA_KERNEL(logical_and, CudaAndFunctor) +REGISTER_LOGICAL_CUDA_KERNEL(logical_xor, CudaXorFunctor) +REGISTER_LOGICAL_CUDA_KERNEL(logical_not, CudaNotFunctor) +#undef REGISTER_LOGICAL_CUDA_KERNEL diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cu b/paddle/fluid/operators/elementwise/elementwise_max_op.cu index 483b21d07f..d4b5d98d5b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cu @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_max_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_function.cu.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cu b/paddle/fluid/operators/elementwise/elementwise_min_op.cu index 88faaf257a..4a99f7e367 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cu @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_min_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_function.cu.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu index 973f2305cc..adcc18f837 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu @@ -36,52 +36,13 @@ class ElementwiseMulKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - int axis = -1; - auto x_var = ctx.InputVar("X"); - PADDLE_ENFORCE_NOT_NULL( - x_var, platform::errors::InvalidArgument( - "Cannot get input Variable X, Variable name = %s.", - ctx.InputName("X"))); - auto* y = ctx.Input("Y"); - - framework::Tensor x, *z; + framework::Tensor x_for_selectedrows; std::vector ins; std::vector outs; const auto& cuda_ctx = ctx.template device_context(); - if (x_var->IsType()) { - x = x_var->Get(); - z = ctx.Output("Out"); - axis = PackTensorsIntoVector(ctx, &ins, &outs); - } else if (x_var->IsType()) { - PADDLE_ENFORCE_EQ(y->dims().size() == 1 && y->dims()[0] == 1, true, - platform::errors::InvalidArgument( - "For elementwise_op, if X is Sparse, Y must be " - "scalar. But reveived the size of Y = %s.", - y->dims().size())); - auto& x_sele = x_var->Get(); - auto out_sele = ctx.Output("Out"); - x = x_sele.value(); - out_sele->set_rows(x_sele.rows()); - out_sele->set_height(x_sele.height()); - out_sele->mutable_value()->Resize(x_sele.value().dims()); - out_sele->mutable_value()->mutable_data(ctx.GetPlace(), x.type()); - z = ctx.Output("Out")->mutable_value(); - z->mutable_data(ctx.GetPlace()); - outs.emplace_back(z); - ins.emplace_back(&x); - ins.emplace_back(y); - - axis = ctx.HasAttr("axis") ? ctx.Attr("axis") : -1; - axis = axis == -1 ? std::abs(y->dims().size() - x.dims().size()) : axis; - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "X's type[%s] is not supported by elementwise_op. X's type should be " - "LoDTensor or SelectedRows.", - framework::ToTypeName(x_var->Type()))); - } - + int axis = PackTensorsIntoVector(ctx, &ins, &outs, &x_for_selectedrows); LaunchElementwiseCudaKernel( cuda_ctx, ins, &outs, axis, CudaMulFunctor()); } diff --git a/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h b/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h index 74216d6a9d..541ff9aacf 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h @@ -509,15 +509,21 @@ void LaunchElementwiseCudaKernel( const platform::CUDADeviceContext &cuda_ctx, const std::vector &ins, std::vector *outs, int axis, Functor func) { + std::vector dims_size; bool no_broadcast_flag = true; for (auto *in : ins) { no_broadcast_flag = ins[0]->dims() == in->dims(); + dims_size.emplace_back(in->dims().size()); } if (no_broadcast_flag) { LaunchSameDimsElementwiseCudaKernel(cuda_ctx, ins, outs, func); } else { + axis = axis == -1 + ? *std::max_element(dims_size.begin(), dims_size.end()) - + *std::min_element(dims_size.begin(), dims_size.end()) + : axis; LaunchBroadcastElementwiseCudaKernel(cuda_ctx, ins, outs, axis, func); } diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index d19c75eaf3..d09e777670 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -61,25 +61,66 @@ namespace paddle { namespace operators { /* -* To pack the input and output tnesors into vector for -* LaunchElementwiseCudaKernel +* Pack input and output tensors into respective vectors with +* consideration of varible X`s class type. +* Input variable X is supported to be whether LoDTensor or +* SelectedRows class type in this package function, once X +* was SelectedRows type, a valid pointer x_for_selectedrows +* is excepted to be passed in from op kernel for acquisition +* of the valid address of LoDTensor created ahead in the function. */ template int PackTensorsIntoVector(const framework::ExecutionContext &ctx, std::vector *ins, - std::vector *outs) { + std::vector *outs, + framework::Tensor *x_for_selectedrows = nullptr) { int axis = -1; - auto *x = ctx.Input("X"); + auto x_var = ctx.InputVar("X"); + PADDLE_ENFORCE_NOT_NULL( + x_var, platform::errors::InvalidArgument( + "Unable to get input Variable X, Variable name is %s.\n", + ctx.InputName("X"))); auto *y = ctx.Input("Y"); - auto *z = ctx.Output("Out"); + framework::Tensor *z; + + if (x_var->IsType()) { + auto *x = ctx.Input("X"); + z = ctx.Output("Out"); + ins->emplace_back(x); + } else if (x_var->IsType()) { + PADDLE_ENFORCE_EQ(y->dims().size() == 1 && y->dims()[0] == 1, true, + platform::errors::InvalidArgument( + "For elementwise_op, if X is Sparse, Y must be " + "scalar. But reveived the size of Y = %d.", + y->dims().size())); + PADDLE_ENFORCE_NOT_NULL( + x_for_selectedrows, + platform::errors::InvalidArgument( + "The parameter x_for_selectedrows is excepted to " + "be valid, once input varible X`s class type is " + "SelectedRows.\n")); + auto &x_sele = x_var->Get(); + auto out_sele = ctx.Output("Out"); + *x_for_selectedrows = x_sele.value(); + out_sele->set_rows(x_sele.rows()); + out_sele->set_height(x_sele.height()); + out_sele->mutable_value()->Resize(x_sele.value().dims()); + out_sele->mutable_value()->mutable_data(ctx.GetPlace(), + x_for_selectedrows->type()); + z = ctx.Output("Out")->mutable_value(); + ins->emplace_back(x_for_selectedrows); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "X's type[%s] is not supported by elementwise_op. X's type should be " + "LoDTensor or SelectedRows.", + framework::ToTypeName(x_var->Type()))); + } z->mutable_data(ctx.GetPlace()); outs->emplace_back(z); - ins->emplace_back(x); if (y != nullptr) { ins->emplace_back(y); axis = ctx.HasAttr("axis") ? ctx.Attr("axis") : -1; - axis = axis == -1 ? std::abs(y->dims().size() - x->dims().size()) : axis; } return axis; } -- GitLab