From 04025237ea5856bc517f96b39a7f5c10006d838a Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Mon, 27 Mar 2023 15:25:24 +0800 Subject: [PATCH] [CustomOP Inplace] Automap inplace dtype and shape, support vector output (#52114) * [CustomOP Inplace] Automap inplace dtype and shape, prepare for vector output * delete dtype,shape func of multi_inplace op * [CustomOP Inplace] Automap inplace dtype and shape, support vector output --- paddle/fluid/framework/custom_operator.cc | 466 ++++++++++++------ paddle/fluid/pybind/eager_functions.cc | 1 + paddle/phi/api/ext/op_meta_info.h | 27 +- paddle/phi/api/lib/op_meta_info.cc | 4 + .../fluid/tests/custom_op/custom_inplace.cc | 123 +++-- .../tests/custom_op/test_custom_inplace.py | 222 ++++++++- 6 files changed, 640 insertions(+), 203 deletions(-) diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index b5e7818a7bf..70df9a34ed7 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -268,15 +268,15 @@ static void RunKernelFunc( for (size_t i = 0; i < outputs.size(); ++i) { auto out_name = outputs[i]; if (detail::IsDuplicableVar(out_name)) { - PADDLE_ENFORCE(i == 0UL && outputs.size() == 1UL, - platform::errors::PreconditionNotMet( - "If custom operator's outputs contains `paddle::Vec(" - ")` type, " - "it only can hold one output.")); + PADDLE_ENFORCE( + !inplace_map.empty() || (i == 0UL && outputs.size() == 1UL), + phi::errors::PreconditionNotMet( + "If custom operator's outputs contains `paddle::Vec()` type " + "without setting InplaceMap, it only can hold one output.")); auto vec_out = ctx.MultiOutput(out_name); PADDLE_ENFORCE_NE(vec_out.empty(), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output vector (%s) is empty.", out_name)); std::vector custom_vec_out; for (size_t j = 0; j < vec_out.size(); ++j) { @@ -359,11 +359,67 @@ static void RunKernelFunc( } } -static void RunInferShapeFunc(framework::InferShapeContext* ctx, - const paddle::InferShapeFunc& func, - const std::vector& inputs, - const std::vector& outputs, - const std::vector& attrs) { +static void RunDefaultInferShapeFunc( + framework::InferShapeContext* ctx, + const std::vector& inputs, + const std::vector& outputs, + const std::unordered_map& inplace_map) { + if (inplace_map.empty()) { // general case, assure single input and output + PADDLE_ENFORCE_EQ( + inputs.size(), + 1UL, + phi::errors::Unavailable( + "Your custom operator contains multiple inputs. " + "We only allow a custom operator that contains only one input " + "and only one output without setting the InferShapeFn. " + "At this time, the input shape will be directly set to " + "the output shape.\n" + "Please set the InferShapeFn of custom " + "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); + PADDLE_ENFORCE_EQ( + outputs.size(), + 1UL, + phi::errors::Unavailable( + "Your custom operator contains multiple outputs. " + "We only allow a custom operator that contains only one input " + "and only one output without setting the InferShapeFn. " + "At this time, the input shape will be directly set to " + "the output shape.\n" + "Please set the InferShapeFn of custom " + "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); + + VLOG(3) << "Custom Operator: Default InferShape - share ddim."; + ctx->ShareDim(inputs[0], outputs[0]); + } else { // inplace case + PADDLE_ENFORCE_EQ( + inplace_map.size(), + outputs.size(), + phi::errors::Unavailable( + "Your custom operator uses `SetInplaceMap` without setting the " + "InferShapeFn. However, `Outputs` size = %d does not match the " + "`InplaceMap` size = %d. Please check `SetInplaceMap` again or set " + "the InferShapeFn of custom operator by " + "`.SetInferShapeFn(PD_INFER_SHAPE(...)`)", + outputs.size(), + inplace_map.size())); + for (auto const& pair : inplace_map) { + if (detail::IsDuplicableVar(pair.first)) { + ctx->SetOutputsDim(pair.second, ctx->GetInputsDim(pair.first)); + } else { + ctx->ShareDim(pair.first, pair.second); + } + } + } +} + +static void RunInferShapeFunc( + framework::InferShapeContext* ctx, + const paddle::InferShapeFunc& func, + const std::vector& inputs, + const std::vector& outputs, + const std::vector& attrs, + const std::unordered_map& inplace_map, + const std::unordered_map& inplace_reverse_map) { std::vector> input_shapes; std::vector>> vec_input_shapes; @@ -450,22 +506,220 @@ static void RunInferShapeFunc(framework::InferShapeContext* ctx, VLOG(3) << "Custom Operator: InferShape - calc output ddim."; auto output_shapes = func(input_shapes, vec_input_shapes, custom_attrs); + if (inplace_map.empty()) { + PADDLE_ENFORCE_EQ(outputs.size(), + output_shapes.size(), + phi::errors::InvalidArgument( + "Your custom operator has set the InferShapeFn. " + "However, `Outputs` size = %d does not match the " + "returned vector size of InferShapeFn = %d. Please " + "check InferShapeFn again.", + outputs.size(), + output_shapes.size())); + } else { + PADDLE_ENFORCE_EQ( + outputs.size(), + output_shapes.size() + inplace_map.size(), + phi::errors::InvalidArgument( + "Your custom operator uses `SetInplaceMap` and sets the " + "InferShapeFn. However, `Outputs` size = %d does not match the " + "`InplaceMap size + InferShapeFn output size` = %d. Please check " + "InplaceMap and InferShapeFn again", + outputs.size(), + output_shapes.size() + inplace_map.size())); + } - VLOG(3) << "Custom Operator: InferShape - set output ddim."; + VLOG(3) + << "Custom Operator: InferShape - set output ddim: inplace_map.size() = " + << inplace_map.size() + << ", output_shapes.size() = " << output_shapes.size(); + size_t output_shape_idx = 0; for (size_t i = 0; i < outputs.size(); ++i) { auto out_name = outputs[i]; if (detail::IsDuplicableVar(out_name)) { - std::vector vec_ddim; - vec_ddim.reserve(output_shapes.size()); - std::transform(output_shapes.begin(), - output_shapes.end(), - std::back_inserter(vec_ddim), - [&](const std::vector& shape) -> DDim { - return phi::make_ddim(shape); - }); - ctx->SetOutputsDim(out_name, vec_ddim); + PADDLE_ENFORCE( + inplace_reverse_map.find(out_name) != inplace_reverse_map.end(), + phi::errors::InvalidArgument( + "Custom operator only supports `paddle::Vec(...)` inputs and " + "cannot support `paddle::Vec(...)` output without setting " + "InplaceMap. If you have to use `paddle::Vec(...)` output, " + "please indicate it by setting InplaceMap manully.")); + auto in_name = inplace_reverse_map.at(out_name); + ctx->SetOutputsDim(out_name, ctx->GetInputsDim(in_name)); } else { - ctx->SetOutputDim(out_name, phi::make_ddim(output_shapes[i])); + if (inplace_reverse_map.find(out_name) != inplace_reverse_map.end()) { + // Share dims between inplace inputs and outputs + ctx->ShareDim(inplace_reverse_map.at(out_name), out_name); + } else { + // Set output dims by the output of InferShapeFn + ctx->SetOutputDim(out_name, + phi::make_ddim(output_shapes[output_shape_idx++])); + } + } + } +} + +static void RunDefaultInferDtypeFunc( + framework::InferVarTypeContext* ctx, + const std::vector& inputs, + const std::vector& outputs, + const std::unordered_map& inplace_map) { + if (inplace_map.empty()) { // general case, assure single input and output + PADDLE_ENFORCE_EQ( + inputs.size(), + 1UL, + platform::errors::Unavailable( + "Your custom operator contains multiple inputs. " + "We only allow a custom operator that contains only one input " + "and only one output without setting the InferDtypeFn. " + "At this time, the input dtype will be directly set to " + "the output dtype.\n" + "Please set the InferDtypeFn of custom " + "operator by `.SetInferDtypeFn(PD_INFER_DTYPE(...))`")); + PADDLE_ENFORCE_EQ( + outputs.size(), + 1UL, + platform::errors::Unavailable( + "Your custom operator contains multiple outputs. " + "We only allow a custom operator that contains only one input " + "and only one output without setting the InferDtypeFn. " + "At this time, the input dtype will be directly set to " + "the output dtype.\n" + "Please set the InferDtypeFn of custom " + "operator by `.SetInferDtypeFn(PD_INFER_DTYPE(...))`")); + + VLOG(3) << "Custom Operator: InferDtype - share dtype."; + auto dtype = ctx->GetInputDataType(inputs[0]); + ctx->SetOutputDataType(outputs[0], dtype); + } else { // inplace case + PADDLE_ENFORCE_EQ( + inplace_map.size(), + outputs.size(), + phi::errors::Unavailable( + "Your custom operator uses `SetInplaceMap` without setting the " + "InferDtypeFn. However, `Outputs` size = %d does not match the " + "`InplaceMap` size = %d. Please check `SetInplaceMap` again or set " + "the InferDtypeFn of custom operator by " + "`.SetInferDtypeFn(PD_INFER_DTYPE(...))`", + outputs.size(), + inplace_map.size())); + for (auto const& pair : inplace_map) { + VLOG(3) << "Custom Operator: InferDtype - inplace dtype: " << pair.first + << "->" << pair.second; + if (detail::IsDuplicableVar(pair.first)) { + size_t size = ctx->InputSize(pair.first); + for (size_t i = 0; i < size; ++i) { + auto dtype = ctx->GetInputDataType(pair.first, i); + ctx->SetOutputDataType(pair.second, dtype, i); + } + } else { + auto dtype = ctx->GetInputDataType(pair.first); + ctx->SetOutputDataType(pair.second, dtype); + } + } + } +} + +static void RunInferDtypeFunc( + framework::InferVarTypeContext* ctx, + const paddle::InferDtypeFunc& func, + const std::vector& inputs, + const std::vector& outputs, + const std::unordered_map& inplace_map, + const std::unordered_map& inplace_reverse_map) { + std::vector input_dtypes; + std::vector> vec_input_dtypes; + + VLOG(3) << "Custom Operator: InferDtype - get input dtype."; + for (auto& in_name : inputs) { + if (detail::IsDuplicableVar(in_name)) { + std::vector vec_custom_dtype; + if (ctx->HasInput(in_name)) { // general inputs + for (size_t i = 0; i < ctx->InputSize(in_name); ++i) { + auto dtype = ctx->GetInputDataType(in_name, i); + vec_custom_dtype.emplace_back( + paddle::framework::TransToPhiDataType(dtype)); + } + } else { // optional inputs, `vec_custom_dtype` is empty + PADDLE_ENFORCE( + detail::IsOptionalVar(in_name), + phi::errors::NotFound("Your custom operator's InferDtypeFn " + "cannot find input parameter `%s`", + in_name)); + VLOG(3) << "Custom Operator: InferDtypeFn's vector input " << in_name + << " is optional dtype with None input"; + } + vec_input_dtypes.emplace_back(vec_custom_dtype); + } else { + if (ctx->HasInput(in_name)) { // general inputs + auto dtype = ctx->GetInputDataType(in_name); + input_dtypes.emplace_back(paddle::framework::TransToPhiDataType(dtype)); + } else { // optional inputs + PADDLE_ENFORCE( + detail::IsOptionalVar(in_name), + phi::errors::NotFound("Your custom operator's InferDtypeFn " + "cannot find input parameter `%s`", + in_name)); + input_dtypes.emplace_back(DataType::UNDEFINED); + VLOG(3) << "Custom Operator: InferDtypeFn's input " << in_name + << " is optional dtype with None input"; + } + } + } + + VLOG(3) << "Custom Operator: InferDtype - infer output dtype."; + auto output_dtypes = func(input_dtypes, vec_input_dtypes); + if (inplace_map.empty()) { + PADDLE_ENFORCE_EQ(outputs.size(), + output_dtypes.size(), + phi::errors::InvalidArgument( + "Your custom operator has set the InferDtypeFn. " + "However, `Outputs` size = %d does not match the " + "returned vector size of InferDtypeFn = %d. Please " + "check InferDtypeFn again.", + outputs.size(), + output_dtypes.size())); + } else { + PADDLE_ENFORCE_EQ( + outputs.size(), + output_dtypes.size() + inplace_map.size(), + phi::errors::InvalidArgument( + "Your custom operator uses `SetInplaceMap` and sets the " + "InferDtypeFn. However, `Outputs` size = %d does not match the " + "`InplaceMap size + InferDtypeFn output size` = %d. Please check " + "InplaceMap and InferDtypeFn again", + outputs.size(), + output_dtypes.size() + inplace_map.size())); + } + + VLOG(3) + << "Custom Operator: InferDtype - set output dtype: inplace_map.size() = " + << inplace_map.size() + << ", output_dtypes.size() = " << output_dtypes.size(); + size_t output_dtype_idx = 0; + for (size_t i = 0; i < outputs.size(); ++i) { + auto out_name = outputs[i]; + if (detail::IsDuplicableVar(out_name)) { + PADDLE_ENFORCE( + inplace_reverse_map.find(out_name) != inplace_reverse_map.end(), + phi::errors::InvalidArgument( + "Custom operator only supports `paddle::Vec(...)` inputs and " + "cannot support `paddle::Vec(...)` output without setting " + "InplaceMap. If you have to use `paddle::Vec(...)` output, " + "please indicate it by setting InplaceMap manully.")); + auto in_name = inplace_reverse_map.at(out_name); + ctx->SetOutputDataTypes(out_name, ctx->GetInputDataTypes(in_name)); + } else { + if (inplace_reverse_map.find(out_name) != inplace_reverse_map.end()) { + auto in_name = inplace_reverse_map.at(out_name); + // Share dtype between inplace inputs and outputs + ctx->SetOutputDataType(out_name, ctx->GetInputDataType(in_name)); + } else { + // Set output dtype by the output of InferDtypeFn + ctx->SetOutputDataType(out_name, + paddle::framework::TransToProtoVarType( + output_dtypes[output_dtype_idx++])); + } } } } @@ -822,6 +1076,8 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, auto& op_outputs = OpMetaInfoHelper::GetOutputs(base_op_meta); auto& op_attrs = OpMetaInfoHelper::GetAttrs(base_op_meta); auto& op_inplace_map = OpMetaInfoHelper::GetInplaceMap(base_op_meta); + auto& op_inplace_reverse_map = + OpMetaInfoHelper::GetInplaceReverseMap(base_op_meta); auto& kernel_fn = OpMetaInfoHelper::GetKernelFn(base_op_meta); auto& infer_shape_func = OpMetaInfoHelper::GetInferShapeFn(base_op_meta); auto& infer_dtype_func = OpMetaInfoHelper::GetInferDtypeFn(base_op_meta); @@ -873,134 +1129,47 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, // InferShape if (infer_shape_func == nullptr) { // use default InferShape - info.infer_shape_ = [op_inputs, op_outputs](InferShapeContext* ctx) { - PADDLE_ENFORCE_EQ( - op_inputs.size(), - 1UL, - platform::errors::Unavailable( - "Your custom operator contains multiple inputs. " - "We only allow a custom operator that contains only one input " - "and only one output without setting the InferShapeFn. " - "At this time, the input shape will be directly set to " - "the output shape.\n" - "Please set the InferShapeFn of custom " - "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); - PADDLE_ENFORCE_EQ( - op_outputs.size(), - 1UL, - platform::errors::Unavailable( - "Your custom operator contains multiple outputs. " - "We only allow a custom operator that contains only one input " - "and only one output without setting the InferShapeFn. " - "At this time, the input shape will be directly set to " - "the output shape.\n" - "Please set the InferShapeFn of custom " - "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); - - VLOG(3) << "Custom Operator: Default InferShape - share ddim."; - ctx->ShareDim(op_inputs[0], op_outputs[0]); - }; + info.infer_shape_ = + [op_inputs, op_outputs, op_inplace_map](InferShapeContext* ctx) { + RunDefaultInferShapeFunc(ctx, op_inputs, op_outputs, op_inplace_map); + }; } else { - info.infer_shape_ = [op_inputs, op_outputs, op_attrs, infer_shape_func]( - InferShapeContext* ctx) { - RunInferShapeFunc(ctx, infer_shape_func, op_inputs, op_outputs, op_attrs); + info.infer_shape_ = [op_inputs, + op_outputs, + op_attrs, + op_inplace_map, + op_inplace_reverse_map, + infer_shape_func](InferShapeContext* ctx) { + RunInferShapeFunc(ctx, + infer_shape_func, + op_inputs, + op_outputs, + op_attrs, + op_inplace_map, + op_inplace_reverse_map); }; } // Infer Dtype if (infer_dtype_func == nullptr) { // use default InferDtype - info.infer_var_type_ = [op_inputs, op_outputs](InferVarTypeContext* ctx) { - PADDLE_ENFORCE_EQ( - op_inputs.size(), - 1UL, - platform::errors::Unavailable( - "Your custom operator contains multiple inputs. " - "We only allow a custom operator that contains only one input " - "and only one output without setting the InferDtypeFn. " - "At this time, the input dtype will be directly set to " - "the output dtype.\n" - "Please set the InferDtypeFn of custom " - "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); - PADDLE_ENFORCE_EQ( - op_outputs.size(), - 1UL, - platform::errors::Unavailable( - "Your custom operator contains multiple outputs. " - "We only allow a custom operator that contains only one input " - "and only one output without setting the InferDtypeFn. " - "At this time, the input dtype will be directly set to " - "the output dtype.\n" - "Please set the InferDtypeFn of custom " - "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); - - VLOG(3) << "Custom Operator: InferDtype - share dtype."; - auto dtype = ctx->GetInputDataType(op_inputs[0]); - ctx->SetOutputDataType(op_outputs[0], dtype); - }; - } else { info.infer_var_type_ = - [op_inputs, op_outputs, infer_dtype_func](InferVarTypeContext* ctx) { - std::vector input_dtypes; - std::vector> vec_input_dtypes; - - VLOG(3) << "Custom Operator: InferDtype - get input dtype."; - for (auto& in_name : op_inputs) { - if (detail::IsDuplicableVar(in_name)) { - std::vector vec_custom_dtype; - if (ctx->HasInput(in_name)) { // general inputs - for (size_t i = 0; i < ctx->InputSize(in_name); ++i) { - auto dtype = ctx->GetInputDataType(in_name, i); - vec_custom_dtype.emplace_back( - paddle::framework::TransToPhiDataType(dtype)); - } - } else { // optional inputs, `vec_custom_dtype` is empty - PADDLE_ENFORCE( - detail::IsOptionalVar(in_name), - phi::errors::NotFound("Your custom operator's InferDtypeFn " - "cannot find input parameter `%s`", - in_name)); - VLOG(3) << "Custom Operator: InferDtypeFn's vector input " - << in_name << " is optional dtype with None input"; - } - vec_input_dtypes.emplace_back(vec_custom_dtype); - } else { - if (ctx->HasInput(in_name)) { // general inputs - auto dtype = ctx->GetInputDataType(in_name); - input_dtypes.emplace_back( - paddle::framework::TransToPhiDataType(dtype)); - } else { // optional inputs - PADDLE_ENFORCE( - detail::IsOptionalVar(in_name), - phi::errors::NotFound("Your custom operator's InferDtypeFn " - "cannot find input parameter `%s`", - in_name)); - input_dtypes.emplace_back(DataType::UNDEFINED); - VLOG(3) << "Custom Operator: InferDtypeFn's input " << in_name - << " is optional dtype with None input"; - } - } - } - - VLOG(3) << "Custom Operator: InferDtype - infer output dtype."; - auto output_dtypes = infer_dtype_func(input_dtypes, vec_input_dtypes); - - VLOG(3) << "Custom Operator: InferDtype - set output dtype."; - for (size_t i = 0; i < op_outputs.size(); ++i) { - auto out_name = op_outputs[i]; - if (detail::IsDuplicableVar(out_name)) { - for (size_t j = 0; j < output_dtypes.size(); ++j) { - auto dtype = - paddle::framework::TransToProtoVarType(output_dtypes[i]); - ctx->SetOutputDataType(out_name, dtype, j); - } - } else { - ctx->SetOutputDataType( - out_name, - paddle::framework::TransToProtoVarType(output_dtypes[i])); - } - } + [op_inputs, op_outputs, op_inplace_map](InferVarTypeContext* ctx) { + RunDefaultInferDtypeFunc(ctx, op_inputs, op_outputs, op_inplace_map); }; + } else { + info.infer_var_type_ = [op_inputs, + op_outputs, + op_inplace_map, + op_inplace_reverse_map, + infer_dtype_func](InferVarTypeContext* ctx) { + RunInferDtypeFunc(ctx, + infer_dtype_func, + op_inputs, + op_outputs, + op_inplace_map, + op_inplace_reverse_map); + }; } // Kernel func @@ -1022,6 +1191,8 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, auto& grad_op_outputs = OpMetaInfoHelper::GetOutputs(cur_grad_op); auto& grad_op_attrs = OpMetaInfoHelper::GetAttrs(cur_grad_op); auto& grad_op_inplace_map = OpMetaInfoHelper::GetInplaceMap(cur_grad_op); + auto& grad_op_inplace_reverse_map = + OpMetaInfoHelper::GetInplaceReverseMap(cur_grad_op); auto& grad_kernel_fn = OpMetaInfoHelper::GetKernelFn(cur_grad_op); auto& grad_infer_shape_fn = OpMetaInfoHelper::GetInferShapeFn(cur_grad_op); @@ -1092,6 +1263,13 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, return new CustomOperator(type, inputs, outputs, attrs); }; + // Inplace + if (!grad_op_inplace_map.empty()) { + grad_info.infer_inplace_ = [grad_op_inplace_map](bool use_cuda) { + return grad_op_inplace_map; + }; + } + // Grad InferShape if (grad_infer_shape_fn == nullptr) { grad_info.infer_shape_ = [grad_op_inputs, @@ -1135,12 +1313,16 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, grad_info.infer_shape_ = [grad_op_inputs, grad_op_outputs, grad_op_attrs, + grad_op_inplace_map, + grad_op_inplace_reverse_map, grad_infer_shape_fn](InferShapeContext* ctx) { RunInferShapeFunc(ctx, grad_infer_shape_fn, grad_op_inputs, grad_op_outputs, - grad_op_attrs); + grad_op_attrs, + grad_op_inplace_map, + grad_op_inplace_reverse_map); }; } diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 7475ab7321d..47a06885809 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -518,6 +518,7 @@ static PyObject* eager_api_run_custom_op(PyObject* self, "sure you registered your op first and try again. ", op_type)); VLOG(7) << "Run Kernel of Custom Op: " << op_type; + // TODO(HongyuJia): Optimize Attrs Cast naming and implementation std::vector res_attrs = CastAttrsToTargetType( ctx.Attrs(), paddle::OpMetaInfoHelper::GetAttrs(meta_info_map.at(op_type)[0])); diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h index c9c94ceb112..1fb34481b1b 100644 --- a/paddle/phi/api/ext/op_meta_info.h +++ b/paddle/phi/api/ext/op_meta_info.h @@ -196,6 +196,7 @@ struct KernelFuncImpl { template struct ComputeCallHelper; + // Handle args for general Tensor input case template struct ComputeCallHelper { template @@ -209,6 +210,7 @@ struct KernelFuncImpl { } }; + // Handle args for optional Tensor input case template struct ComputeCallHelper&, Tail...> { template @@ -228,6 +230,7 @@ struct KernelFuncImpl { } }; + // Handle args for general vector input case template struct ComputeCallHelper&, Tail...> { template @@ -241,6 +244,7 @@ struct KernelFuncImpl { } }; + // Handle args for optional vector input case template struct ComputeCallHelper>&, Tail...> { @@ -293,6 +297,7 @@ struct KernelFuncImpl { // Used to be compatible with 2.3 released internal inplace interface, not // recommended + // Handle args for compatible inplace case template struct ComputeCallHelper { template @@ -310,6 +315,7 @@ struct KernelFuncImpl { // recommended // TODO(chenweihang): What is the appropriate output form? // std::vector*? or std::vector? or std::vector* + // Handle args for compatible inplace case template struct ComputeCallHelper, Tail...> { template @@ -323,7 +329,7 @@ struct KernelFuncImpl { } }; - // Handle Tensor& for inplace case + // Handle args for inplace Tensor case template struct ComputeCallHelper { template @@ -337,6 +343,20 @@ struct KernelFuncImpl { } }; + // Handle args for inplace vector case + template + struct ComputeCallHelper&, Tail...> { + template + static void Compute(CustomOpKernelContext* ctx, PreviousArgs&... pargs) { + auto& range = ctx->InputRangeAt(in_idx); + auto arg = ctx->InputsBetween(range.first, range.second); + ComputeCallHelper< + Tail...>::template Compute(ctx, + pargs..., + arg); + } + }; + template struct ComputeReturnHelper; @@ -739,6 +759,7 @@ class PADDLE_API OpMetaInfo { std::vector outputs_; std::vector attrs_; std::unordered_map inplace_map_; + std::unordered_map inplace_reverse_map_; // 2. func info KernelFunc kernel_fn_{nullptr}; InferShapeFunc infer_shape_fn_{nullptr}; @@ -767,6 +788,10 @@ class OpMetaInfoHelper { const paddle::OpMetaInfo& info) { return info.inplace_map_; } + static const std::unordered_map& + GetInplaceReverseMap(const paddle::OpMetaInfo& info) { + return info.inplace_reverse_map_; + } static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) { return info.kernel_fn_; } diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc index 35b8267873e..eef082f18b1 100644 --- a/paddle/phi/api/lib/op_meta_info.cc +++ b/paddle/phi/api/lib/op_meta_info.cc @@ -134,6 +134,7 @@ const std::pair& CustomOpKernelContext::OutputRangeAt( // handle inplace mechanism // Find out non-inplace output tensors. +// TODO(HongyuJia): Add cache for inplace_tensor_map_ to optimize performance void CustomOpKernelContext::MapPlainOutputs( const std::vector& inputs, const std::vector& outputs, @@ -215,6 +216,9 @@ OpMetaInfo& OpMetaInfo::SetInplaceMap( std::unordered_map&& inplace_map) { inplace_map_ = std::forward>(inplace_map); + for (const auto& pair : inplace_map_) { + inplace_reverse_map_[pair.second] = pair.first; + } return *this; } OpMetaInfo& OpMetaInfo::SetKernelFn(KernelFunc&& func) { diff --git a/python/paddle/fluid/tests/custom_op/custom_inplace.cc b/python/paddle/fluid/tests/custom_op/custom_inplace.cc index 9e426c2dfd4..fbbe10b513e 100644 --- a/python/paddle/fluid/tests/custom_op/custom_inplace.cc +++ b/python/paddle/fluid/tests/custom_op/custom_inplace.cc @@ -19,18 +19,18 @@ #include "paddle/extension.h" template -void add_forward_kernel(data_t* x_data, const data_t* y_data, int64_t numel) { +void add_data_pointer(const data_t* x_data, data_t* out_data, int64_t numel) { for (size_t i = 0; i < numel; ++i) { - x_data[i] += y_data[i]; + out_data[i] += x_data[i]; } } template -void add_backward_kernel(data_t* y_grad_data, - const data_t* out_grad_data, +void assign_data_pointer(const data_t* x_data, + data_t* out_data, int64_t numel) { for (size_t i = 0; i < numel; ++i) { - y_grad_data[i] = out_grad_data[i]; + out_data[i] = x_data[i]; } } @@ -54,21 +54,10 @@ void relu_backward_kernel(const data_t* out_data, void AddForward(paddle::Tensor& x, const paddle::Tensor& y) { // NOLINT PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); - PD_DISPATCH_FLOATING_TYPES(x.type(), "AddForward", ([&] { - add_forward_kernel(x.data(), - y.data(), - x.size()); - })); -} - -std::vector AddInferDtype(const paddle::DataType& x_dtype, - const paddle::DataType& y_dtype) { - return {x_dtype}; -} - -std::vector> AddInferShape( - const std::vector& x_shape, const std::vector& y_shape) { - return {x_shape}; + PD_DISPATCH_FLOATING_TYPES( + x.type(), "AddForward", ([&] { + add_data_pointer(y.data(), x.data(), x.size()); + })); } std::vector AddBackward(const paddle::Tensor& x, @@ -81,8 +70,8 @@ std::vector AddBackward(const paddle::Tensor& x, PD_DISPATCH_FLOATING_TYPES( out_grad.type(), "AddBackward", ([&] { - add_backward_kernel( - y_grad.data(), out_grad.data(), out_grad.size()); + assign_data_pointer( + out_grad.data(), y_grad.data(), out_grad.size()); })); return {y_grad}; @@ -92,9 +81,7 @@ PD_BUILD_OP(custom_add) .Inputs({"X", "Y"}) .Outputs({"Out"}) .SetInplaceMap({{"X", "Out"}}) - .SetKernelFn(PD_KERNEL(AddForward)) - .SetInferShapeFn(PD_INFER_SHAPE(AddInferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(AddInferDtype)); + .SetKernelFn(PD_KERNEL(AddForward)); PD_BUILD_GRAD_OP(custom_add) .Inputs({"X", "Y", paddle::Grad("Out")}) @@ -102,6 +89,58 @@ PD_BUILD_GRAD_OP(custom_add) .SetInplaceMap({{paddle::Grad("Out"), paddle::Grad("X")}}) .SetKernelFn(PD_KERNEL(AddBackward)); +// out[i] = x[i] + y +void AddVectorForward(std::vector& x, // NOLINT + const paddle::Tensor& y) { + PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor."); + + PD_DISPATCH_FLOATING_TYPES(y.type(), "AddVectorForward", ([&] { + for (size_t i = 0; i < x.size(); ++i) { + add_data_pointer(y.data(), + x[i].data(), + y.size()); + } + })); +} + +// dout[i] / dx[i] = out_grad[i] (do not need any code, inplace automatically) +// dout / dy = out_grad[0] + ... + out_grad[n - 1] +std::vector AddVectorBackward( + const std::vector& x, + const paddle::Tensor& y, + std::vector& out_grad) { // NOLINT + PD_CHECK(x[0].place() == paddle::PlaceType::kCPU, + "x[0] must be a CPU Tensor."); + PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor."); + PD_CHECK(x.size() == out_grad.size(), + "x must have the same size as out_grad."); + + paddle::Tensor y_grad = paddle::zeros(y.shape(), y.dtype(), y.place()); + + PD_DISPATCH_FLOATING_TYPES( + y.type(), "AddVectorBackward", ([&] { + // y_grad = out_grad[0] + ... + out_grad[n - 1] + for (size_t i = 0; i < out_grad.size(); ++i) { + add_data_pointer( + out_grad[i].data(), y_grad.data(), y_grad.size()); + } + })); + return {y_grad}; +} + +PD_BUILD_OP(custom_add_vec) + .Inputs({paddle::Vec("X"), "Y"}) + .Outputs({paddle::Vec("Out")}) + .SetInplaceMap({{paddle::Vec("X"), paddle::Vec("Out")}}) + .SetKernelFn(PD_KERNEL(AddVectorForward)); + +PD_BUILD_GRAD_OP(custom_add_vec) + .Inputs({paddle::Vec("X"), "Y", paddle::Grad(paddle::Vec("Out"))}) + .Outputs({paddle::Grad(paddle::Vec("X")), paddle::Grad("Y")}) + .SetInplaceMap({{paddle::Grad(paddle::Vec("Out")), + paddle::Grad(paddle::Vec("X"))}}) + .SetKernelFn(PD_KERNEL(AddVectorBackward)); + void MultiInplaceForward(paddle::Tensor& x, // NOLINT const paddle::Tensor& y, paddle::Tensor& a, // NOLINT @@ -111,29 +150,11 @@ void MultiInplaceForward(paddle::Tensor& x, // NOLINT PD_DISPATCH_FLOATING_TYPES( x.type(), "MultiInplaceForward", ([&] { - add_forward_kernel( - x.data(), y.data(), x.size()); - add_forward_kernel( - a.data(), b.data(), a.size()); + add_data_pointer(y.data(), x.data(), x.size()); + add_data_pointer(b.data(), a.data(), a.size()); })); } -std::vector MultiInplaceInferDtype( - const paddle::DataType& x_dtype, - const paddle::DataType& y_dtype, - const paddle::DataType& a_dtype, - const paddle::DataType& b_dtype) { - return {x_dtype, a_dtype}; -} - -std::vector> MultiInplaceInferShape( - const std::vector& x_shape, - const std::vector& y_shape, - const std::vector& a_shape, - const std::vector& b_shape) { - return {x_shape, a_shape}; -} - std::vector MultiInplaceBackward( const paddle::Tensor& x, const paddle::Tensor& y, @@ -151,11 +172,11 @@ std::vector MultiInplaceBackward( PD_DISPATCH_FLOATING_TYPES( outxy_grad.type(), "MultiInplaceBackward", ([&] { - add_backward_kernel(y_grad.data(), - outxy_grad.data(), + assign_data_pointer(outxy_grad.data(), + y_grad.data(), outxy_grad.size()); - add_backward_kernel(b_grad.data(), - outab_grad.data(), + assign_data_pointer(outab_grad.data(), + b_grad.data(), outab_grad.size()); })); @@ -166,9 +187,7 @@ PD_BUILD_OP(custom_multi_inplace) .Inputs({"X", "Y", "A", "B"}) .Outputs({"OutXY", "OutAB"}) .SetInplaceMap({{"X", "OutXY"}, {"A", "OutAB"}}) - .SetKernelFn(PD_KERNEL(MultiInplaceForward)) - .SetInferShapeFn(PD_INFER_SHAPE(MultiInplaceInferShape)) - .SetInferDtypeFn(PD_INFER_DTYPE(MultiInplaceInferDtype)); + .SetKernelFn(PD_KERNEL(MultiInplaceForward)); PD_BUILD_GRAD_OP(custom_multi_inplace) .Inputs({"X", "Y", paddle::Grad("OutXY"), "A", "B", paddle::Grad("OutAB")}) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_inplace.py b/python/paddle/fluid/tests/custom_op/test_custom_inplace.py index a88c2096d15..c9ca153a694 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_inplace.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_inplace.py @@ -40,6 +40,54 @@ custom_inplace = load( verbose=True, ) +# Temporarily assemble custom python API +import paddle.fluid.core as core +from paddle.fluid.core import CustomOpKernelContext +from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.layer_helper import LayerHelper + + +def custom_add_vec(x_vector, y): + # prepare inputs and outputs + attrs = {} + outs = {} + out_names = ["Out@VECTOR"] + + # The output variable's dtype use default value 'float32', + # and the actual dtype of output variable will be inferred in runtime. + if in_dygraph_mode(): + ctx = CustomOpKernelContext() + for i in [x_vector, y]: + ctx.add_inputs(i) + for out_name in out_names: + outs[out_name] = [core.eager.Tensor() for _ in range(len(x_vector))] + ctx.add_outputs(outs[out_name]) + core.eager._run_custom_op(ctx, "custom_add_vec", True) + else: + ins = {} + for key, value in dict({"X@VECTOR": x_vector, "Y": y}).items(): + # handle optional inputs + if value is not None: + ins[key] = value + helper = LayerHelper("custom_add_vec", **locals()) + for out_name in out_names: + outs[out_name] = [ + helper.create_variable(dtype='float32') + for _ in range(len(x_vector)) + ] + + helper.append_op( + type="custom_add_vec", inputs=ins, outputs=outs, attrs=attrs + ) + + res = [outs[out_name] for out_name in out_names] + + return res[0] if len(res) == 1 else res + + +# Set custom python API manually +custom_inplace.custom_add_vec = custom_add_vec + def inplace_dynamic_add(phi_func, device, dtype, np_x, np_y): paddle.set_device(device) @@ -88,7 +136,89 @@ def inplace_static_add(func, device, dtype, np_x, np_y): return x_v, out_v, x_grad_v, y_grad_v, out_grad_v -def inplace_dynamic_relu(phi_func, device, dtype, np_x, np_y, np_z): +def inplace_dynamic_add_vector(phi_func, device, dtype, np_inputs, np_y): + paddle.set_device(device) + inputs = [ + paddle.to_tensor(np_input, dtype=dtype, stop_gradient=True) + for np_input in np_inputs + ] + y = paddle.to_tensor(np_y, dtype=dtype, stop_gradient=False) + if phi_func: + out = custom_inplace.custom_add_vec(inputs, y) + else: + out = [x.add_(y) for x in inputs] + + mean_out = paddle.mean(paddle.concat(out)) + mean_out.backward() + return ( + np.concatenate([input.numpy() for input in inputs]), + y.numpy(), + np.concatenate([o.numpy() for o in out]), + np.concatenate([input.grad.numpy() for input in inputs]), + y.grad.numpy(), + ) + + +def inplace_static_add_vector(phi_func, device, dtype, np_inputs, np_y): + paddle.enable_static() + paddle.set_device(device) + with static.scope_guard(static.Scope()): + with static.program_guard(static.Program()): + x1 = static.data( + name="x1", shape=[None, np_inputs[0].shape[1]], dtype=dtype + ) + x2 = static.data( + name="x2", shape=[None, np_inputs[1].shape[1]], dtype=dtype + ) + y = static.data(name="y", shape=[None, np_y.shape[1]], dtype=dtype) + x1.stop_gradient = False + x2.stop_gradient = False + y.stop_gradient = False + if phi_func: + out = custom_inplace.custom_add_vec([x1, x2], y) + else: + out = [paddle.add(x1, y), paddle.add(x2, y)] + mean_out = paddle.mean(paddle.concat(out)) + static.append_backward(mean_out) + + exe = static.Executor() + exe.run(static.default_startup_program()) + + ( + out0_v, + out1_v, + x1_grad_v, + x2_grad_v, + y_grad_v, + out0_grad_v, + out1_grad_v, + ) = exe.run( + static.default_main_program(), + feed={ + "x1": np_inputs[0].astype(dtype), + "x2": np_inputs[1].astype(dtype), + "y": np_y.astype(dtype), + }, + fetch_list=[ + out[0].name, + out[1].name, + x1.name + "@GRAD", + x2.name + "@GRAD", + y.name + "@GRAD", + out[0].name + "@GRAD", + out[1].name + "@GRAD", + ], + ) + paddle.disable_static() + return ( + [out0_v, out1_v], + [x1_grad_v, x2_grad_v], + y_grad_v, + [out0_grad_v, out1_grad_v], + ) + + +def inplace_dynamic_relu_net(phi_func, device, dtype, np_x, np_y, np_z): paddle.set_device(device) x = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) y = paddle.to_tensor(np_y, dtype=dtype, stop_gradient=False) @@ -107,7 +237,7 @@ def inplace_dynamic_relu(phi_func, device, dtype, np_x, np_y, np_z): return x.numpy(), y.numpy(), out.numpy(), x.grad.numpy(), y.grad.numpy() -def inplace_static_relu(func, device, dtype, np_x, np_y, np_z): +def inplace_static_relu_net(func, device, dtype, np_x, np_y, np_z): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): @@ -255,6 +385,10 @@ class TestCustomInplaceJit(unittest.TestCase): self.np_z = np.random.random((3, 2)).astype("float32") self.np_a = np.random.random((3, 2)).astype("float32") self.np_b = np.random.random((3, 2)).astype("float32") + self.np_inputs = [ + np.random.random((3, 2)).astype("float32"), + np.random.random((3, 2)).astype("float32"), + ] def check_output(self, out, pd_out, name): np.testing.assert_array_equal( @@ -354,7 +488,79 @@ class TestCustomInplaceJit(unittest.TestCase): self.check_output(phi_x_grad, pd_x_grad, "x_grad") self.check_output(phi_y_grad, pd_y_grad, "y_grad") - def test_static_multiple_inplace_relu(self): + def test_static_add_vector(self): + for device in self.devices: + for dtype in self.dtypes: + ( + pd_out, + pd_x_grad, + pd_y_grad, + pd_out_grad, + ) = inplace_static_add_vector( + True, + device, + dtype, + self.np_inputs, + self.np_y, + ) + ( + phi_out, + phi_x_grad, + phi_y_grad, + phi_out_grad, + ) = inplace_static_add_vector( + False, + device, + dtype, + self.np_inputs, + self.np_y, + ) + + self.check_output(phi_out, pd_out, "out") + self.check_output(phi_x_grad, pd_x_grad, "x_grad") + self.check_output(phi_y_grad, pd_y_grad, "y_grad") + self.check_output(phi_out_grad, pd_out_grad, "out_grad") + + def test_dynamic_add_vector(self): + for device in self.devices: + for dtype in self.dtypes: + ( + pd_x, + pd_y, + pd_out, + pd_x_grad, + pd_y_grad, + ) = inplace_dynamic_add_vector( + True, + device, + dtype, + self.np_inputs, + self.np_y, + ) + ( + phi_x, + phi_y, + phi_out, + phi_x_grad, + phi_y_grad, + ) = inplace_dynamic_add_vector( + False, + device, + dtype, + self.np_inputs, + self.np_y, + ) + + self.check_output(phi_x, phi_out, "inplace_phi_x") + self.check_output(pd_x, pd_out, "inplace_pd_x") + + self.check_output(phi_x, pd_x, "x") + self.check_output(phi_y, pd_y, "y") + self.check_output(phi_out, pd_out, "out") + self.check_output(phi_x_grad, pd_x_grad, "x_grad") + self.check_output(phi_y_grad, pd_y_grad, "y_grad") + + def test_static_relu_net(self): for device in self.devices: for dtype in self.dtypes: ( @@ -363,7 +569,7 @@ class TestCustomInplaceJit(unittest.TestCase): pd_out, pd_x_grad, pd_y_grad, - ) = inplace_static_relu( + ) = inplace_static_relu_net( paddle.nn.functional.relu, device, dtype, @@ -377,7 +583,7 @@ class TestCustomInplaceJit(unittest.TestCase): phi_out, phi_x_grad, phi_y_grad, - ) = inplace_static_relu( + ) = inplace_static_relu_net( custom_inplace.custom_relu_inplace, device, dtype, @@ -391,7 +597,7 @@ class TestCustomInplaceJit(unittest.TestCase): self.check_output_allclose(phi_x_grad, pd_x_grad, "x_grad") self.check_output_allclose(phi_y_grad, pd_y_grad, "y_grad") - def test_dynamic_multiple_inplace_relu(self): + def test_dynamic_relu_net(self): for device in self.devices: for dtype in self.dtypes: ( @@ -400,7 +606,7 @@ class TestCustomInplaceJit(unittest.TestCase): pd_out, pd_x_grad, pd_y_grad, - ) = inplace_dynamic_relu( + ) = inplace_dynamic_relu_net( False, device, dtype, @@ -414,7 +620,7 @@ class TestCustomInplaceJit(unittest.TestCase): phi_out, phi_x_grad, phi_y_grad, - ) = inplace_dynamic_relu( + ) = inplace_dynamic_relu_net( True, device, dtype, -- GitLab