diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index 63dbf152b8115686eef188969130ff35950563e7..6695cf5027f6de8ff25229c7eb86d47c86b073cf 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -236,7 +236,8 @@ RunCustomOpNode::operator()(paddle::small_vector, VLOG(7) << "Run Kernel of Grad Custom Op: " << op_type_ << "_grad"; // handle inplace map - ctx.MapPlainOutputs(grad_inputs_name, grad_outputs_names, grad_inplace_map); + ctx.UpdatePlainOutputs( + grad_inputs_name, grad_outputs_names, grad_inplace_map); (*paddle::OpMetaInfoHelper::GetKernelFn(kernel_map.at(op_type_)[1]))(&ctx); ctx.AssignInplaceOutputs(); @@ -443,7 +444,8 @@ RunCustomOpDoubleGradNode::operator()( VLOG(7) << "Run Kernel of Grad Custom Op: " << name(); // handle inplace map - ctx.MapPlainOutputs(grad_inputs_name, grad_outputs_names, grad_inplace_map); + ctx.UpdatePlainOutputs( + grad_inputs_name, grad_outputs_names, grad_inplace_map); (*paddle::OpMetaInfoHelper::GetKernelFn(kernel_map.at(op_type_)[2]))(&ctx); ctx.AssignInplaceOutputs(); diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index 641674695ca8c5a909ad449ef3d3186a8974330a..8435e825531eef156c5971185dcac32225faa9fa 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -28,6 +28,7 @@ limitations under the License. */ #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/convert_utils.h" +#include "paddle/fluid/framework/custom_operator_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/phi_utils.h" @@ -52,87 +53,6 @@ DECLARE_string(tensor_operants_mode); namespace paddle { namespace framework { -namespace detail { - -// dynamic lib load func -template -static T* DynLoad(void* handle, std::string name) { - T* func = reinterpret_cast(dlsym(handle, name.c_str())); -#if !defined(_WIN32) - auto errorno = dlerror(); -#else - auto errorno = GetLastError(); -#endif // !_WIN32 - PADDLE_ENFORCE_NOT_NULL( - func, - platform::errors::NotFound( - "Failed to load dynamic operator library, error message(%s).", - errorno)); - return func; -} - -inline static bool IsDuplicableVar(const std::string& var_name) { - std::string suffix = kTensorVectorSuffix; - return var_name.rfind(suffix) != std::string::npos; -} - -inline static bool IsOptionalVar(const std::string& var_name) { - std::string suffix = kOptionalSuffix; - return var_name.rfind(suffix) != std::string::npos; -} - -inline static std::string NoGrad(const std::string& var_name, - bool is_double_grad = false) { - std::string suffix = kGradVarSuffix; - std::string new_out_suffix = kDoubleGradNewOutSuffix; - std::string tmp_var_name(var_name); - if (is_double_grad && - (tmp_var_name.rfind(new_out_suffix) != std::string::npos)) { - tmp_var_name = tmp_var_name.substr( - 0, tmp_var_name.size() - /*kDoubleGradNewOutSuffix length*/ 4); - } - return tmp_var_name.substr(0, tmp_var_name.size() - kGradVarSuffixSize); -} - -inline static bool IsGradVar(const std::string& var_name, bool is_double_grad) { - std::string suffix = kGradVarSuffix; - if (!is_double_grad) { - return var_name.rfind(suffix) != std::string::npos; - } else { - // for double grad cases, the X@GRAD is not a grad var, X@GRAD@GRAD is a - // grad var, here we remove a @GRAD suffix - return NoGrad(var_name).rfind(suffix) != std::string::npos; - } -} - -inline static bool IsMemberOf(const std::vector& vec, - const std::string& name) { - return std::find(vec.cbegin(), vec.cend(), name) != vec.cend(); -} - -static std::vector ParseAttrStr(const std::string& attr) { - auto split_pos = attr.find_first_of(":"); - PADDLE_ENFORCE_NE(split_pos, - std::string::npos, - platform::errors::InvalidArgument( - "Invalid attribute string format. Attribute string " - "format is `:`.")); - - std::vector rlt; - // 1. name - rlt.emplace_back(string::trim_spaces(attr.substr(0, split_pos))); - // 2. type - rlt.emplace_back(string::trim_spaces(attr.substr(split_pos + 1))); - - VLOG(3) << "attr name: " << rlt[0] << ", attr type str: " << rlt[1]; - - return rlt; -} - -} // namespace detail - -////////////////// Kernel Define //////////////////// - // custom op kernel call function define static void RunKernelFunc( const framework::ExecutionContext& ctx, @@ -355,7 +275,7 @@ static void RunKernelFunc( } // handle inplace map - kernel_ctx.MapPlainOutputs(inputs, outputs, inplace_map); + kernel_ctx.UpdatePlainOutputs(inputs, outputs, inplace_map); func(&kernel_ctx); kernel_ctx.AssignInplaceOutputs(); diff --git a/paddle/fluid/framework/custom_operator_utils.h b/paddle/fluid/framework/custom_operator_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..678e0f5db3194dec396af5078b69ed99da22f56a --- /dev/null +++ b/paddle/fluid/framework/custom_operator_utils.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/string/string_helper.h" +#include "paddle/phi/api/ext/op_meta_info.h" + +namespace paddle { +namespace framework { + +namespace detail { + +// dynamic lib load func +template +static T* DynLoad(void* handle, std::string name) { + T* func = reinterpret_cast(dlsym(handle, name.c_str())); +#if !defined(_WIN32) + auto errorno = dlerror(); +#else + auto errorno = GetLastError(); +#endif // !_WIN32 + PADDLE_ENFORCE_NOT_NULL( + func, + platform::errors::NotFound( + "Failed to load dynamic operator library, error message(%s).", + errorno)); + return func; +} + +inline static bool IsDuplicableVar(const std::string& var_name) { + std::string suffix = kTensorVectorSuffix; + return var_name.rfind(suffix) != std::string::npos; +} + +inline static bool IsOptionalVar(const std::string& var_name) { + std::string suffix = kOptionalSuffix; + return var_name.rfind(suffix) != std::string::npos; +} + +inline static std::string NoGrad(const std::string& var_name, + bool is_double_grad = false) { + std::string suffix = kGradVarSuffix; + std::string new_out_suffix = kDoubleGradNewOutSuffix; + std::string tmp_var_name(var_name); + if (is_double_grad && + (tmp_var_name.rfind(new_out_suffix) != std::string::npos)) { + tmp_var_name = tmp_var_name.substr( + 0, tmp_var_name.size() - /*kDoubleGradNewOutSuffix length*/ 4); + } + return tmp_var_name.substr(0, tmp_var_name.size() - kGradVarSuffixSize); +} + +inline static bool IsGradVar(const std::string& var_name, bool is_double_grad) { + std::string suffix = kGradVarSuffix; + if (!is_double_grad) { + return var_name.rfind(suffix) != std::string::npos; + } else { + // for double grad cases, the X@GRAD is not a grad var, X@GRAD@GRAD is a + // grad var, here we remove a @GRAD suffix + return NoGrad(var_name).rfind(suffix) != std::string::npos; + } +} + +inline static bool IsMemberOf(const std::vector& vec, + const std::string& name) { + return std::find(vec.cbegin(), vec.cend(), name) != vec.cend(); +} + +static std::vector ParseAttrStr(const std::string& attr) { + auto split_pos = attr.find_first_of(":"); + PADDLE_ENFORCE_NE(split_pos, + std::string::npos, + platform::errors::InvalidArgument( + "Invalid attribute string format. Attribute string " + "format is `:`.")); + + std::vector rlt; + // 1. name + rlt.emplace_back(string::trim_spaces(attr.substr(0, split_pos))); + // 2. type + rlt.emplace_back(string::trim_spaces(attr.substr(split_pos + 1))); + + VLOG(3) << "attr name: " << rlt[0] << ", attr type str: " << rlt[1]; + + return rlt; +} + +} // namespace detail +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 0d8b297bf40329701afdbeae20939f8455ab0272..8df301520ec50cc378be340013f804bf4d2dcb73 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -33,6 +33,7 @@ typedef SSIZE_T ssize_t; #include "paddle/fluid/eager/utils.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/custom_operator.h" +#include "paddle/fluid/framework/custom_operator_utils.h" #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/memory/allocation/allocator.h" @@ -43,6 +44,7 @@ typedef SSIZE_T ssize_t; #include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/exception.h" +#include "paddle/fluid/pybind/op_function_common.h" #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/phi/api/ext/op_meta_info.h" #include "paddle/phi/api/lib/utils/allocator.h" @@ -424,55 +426,6 @@ static void ConstructFwdAndBwdMap( } } -static std::vector CastAttrsToTargetType( - const std::vector& src, - const std::vector& attrs_names) { - std::vector res; - PADDLE_ENFORCE_EQ(src.size(), - attrs_names.size(), - paddle::platform::errors::InvalidArgument( - "We Expected same size of attrs and attrs_name list, " - "if u got this error indicate your custom op setting " - "%s attrs, but you just give %s", - attrs_names.size(), - src.size())); - for (size_t i = 0; i < src.size(); i++) { - size_t end = attrs_names[i].find(": "); - std::string type_name = attrs_names[i].substr(end + 2); - if (type_name == "int") { - if (src[i].type() == typeid(bool)) { - res.emplace_back(static_cast(paddle::any_cast(src[i]))); - } else if (src[i].type() == typeid(int)) { - res.emplace_back(src[i]); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Your No. %s attrs should only can be bool or int32, other type is " - "forbidden for now but we got %s. Check your code first please", - i, - src[i].type().name())); - } - } else if (type_name == "int64_t") { - if (src[i].type() == typeid(bool)) { - res.emplace_back(static_cast(paddle::any_cast(src[i]))); - } else if (src[i].type() == typeid(int)) { - res.emplace_back(static_cast(paddle::any_cast(src[i]))); - } else if (src[i].type() == typeid(int64_t)) { - res.emplace_back(src[i]); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Your No. %s attrs should only can be bool or int32 or int64_t, " - "other type is forbidden for now but we got %s. Check your code " - "first please", - i, - src[i].type().name())); - } - } else { - res.emplace_back(src[i]); - } - } - return res; -} - static PyObject* eager_api_jit_function_call(PyObject* self, PyObject* args, PyObject* kwargs) { @@ -534,6 +487,25 @@ static PyObject* eager_api__get_custom_operator_inplace_reverse_idx( EAGER_CATCH_AND_THROW_RETURN_NULL } +// This function copies from function `EmptyTensorInitializer` with default +// parameters +static Tensor InitializedEmptyTensor() { + auto ddims = phi::make_ddim({0}); + auto tensor = paddle::Tensor(); + tensor.set_name( + egr::Controller::Instance().GenerateUniqueName("generated_tensor")); + auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor); + autograd_meta->SetPersistable(false); + std::shared_ptr dense_tensor = nullptr; + std::shared_ptr allocation_ptr = nullptr; + dense_tensor = std::make_shared( + allocation_ptr, phi::DenseTensorMeta(phi::DataType::FLOAT32, ddims)); + tensor.set_impl(dense_tensor); + autograd_meta->SetGradNode( + std::make_shared(autograd_meta)); + return tensor; +} + static PyObject* eager_api_run_custom_op(PyObject* self, PyObject* args, PyObject* kwargs) { @@ -545,14 +517,11 @@ static PyObject* eager_api_run_custom_op(PyObject* self, VLOG(4) << "Initialize phi tensor operants successfully"; } - paddle::CustomOpKernelContext ctx = - CastPyArg2CustomOpKernelContext(PyTuple_GET_ITEM(args, 0), 0); - std::string op_type = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 1), 1); - bool trace_backward = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2); + std::string op_type = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 0), 0); + VLOG(7) << "Get things from python for Custom Op: " << op_type; + paddle::CustomOpKernelContext ctx; { eager_gil_scoped_release guard; - VLOG(7) << "Get things for python for Custom Op: " << op_type - << ", trace_backward is: " << trace_backward; auto meta_info_map = egr::Controller::Instance().GetOpMetaInfoMap(); PADDLE_ENFORCE_NE( meta_info_map.find(op_type), @@ -562,40 +531,138 @@ static PyObject* eager_api_run_custom_op(PyObject* self, "created by LoadOpMetaInfoAndRegisterOp, please make " "sure you registered your op first and try again. ", op_type)); - VLOG(7) << "Run Kernel of Custom Op: " << op_type; - // TODO(HongyuJia): Optimize Attrs Cast naming and implementation - std::vector res_attrs = CastAttrsToTargetType( - ctx.Attrs(), - paddle::OpMetaInfoHelper::GetAttrs(meta_info_map.at(op_type)[0])); - ctx.EmplaceBackAttrs(res_attrs); const auto& vec_map = meta_info_map.at(op_type); - - const auto& inputs = - paddle::OpMetaInfoHelper::GetInputs(meta_info_map.at(op_type)[0]); - const auto& outputs = - paddle::OpMetaInfoHelper::GetOutputs(meta_info_map.at(op_type)[0]); + const auto& inputs = paddle::OpMetaInfoHelper::GetInputs(vec_map[0]); + const auto& attrs = paddle::OpMetaInfoHelper::GetAttrs(vec_map[0]); + const auto& outputs = paddle::OpMetaInfoHelper::GetOutputs(vec_map[0]); const auto& inplace_map = - paddle::OpMetaInfoHelper::GetInplaceMap(meta_info_map.at(op_type)[0]); + paddle::OpMetaInfoHelper::GetInplaceMap(vec_map[0]); + for (size_t i = 0; i < inputs.size(); ++i) { + const auto& input = inputs.at(i); + // Parse op_type first, so that use i + 1 + PyObject* obj = PyTuple_GET_ITEM(args, i + 1); + // Emplace Py_None from python, this means optional inputs passed to C++, + // use one un-initialized tensor to indicate both Tensor and + // vector inputs. + if (obj == Py_None) { + VLOG(7) << "Custom operator add input " << input + << " to CustomOpKernelContext. Add un-initialized tensor " + "because the optional input is None"; + ctx.EmplaceBackInput(std::move(paddle::Tensor())); + continue; + } + if (paddle::framework::detail::IsDuplicableVar(input)) { + ctx.EmplaceBackInputs(std::move(CastPyArg2VectorOfTensor(obj, i + 1))); + VLOG(7) << "Custom operator add input " << input + << " to CustomOpKernelContext. Add vector size = " + << ctx.InputRangeAt(i).second - ctx.InputRangeAt(i).first; + } else { + ctx.EmplaceBackInput(std::move(CastPyArg2Tensor(obj, i + 1))); + VLOG(7) << "Custom operator add input " << input + << " to CustomOpKernelContext. Add Tensor for general case."; + } + } + // Parse op_type and inputs first, so that use 1 + inputs.size() + i + int attr_start_idx = 1 + inputs.size(); + for (size_t i = 0; i < attrs.size(); ++i) { + const auto& attr = attrs.at(i); + std::vector attr_name_and_type = + paddle::framework::detail::ParseAttrStr(attr); + auto attr_type_str = attr_name_and_type[1]; + VLOG(7) << "Custom operator add attrs " << attr_name_and_type[0] + << " to CustomOpKernelContext. Attribute type = " + << attr_type_str; + PyObject* obj = PyTuple_GET_ITEM(args, attr_start_idx + i); + if (attr_type_str == "bool") { + ctx.EmplaceBackAttr(CastPyArg2AttrBoolean(obj, attr_start_idx + i)); + } else if (attr_type_str == "int") { + ctx.EmplaceBackAttr(CastPyArg2AttrInt(obj, attr_start_idx + i)); + } else if (attr_type_str == "float") { + ctx.EmplaceBackAttr(CastPyArg2AttrFloat(obj, attr_start_idx + i)); + } else if (attr_type_str == "int64_t") { + ctx.EmplaceBackAttr(CastPyArg2Long(obj, op_type, attr_start_idx + i)); + } else if (attr_type_str == "std::string") { + ctx.EmplaceBackAttr(CastPyArg2AttrString(obj, attr_start_idx + i)); + } else if (attr_type_str == "std::vector") { + ctx.EmplaceBackAttr(CastPyArg2VectorOfInt(obj, attr_start_idx + i)); + } else if (attr_type_str == "std::vector") { + ctx.EmplaceBackAttr(CastPyArg2VectorOfFloat(obj, attr_start_idx + i)); + } else if (attr_type_str == "std::vector") { + ctx.EmplaceBackAttr(CastPyArg2Longs(obj, op_type, attr_start_idx + i)); + } else if (attr_type_str == "std::vector") { + ctx.EmplaceBackAttr(CastPyArg2VectorOfString(obj, attr_start_idx + i)); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported `%s` type value as custom attribute now. " + "Supported data types include `bool`, `int`, `float`, " + "`int64_t`, `std::string`, `std::vector`, " + "`std::vector`, `std::vector`, " + "`std::vector`, Please check whether " + "the attribute data type and data type string are matched.", + attr_type_str)); + } + } + ctx.ConstructInplaceIndex(inputs, outputs, inplace_map); + const auto& inplace_reverse_idx_map = ctx.GetInplaceReverseIndexMap(); + for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) { + const auto& output = outputs.at(out_idx); + // inplace special case + if (inplace_reverse_idx_map.find(out_idx) != + inplace_reverse_idx_map.end()) { + size_t in_idx = inplace_reverse_idx_map.at(out_idx); + const auto& input_range = ctx.InputRangeAt(in_idx); + const auto& input_tensor = ctx.InputAt(input_range.first); + // inplace optional [Tensor or vector], un-initialized tensor. + if (paddle::framework::detail::IsOptionalVar(output) && + !input_tensor.initialized()) { + VLOG(7) << "Custom operator add output " << output + << " to CustomOpKernelContext. Add un-initialized tensor " + "because the inplace optional input is None"; + ctx.EmplaceBackOutput(std::move(paddle::Tensor())); + continue; + } + /// inplace vector, initialized tensor. + if (paddle::framework::detail::IsDuplicableVar(output)) { + std::vector empty_tensors; + size_t vector_size = input_range.second - input_range.first; + empty_tensors.resize(vector_size); + for (size_t i = 0; i < vector_size; ++i) { + empty_tensors[i] = InitializedEmptyTensor(); + } + VLOG(7) << "Custom operator add output " << output + << " to CustomOpKernelContext. Add vector size = " + << empty_tensors.size(); + ctx.EmplaceBackOutputs(std::move(empty_tensors)); + continue; + } + } + VLOG(7) << "Custom operator add output " << output + << " to CustomOpKernelContext. Add initialized Tensor because " + "using general or inplace mechanism"; + // general Tensor or inplace Tensor, initialized tensor. + ctx.EmplaceBackOutput(std::move(InitializedEmptyTensor())); + } + // handle inplace map - ctx.MapPlainOutputs(inputs, outputs, inplace_map); + ctx.UpdatePlainOutputs(inputs, outputs, inplace_map); + VLOG(7) << "Run Kernel of Custom Op: " << op_type; (*paddle::OpMetaInfoHelper::GetKernelFn(vec_map[0]))(&ctx); ctx.AssignInplaceOutputs(); // handle optional None output when construct backward graph for (size_t i = 0; i < ctx.OutputRange().size(); i++) { if (ctx.OutputRangeAt(i).first + 1 == ctx.OutputRangeAt(i).second) { - size_t idx = ctx.OutputRangeAt(i).first; - paddle::Tensor* out_tensor = ctx.MutableOutputAt(idx); + paddle::Tensor* out_tensor = + ctx.MutableOutputAt(ctx.OutputRangeAt(i).first); if (!out_tensor->initialized()) { PADDLE_ENFORCE( - outputs.at(idx).find(paddle::kOptionalSuffix) != - std::string::npos, + paddle::framework::detail::IsOptionalVar(outputs.at(i)), phi::errors::InvalidArgument( "Custom operator's %d-th output is not initialized. " "Please check your implementation again. If you are " "using inplace optional output, then you must use " "`paddle::Optional` to decorate this output", - idx)); + i)); // We can also consider using `autograd_meta` to tolerant nullptr. out_tensor->set_autograd_meta(std::make_shared()); } @@ -603,45 +670,37 @@ static PyObject* eager_api_run_custom_op(PyObject* self, } VLOG(7) << "Get AutogradMeta for inputs and outputs for Custom Op"; - std::vector> ins_auto_grad_metas; - std::vector> outs_auto_grad_metas; - VLOG(7) << "We got slot num of ins is: " << ctx.InputRange().size(); - ins_auto_grad_metas.resize(ctx.InputRange().size()); - VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size(); - outs_auto_grad_metas.resize(ctx.OutputRange().size()); - - for (size_t i = 0; i < ctx.InputRange().size(); i++) { - ins_auto_grad_metas[i] = - egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween( - ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).second)); - } - for (size_t i = 0; i < ctx.OutputRange().size(); i++) { - outs_auto_grad_metas[i] = - egr::EagerUtils::unsafe_autograd_meta(ctx.OutputsBetweeen( - ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second)); - } + size_t slot_ins_num = ctx.InputRange().size(); + size_t slot_outs_num = ctx.OutputRange().size(); + VLOG(7) << "We got slot num of ins is: " << slot_ins_num; + VLOG(7) << "We got slot num of outs is: " << slot_outs_num; + std::vector ins_auto_grad_metas = + egr::EagerUtils::nullable_autograd_meta(*ctx.AllMutableInput()); + std::vector outs_auto_grad_metas = + egr::EagerUtils::unsafe_autograd_meta(*ctx.AllMutableOutput()); + bool require_any_grad = false; - for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) { + bool trace_backward = true; + for (size_t i = 0; i < ins_auto_grad_metas.size(); ++i) { require_any_grad = require_any_grad || egr::EagerUtils::ComputeRequireGrad( - trace_backward, &(ins_auto_grad_metas[i])); + trace_backward, ins_auto_grad_metas[i]); } // handle inplace map - for (size_t i = 0; i < ctx.InputRange().size(); i++) { - if (inplace_map.find(inputs[i]) != inplace_map.end()) { - size_t input_size = - ctx.InputRangeAt(i).second - ctx.InputRangeAt(i).first; - size_t start_idx = ctx.InputRangeAt(i).first; - for (size_t j = 0; j < input_size; j++) { - egr::EagerUtils::CheckInplace(ctx.InputAt(start_idx + j), - ins_auto_grad_metas[i][j], - require_any_grad); - if (ctx.MutableInputAt(start_idx + j).defined()) { + if (!inplace_map.empty()) { + for (size_t i = 0; i < ctx.InputRange().size(); i++) { + if (inplace_map.find(inputs[i]) == inplace_map.end()) { + continue; + } + const auto& input_pair = ctx.InputRangeAt(i); + for (size_t j = input_pair.first; j < input_pair.second; j++) { + egr::EagerUtils::CheckInplace( + ctx.InputAt(j), ins_auto_grad_metas[j], require_any_grad); + if (ctx.MutableInputAt(j).defined()) { // Bump Inplace Version - ctx.MutableInputAt(start_idx + j).bump_inplace_version(); - VLOG(3) << "Custom operator: Tensor(" - << ctx.InputAt(start_idx + j).name() + ctx.MutableInputAt(j).bump_inplace_version(); + VLOG(3) << "Custom operator: Tensor(" << ctx.InputAt(j).name() << ") uses Inplace Strategy."; } } @@ -651,45 +710,50 @@ static PyObject* eager_api_run_custom_op(PyObject* self, if (require_any_grad && (vec_map.size() > 1)) { VLOG(6) << " Construct Grad for Custom Op: " << op_type; ConstructFwdAndBwdMap(vec_map, op_type); - for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) { - egr::EagerUtils::PassStopGradient(false, &(outs_auto_grad_metas[i])); + for (size_t i = 0; i < outs_auto_grad_metas.size(); ++i) { + egr::EagerUtils::PassStopGradient(false, outs_auto_grad_metas[i]); } // Note(HongyuJia): In dygraph eager mode, CheckInplace makes sure leaf // nodes set stop_gradient=True. However, dygraph mode can also outputs // lead nodes' gradients (For example, we can get x.grad after x.add_(y)). // To be consistent with dygraph mode, we have to PassStopGradient for all // inplaced ins_auto_grad_metas. - std::unordered_map inplace_tensor_map = - ctx.GetInplaceTensorMap(); - for (auto pair : inplace_tensor_map) { - egr::EagerUtils::PassStopGradient(false, - &(ins_auto_grad_metas[pair.first])); + const auto& inplace_index_map = ctx.GetInplaceIndexMap(); + for (auto pair : inplace_index_map) { + const auto& size_pair = ctx.InputRangeAt(pair.first); + for (size_t i = size_pair.first; i < size_pair.second; ++i) { + egr::EagerUtils::PassStopGradient(false, ins_auto_grad_metas[i]); + } } auto grad_node = std::make_shared( - outs_auto_grad_metas.size(), ins_auto_grad_metas.size(), op_type); - auto slot_map = + slot_outs_num, slot_ins_num, op_type); + const auto& slot_map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type); + // Prepare Grad outputs size_t no_grad_cnt = 0; - for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) { + for (size_t i = 0; i < slot_ins_num; i++) { const std::vector& in_tensors = ctx.InputsBetween( ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).second); if (slot_map[0][0].find(i) != slot_map[0][0].end()) { - grad_node->SetGradOutMeta(in_tensors, slot_map[0][0][i]); + grad_node->SetGradOutMeta(in_tensors, slot_map[0][0].at(i)); } else { - grad_node->SetGradOutMeta( - in_tensors, ins_auto_grad_metas.size() - 1 - no_grad_cnt); + grad_node->SetGradOutMeta(in_tensors, slot_ins_num - 1 - no_grad_cnt); no_grad_cnt++; } } // Prepare Grad inputs with grad of fwd outputs - for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) { - const std::vector& out_tensors = ctx.OutputsBetweeen( - ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second); - - egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i); - egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node); + for (size_t i = 0; i < slot_outs_num; i++) { + const auto& size_pair = ctx.OutputRangeAt(i); + const std::vector& out_tensors = + ctx.OutputsBetweeen(size_pair.first, size_pair.second); + for (size_t j = size_pair.first; j < size_pair.second; j++) { + // SetOutRankWithSlot: slot_id = i, rank = j - size_pair.first + outs_auto_grad_metas[j]->SetSingleOutRankWithSlot( + i, j - size_pair.first); + egr::EagerUtils::SetHistory(outs_auto_grad_metas[j], grad_node); + } grad_node->SetGradInMeta(out_tensors, i); } @@ -713,9 +777,8 @@ static PyObject* eager_api_run_custom_op(PyObject* self, ctx.InputRangeAt(it->first).second)); } - auto attrs_names = - paddle::OpMetaInfoHelper::GetAttrs(meta_info_map.at(op_type)[1]); - std::vector attrs(attrs_names.size()); + const std::vector& res_attrs = ctx.Attrs(); + std::vector attrs(res_attrs.size()); // Prepare attrs for Grad node for (auto it = slot_map[0][4].begin(); it != slot_map[0][4].end(); it++) { VLOG(7) << "Prepare fwd attrs: " << it->first @@ -725,7 +788,7 @@ static PyObject* eager_api_run_custom_op(PyObject* self, grad_node->SetAttrs(attrs); } } - RETURN_PY_NONE + return ToPyObject(*ctx.AllMutableOutput()); EAGER_CATCH_AND_THROW_RETURN_NULL } diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 1a177f59adba22b70ea38695e965a13c3fa9cea9..f2d1c396617b126470ce970df76d0ac9b7563360 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -56,7 +56,6 @@ extern PyTypeObject* g_cudapinnedplace_pytype; extern PyTypeObject* g_customplace_pytype; extern PyTypeObject* g_framework_tensor_pytype; extern PyTypeObject* g_framework_lodtensorarray_pytype; -extern PyTypeObject* g_custom_op_kernel_ctx_pytype; extern PyTypeObject* g_jit_function_pytype; int TensorDtype2NumpyDtype(phi::DataType dtype) { @@ -432,6 +431,54 @@ std::vector CastPyArg2VectorOfSize_t(PyObject* obj, size_t arg_pos) { return result; } +std::vector CastPyArg2VectorOfFloat(PyObject* obj, size_t arg_pos) { + std::vector result; + if (PyList_Check(obj)) { + Py_ssize_t len = PyList_Size(obj); + PyObject* item = nullptr; + for (Py_ssize_t i = 0; i < len; i++) { + item = PyList_GetItem(obj, i); + if (PyObject_CheckFloatOrConvertToFloat(&item)) { + result.emplace_back(static_cast(PyFloat_AsDouble(item))); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "argument (position %d) must be " + "list of float, but got %s at pos %d", + arg_pos + 1, + reinterpret_cast(item->ob_type)->tp_name, + i)); + } + } + } else if (PyTuple_Check(obj)) { + Py_ssize_t len = PyTuple_Size(obj); + PyObject* item = nullptr; + for (Py_ssize_t i = 0; i < len; i++) { + item = PyTuple_GET_ITEM(obj, i); + if (PyObject_CheckFloatOrConvertToFloat(&item)) { + result.emplace_back(static_cast(PyFloat_AsDouble(item))); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "argument (position %d) must be " + "list of float, but got %s at pos %d", + arg_pos + 1, + reinterpret_cast(item->ob_type)->tp_name, + i)); + } + } + } else if (obj == Py_None) { + return {}; + } else if (PyObject_CheckFloatOrConvertToFloat(&obj)) { + return {static_cast(PyFloat_AsDouble(obj))}; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "argument (position %d) must be " + "list of float, but got %s", + arg_pos + 1, + reinterpret_cast(obj->ob_type)->tp_name)); + } + return result; +} + std::vector> CastPyArg2VectorOfVectorOfSize_t( PyObject* obj, size_t arg_pos) { std::vector> result; @@ -602,19 +649,6 @@ std::vector CastPyArg2VectorOfString(PyObject* obj, } } -paddle::CustomOpKernelContext CastPyArg2CustomOpKernelContext(PyObject* obj, - ssize_t arg_pos) { - if (PyObject_IsInstance( - obj, reinterpret_cast(g_custom_op_kernel_ctx_pytype))) { - return ::pybind11::handle(obj).cast(); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "argument (position %d) must be CustomOpKernelContext, " - "but got %s", - arg_pos + 1, - reinterpret_cast(obj->ob_type)->tp_name)); - } -} PyObject* ToPyObject(bool value) { if (value) { Py_INCREF(Py_True); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 555489c2ff242fd0e1181b7b46a3c73391cd1d76..dcf71ec0819395331ed8b59aee00af9f34d629bb 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -57,8 +57,6 @@ int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos); size_t CastPyArg2AttrSize_t(PyObject* obj, ssize_t arg_pos); float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos); std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos); -paddle::CustomOpKernelContext CastPyArg2CustomOpKernelContext(PyObject* obj, - ssize_t arg_pos); std::shared_ptr CastPyArg2VarBase(PyObject* obj, ssize_t arg_pos); std::vector CastPyArg2VectorOfTensor(PyObject* obj, @@ -70,6 +68,7 @@ std::vector CastPyArg2VectorOfTensorBase(PyObject* obj, std::vector CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos); std::vector CastPyArg2VectorOfInt64(PyObject* obj, size_t arg_pos); std::vector CastPyArg2VectorOfSize_t(PyObject* obj, size_t arg_pos); +std::vector CastPyArg2VectorOfFloat(PyObject* obj, size_t arg_pos); std::vector> CastPyArg2VectorOfVectorOfSize_t( PyObject* obj, size_t arg_pos); framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj, diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc index 8a53863c7983001c73f2b354d3326db836bfc374..86dea0d145afea8d240e8177977f92842ea05146 100644 --- a/paddle/fluid/pybind/op_function_common.cc +++ b/paddle/fluid/pybind/op_function_common.cc @@ -464,7 +464,7 @@ std::vector CastPyArg2Longs(PyObject* obj, for (Py_ssize_t i = 0; i < len; i++) { item = PyList_GetItem(obj, i); if (PyObject_CheckLongOrToLong(&item)) { - value.emplace_back(PyLong_AsLong(item)); + value.emplace_back((int64_t)PyLong_AsLongLong(item)); } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument (position %d) must be " @@ -481,7 +481,7 @@ std::vector CastPyArg2Longs(PyObject* obj, for (Py_ssize_t i = 0; i < len; i++) { item = PyTuple_GetItem(obj, i); if (PyObject_CheckLongOrToLong(&item)) { - value.emplace_back(PyLong_AsLong(item)); + value.emplace_back((int64_t)PyLong_AsLongLong(item)); } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument (position %d) must be " @@ -498,7 +498,7 @@ std::vector CastPyArg2Longs(PyObject* obj, for (Py_ssize_t i = 0; i < len; i++) { item = PySequence_GetItem(obj, i); if (PyObject_CheckLongOrToLong(&item)) { - value.emplace_back(PyLong_AsLong(item)); + value.emplace_back((int64_t)PyLong_AsLongLong(item)); } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument (position %d) must be " @@ -512,7 +512,7 @@ std::vector CastPyArg2Longs(PyObject* obj, } else if (obj == Py_None) { return {}; } else if (PyObject_CheckLongOrToLong(&obj)) { - return {static_cast(PyLong_AsLong(obj))}; + return {(int64_t)PyLong_AsLongLong(obj)}; } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument (position %d) must be " diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 1583102865fc69a51e93e1068a46d8bd0779208a..65aa609e34fde1a05a505d7e5b270442c74f7c2d 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1013,70 +1013,6 @@ PYBIND11_MODULE(libpaddle, m) { m.def("_promote_types_if_complex_exists", &paddle::framework::PromoteTypesIfComplexExists); - py::class_ custom_op_kernel_ctx( - m, "CustomOpKernelContext", R"DOC()DOC"); - g_custom_op_kernel_ctx_pytype = - reinterpret_cast(custom_op_kernel_ctx.ptr()); - custom_op_kernel_ctx.def(py::init<>()) - .def("add_inputs", - [](paddle::CustomOpKernelContext &self, const py::handle &input) { - PyObject *obj = input.ptr(); - if (PyList_Check(obj) || PyTuple_Check(obj)) { - self.EmplaceBackInputs( - std::move(CastPyArg2VectorOfTensor(obj, 1))); - } else if (obj == Py_None) { - // Check optional Tensor, use one un-initialized tensor to - // indicate both Tensor and vector inputs - self.EmplaceBackInput(std::move(paddle::Tensor())); - } else { - self.EmplaceBackInput(std::move(CastPyArg2Tensor(obj, 1))); - } - }) - .def("add_outputs", - [](paddle::CustomOpKernelContext &self, py::handle &outputs) { - PyObject *obj = outputs.ptr(); - if (PyList_Check(obj) || PyTuple_Check(obj)) { - self.EmplaceBackOutputs( - std::move(CastPyArg2VectorOfTensor(obj, 1))); - } else { - self.EmplaceBackOutput(std::move(CastPyArg2Tensor(obj, 1))); - } - }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, bool attr) { - self.EmplaceBackAttr(attr); - }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, int attr) { - self.EmplaceBackAttr(attr); - }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, float attr) { - self.EmplaceBackAttr(attr); - }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, int64_t attr) { - self.EmplaceBackAttr(attr); - }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, const std::string &attr) { - self.EmplaceBackAttr(attr); - }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, - const std::vector &attr) { self.EmplaceBackAttr(attr); }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, - const std::vector &attr) { self.EmplaceBackAttr(attr); }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, - const std::vector &attr) { self.EmplaceBackAttr(attr); }) - .def("add_attr", - [](paddle::CustomOpKernelContext &self, - const std::vector &attr) { - self.EmplaceBackAttr(attr); - }); - py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h index 4a9a10a53aa9061a55981006fa3bbe3fdc8b69cd..07a47ed1df6327016504a4ec9a8de3246110dc46 100644 --- a/paddle/phi/api/ext/op_meta_info.h +++ b/paddle/phi/api/ext/op_meta_info.h @@ -119,6 +119,7 @@ class PADDLE_API CustomOpKernelContext { const Tensor& InputAt(size_t idx) const; std::vector InputsBetween(size_t start, size_t end) const; Tensor& MutableInputAt(size_t idx); + std::vector* AllMutableInput(); paddle::optional OptionalInputAt(size_t idx); paddle::optional> OptionalInputsBetween(size_t start, size_t end); @@ -144,13 +145,18 @@ class PADDLE_API CustomOpKernelContext { } // handle inplace map - void MapPlainOutputs( + void ConstructInplaceIndex( + const std::vector& inputs, + const std::vector& outputs, + const std::unordered_map& inplace_map); + void UpdatePlainOutputs( const std::vector& inputs, const std::vector& outputs, const std::unordered_map& inplace_map); void AssignInplaceOutputs(); std::vector* AllMutablePlainOutput(); - std::unordered_map GetInplaceTensorMap(); + std::unordered_map GetInplaceIndexMap(); + std::unordered_map GetInplaceReverseIndexMap(); private: // TODO(chenweihang): replaced be SmallVector @@ -159,7 +165,10 @@ class PADDLE_API CustomOpKernelContext { std::vector attrs_; // handle inplace map std::vector plain_outputs_; - std::unordered_map inplace_tensor_map_; + // {input: output} + std::unordered_map inplace_idx_map_; + // {output: input} + std::unordered_map inplace_reverse_idx_map_; std::vector> input_range_; std::vector> output_range_; diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc index bdc46a4e0e7cad6653e0273ca003e215cb7a3a01..0af2c96521c9f1d84d2091610f8110123861245e 100644 --- a/paddle/phi/api/lib/op_meta_info.cc +++ b/paddle/phi/api/lib/op_meta_info.cc @@ -103,6 +103,10 @@ Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) { return inputs_.at(idx); } +std::vector* CustomOpKernelContext::AllMutableInput() { + return &inputs_; +} + paddle::optional CustomOpKernelContext::OptionalInputAt(size_t idx) { if (!inputs_.at(idx).is_initialized()) { return paddle::none; @@ -156,13 +160,15 @@ const std::pair& CustomOpKernelContext::OutputRangeAt( return output_range_.at(idx); } -// handle inplace mechanism -// Find out non-inplace output tensors. -// TODO(HongyuJia): Add cache for inplace_tensor_map_ to optimize performance -void CustomOpKernelContext::MapPlainOutputs( +void CustomOpKernelContext::ConstructInplaceIndex( const std::vector& inputs, const std::vector& outputs, const std::unordered_map& inplace_map) { + // Cache inplace indices. + if (inplace_map.empty() || !inplace_idx_map_.empty()) { + VLOG(4) << "Custom opertor ConstructInplaceIndex no need to recompute."; + return; + } for (size_t in_idx = 0; in_idx < inputs.size(); ++in_idx) { auto& input = inputs[in_idx]; if (inplace_map.find(input) == inplace_map.end()) { @@ -175,15 +181,26 @@ void CustomOpKernelContext::MapPlainOutputs( "the input of `Inplace` again and make " "sure you registered your op accurately. ", input)); - inplace_tensor_map_[in_idx] = distance(outputs.begin(), out_iter); + size_t out_idx = distance(outputs.begin(), out_iter); + inplace_idx_map_[in_idx] = out_idx; + inplace_reverse_idx_map_[out_idx] = in_idx; + } + VLOG(4) << "Custom opertor update inplace input-output map successfully."; +} + +// Find out non-inplace output tensors. +void CustomOpKernelContext::UpdatePlainOutputs( + const std::vector& inputs, + const std::vector& outputs, + const std::unordered_map& inplace_map) { + // Cache plain outputs vector. + if (!plain_outputs_.empty()) { + VLOG(4) << "Custom opertor UpdatePlainOutputs no need to recompute."; + return; } + ConstructInplaceIndex(inputs, outputs, inplace_map); for (size_t i = 0; i < outputs.size(); ++i) { - if (std::any_of( - inplace_tensor_map_.begin(), - inplace_tensor_map_.end(), - [i](std::unordered_map::const_reference pair) { - return pair.second == i; - })) { + if (inplace_reverse_idx_map_.find(i) != inplace_reverse_idx_map_.end()) { continue; } size_t output_start_idx = output_range_[i].first; @@ -192,11 +209,12 @@ void CustomOpKernelContext::MapPlainOutputs( plain_outputs_.push_back(&outputs_[idx]); } } - VLOG(4) << "Custom opertor update inplace input-output map successfully."; + VLOG(4) << "Custom opertor update plain outputs map successfully."; } + // Assign input tensor to inplace output tensors. void CustomOpKernelContext::AssignInplaceOutputs() { - for (auto pair : inplace_tensor_map_) { + for (auto pair : inplace_idx_map_) { size_t in_start_idx = input_range_[pair.first].first; size_t in_end_idx = input_range_[pair.first].second; size_t out_start_idx = output_range_[pair.second].first; @@ -213,15 +231,21 @@ void CustomOpKernelContext::AssignInplaceOutputs() { } VLOG(4) << "Custom opertor update inplace input-output tensor " "successfully. Update map size = " - << inplace_tensor_map_.size(); + << inplace_idx_map_.size(); } } + std::vector* CustomOpKernelContext::AllMutablePlainOutput() { return &plain_outputs_; } + +std::unordered_map CustomOpKernelContext::GetInplaceIndexMap() { + return inplace_idx_map_; +} + std::unordered_map -CustomOpKernelContext::GetInplaceTensorMap() { - return inplace_tensor_map_; +CustomOpKernelContext::GetInplaceReverseIndexMap() { + return inplace_reverse_idx_map_; } ////////////////////// Op Meta Info ////////////////////// diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index 8958c6bc7ac1f599c30b69bc669ef14b47312dcd..e78cc85f73ca0a51d4926d0427fb2f541e8a1014 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -1042,7 +1042,9 @@ def _gen_output_content( # ' ' * tab space * tab number indent = ' ' * 4 * 2 inplace_idx = {v: k for k, v in inplace_reverse_idx.items()} - dynamic_content = "" + dynamic_content = f""" +{indent}res = [] +{indent}start_idx = 0""" static_content = f""" {indent}ins = {{}} {indent}ins_map = {ins_map} @@ -1065,10 +1067,11 @@ def _gen_output_content( lower_in_names = in_names[in_idx].split("@")[0].lower() dynamic_content += f""" {indent}if {lower_in_names} is not None: -{indent} outs['{out_name}'] = [core.eager.Tensor() for _ in range(len({lower_in_names}))] +{indent} res.append(outs[start_idx: start_idx + len({lower_in_names})]) +{indent} start_idx += len({lower_in_names}) {indent}else: -{indent} outs['{out_name}'] = core.eager.Tensor() -{indent}ctx.add_outputs(outs['{out_name}'])""" +{indent} res.append(None) +{indent} start_idx += 1""" static_content += f""" {indent}if {lower_in_names} is not None: {indent} outs['{out_name}'] = [helper.create_variable(dtype='float32') for _ in range(len({lower_in_names}))]""" @@ -1077,8 +1080,8 @@ def _gen_output_content( ): # inplace vector output case lower_in_names = in_names[in_idx].split("@")[0].lower() dynamic_content += f""" -{indent}outs['{out_name}'] = [core.eager.Tensor() for _ in range(len({lower_in_names}))] -{indent}ctx.add_outputs(outs['{out_name}'])""" +{indent}res.append(outs[start_idx: start_idx + len({lower_in_names})]) +{indent}start_idx += len({lower_in_names})""" static_content += f""" {indent}outs['{out_name}'] = [helper.create_variable(dtype='float32') for _ in range(len({lower_in_names}))]""" elif ( @@ -1086,21 +1089,22 @@ def _gen_output_content( ): # inplace optional Tensor output case, handle inplace None input lower_in_names = in_names[in_idx].split("@")[0].lower() dynamic_content += f""" -{indent}outs['{out_name}'] = core.eager.Tensor() -{indent}ctx.add_outputs(outs['{out_name}'])""" +{indent}if {lower_in_names} is not None: +{indent} res.append(outs[start_idx]) +{indent}else: +{indent} res.append(None) +{indent}start_idx += 1""" static_content += f""" {indent}if {lower_in_names} is not None: {indent} outs['{out_name}'] = helper.create_variable(dtype='float32')""" else: # general/inplace Tensor output case dynamic_content += f""" -{indent}outs['{out_name}'] = core.eager.Tensor() -{indent}ctx.add_outputs(outs['{out_name}'])""" +{indent}res.append(outs[start_idx]) +{indent}start_idx += 1""" static_content += f""" {indent}outs['{out_name}'] = helper.create_variable(dtype='float32')""" dynamic_content += f""" -{indent}core.eager._run_custom_op(ctx, "{op_name}", True) -{indent}res = [outs[out_name] if isinstance(outs[out_name], list) or outs[out_name]._is_initialized() else None for out_name in outs_list] {indent}return res[0] if len(res)==1 else res""" static_content += f""" @@ -1134,7 +1138,7 @@ def _custom_api_content(op_name): API_TEMPLATE = textwrap.dedent( """ import paddle.fluid.core as core - from paddle.fluid.core import Tensor, CustomOpKernelContext + from paddle.fluid.core import Tensor from paddle.fluid.framework import _dygraph_tracer, in_dygraph_mode from paddle.fluid.layer_helper import LayerHelper @@ -1146,11 +1150,7 @@ def _custom_api_content(op_name): # The output variable's dtype use default value 'float32', # and the actual dtype of output variable will be inferred in runtime. if in_dygraph_mode(): - ctx = CustomOpKernelContext() - for i in {in_names}: - ctx.add_inputs(i) - for j in {attr_names}: - ctx.add_attr(j) + outs = core.eager._run_custom_op("{op_name}", {params_list}) {dynamic_content} else: {static_content}