[CustomOP Inplace] Automap inplace dtype and shape, prepare for vector<Tensor> output (#52214)

* [CustomOP Inplace] Automap inplace dtype and shape, prepare for vector<Tensor> output * delete dtype,shape func of multi_inplace op * [CustomOP Inplace] Automap inplace dtype and shape, support vector<Tensor> output * [CustomOP Inplace] Auto-generate python API for inplace vector<Tensor> output

[CustomOP Inplace] Automap inplace dtype and shape, prepare for vector<Tensor> output (#52214)
* [CustomOP Inplace] Automap inplace dtype and shape, prepare for vector<Tensor> output * delete dtype,shape func of multi_inplace op * [CustomOP Inplace] Automap inplace dtype and shape, support vector<Tensor> output * [CustomOP Inplace] Auto-generate python API for inplace vector<Tensor> output
fc02b1e6 · HongyuJia · GitHub · 225f1af2 · fc02b1e6 · fc02b1e6
5 changed file
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -22,6 +22,7 @@ typedef SSIZE_T ssize_t;
 #endif

 #include <string>
+#include <unordered_map>
 #include <vector>

 #include "paddle/fluid/eager/accumulation/accumulation_node.h"
@@ -50,6 +51,7 @@ typedef SSIZE_T ssize_t;
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
+#include "paddle/utils/string/string_helper.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"

@@ -489,6 +491,49 @@ static PyObject* eager_api_jit_function_call(PyObject* self,
  EAGER_CATCH_AND_THROW_RETURN_NULL
 }

+static PyObject* eager_api__get_custom_operator_inplace_reverse_idx(
+    PyObject* self, PyObject* args, PyObject* kwargs) {
+  EAGER_TRY
+  std::string op_type = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 0), 0);
+  auto meta_info_map = egr::Controller::Instance().GetOpMetaInfoMap();
+  PADDLE_ENFORCE_NE(meta_info_map.find(op_type),
+                    meta_info_map.end(),
+                    paddle::platform::errors::NotFound(
+                        "Can't find %s in Eager OpMetaInfoMap which should be "
+                        "created by LoadOpMetaInfoAndRegisterOp, please make "
+                        "sure you registered your op first and try again. ",
+                        op_type));
+
+  const auto& inputs =
+      paddle::OpMetaInfoHelper::GetInputs(meta_info_map.at(op_type)[0]);
+  const auto& outputs =
+      paddle::OpMetaInfoHelper::GetOutputs(meta_info_map.at(op_type)[0]);
+  const auto& inplace_map =
+      paddle::OpMetaInfoHelper::GetInplaceMap(meta_info_map.at(op_type)[0]);
+  VLOG(7) << "Custom operator " << op_type
+          << " get InplaceMap for python, inplace map size = "
+          << inplace_map.size();
+
+  std::unordered_map<int, int> inplace_idx_map;
+  for (size_t in_idx = 0; in_idx < inputs.size(); ++in_idx) {
+    auto& input = inputs[in_idx];
+    if (inplace_map.find(input) == inplace_map.end()) {
+      continue;
+    }
+    auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input));
+    PADDLE_ENFORCE(
+        out_iter != outputs.end(),
+        phi::errors::NotFound("Can't find the mapped value of %s, please check "
+                              "the input of `Inplace` again and make "
+                              "sure you registered your op accurately. ",
+                              input));
+    inplace_idx_map[distance(outputs.begin(), out_iter)] = in_idx;
+  }
+
+  return ToPyObject(inplace_idx_map);
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
 static PyObject* eager_api_run_custom_op(PyObject* self,
                                         PyObject* args,
                                         PyObject* kwargs) {
@@ -1133,6 +1178,11 @@ PyMethodDef variable_functions[] = {
     (PyCFunction)(void (*)(void))eager_api_run_partial_grad,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
+    {"_get_custom_operator_inplace_map",
+     (PyCFunction)(void (*)(
+         void))eager_api__get_custom_operator_inplace_reverse_idx,
+     METH_VARARGS | METH_KEYWORDS,
+     NULL},
    {"_run_custom_op",
     (PyCFunction)(void (*)(void))eager_api_run_custom_op,
     METH_VARARGS | METH_KEYWORDS,

--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -810,6 +810,27 @@ PyObject* ToPyObject(const void* value) {
      platform::errors::Fatal("ToPyObject do not support void* with value."));
 }

+PyObject* ToPyObject(const std::unordered_map<int, int>& value) {
+  PyObject* dict = PyDict_New();
+  for (const auto& map_iter : value) {
+    // Convert Key
+    PyObject* key = ToPyObject(map_iter.first);
+    // Convert Value
+    PyObject* value = ToPyObject(map_iter.second);
+
+    if (!key || !value) {
+      PADDLE_THROW(
+          platform::errors::Fatal("Unable to convert int to PyObject"));
+    }
+
+    if (PyDict_SetItem(dict, key, value) != 0) {
+      PADDLE_THROW(
+          platform::errors::Fatal("Unable to set key:value for py_dict"));
+    }
+  }
+  return dict;
+}
+
 PyObject* ToPyObject(
    const std::unordered_map<std::string, std::vector<std::string>>& value) {
  PyObject* dict = PyDict_New();

--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -110,6 +110,7 @@ PyObject* ToPyObject(const phi::SelectedRows* value);
 PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype);
 PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
 PyObject* ToPyObject(const void* value);
+PyObject* ToPyObject(const std::unordered_map<int, int>& value);
 PyObject* ToPyObject(
    const std::unordered_map<std::string, std::vector<std::string>>& value);
 PyObject* ToPyObject(const paddle::framework::Vocab& value);

--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -950,12 +950,12 @@ def parse_op_info(op_name):
    op_proto = OpProtoHolder.instance().get_op_proto(op_name)

    in_names = [x.name for x in op_proto.inputs]
-    out_names = [x.name for x in op_proto.outputs]
    attr_names = [
        x.name for x in op_proto.attrs if x.name not in DEFAULT_OP_ATTR_NAMES
    ]
+    out_names = [x.name for x in op_proto.outputs]

-    return in_names, out_names, attr_names
+    return in_names, attr_names, out_names


 def _import_module_from_library(module_name, build_directory, verbose=False):
@@ -1038,16 +1038,47 @@ def _generate_python_module(
    return custom_module


+def _gen_output_content(in_names, out_names, inplace_reverse_idx):
+    # ' ' * tab space * tab number
+    indent = ' ' * 4 * 2
+    dynamic_content = ""
+    static_content = ""
+    for out_idx, out_name in enumerate(out_names):
+        in_idx = -1
+        if out_idx in inplace_reverse_idx:
+            in_idx = inplace_reverse_idx[out_idx]
+        if in_idx != -1 and "@VECTOR" in in_names[in_idx]:
+            lower_in_names = in_names[in_idx].split("@")[0].lower()
+            dynamic_content += f"""
+{indent}outs['{out_name}'] = [core.eager.Tensor() for _ in range(len({lower_in_names}))]
+{indent}ctx.add_outputs(outs['{out_name}'])"""
+            static_content += f"""
+{indent}outs['{out_name}'] = [helper.create_variable(dtype='float32') for _ in range(len({lower_in_names}))]"""
+        else:
+            dynamic_content += f"""
+{indent}outs['{out_name}'] = core.eager.Tensor()
+{indent}ctx.add_outputs(outs['{out_name}'])"""
+            static_content += f"""
+{indent}outs['{out_name}'] = helper.create_variable(dtype='float32')"""
+
+    return dynamic_content, static_content
+
+
 def _custom_api_content(op_name):
    (
-        params_str,
-        ins_str,
-        attrs_str,
-        outs_str,
+        params_list,
+        ins_map,
+        attrs_map,
+        outs_list,
        in_names,
-        attrs_names,
+        attr_names,
+        out_names,
+        inplace_reverse_idx,
    ) = _get_api_inputs_str(op_name)
-    lower_in_names = [p.split("@")[0].lower() for p in in_names]
+    dynamic_content, static_content = _gen_output_content(
+        in_names, out_names, inplace_reverse_idx
+    )
+    lower_in_list = [p.split("@")[0].lower() for p in in_names]
    API_TEMPLATE = textwrap.dedent(
        """
        import paddle.fluid.core as core
@@ -1055,11 +1086,10 @@ def _custom_api_content(op_name):
        from paddle.fluid.framework import _dygraph_tracer, in_dygraph_mode
        from paddle.fluid.layer_helper import LayerHelper

-        def {op_name}({inputs}):
+        def {op_name}({params_list}):
            # prepare inputs and outputs
-            attrs = {attrs}
            outs = {{}}
-            out_names = {out_names}
+            outs_list = {outs_list}

            # The output variable's dtype use default value 'float32',
            # and the actual dtype of output variable will be inferred in runtime.
@@ -1069,23 +1099,19 @@ def _custom_api_content(op_name):
                    ctx.add_inputs(i)
                for j in {attr_names}:
                    ctx.add_attr(j)
-                for out_name in out_names:
-                    outs[out_name] = core.eager.Tensor()
-                    ctx.add_outputs(outs[out_name])
+                {dynamic_content}
                core.eager._run_custom_op(ctx, "{op_name}", True)
            else:
                ins = {{}}
-                for key, value in dict({ins}).items():
+                for key, value in dict({ins_map}).items():
                    # handle optional inputs
                    if value is not None:
                        ins[key] = value
                helper = LayerHelper("{op_name}", **locals())
-                for out_name in out_names:
-                    outs[out_name] = helper.create_variable(dtype='float32')
-
-                helper.append_op(type="{op_name}", inputs=ins, outputs=outs, attrs=attrs)
+                {static_content}
+                helper.append_op(type="{op_name}", inputs=ins, outputs=outs, attrs={attrs_map})

-            res = [outs[out_name] for out_name in out_names]
+            res = [outs[out_name] for out_name in outs_list]

            return res[0] if len(res)==1 else res
            """
@@ -1094,13 +1120,15 @@ def _custom_api_content(op_name):
    # generate python api file
    api_content = API_TEMPLATE.format(
        op_name=op_name,
-        inputs=params_str,
-        ins=ins_str,
-        attrs=attrs_str,
+        params_list=params_list,
+        ins_map=ins_map,
+        attrs_map=attrs_map,
        # "[x, y, z]""
-        in_names="[" + ",".join(lower_in_names) + "]",
-        attr_names="[" + ",".join(attrs_names) + "]",
-        out_names=outs_str,
+        in_names="[" + ",".join(lower_in_list) + "]",
+        attr_names="[" + ",".join(attr_names) + "]",
+        outs_list=outs_list,
+        dynamic_content=dynamic_content,
+        static_content=static_content,
    )

    return api_content
@@ -1132,30 +1160,42 @@ def _get_api_inputs_str(op_name):
    """
    Returns string of api parameters and inputs dict.
    """
-    in_names, out_names, attr_names = parse_op_info(op_name)
+    in_names, attr_names, out_names = parse_op_info(op_name)
    # e.g: x, y, z
    param_names = in_names + attr_names
    # NOTE(chenweihang): we add suffix `@VECTOR` for std::vector<Tensor> input,
    # but the string contains `@` cannot used as argument name, so we split
    # input name by `@`, and only use first substr as argument
-    params_str = ','.join([p.split("@")[0].lower() for p in param_names])
+    params_list = ','.join([p.split("@")[0].lower() for p in param_names])
    # e.g: {'X': x, 'Y': y, 'Z': z}
-    ins_str = "{%s}" % ','.join(
+    ins_map = "{%s}" % ','.join(
        [
            "'{}' : {}".format(in_name, in_name.split("@")[0].lower())
            for in_name in in_names
        ]
    )
    # e.g: {'num': n}
-    attrs_str = "{%s}" % ",".join(
+    attrs_map = "{%s}" % ",".join(
        [
            "'{}' : {}".format(attr_name, attr_name.split("@")[0].lower())
            for attr_name in attr_names
        ]
    )
    # e.g: ['Out', 'Index']
-    outs_str = "[%s]" % ','.join(["'{}'".format(name) for name in out_names])
-    return params_str, ins_str, attrs_str, outs_str, in_names, attr_names
+    outs_list = "[%s]" % ','.join(["'{}'".format(name) for name in out_names])
+
+    inplace_reverse_idx = core.eager._get_custom_operator_inplace_map(op_name)
+
+    return (
+        params_list,
+        ins_map,
+        attrs_map,
+        outs_list,
+        in_names,
+        attr_names,
+        out_names,
+        inplace_reverse_idx,
+    )


 def _write_setup_file(

--- a/test/custom_op/test_custom_inplace.py
+++ b/test/custom_op/test_custom_inplace.py
@@ -40,54 +40,6 @@ custom_inplace = load(
    verbose=True,
 )

-# Temporarily assemble custom python API
-from paddle.fluid import core
-from paddle.fluid.core import CustomOpKernelContext
-from paddle.fluid.framework import in_dygraph_mode
-from paddle.fluid.layer_helper import LayerHelper
-
-
-def custom_add_vec(x_vector, y):
-    # prepare inputs and outputs
-    attrs = {}
-    outs = {}
-    out_names = ["Out@VECTOR"]
-
-    # The output variable's dtype use default value 'float32',
-    # and the actual dtype of output variable will be inferred in runtime.
-    if in_dygraph_mode():
-        ctx = CustomOpKernelContext()
-        for i in [x_vector, y]:
-            ctx.add_inputs(i)
-        for out_name in out_names:
-            outs[out_name] = [core.eager.Tensor() for _ in range(len(x_vector))]
-            ctx.add_outputs(outs[out_name])
-        core.eager._run_custom_op(ctx, "custom_add_vec", True)
-    else:
-        ins = {}
-        for key, value in dict({"X@VECTOR": x_vector, "Y": y}).items():
-            # handle optional inputs
-            if value is not None:
-                ins[key] = value
-        helper = LayerHelper("custom_add_vec", **locals())
-        for out_name in out_names:
-            outs[out_name] = [
-                helper.create_variable(dtype='float32')
-                for _ in range(len(x_vector))
-            ]
-
-        helper.append_op(
-            type="custom_add_vec", inputs=ins, outputs=outs, attrs=attrs
-        )
-
-    res = [outs[out_name] for out_name in out_names]
-
-    return res[0] if len(res) == 1 else res
-
-
-# Set custom python API manually
-custom_inplace.custom_add_vec = custom_add_vec
-

 def inplace_dynamic_add(phi_func, device, dtype, np_x, np_y):
    paddle.set_device(device)