eager_functions.cc 42.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// disable numpy compile error
12 13 14 15 16 17

#if defined(_MSC_VER)
#include <BaseTsd.h>
typedef SSIZE_T ssize_t;
#endif

18 19 20 21 22 23 24 25 26
#include <Python.h>

#include <string>
#include <vector>

#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
27
#include "paddle/fluid/eager/custom_operator/custom_operator_node.h"
28
#include "paddle/fluid/eager/saved_tensors_hooks.h"
29
#include "paddle/fluid/eager/utils.h"
30
#include "paddle/fluid/framework/convert_utils.h"
31 32
#include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/framework/op_meta_info_helper.h"
33
#include "paddle/fluid/framework/python_headers.h"
34 35
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/memcpy.h"
W
wanghuancoder 已提交
36
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
37
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
38 39 40 41
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/fluid/pybind/exception.h"
42
#include "paddle/fluid/pybind/tensor_py.h"
43
#include "paddle/phi/api/ext/op_meta_info.h"
44 45 46 47 48
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
49 50
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
51 52
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
53

L
Leo Chen 已提交
54 55 56 57
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/pybind/cuda_streams_py.h"
#endif

58 59 60 61 62
namespace paddle {
namespace pybind {

namespace py = ::pybind11;

63
extern PyTypeObject* p_tensor_type;
64 65
extern PyTypeObject* g_multidevicefeedreader_pytype;
extern PyTypeObject* g_orderedmultidevicefeedreader_pytype;
66 67 68 69 70 71 72 73 74 75 76

size_t PyArray_Size_(PyObject* numpy_data) {
  size_t res = 1;
  auto dims = pybind11::detail::array_proxy(numpy_data)->dimensions;
  auto nd = pybind11::detail::array_proxy(numpy_data)->nd;
  while (nd--) {
    res *= (*dims++);
  }
  return res;
}

77
class EagerNumpyAllocation : public phi::Allocation {
78
 public:
79
  explicit EagerNumpyAllocation(PyObject* numpy_data, phi::DataType dtype)
80 81
      : Allocation(
            static_cast<void*>(pybind11::detail::array_proxy(numpy_data)->data),
82
            framework::DataTypeSize(dtype) * PyArray_Size_(numpy_data),
83 84
            paddle::platform::CPUPlace()),
        arr_(numpy_data) {
85 86 87 88
    PADDLE_ENFORCE_NOT_NULL(
        arr_,
        platform::errors::InvalidArgument("The underlying PyObject pointer of "
                                          "numpy array cannot be nullptr"));
89
    PADDLE_ENFORCE_NE(
90 91
        arr_,
        Py_None,
92 93 94 95 96 97 98 99 100 101 102 103 104
        platform::errors::PreconditionNotMet(
            "The underlying PyObject pointer of numpy array cannot be None"));
    Py_INCREF(arr_);
  }
  ~EagerNumpyAllocation() override {
    py::gil_scoped_acquire gil;
    Py_DECREF(arr_);
  }

 private:
  PyObject* arr_;
};

105 106
static PyObject* eager_api_scale(PyObject* self,
                                 PyObject* args,
107 108 109
                                 PyObject* kwargs) {
  EAGER_TRY
  // TODO(jiabin): Sync Tensor and Variable here when we support
110 111 112 113 114 115
  paddle::experimental::Tensor ret = egr::scale(
      reinterpret_cast<TensorObject*>(PyTuple_GET_ITEM(args, 0))->tensor,
      CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 1), 1),
      CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 2), 2),
      CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3),
      CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4));
116 117 118 119
  return ToPyObject(ret);
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

120 121
static PyObject* eager_api_run_backward(PyObject* self,
                                        PyObject* args,
122 123
                                        PyObject* kwargs) {
  EAGER_TRY
124 125
  auto tensors = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0);
  auto grad_tensors = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 1), 1);
126 127 128 129 130 131
  {
    eager_gil_scoped_release guard;
    egr::Backward(tensors,
                  grad_tensors,
                  CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2));
  }
132
  RETURN_PY_NONE
133 134 135
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

136 137
static PyObject* eager_api_run_partial_grad(PyObject* self,
                                            PyObject* args,
138 139 140 141 142 143 144 145 146 147
                                            PyObject* kwargs) {
  EAGER_TRY
  auto tensors = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0);
  auto inputs = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 1), 1);
  auto grad_tensors = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 2), 2);
  auto retain_graph = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3);
  auto create_graph = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4);
  auto only_inputs = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 5), 5);
  auto allow_unused = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 6), 6);
  auto no_grad_vars = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 7), 7);
148 149 150 151 152 153 154 155 156 157 158 159
  std::vector<paddle::experimental::Tensor> result;
  {
    eager_gil_scoped_release guard;
    result = egr::Grad(tensors,
                       inputs,
                       grad_tensors,
                       retain_graph,
                       create_graph,
                       only_inputs,
                       allow_unused,
                       no_grad_vars);
  }
160 161 162 163 164
  VLOG(1) << " in eager_api_run_partial_grad, after runing egr::Grad";
  return ToPyObject(result, true /* return_py_none_if_not_initialize */);
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

165 166
static PyObject* eager_api_tensor_copy(PyObject* self,
                                       PyObject* args,
167 168
                                       PyObject* kwargs) {
  EAGER_TRY
169 170 171 172
  paddle::experimental::Tensor& src =
      reinterpret_cast<TensorObject*>(PyTuple_GET_ITEM(args, 0))->tensor;
  paddle::experimental::Tensor& dst =
      reinterpret_cast<TensorObject*>(PyTuple_GET_ITEM(args, 1))->tensor;
173 174 175
  auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 2), 2);
  bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3);

176
  dst = src.copy_to(place, blocking);
177 178 179 180
  egr::EagerUtils::autograd_meta(&dst)->SetStopGradient(
      egr::EagerUtils::autograd_meta(&(src))->StopGradient());
  egr::EagerUtils::autograd_meta(&dst)->SetPersistable(
      egr::EagerUtils::autograd_meta(&(src))->Persistable());
181
  RETURN_PY_NONE
182 183 184
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

185 186
static PyObject* eager_api_read_next_tensor_list(PyObject* self,
                                                 PyObject* args,
187
                                                 PyObject* kwargs) {
188
  EAGER_TRY
189 190 191
  auto tensor_base_list =
      CastPyArg2VectorOfTensorBase(PyTuple_GET_ITEM(args, 0), 0);
  std::vector<paddle::experimental::Tensor> tensor_list;
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
  {
    eager_gil_scoped_release guard;
    tensor_list.reserve(tensor_base_list.size());
    auto func = [](framework::Tensor& tensor_base) {
      paddle::experimental::Tensor tensor(
          egr::Controller::Instance().GenerateUniqueName());
      auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
      autograd_meta->SetPersistable(false);
      autograd_meta->SetStopGradient(true);
      tensor.set_impl(std::make_shared<phi::DenseTensor>(tensor_base));
      return tensor;
    };
    for (auto& tensor_base : tensor_base_list) {
      tensor_list.emplace_back(func(tensor_base));
    }
207
  }
208
  return ToPyObject(tensor_list);
209 210 211
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
static void ConstructFwdAndBwdMap(
    const std::vector<paddle::OpMetaInfo>& vec_map,
    const std::string& op_type) {
  auto& in_out_map = egr::Controller::Instance().GetCustomEdgesSlotMap();
  if (in_out_map.find(op_type) != in_out_map.end()) {
    VLOG(7) << "Find Exist CustomEdgesSlotMap Skip >>>> ";
    return;
  } else {
    VLOG(7) << "Construct CustomEdgesSlotMap ";
    auto inputs_names =
        paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[0]);
    auto outputs_names =
        paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[0]);
    auto attrs_names =
        paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[0]);
    auto grad_outputs_names =
        paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[1]);
    auto grad_inputs_names =
        paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[1]);
    auto grad_attrs_names =
        paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[1]);
    std::vector<std::unordered_map<int, int>> res(5);
234 235

    in_out_map.insert({op_type, {res}});
236 237 238
    // Prepare pos map for grad_outputs
    VLOG(7) << "Prepare pos map for grad_outputs";
    PADDLE_ENFORCE_LE(
239 240
        grad_outputs_names.size(),
        inputs_names.size(),
241 242 243 244 245
        paddle::platform::errors::InvalidArgument(
            "Grad outputs num should be less equal than forward inputs num."));
    for (size_t i = 0; i < grad_outputs_names.size(); i++) {
      size_t end = grad_outputs_names[i].find("@GRAD");
      PADDLE_ENFORCE_NE(
246 247
          end,
          std::string::npos,
248 249 250 251 252 253 254 255 256
          paddle::platform::errors::NotFound(
              "All Grad outputs should be grad and we got %s is not grad var, "
              "please check your op and change to fit the rule.",
              grad_outputs_names[i]));
      for (size_t j = 0; j < inputs_names.size(); j++) {
        if (grad_outputs_names[i].substr(0, end) == inputs_names[j]) {
          VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
                  << " inputs: " << inputs_names[j] << " related to No." << i
                  << " grad_outputs: " << grad_outputs_names[i];
257
          in_out_map[op_type][0][0][j] = i;
258 259 260 261 262 263 264 265 266 267 268 269
        }
      }
    }
    // Prepare pos map for grad_inputs
    for (size_t i = 0; i < grad_inputs_names.size(); i++) {
      size_t end = grad_inputs_names[i].find("@GRAD");
      if (end != std::string::npos) {
        for (size_t j = 0; j < outputs_names.size(); j++) {
          if (grad_inputs_names[i].substr(0, end) == outputs_names[j]) {
            VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
                    << " outputs: " << outputs_names[j] << " related to No."
                    << i << " grad_inputs's grad: " << grad_inputs_names[i];
270
            in_out_map[op_type][0][1][j] = i;
271 272 273
          }
        }
      } else {
274 275
        if (std::find(outputs_names.begin(),
                      outputs_names.end(),
276 277 278 279 280 281 282
                      grad_inputs_names[i]) != outputs_names.end()) {
          for (size_t j = 0; j < outputs_names.size(); j++) {
            if (grad_inputs_names[i] == outputs_names[j]) {
              VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
                      << " outputs: " << outputs_names[j] << " related to No."
                      << i
                      << " grad_inputs fwd outputs: " << grad_inputs_names[i];
283
              in_out_map[op_type][0][2][j] = i;
284 285 286 287 288 289 290 291 292
            }
          }
        } else {
          for (size_t j = 0; j < inputs_names.size(); j++) {
            if (grad_inputs_names[i] == inputs_names[j]) {
              VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
                      << " inputs: " << inputs_names[j] << " related to No."
                      << i
                      << " grad_inputs fwd inputs: " << grad_inputs_names[i];
293
              in_out_map[op_type][0][3][j] = i;
294 295 296 297 298 299 300 301
            }
          }
        }
      }
    }

    // Prepare pos map for grad attrs_
    for (size_t i = 0; i < grad_attrs_names.size(); i++) {
302 303 304 305
      auto end = std::find(
          attrs_names.begin(), attrs_names.end(), grad_attrs_names[i]);
      PADDLE_ENFORCE_NE(end,
                        attrs_names.end(),
306 307 308 309 310 311 312 313 314 315
                        paddle::platform::errors::NotFound(
                            "All Grad attrs should be one of forward attrs and "
                            "we got %s is not one of them, please check your "
                            "op and change to fit the rule.",
                            grad_attrs_names[i]));
      for (size_t j = 0; j < attrs_names.size(); j++) {
        if (grad_attrs_names[i] == attrs_names[j]) {
          VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
                  << " attrs: " << attrs_names[j] << " related to No." << i
                  << " grad_attrs: " << grad_attrs_names[i];
316
          in_out_map[op_type][0][4][j] = i;
317 318 319 320 321 322 323 324 325 326
        }
      }
    }
  }
}

static std::vector<paddle::any> CastAttrsToTragetType(
    const std::vector<paddle::any>& src,
    const std::vector<std::string>& attrs_names) {
  std::vector<paddle::any> res;
327 328
  PADDLE_ENFORCE_EQ(src.size(),
                    attrs_names.size(),
329 330 331 332
                    paddle::platform::errors::InvalidArgument(
                        "We Expected same size of attrs and attrs_name list, "
                        "if u got this error indicate your custom op setting "
                        "%s attrs, but you just give %s",
333 334
                        attrs_names.size(),
                        src.size()));
335 336
  for (size_t i = 0; i < src.size(); i++) {
    size_t end = attrs_names[i].find(": ");
337
    std::string type_name = attrs_names[i].substr(end + 2);
338 339 340 341 342 343 344 345 346
    if (type_name == "int") {
      if (src[i].type() == typeid(bool)) {
        res.emplace_back(static_cast<int>(paddle::any_cast<bool>(src[i])));
      } else if (src[i].type() == typeid(int)) {
        res.emplace_back(src[i]);
      } else {
        PADDLE_THROW(platform::errors::InvalidArgument(
            "Your No. %s attrs should only can be bool or int32, other type is "
            "forbidden for now but we got %s. Check your code first please",
347 348
            i,
            src[i].type().name()));
349 350 351 352 353 354 355 356 357 358 359 360 361
      }
    } else if (type_name == "int64_t") {
      if (src[i].type() == typeid(bool)) {
        res.emplace_back(static_cast<int64_t>(paddle::any_cast<bool>(src[i])));
      } else if (src[i].type() == typeid(int)) {
        res.emplace_back(static_cast<int64_t>(paddle::any_cast<int>(src[i])));
      } else if (src[i].type() == typeid(int64_t)) {
        res.emplace_back(src[i]);
      } else {
        PADDLE_THROW(platform::errors::InvalidArgument(
            "Your No. %s attrs should only can be bool or int32 or int64_t, "
            "other type is forbidden for now but we got %s. Check your code "
            "first please",
362 363
            i,
            src[i].type().name()));
364 365 366 367 368 369 370 371
      }
    } else {
      res.emplace_back(src[i]);
    }
  }
  return res;
}

372 373 374 375
static PyObject* eager_api_jit_function_call(PyObject* self,
                                             PyObject* args,
                                             PyObject* kwargs) {
  EAGER_TRY
376 377 378

  std::shared_ptr<jit::Function> function =
      CastPyArg2JitFunction(PyTuple_GET_ITEM(args, 0), 0);
379 380 381 382 383 384 385
  std::vector<paddle::experimental::Tensor> ins =
      CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 1), 1);
  std::vector<paddle::experimental::Tensor> outs = (*function)(ins);
  return ToPyObject(outs);
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

386 387
static PyObject* eager_api_run_costum_op(PyObject* self,
                                         PyObject* args,
388 389 390 391 392 393 394 395 396
                                         PyObject* kwargs) {
  EAGER_TRY
  paddle::CustomOpKernelContext ctx =
      CastPyArg2CustomOpKernelContext(PyTuple_GET_ITEM(args, 0), 0);
  std::string op_type = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 1), 1);
  bool trace_backward = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2);
  VLOG(7) << "Get things for python for Custom Op: " << op_type
          << ", trace_backward is: " << trace_backward;
  auto meta_info_map = egr::Controller::Instance().GetOpMetaInfoMap();
397 398
  PADDLE_ENFORCE_NE(meta_info_map.find(op_type),
                    meta_info_map.end(),
399 400 401 402 403 404
                    paddle::platform::errors::NotFound(
                        "Can't find %s in Eager OpMetaInfoMap which should be "
                        "created by LoadOpMetaInfoAndRegisterOp, please make "
                        "sure you registered your op first and try again. ",
                        op_type));
  VLOG(7) << "Run Kernel of Custom Op: " << op_type;
405 406 407 408
  std::vector<paddle::any> res_attrs =
      CastAttrsToTragetType(ctx.Attrs(),
                            paddle::framework::OpMetaInfoHelper::GetAttrs(
                                meta_info_map.at(op_type)[0]));
409 410 411 412 413 414 415 416 417 418 419
  ctx.EmplaceBackAttrs(res_attrs);
  const auto& vec_map = meta_info_map.at(op_type);
  (*paddle::framework::OpMetaInfoHelper::GetKernelFn(vec_map[0]))(&ctx);

  VLOG(7) << "Get AutogradMeta for inputs and outputs for Custom Op";
  std::vector<std::vector<egr::AutogradMeta*>> ins_auto_grad_metas;
  std::vector<std::vector<egr::AutogradMeta*>> outs_auto_grad_metas;
  VLOG(7) << "We got slot num of ins is: " << ctx.InputRange().size();
  ins_auto_grad_metas.resize(ctx.InputRange().size());
  VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size();
  outs_auto_grad_metas.resize(ctx.OutputRange().size());
420

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
  for (size_t i = 0; i < ctx.InputRange().size(); i++) {
    ins_auto_grad_metas[i] =
        egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween(
            ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).second));
  }
  for (size_t i = 0; i < ctx.OutputRange().size(); i++) {
    outs_auto_grad_metas[i] =
        egr::EagerUtils::unsafe_autograd_meta(ctx.OutputsBetweeen(
            ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second));
  }
  bool require_any_grad = false;
  for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
    require_any_grad =
        require_any_grad || egr::EagerUtils::ComputeRequireGrad(
                                trace_backward, &(ins_auto_grad_metas[i]));
  }
437
  if (require_any_grad && (vec_map.size() > 1)) {
438 439 440 441 442 443 444 445 446 447 448 449
    VLOG(6) << " Construct Grad for Custom Op: " << op_type;
    ConstructFwdAndBwdMap(vec_map, op_type);
    for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
      egr::EagerUtils::PassStopGradient(false, &(outs_auto_grad_metas[i]));
    }
    auto grad_node = std::make_shared<egr::RunCustomOpNode>(
        outs_auto_grad_metas.size(), ins_auto_grad_metas.size(), op_type);
    auto slot_map =
        egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type);
    // Prepare Grad outputs
    size_t no_grad_cnt = 0;
    for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
450 451 452 453
      const std::vector<paddle::experimental::Tensor>& in_tensors =
          ctx.InputsBetween(ctx.InputRangeAt(i).first,
                            ctx.InputRangeAt(i).second);

454 455
      if (slot_map[0][0].find(i) != slot_map[0][0].end()) {
        grad_node->SetGradOutMeta(in_tensors, slot_map[0][0][i]);
456
      } else {
457
        grad_node->SetGradOutMeta(in_tensors,
458 459 460 461 462 463
                                  ins_auto_grad_metas.size() - 1 - no_grad_cnt);
        no_grad_cnt++;
      }
    }
    // Prepare Grad inputs with grad of fwd outputs
    for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
464 465 466 467
      const std::vector<paddle::experimental::Tensor>& out_tensors =
          ctx.OutputsBetweeen(ctx.OutputRangeAt(i).first,
                              ctx.OutputRangeAt(i).second);

468 469
      egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i);
      egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node);
470 471
      grad_node->SetGradInMeta(out_tensors, i);
      egr::EagerUtils::CheckAndRetainGrad(out_tensors);
472 473 474
    }

    // Prepare Grad inputs with fwd outputs
475
    for (auto it = slot_map[0][2].begin(); it != slot_map[0][2].end(); it++) {
476 477 478 479 480 481 482 483 484
      VLOG(7) << "Prepare fwd_outs: " << it->first
              << " to grad_inputs: " << it->second;
      grad_node->fwd_outs[it->second] =
          egr::RunCustomOpNode::ConstructTensorWrapper(
              ctx.OutputsBetweeen(ctx.OutputRangeAt(it->first).first,
                                  ctx.OutputRangeAt(it->first).second));
    }

    // Prepare Grad inputs with fwd inputs
485
    for (auto it = slot_map[0][3].begin(); it != slot_map[0][3].end(); it++) {
486 487 488 489 490 491 492 493 494 495 496 497
      VLOG(7) << "Prepare fwd_ins: " << it->first
              << " to grad_inputs: " << it->second;
      grad_node->fwd_ins[it->second] =
          egr::RunCustomOpNode::ConstructTensorWrapper(
              ctx.InputsBetween(ctx.InputRangeAt(it->first).first,
                                ctx.InputRangeAt(it->first).second));
    }

    auto attrs_names = paddle::framework::OpMetaInfoHelper::GetAttrs(
        meta_info_map.at(op_type)[1]);
    std::vector<paddle::any> attrs(attrs_names.size());
    // Prepare attrs for Grad node
498
    for (auto it = slot_map[0][4].begin(); it != slot_map[0][4].end(); it++) {
499 500 501 502 503 504
      VLOG(7) << "Prepare fwd attrs: " << it->first
              << " to grad_attrs: " << it->second;
      attrs[it->second] = res_attrs[it->first];
    }
    grad_node->SetAttrs(attrs);
  }
505
  RETURN_PY_NONE
506 507 508
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

509 510
static PyObject* eager_api_sparse_coo_tensor(PyObject* self,
                                             PyObject* args,
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
                                             PyObject* kwargs) {
  EAGER_TRY
  auto non_zero_indices = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0);
  auto non_zero_elements = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 1), 1);
  auto dense_shape = CastPyArg2VectorOfInt(PyTuple_GET_ITEM(args, 2), 2);
  auto stop_gradient = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3);
  PADDLE_ENFORCE(non_zero_indices.is_dense_tensor(),
                 paddle::platform::errors::Fatal(
                     "the non-zero indices must be a DenseTensor."));
  PADDLE_ENFORCE(non_zero_elements.is_dense_tensor(),
                 paddle::platform::errors::Fatal(
                     "the non-zero elements must be a DenseTensor."));
  auto dense_indices =
      std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_indices.impl());
  auto dense_elements =
      std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_elements.impl());
527 528
  // TODO(zhangkaihuo): After creating SparseCooTensor, call coalesced() to sort
  // and merge duplicate indices
529
  std::shared_ptr<phi::SparseCooTensor> coo_tensor =
530 531
      std::make_shared<phi::SparseCooTensor>(
          *dense_indices, *dense_elements, phi::make_ddim(dense_shape));
532 533 534 535 536 537 538 539 540
  paddle::experimental::Tensor tensor;
  tensor.set_impl(coo_tensor);
  auto name =
      egr::Controller::Instance().GenerateUniqueName("generated_tensor");
  tensor.set_name(name);
  auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
  autograd_meta->SetStopGradient(static_cast<bool>(stop_gradient));
  if (!autograd_meta->GetMutableGradNode()) {
    VLOG(3) << "Tensor(" << name
541
            << ") doesn't have GradNode, add GradNodeAccumulation to it.";
542 543 544 545 546 547 548
    autograd_meta->SetGradNode(
        std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
  }
  return ToPyObject(tensor);
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

549 550
static PyObject* eager_api_sparse_csr_tensor(PyObject* self,
                                             PyObject* args,
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
                                             PyObject* kwargs) {
  EAGER_TRY
  auto non_zero_crows = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0);
  auto non_zero_cols = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 1), 1);
  auto non_zero_elements = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 2), 2);
  auto dense_shape = CastPyArg2VectorOfInt(PyTuple_GET_ITEM(args, 3), 3);
  auto stop_gradient = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4);
  PADDLE_ENFORCE(non_zero_crows.is_dense_tensor(),
                 paddle::platform::errors::Fatal(
                     "the compressed non-zero rows must be a DenseTensor."));
  PADDLE_ENFORCE(non_zero_cols.is_dense_tensor(),
                 paddle::platform::errors::Fatal(
                     "the non-zero cols must be a DenseTensor."));
  PADDLE_ENFORCE(non_zero_elements.is_dense_tensor(),
                 paddle::platform::errors::Fatal(
                     "the non-zero elements must be a DenseTensor."));

  auto dense_crows =
      std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_crows.impl());
  auto dense_cols =
      std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_cols.impl());
  auto dense_elements =
      std::dynamic_pointer_cast<phi::DenseTensor>(non_zero_elements.impl());
  std::shared_ptr<phi::SparseCsrTensor> csr_tensor =
575 576
      std::make_shared<phi::SparseCsrTensor>(*dense_crows,
                                             *dense_cols,
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
                                             *dense_elements,
                                             phi::make_ddim(dense_shape));
  paddle::experimental::Tensor tensor;
  tensor.set_impl(csr_tensor);
  auto name =
      egr::Controller::Instance().GenerateUniqueName("generated_tensor");
  tensor.set_name(name);
  auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor);
  autograd_meta->SetStopGradient(static_cast<bool>(stop_gradient));
  if (!autograd_meta->GetMutableGradNode()) {
    VLOG(3) << "Tensor(" << name
            << ") have not GradNode, add GradNodeAccumulation for it.";
    autograd_meta->SetGradNode(
        std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
  }
  return ToPyObject(tensor);
  EAGER_CATCH_AND_THROW_RETURN_NULL
}
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617

static PyObject* eager_api_register_saved_tensors_hooks(PyObject* self,
                                                        PyObject* args,
                                                        PyObject* kwargs) {
  EAGER_TRY
  if (egr::Controller::Instance().HasGrad()) {
    auto pack_hook = PyTuple_GET_ITEM(args, 0);
    auto unpack_hook = PyTuple_GET_ITEM(args, 1);
    egr::SavedTensorsHooks::GetInstance().SetHooks(pack_hook, unpack_hook);
  }
  RETURN_PY_NONE
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

static PyObject* eager_api_reset_saved_tensors_hooks(PyObject* self,
                                                     PyObject* args,
                                                     PyObject* kwargs) {
  EAGER_TRY
  egr::SavedTensorsHooks::GetInstance().ResetHooks();
  RETURN_PY_NONE
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

W
wanghuancoder 已提交
618
#if defined(PADDLE_WITH_CUDA)
619 620
static PyObject* eager_api_async_read(PyObject* self,
                                      PyObject* args,
W
wanghuancoder 已提交
621 622 623 624 625 626 627 628 629
                                      PyObject* kwargs) {
  EAGER_TRY
  auto& src = GetTensorFromArgs("async_read", "src", args, 0, false);
  auto& dst = GetTensorFromArgs("async_read", "dst", args, 1, false);
  auto& index = GetTensorFromArgs("async_read", "index", args, 2, false);
  auto& buffer = GetTensorFromArgs("async_read", "buffer", args, 3, false);
  auto& offset = GetTensorFromArgs("async_read", "offset", args, 4, false);
  auto& count = GetTensorFromArgs("async_read", "count", args, 5, false);
  PADDLE_ENFORCE_EQ(
630 631
      src.is_gpu_pinned(),
      true,
W
wanghuancoder 已提交
632 633
      platform::errors::InvalidArgument("Required `src` device should be "
                                        "CUDAPinnedPlace, but received %d.",
C
Chen Weihang 已提交
634
                                        src.place()));
W
wanghuancoder 已提交
635
  PADDLE_ENFORCE_EQ(
636 637
      dst.is_gpu(),
      true,
W
wanghuancoder 已提交
638 639
      platform::errors::InvalidArgument(
          "Required `dst` device should be CUDAPlace, but received %d.",
C
Chen Weihang 已提交
640
          dst.place()));
W
wanghuancoder 已提交
641
  PADDLE_ENFORCE_EQ(
642 643
      index.is_cpu(),
      true,
W
wanghuancoder 已提交
644 645
      platform::errors::InvalidArgument(
          "Required `index` device should be CPUPlace, but received %d.",
C
Chen Weihang 已提交
646
          index.place()));
647 648
  PADDLE_ENFORCE_EQ(buffer.is_gpu_pinned(),
                    true,
W
wanghuancoder 已提交
649 650 651
                    platform::errors::InvalidArgument(
                        "Required `buffer` device should be CUDAPinnedPlace, "
                        "but received %d.",
C
Chen Weihang 已提交
652
                        buffer.place()));
W
wanghuancoder 已提交
653
  PADDLE_ENFORCE_EQ(
654 655
      offset.is_cpu(),
      true,
W
wanghuancoder 已提交
656 657
      platform::errors::InvalidArgument(
          "Required `offset` device should be CPUPlace, but received %d.",
C
Chen Weihang 已提交
658
          offset.place()));
W
wanghuancoder 已提交
659
  PADDLE_ENFORCE_EQ(
660 661
      count.is_cpu(),
      true,
W
wanghuancoder 已提交
662 663
      platform::errors::InvalidArgument(
          "Required `count` device should be CPUPlace, but received %d.",
C
Chen Weihang 已提交
664
          count.place()));
W
wanghuancoder 已提交
665 666 667 668 669 670 671 672 673 674

  auto& src_tensor = src;
  auto* dst_tensor = &dst;
  auto& index_tensor = index;
  auto* buffer_tensor = &buffer;
  auto& offset_tensor = offset;
  auto& count_tensor = count;
  auto* dst_data = dst_tensor->mutable_data<float>(dst.place());
  const auto& deviceId = paddle::platform::GetCurrentDeviceId();

675 676
  PADDLE_ENFORCE_EQ(src_tensor.dims().size(),
                    dst_tensor->dims().size(),
W
wanghuancoder 已提交
677 678 679
                    platform::errors::InvalidArgument(
                        "`src` and `dst` should have same tensor shape, "
                        "except for the first dimension."));
680 681
  PADDLE_ENFORCE_EQ(src_tensor.dims().size(),
                    buffer_tensor->dims().size(),
W
wanghuancoder 已提交
682 683 684 685
                    platform::errors::InvalidArgument(
                        "`src` and `buffer` should have same tensor shape, "
                        "except for the first dimension."));
  for (int i = 1; i < src_tensor.dims().size(); i++) {
686 687
    PADDLE_ENFORCE_EQ(src_tensor.dims()[i],
                      dst_tensor->dims()[i],
W
wanghuancoder 已提交
688 689 690 691
                      platform::errors::InvalidArgument(
                          "`src` and `dst` should have the same tensor shape, "
                          "except for the first dimension."));
    PADDLE_ENFORCE_EQ(
692 693
        src_tensor.dims()[i],
        buffer_tensor->dims()[i],
W
wanghuancoder 已提交
694 695 696 697
        platform::errors::InvalidArgument(
            "`src` and `buffer` should have the same tensor shape, "
            "except for the first dimension."));
  }
698 699
  PADDLE_ENFORCE_EQ(index_tensor.dims().size(),
                    1,
W
wanghuancoder 已提交
700 701 702
                    platform::errors::InvalidArgument(
                        "`index` tensor should be one-dimensional."));

L
Leo Chen 已提交
703
  auto stream = paddle::platform::get_current_stream(deviceId)->raw_stream();
W
wanghuancoder 已提交
704 705 706 707 708 709

  int64_t numel = 0;  // total copy length
  int64_t copy_flag = offset_tensor.dims()[0];
  int64_t size = src_tensor.numel() / src_tensor.dims()[0];

  if (copy_flag != 0) {
710 711
    PADDLE_ENFORCE_EQ(offset_tensor.dims().size(),
                      1,
W
wanghuancoder 已提交
712 713
                      platform::errors::InvalidArgument(
                          "`offset` tensor should be one-dimensional."));
714 715
    PADDLE_ENFORCE_EQ(count_tensor.dims().size(),
                      1,
W
wanghuancoder 已提交
716 717
                      platform::errors::InvalidArgument(
                          "`count` tensor should be one-dimensional."));
718 719
    PADDLE_ENFORCE_EQ(offset_tensor.numel(),
                      count_tensor.numel(),
W
wanghuancoder 已提交
720 721 722 723 724 725 726 727
                      platform::errors::InvalidArgument(
                          "`offset` and `count` tensor size dismatch."));
    auto* offset_data = offset_tensor.data<int64_t>();
    auto* count_data = count_tensor.data<int64_t>();
    for (int64_t i = 0; i < count_tensor.numel(); i++) {
      numel += count_data[i];
    }
    PADDLE_ENFORCE_LE(
728 729
        numel + index_tensor.numel(),
        buffer_tensor->dims()[0],
W
wanghuancoder 已提交
730 731
        platform::errors::InvalidArgument("Buffer tensor size is too small."));
    PADDLE_ENFORCE_LE(
732 733
        numel + index_tensor.numel(),
        dst_tensor->dims()[0],
W
wanghuancoder 已提交
734 735 736 737 738 739 740
        platform::errors::InvalidArgument("Target tensor size is too small."));

    int64_t src_offset, dst_offset = 0, c;
    auto* src_data = src_tensor.data<float>();
    for (int64_t i = 0; i < offset_tensor.numel(); i++) {
      src_offset = offset_data[i], c = count_data[i];
      PADDLE_ENFORCE_LE(
741 742
          src_offset + c,
          src_tensor.dims()[0],
W
wanghuancoder 已提交
743 744
          platform::errors::InvalidArgument("Invalid offset or count index."));
      PADDLE_ENFORCE_LE(
745 746
          dst_offset + c,
          dst_tensor->dims()[0],
W
wanghuancoder 已提交
747 748
          platform::errors::InvalidArgument("Invalid offset or count index."));
      cudaMemcpyAsync(dst_data + (dst_offset * size),
749 750 751 752
                      src_data + (src_offset * size),
                      c * size * sizeof(float),
                      cudaMemcpyHostToDevice,
                      stream);
W
wanghuancoder 已提交
753 754 755 756
      dst_offset += c;
    }
  } else {
    PADDLE_ENFORCE_LE(
757 758
        index_tensor.numel(),
        buffer_tensor->dims()[0],
W
wanghuancoder 已提交
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
        platform::errors::InvalidArgument("Buffer tensor size is too small."));
  }

  // Select the index data to the buffer
  auto index_select = [](const paddle::experimental::Tensor& src_tensor,
                         const paddle::experimental::Tensor& index_tensor,
                         paddle::experimental::Tensor* buffer_tensor) {
    auto* src_data = src_tensor.data<float>();
    auto* index_data = index_tensor.data<int64_t>();
    auto* buffer_data = buffer_tensor->data<float>();
    const int& slice_size = src_tensor.numel() / src_tensor.dims()[0];
    const int& copy_bytes = slice_size * sizeof(float);
    int64_t c = 0;
    for (int64_t i = 0; i < index_tensor.numel(); i++) {
      std::memcpy(buffer_data + c * slice_size,
774 775
                  src_data + index_data[i] * slice_size,
                  copy_bytes);
W
wanghuancoder 已提交
776 777 778 779 780 781
      c += 1;
    }
  };
  index_select(src_tensor, index_tensor, buffer_tensor);

  // Copy the data to device memory
782 783
  cudaMemcpyAsync(dst_data + (numel * size),
                  buffer_tensor->data<float>(),
W
wanghuancoder 已提交
784
                  index_tensor.numel() * size * sizeof(float),
785 786
                  cudaMemcpyHostToDevice,
                  stream);
787
  RETURN_PY_NONE
W
wanghuancoder 已提交
788 789 790
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

791 792
static PyObject* eager_api_async_write(PyObject* self,
                                       PyObject* args,
W
wanghuancoder 已提交
793 794 795 796 797 798 799
                                       PyObject* kwargs) {
  EAGER_TRY
  auto& src = GetTensorFromArgs("async_write", "src", args, 0, false);
  auto& dst = GetTensorFromArgs("async_write", "dst", args, 1, false);
  auto& offset = GetTensorFromArgs("async_write", "offset", args, 2, false);
  auto& count = GetTensorFromArgs("async_write", "count", args, 3, false);
  PADDLE_ENFORCE_EQ(
800 801
      src.is_gpu(),
      true,
W
wanghuancoder 已提交
802 803
      platform::errors::InvalidArgument(
          "Required `src` device should be CUDAPlace, but received %d. ",
C
Chen Weihang 已提交
804
          src.place()));
805 806
  PADDLE_ENFORCE_EQ(dst.is_gpu_pinned(),
                    true,
W
wanghuancoder 已提交
807 808 809
                    platform::errors::InvalidArgument(
                        "Required `dst` device should be CUDAPinnedPlace, "
                        "but received %d. ",
C
Chen Weihang 已提交
810
                        dst.place()));
W
wanghuancoder 已提交
811
  PADDLE_ENFORCE_EQ(
812 813
      offset.is_cpu(),
      true,
W
wanghuancoder 已提交
814 815
      platform::errors::InvalidArgument("Required `offset` device should "
                                        "be CPUPlace, but received %d. ",
C
Chen Weihang 已提交
816
                                        offset.place()));
W
wanghuancoder 已提交
817
  PADDLE_ENFORCE_EQ(
818 819
      count.is_cpu(),
      true,
W
wanghuancoder 已提交
820 821
      platform::errors::InvalidArgument(
          "Required `count` device should be CPUPlace, but received %d. ",
C
Chen Weihang 已提交
822
          count.place()));
W
wanghuancoder 已提交
823 824 825 826 827 828 829 830 831

  // TODO(daisiming): In future, add index as arguments following
  // async_read.
  auto& src_tensor = src;
  auto* dst_tensor = &dst;
  auto& offset_tensor = offset;
  auto& count_tensor = count;
  const auto& deviceId = paddle::platform::GetCurrentDeviceId();

832 833
  PADDLE_ENFORCE_EQ(offset_tensor.dims().size(),
                    1,
W
wanghuancoder 已提交
834 835
                    platform::errors::InvalidArgument(
                        "`offset` tensor should be one-dimensional."));
836 837
  PADDLE_ENFORCE_EQ(count_tensor.dims().size(),
                    1,
W
wanghuancoder 已提交
838 839
                    platform::errors::InvalidArgument(
                        "`count` tensor should be one-dimensional."));
840 841
  PADDLE_ENFORCE_EQ(offset_tensor.numel(),
                    count_tensor.numel(),
W
wanghuancoder 已提交
842 843
                    platform::errors::InvalidArgument(
                        "`offset` and `count` tensor size dismatch."));
844 845
  PADDLE_ENFORCE_EQ(src_tensor.dims().size(),
                    dst_tensor->dims().size(),
W
wanghuancoder 已提交
846 847 848 849
                    platform::errors::InvalidArgument(
                        "`src` and `dst` should have the same tensor shape, "
                        "except for the first dimension."));
  for (int i = 1; i < src_tensor.dims().size(); i++) {
850 851
    PADDLE_ENFORCE_EQ(src_tensor.dims()[i],
                      dst_tensor->dims()[i],
W
wanghuancoder 已提交
852 853 854 855
                      platform::errors::InvalidArgument(
                          "`src` and `dst` should have the same tensor shape, "
                          "except for the first dimension."));
  }
856

L
Leo Chen 已提交
857
  auto stream = paddle::platform::get_current_stream(deviceId)->raw_stream();
W
wanghuancoder 已提交
858 859 860 861 862 863 864 865 866 867

  int64_t size = src_tensor.numel() / src_tensor.dims()[0];
  auto* src_data = src_tensor.data<float>();
  auto* dst_data = dst_tensor->data<float>();
  const int64_t* offset_data = offset_tensor.data<int64_t>();
  const int64_t* count_data = count_tensor.data<int64_t>();
  int64_t src_offset = 0, dst_offset, c;
  for (int64_t i = 0; i < offset_tensor.numel(); i++) {
    dst_offset = offset_data[i], c = count_data[i];
    PADDLE_ENFORCE_LE(
868 869
        src_offset + c,
        src_tensor.dims()[0],
W
wanghuancoder 已提交
870 871
        platform::errors::InvalidArgument("Invalid offset or count index"));
    PADDLE_ENFORCE_LE(
872 873
        dst_offset + c,
        dst_tensor->dims()[0],
W
wanghuancoder 已提交
874 875
        platform::errors::InvalidArgument("Invalid offset or count index"));
    cudaMemcpyAsync(dst_data + (dst_offset * size),
876 877 878 879
                    src_data + (src_offset * size),
                    c * size * sizeof(float),
                    cudaMemcpyDeviceToHost,
                    stream);
W
wanghuancoder 已提交
880 881
    src_offset += c;
  }
882
  RETURN_PY_NONE
W
wanghuancoder 已提交
883 884
  EAGER_CATCH_AND_THROW_RETURN_NULL
}
885

886 887
static PyObject* eager_api_to_uva_tensor(PyObject* self,
                                         PyObject* args,
888 889 890 891 892 893 894 895 896
                                         PyObject* kwargs) {
  EAGER_TRY
  VLOG(4) << "Running in eager_api_to_uva_tensor.";
  auto new_tensor = std::shared_ptr<paddle::experimental::Tensor>(
      new paddle::experimental::Tensor(
          egr::Controller::Instance().GenerateUniqueName()));
  PyObject* obj = PyTuple_GET_ITEM(args, 0);
  auto array = py::cast<py::array>(py::handle(obj));

897 898 899 900 901 902 903
  Py_ssize_t args_num = PyTuple_Size(args);
  int64_t device_id = 0;
  if (args_num > 1) {
    PyObject* Py_device_id = PyTuple_GET_ITEM(args, 1);
    if (Py_device_id) {
      device_id = CastPyArg2AttrLong(Py_device_id, 1);
    }
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
  }

  if (py::isinstance<py::array_t<int32_t>>(array)) {
    SetUVATensorFromPyArray<int32_t>(new_tensor, array, device_id);
  } else if (py::isinstance<py::array_t<int64_t>>(array)) {
    SetUVATensorFromPyArray<int64_t>(new_tensor, array, device_id);
  } else if (py::isinstance<py::array_t<float>>(array)) {
    SetUVATensorFromPyArray<float>(new_tensor, array, device_id);
  } else if (py::isinstance<py::array_t<double>>(array)) {
    SetUVATensorFromPyArray<double>(new_tensor, array, device_id);
  } else if (py::isinstance<py::array_t<int8_t>>(array)) {
    SetUVATensorFromPyArray<int8_t>(new_tensor, array, device_id);
  } else if (py::isinstance<py::array_t<int16_t>>(array)) {
    SetUVATensorFromPyArray<int16_t>(new_tensor, array, device_id);
  } else if (py::isinstance<py::array_t<paddle::platform::float16>>(array)) {
919 920
    SetUVATensorFromPyArray<paddle::platform::float16>(
        new_tensor, array, device_id);
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
  } else if (py::isinstance<py::array_t<bool>>(array)) {
    SetUVATensorFromPyArray<bool>(new_tensor, array, device_id);
  } else {
    // obj may be any type, obj.cast<py::array>() may be failed,
    // then the array.dtype will be string of unknown meaning.
    PADDLE_THROW(platform::errors::InvalidArgument(
        "Input object type error or incompatible array data type. "
        "tensor.set() supports array with bool, float16, float32, "
        "float64, int8, int16, int32, int64,"
        "please check your input or input array data type."));
  }

  return ToPyObject(*(new_tensor.get()));
  EAGER_CATCH_AND_THROW_RETURN_NULL
}
W
wanghuancoder 已提交
936
#endif
937

938 939 940 941 942 943 944 945 946 947 948
static PyObject* eager_api__add_backward_final_hook(PyObject* self,
                                                    PyObject* args,
                                                    PyObject* kwargs) {
  EAGER_TRY
  PyObject* hook_func = PyTuple_GET_ITEM(args, 0);
  egr::Controller::Instance().RegisterBackwardFinalHook(
      std::make_shared<PyVoidHook>(hook_func));
  RETURN_PY_NONE
  EAGER_CATCH_AND_THROW_RETURN_NULL
}

949
PyMethodDef variable_functions[] = {
950
    // TODO(jiabin): Remove scale when we have final state tests
951 952 953 954
    {"scale",
     (PyCFunction)(void (*)(void))eager_api_scale,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
955 956 957 958
    {"_add_backward_final_hook",
     (PyCFunction)(void (*)(void))eager_api__add_backward_final_hook,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
959 960 961 962
    {"run_backward",
     (PyCFunction)(void (*)(void))eager_api_run_backward,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
963 964
    {"run_partial_grad",
     (PyCFunction)(void (*)(void))eager_api_run_partial_grad,
965 966 967 968 969 970 971 972 973 974
     METH_VARARGS | METH_KEYWORDS,
     NULL},
    {"_run_custom_op",
     (PyCFunction)(void (*)(void))eager_api_run_costum_op,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
    {"tensor_copy",
     (PyCFunction)(void (*)(void))eager_api_tensor_copy,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
975 976
    {"read_next_tensor_list",
     (PyCFunction)(void (*)(void))eager_api_read_next_tensor_list,
977 978
     METH_VARARGS | METH_KEYWORDS,
     NULL},
979 980 981 982
    {"jit_function_call",
     (PyCFunction)(void (*)(void))eager_api_jit_function_call,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
983 984 985
    /**sparse functions**/
    {"sparse_coo_tensor",
     (PyCFunction)(void (*)(void))eager_api_sparse_coo_tensor,
986 987
     METH_VARARGS | METH_KEYWORDS,
     NULL},
988 989
    {"sparse_csr_tensor",
     (PyCFunction)(void (*)(void))eager_api_sparse_csr_tensor,
990 991
     METH_VARARGS | METH_KEYWORDS,
     NULL},
992 993 994 995 996 997 998 999
    {"register_saved_tensors_hooks",
     (PyCFunction)(void (*)(void))eager_api_register_saved_tensors_hooks,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
    {"reset_saved_tensors_hooks",
     (PyCFunction)(void (*)(void))eager_api_reset_saved_tensors_hooks,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
1000
/**sparse functions**/
W
wanghuancoder 已提交
1001
#if defined(PADDLE_WITH_CUDA)
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
    {"async_read",
     (PyCFunction)(void (*)(void))eager_api_async_read,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
    {"async_write",
     (PyCFunction)(void (*)(void))eager_api_async_write,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
    {"to_uva_tensor",
     (PyCFunction)(void (*)(void))eager_api_to_uva_tensor,
     METH_VARARGS | METH_KEYWORDS,
     NULL},
W
wanghuancoder 已提交
1014
#endif
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
    {NULL, NULL, 0, NULL}};

void BindFunctions(PyObject* module) {
  if (PyModule_AddFunctions(module, variable_functions) < 0) {
    PADDLE_THROW(platform::errors::Fatal(
        "Init Paddle erroe in BindFunctions(PyModule_AddFunctions)."));
    return;
  }
}

}  // namespace pybind
}  // namespace paddle