未验证 提交 227fa408 编写于 作者: J Jiabin Yang 提交者: GitHub

Support custom op and paddle.autograd.bacward in eager (#40423)

* eager, test=develop

* fix bug, test=develop

* eager, test=develop

* merge legacy to fluid

* eager, test=develop

* eager, test=develop

* Refactor TensorAdd func by template and remove gradient_accumulation in eager

* Remove needless target name

* eager, test=develop

* eager, test=develop

* Use overload instead of template

* Remove legacy code

* Remove legacy code

* selectedrows, test=develop

* Remove DataType test

* eager, test=develop

* eager, test=develop

* support gan, test=develop

* Using Tensor directly instead of using EagerTensor

* support gradient_accumulation

* make test_imperative_lod_tensor_to_selected_rows longer

* make test_imperative_lod_tensor_to_selected_rows longer

* refine code

* ptb, test=develop

* Rename all EagerTensor to Tensor

* Rename some EagerTensor to Tensor

* rename EagerTensor to EagerVariable

* eager, test=develop

* eager, test=develop

* eager, test=develop

* eager, test=develop

* add more test

* eager, test=develop

* Support copiable selected rows and merge develop

* save load, eager, test=develop

* save load, eager, test=develop

* refine, test=develop

* remove useless _set_value method

* refine, test=develop

* refine, test=develop

* revert static_runner, test=develop

* EagerTensor to Tensor, test=develop

* refine, test=develop

* refine, test=develop

* clear grad, test=develop

* merge, develop

* merge, develop

* merge, test=develop

* merge, test=develop

* Support quant and part of slice

* support legacy static save

* extend slim tests time

* remove imperative on inference

* remove imperative on inference

* merge develop

* fix typo

* fix typo

* split slice related code into 2 part for imperative and eager

* split slice from inference

* split slice from inference

* fix test_tensor_register_hook

* support custom op in eager mode

* fix inference deps error

* split eager utils from custom operator

* fix type match

* fix typo
Co-authored-by: NWang Huan <wanghuan29@baidu.com>
Co-authored-by: NWeilong Wu <veyron_wu@163.com>
Co-authored-by: Nwanghuancoder <wanghuancoder@163.com>
上级 250e254f
set(eager_deps phi_api hook_utils tensor_utils utils global_utils backward phi_tensor tracer layer autograd_meta grad_node_info grad_tensor_holder accumulation_node)
set(eager_deps phi_api hook_utils tensor_utils utils global_utils backward phi_tensor tracer layer autograd_meta grad_node_info grad_tensor_holder accumulation_node custom_operator_node)
set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy)
set(generated_deps final_dygraph_function final_dygraph_node dygraph_function dygraph_node)
......@@ -9,6 +10,8 @@ endif()
add_subdirectory(api)
add_subdirectory(accumulation)
add_subdirectory(custom_operator)
cc_library(grad_node_info SRCS grad_node_info.cc DEPS phi_api phi_tensor)
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator)
......
......@@ -18,7 +18,7 @@
#include <atomic>
#include <memory>
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/ext/op_meta_info.h"
namespace egr {
class UniqueNameGenerator {
......@@ -70,6 +70,21 @@ class Controller {
void SetInEagerMode(bool in_eager_mode) { in_eager_mode_ = in_eager_mode; }
const std::unordered_map<std::string, std::vector<paddle::OpMetaInfo>>&
GetOpMetaInfoMap() {
return op_meta_info_map_;
}
void MergeOpMetaInfoMap(const std::unordered_map<
std::string, std::vector<paddle::OpMetaInfo>>& map) {
op_meta_info_map_.insert(map.begin(), map.end());
}
std::unordered_map<std::string, std::vector<std::unordered_map<int, int>>>&
GetCustomEdgesSlotMap() {
return custom_edges_slot_map_;
}
private:
Controller() = default;
static Controller* controller_;
......@@ -77,6 +92,11 @@ class Controller {
new paddle::imperative::Tracer()};
// TODO(jiabin): remove when we don't need imperative.
bool in_eager_mode_{false};
std::unordered_map<std::string, std::vector<paddle::OpMetaInfo>>
op_meta_info_map_;
/* op_type : {{grad_outputs}, {grad_inputs}, {input}, {output}, {attrs}}*/
std::unordered_map<std::string, std::vector<std::unordered_map<int, int>>>
custom_edges_slot_map_;
DISABLE_COPY_AND_ASSIGN(Controller);
};
......
......@@ -112,7 +112,8 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
// Prepare GradTensorHolder
if (!node_input_buffers_dict.count(grad_node)) {
VLOG(6) << "Create Value for grad input tensor " << i;
VLOG(6) << "Create Value for grad input tensor " << i
<< " of grad node: " << grad_node->name();
node_input_buffers_dict[grad_node] =
std::make_unique<GradTensorHolder>(grad_node->InputMeta());
}
......@@ -158,19 +159,23 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
VLOG(6) << "Run Backward";
while (!queue.empty()) {
GradNodeBase* node = queue.front();
queue.pop();
if (queue.size() > 1 && node_in_degree_map[node] != 0) {
queue.pop();
continue;
}
queue.pop();
// Run node: This is where Hook happens
PADDLE_ENFORCE(
node_input_buffers_dict.count(node),
paddle::platform::errors::Fatal(
"Unable to find next node in the InputBuufer"
"Unable to find next node in the GradTensorHolder \n"
"Trying to run Node without configuring its GradTensorHolder"));
std::unique_ptr<GradTensorHolder> node_input_buffer =
std::move(node_input_buffers_dict[node]);
VLOG(6) << "Run Backward Kernel with input_buffer";
VLOG(6) << "Run Backward Kernel with GradTensorHolder";
// Run Pre Backward Node and get outputs
std::vector<std::vector<paddle::experimental::Tensor>> grad_output_tensors =
(*node)(node_input_buffer->Buffers());
......@@ -215,9 +220,8 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
if ((!grad_output_tensor.defined() ||
!grad_output_tensor.initialized())) {
VLOG(6)
<< "We get grad_output_tensor with slot: " << i << ", rank: " << j
<< " as uninitialized or undefined in both tensor and variable";
VLOG(6) << "We get grad_output_tensor with slot: " << i
<< ", rank: " << j << " as uninitialized or undefined tensor";
}
VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i
<< ", rank: " << j
......@@ -228,6 +232,8 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
const auto& input_meta = next_node->InputMeta();
auto grad_tensor_holder =
std::make_unique<GradTensorHolder>(input_meta);
VLOG(6) << "Construct GradTensorHolder for grad node: "
<< next_node->name();
node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
}
VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
......@@ -237,10 +243,12 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
// Update queue
node_in_degree_map[next_node]--;
PADDLE_ENFORCE(node_in_degree_map[next_node] >= 0,
PADDLE_ENFORCE(
node_in_degree_map[next_node] >= 0,
paddle::platform::errors::Fatal(
"Detected in-degree value smaller than zero."
"Node's in-degree cannot be negative"));
"Detected in-degree value smaller than zero. For Node: %s"
"Node's in-degree cannot be negative",
next_node->name()));
if (node_in_degree_map[next_node] == 0) {
queue.emplace(std::move(next_node));
}
......
cc_library(custom_operator_node SRCS custom_operator_node.cc DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/custom_operator/custom_operator_node.h"
#include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/framework/op_meta_info_helper.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/core/dense_tensor.h"
namespace egr {
std::vector<std::vector<paddle::experimental::Tensor>> RunCustomOpNode::
operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
paddle::CustomOpKernelContext ctx;
auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs(
egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]);
auto grad_outputs_names = paddle::framework::OpMetaInfoHelper::GetOutputs(
egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]);
auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap();
std::vector<std::vector<paddle::experimental::Tensor>> tmp_ins(
grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size();
for (size_t i = 0; i < grads.size(); i++) {
if (map[1].find(i) != map[1].end()) {
VLOG(7) << "Insert grad: " << i << " to grad_inputs: " << map[1][i];
tmp_ins[map[1][i]] = grads[i];
}
}
for (auto it : fwd_outs) {
VLOG(7) << "Insert fwd_outs to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpNode::Recover(&(it.second));
}
for (auto it : fwd_ins) {
VLOG(7) << "Insert fwd_ins to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpNode::Recover(&(it.second));
}
VLOG(6) << "Prepare Grad inputs";
for (const auto& in : tmp_ins) {
ctx.EmplaceBackInputs(in);
}
VLOG(6) << "Prepare Grad attrs";
ctx.EmplaceBackAttrs(attrs_);
std::vector<std::vector<paddle::experimental::Tensor>> outs(
GetEdges().size());
std::vector<std::vector<paddle::experimental::Tensor>> tmp_outs(
grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
for (size_t i = 0; i < GetEdges().size(); i++) {
if (map[0].find(i) != map[0].end()) {
VLOG(7) << "Insert grad outputs: " << i
<< " with size: " << GetEdges()[i].size()
<< " to tmp_outputs: " << map[0][i];
for (size_t j = 0; j < GetEdges()[i].size(); j++) {
outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */
std::make_shared<phi::DenseTensor>(
phi::DataType::UNDEFINED),
egr::Controller::Instance().GenerateUniqueName(
"custom_tmp_grad"));
}
tmp_outs[map[0][i]] = outs[i];
}
}
for (size_t i = 0; i < tmp_outs.size(); i++) {
VLOG(7) << "Prepare grad outputs size: " << tmp_outs[i].size();
ctx.EmplaceBackOutputs(tmp_outs[i]);
}
VLOG(7) << "Run Kernel of Grad Custom Op: " << op_type_;
(*paddle::framework::OpMetaInfoHelper::GetKernelFn(
kernel_map.at(op_type_)[1]))(&ctx);
return outs;
}
} // namespace egr
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/fluid/eager/tensor_wrapper.h"
#include "paddle/fluid/framework/custom_operator.h"
#include "paddle/utils/any.h"
namespace egr {
class RunCustomOpNode : public GradNodeBase {
public:
// Constructor: configure fwd input tensors to grad node
explicit RunCustomOpNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num,
const std::string& op_type)
: GradNodeBase(bwd_in_slot_num, bwd_out_slot_num), op_type_(op_type) {
VLOG(6) << "Construct RunCustomOpNode for op: " << op_type;
}
~RunCustomOpNode() override {
VLOG(6) << "Destruct RunCustomOpNode for op: " << op_type_;
}
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
override;
std::string name() {
return paddle::string::Sprintf("RunCustomOpNode: %s_grad", op_type_);
}
static std::vector<egr::TensorWrapper> ConstructTensorWrapper(
const std::vector<paddle::experimental::Tensor>& fwd_var) {
std::vector<egr::TensorWrapper> res;
for (auto const& var : fwd_var) {
res.emplace_back(var);
}
return res;
}
static std::vector<paddle::experimental::Tensor> Recover(
std::vector<egr::TensorWrapper>* fwd_var) {
std::vector<paddle::experimental::Tensor> res;
for (size_t i = 0; i < fwd_var->size(); i++) {
res.emplace_back(fwd_var->at(i).recover(nullptr));
}
return res;
}
void SetAttrs(const std::vector<paddle::any>& attr) { attrs_ = attr; }
public:
std::unordered_map<int, std::vector<egr::TensorWrapper>> fwd_outs;
std::unordered_map<int, std::vector<egr::TensorWrapper>> fwd_ins;
std::unordered_map<int, int> grads2grad_in_map;
private:
std::vector<paddle::any> attrs_;
std::string op_type_{""};
};
} // namespace egr
......@@ -25,7 +25,7 @@
#include "glog/logging.h"
/**
* Implementation of GradNodeBase, Edge and InputBuffer.
* Implementation of GradNodeBase, Edge and GradTensorHolder.
**/
namespace egr {
......
......@@ -440,6 +440,7 @@ message(STATUS "branch: ${PADDLE_BRANCH}")
configure_file(commit.h.in commit.h)
cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framework_proto op_registry operator dynamic_loader string_helper phi_tensor op_meta_info phi_api)
#cc_binary(test_executor SRCS test_executor.cc DEPS executor op_registry ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} )
#cc_binary(new_executor SRCS new_exec_test.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
......
......@@ -25,6 +25,7 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_meta_info_helper.h"
......@@ -946,15 +947,16 @@ void RegisterOperatorWithMetaInfoMap(
////////////////////// User APIs ///////////////////////
// load op api
void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name) {
const std::unordered_map<std::string, std::vector<OpMetaInfo>>&
LoadOpMetaInfoAndRegisterOp(const std::string& dso_name) {
void* handle = paddle::platform::dynload::GetOpDsoHandle(dso_name);
VLOG(3) << "load custom_op lib: " << dso_name;
typedef OpMetaInfoMap& get_op_meta_info_map_t();
auto* get_op_meta_info_map =
detail::DynLoad<get_op_meta_info_map_t>(handle, "PD_GetOpMetaInfoMap");
auto& op_meta_info_map = get_op_meta_info_map();
RegisterOperatorWithMetaInfoMap(op_meta_info_map, handle);
return op_meta_info_map.GetMap();
}
} // namespace framework
......
......@@ -20,9 +20,9 @@ limitations under the License. */
namespace paddle {
namespace framework {
// Load custom op api: register op after user compiled
void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name);
const std::unordered_map<std::string, std::vector<OpMetaInfo>>&
LoadOpMetaInfoAndRegisterOp(const std::string& dso_name);
// Register custom op api: register op directly
void RegisterOperatorWithMetaInfoMap(
......@@ -31,6 +31,5 @@ void RegisterOperatorWithMetaInfoMap(
// Interface for selective register custom op.
void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
void* dso_handle = nullptr);
} // namespace framework
} // namespace paddle
......@@ -351,7 +351,7 @@ if(WITH_PYTHON)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_library(paddle_eager
SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
DEPS eager_api autograd_meta backward grad_node_info phi op_function_common final_dygraph_function final_dygraph_node dygraph_function dygraph_node accumulation_node global_utils utils python)
DEPS eager_api autograd_meta backward grad_node_info phi op_function_common final_dygraph_function final_dygraph_node dygraph_function dygraph_node accumulation_node global_utils utils python custom_operator custom_operator_node)
add_dependencies(paddle_eager eager_codegen)
add_dependencies(paddle_eager eager_op_function_generator_cmd)
list(APPEND PYBIND_DEPS paddle_eager)
......
......@@ -21,21 +21,25 @@ limitations under the License. */
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/custom_operator/custom_operator_node.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/framework/op_meta_info_helper.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/fluid/pybind/exception.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/api/lib/utils/storage.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
namespace paddle {
namespace pybind {
......@@ -168,7 +172,276 @@ static PyObject* eager_api_read_next_tensor_list(PyObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static void ConstructFwdAndBwdMap(
const std::vector<paddle::OpMetaInfo>& vec_map,
const std::string& op_type) {
auto& in_out_map = egr::Controller::Instance().GetCustomEdgesSlotMap();
if (in_out_map.find(op_type) != in_out_map.end()) {
VLOG(7) << "Find Exist CustomEdgesSlotMap Skip >>>> ";
return;
} else {
VLOG(7) << "Construct CustomEdgesSlotMap ";
auto inputs_names =
paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[0]);
auto outputs_names =
paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[0]);
auto attrs_names =
paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[0]);
auto grad_outputs_names =
paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[1]);
auto grad_inputs_names =
paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[1]);
auto grad_attrs_names =
paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[1]);
std::vector<std::unordered_map<int, int>> res(5);
in_out_map.insert({op_type, res});
// Prepare pos map for grad_outputs
VLOG(7) << "Prepare pos map for grad_outputs";
PADDLE_ENFORCE_LE(
grad_outputs_names.size(), inputs_names.size(),
paddle::platform::errors::InvalidArgument(
"Grad outputs num should be less equal than forward inputs num."));
for (size_t i = 0; i < grad_outputs_names.size(); i++) {
size_t end = grad_outputs_names[i].find("@GRAD");
PADDLE_ENFORCE_NE(
end, std::string::npos,
paddle::platform::errors::NotFound(
"All Grad outputs should be grad and we got %s is not grad var, "
"please check your op and change to fit the rule.",
grad_outputs_names[i]));
for (size_t j = 0; j < inputs_names.size(); j++) {
if (grad_outputs_names[i].substr(0, end) == inputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " inputs: " << inputs_names[j] << " related to No." << i
<< " grad_outputs: " << grad_outputs_names[i];
in_out_map[op_type][0][j] = i;
}
}
}
// Prepare pos map for grad_inputs
for (size_t i = 0; i < grad_inputs_names.size(); i++) {
size_t end = grad_inputs_names[i].find("@GRAD");
if (end != std::string::npos) {
for (size_t j = 0; j < outputs_names.size(); j++) {
if (grad_inputs_names[i].substr(0, end) == outputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " outputs: " << outputs_names[j] << " related to No."
<< i << " grad_inputs's grad: " << grad_inputs_names[i];
in_out_map[op_type][1][j] = i;
}
}
} else {
if (std::find(outputs_names.begin(), outputs_names.end(),
grad_inputs_names[i]) != outputs_names.end()) {
for (size_t j = 0; j < outputs_names.size(); j++) {
if (grad_inputs_names[i] == outputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " outputs: " << outputs_names[j] << " related to No."
<< i
<< " grad_inputs fwd outputs: " << grad_inputs_names[i];
in_out_map[op_type][2][j] = i;
}
}
} else {
for (size_t j = 0; j < inputs_names.size(); j++) {
if (grad_inputs_names[i] == inputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " inputs: " << inputs_names[j] << " related to No."
<< i
<< " grad_inputs fwd inputs: " << grad_inputs_names[i];
in_out_map[op_type][3][j] = i;
}
}
}
}
}
// Prepare pos map for grad attrs_
for (size_t i = 0; i < grad_attrs_names.size(); i++) {
auto end = std::find(attrs_names.begin(), attrs_names.end(),
grad_attrs_names[i]);
PADDLE_ENFORCE_NE(end, attrs_names.end(),
paddle::platform::errors::NotFound(
"All Grad attrs should be one of forward attrs and "
"we got %s is not one of them, please check your "
"op and change to fit the rule.",
grad_attrs_names[i]));
for (size_t j = 0; j < attrs_names.size(); j++) {
if (grad_attrs_names[i] == attrs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " attrs: " << attrs_names[j] << " related to No." << i
<< " grad_attrs: " << grad_attrs_names[i];
in_out_map[op_type][4][j] = i;
}
}
}
}
}
static std::vector<paddle::any> CastAttrsToTragetType(
const std::vector<paddle::any>& src,
const std::vector<std::string>& attrs_names) {
std::vector<paddle::any> res;
PADDLE_ENFORCE_EQ(src.size(), attrs_names.size(),
paddle::platform::errors::InvalidArgument(
"We Expected same size of attrs and attrs_name list, "
"if u got this error indicate your custom op setting "
"%s attrs, but you just give %s",
attrs_names.size(), src.size()));
for (size_t i = 0; i < src.size(); i++) {
size_t end = attrs_names[i].find(": ");
std::string type_name =
attrs_names[i].substr(end + 2, attrs_names.size() - end - 2);
if (type_name == "int") {
if (src[i].type() == typeid(bool)) {
res.emplace_back(static_cast<int>(paddle::any_cast<bool>(src[i])));
} else if (src[i].type() == typeid(int)) {
res.emplace_back(src[i]);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Your No. %s attrs should only can be bool or int32, other type is "
"forbidden for now but we got %s. Check your code first please",
i, src[i].type().name()));
}
} else if (type_name == "int64_t") {
if (src[i].type() == typeid(bool)) {
res.emplace_back(static_cast<int64_t>(paddle::any_cast<bool>(src[i])));
} else if (src[i].type() == typeid(int)) {
res.emplace_back(static_cast<int64_t>(paddle::any_cast<int>(src[i])));
} else if (src[i].type() == typeid(int64_t)) {
res.emplace_back(src[i]);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Your No. %s attrs should only can be bool or int32 or int64_t, "
"other type is forbidden for now but we got %s. Check your code "
"first please",
i, src[i].type().name()));
}
} else {
res.emplace_back(src[i]);
}
}
return res;
}
static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
paddle::CustomOpKernelContext ctx =
CastPyArg2CustomOpKernelContext(PyTuple_GET_ITEM(args, 0), 0);
std::string op_type = CastPyArg2AttrString(PyTuple_GET_ITEM(args, 1), 1);
bool trace_backward = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2);
VLOG(7) << "Get things for python for Custom Op: " << op_type
<< ", trace_backward is: " << trace_backward;
auto meta_info_map = egr::Controller::Instance().GetOpMetaInfoMap();
PADDLE_ENFORCE_NE(meta_info_map.find(op_type), meta_info_map.end(),
paddle::platform::errors::NotFound(
"Can't find %s in Eager OpMetaInfoMap which should be "
"created by LoadOpMetaInfoAndRegisterOp, please make "
"sure you registered your op first and try again. ",
op_type));
VLOG(7) << "Run Kernel of Custom Op: " << op_type;
std::vector<paddle::any> res_attrs = CastAttrsToTragetType(
ctx.Attrs(), paddle::framework::OpMetaInfoHelper::GetAttrs(
meta_info_map.at(op_type)[0]));
ctx.EmplaceBackAttrs(res_attrs);
const auto& vec_map = meta_info_map.at(op_type);
(*paddle::framework::OpMetaInfoHelper::GetKernelFn(vec_map[0]))(&ctx);
VLOG(7) << "Get AutogradMeta for inputs and outputs for Custom Op";
std::vector<std::vector<egr::AutogradMeta*>> ins_auto_grad_metas;
std::vector<std::vector<egr::AutogradMeta*>> outs_auto_grad_metas;
VLOG(7) << "We got slot num of ins is: " << ctx.InputRange().size();
ins_auto_grad_metas.resize(ctx.InputRange().size());
VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size();
outs_auto_grad_metas.resize(ctx.OutputRange().size());
for (size_t i = 0; i < ctx.InputRange().size(); i++) {
ins_auto_grad_metas[i] =
egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween(
ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).second));
}
for (size_t i = 0; i < ctx.OutputRange().size(); i++) {
outs_auto_grad_metas[i] =
egr::EagerUtils::unsafe_autograd_meta(ctx.OutputsBetweeen(
ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second));
}
bool require_any_grad = false;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
require_any_grad =
require_any_grad || egr::EagerUtils::ComputeRequireGrad(
trace_backward, &(ins_auto_grad_metas[i]));
}
if (require_any_grad) {
VLOG(6) << " Construct Grad for Custom Op: " << op_type;
ConstructFwdAndBwdMap(vec_map, op_type);
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
egr::EagerUtils::PassStopGradient(false, &(outs_auto_grad_metas[i]));
}
auto grad_node = std::make_shared<egr::RunCustomOpNode>(
outs_auto_grad_metas.size(), ins_auto_grad_metas.size(), op_type);
auto slot_map =
egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type);
// Prepare Grad outputs
size_t no_grad_cnt = 0;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
if (slot_map[0].find(i) != slot_map[0].end()) {
grad_node->SetGradOutMeta(&ins_auto_grad_metas[i], slot_map[0][i]);
grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]);
} else {
grad_node->SetGradOutMeta(&ins_auto_grad_metas[i],
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
grad_node->AddEdges(&ins_auto_grad_metas[i],
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
no_grad_cnt++;
}
}
// Prepare Grad inputs with grad of fwd outputs
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node);
grad_node->SetGradInMeta(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::CheckAndRetainGrad(ctx.OutputsBetweeen(
ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second));
}
// Prepare Grad inputs with fwd outputs
for (auto it = slot_map[2].begin(); it != slot_map[2].end(); it++) {
VLOG(7) << "Prepare fwd_outs: " << it->first
<< " to grad_inputs: " << it->second;
grad_node->fwd_outs[it->second] =
egr::RunCustomOpNode::ConstructTensorWrapper(
ctx.OutputsBetweeen(ctx.OutputRangeAt(it->first).first,
ctx.OutputRangeAt(it->first).second));
}
// Prepare Grad inputs with fwd inputs
for (auto it = slot_map[3].begin(); it != slot_map[3].end(); it++) {
VLOG(7) << "Prepare fwd_ins: " << it->first
<< " to grad_inputs: " << it->second;
grad_node->fwd_ins[it->second] =
egr::RunCustomOpNode::ConstructTensorWrapper(
ctx.InputsBetween(ctx.InputRangeAt(it->first).first,
ctx.InputRangeAt(it->first).second));
}
auto attrs_names = paddle::framework::OpMetaInfoHelper::GetAttrs(
meta_info_map.at(op_type)[1]);
std::vector<paddle::any> attrs(attrs_names.size());
// Prepare attrs for Grad node
for (auto it = slot_map[4].begin(); it != slot_map[4].end(); it++) {
VLOG(7) << "Prepare fwd attrs: " << it->first
<< " to grad_attrs: " << it->second;
attrs[it->second] = res_attrs[it->first];
}
grad_node->SetAttrs(attrs);
}
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyMethodDef variable_functions[] = {
// TODO(jiabin): Remove scale when we have final state tests
{"scale", (PyCFunction)(void (*)(void))eager_api_scale,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_expected_place",
......@@ -179,6 +452,8 @@ PyMethodDef variable_functions[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"run_backward", (PyCFunction)(void (*)(void))eager_api_run_backward,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_run_custom_op", (PyCFunction)(void (*)(void))eager_api_run_costum_op,
METH_VARARGS | METH_KEYWORDS, NULL},
{"tensor_copy", (PyCFunction)(void (*)(void))eager_api_tensor_copy,
METH_VARARGS | METH_KEYWORDS, NULL},
{"read_next_tensor_list",
......
......@@ -72,7 +72,7 @@ PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) {
EAGER_TRY
VLOG(6) << "Get grad for tensor: " << self->tensor.name();
auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor);
if (meta) {
if (meta && meta->Grad().initialized()) {
return ToPyObject(meta->Grad());
} else {
Py_INCREF(Py_None);
......
......@@ -27,10 +27,10 @@ limitations under the License. */
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/fluid/pybind/op_function_common.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
namespace paddle {
namespace pybind {
......@@ -46,6 +46,7 @@ extern PyTypeObject* g_npuplace_pytype;
extern PyTypeObject* g_cudapinnedplace_pytype;
extern PyTypeObject* g_framework_tensor_pytype;
extern PyTypeObject* g_framework_lodtensorarray_pytype;
extern PyTypeObject* g_custom_op_kernel_ctx_pytype;
int TensorDtype2NumpyDtype(phi::DataType dtype) {
switch (dtype) {
......@@ -184,7 +185,7 @@ paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos) {
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be "
"EagerVariable, but got %s",
"Tensor, but got %s",
arg_pos + 1, reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
}
}
......@@ -319,7 +320,7 @@ framework::Tensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) {
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be "
"EagerVariable, but got %s",
"DenseTensor, but got %s",
arg_pos + 1, reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
}
}
......@@ -391,6 +392,19 @@ paddle::framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
return dtype;
}
paddle::CustomOpKernelContext CastPyArg2CustomOpKernelContext(PyObject* obj,
ssize_t arg_pos) {
if (PyObject_IsInstance(
obj, reinterpret_cast<PyObject*>(g_custom_op_kernel_ctx_pytype))) {
return ::pybind11::handle(obj).cast<paddle::CustomOpKernelContext>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be "
"one of(Place,CUDAPlace,CPUPlace,XPUPlace,NPUPlace,CUDAPinnedPlace), "
"but got %s",
arg_pos + 1, reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
}
}
PyObject* ToPyObject(bool value) {
if (value) {
Py_INCREF(Py_True);
......@@ -928,6 +942,5 @@ paddle::experimental::DataType CastPyArg2DataType(PyObject* obj,
framework::proto::VarType::Type type = CastPyArg2ProtoType(obj, arg_pos);
return framework::TransToPhiDataType(type);
}
} // namespace pybind
} // namespace paddle
......@@ -20,10 +20,10 @@ limitations under the License. */
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
namespace paddle {
class CustomOpKernelContext;
namespace framework {
class Scope;
}
namespace pybind {
typedef struct {
......@@ -40,6 +40,8 @@ int CastPyArg2AttrInt(PyObject* obj, ssize_t arg_pos);
int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos);
float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos);
std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos);
paddle::CustomOpKernelContext CastPyArg2CustomOpKernelContext(PyObject* obj,
ssize_t arg_pos);
paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos);
std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
ssize_t arg_pos);
......@@ -52,6 +54,7 @@ std::vector<framework::LoDTensor> CastPyArg2VectorOfTensorBase(PyObject* obj,
std::vector<int> CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos);
framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
ssize_t arg_pos);
PyObject* ToPyObject(int value);
PyObject* ToPyObject(bool value);
PyObject* ToPyObject(int64_t value);
......@@ -138,6 +141,7 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs(
ssize_t arg_idx, bool dispensable = false);
// end of Slice related methods
std::vector<paddle::framework::Scope*> GetScopePtrListFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args,
ssize_t arg_idx, bool dispensable);
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/pybind/exception.h"
#include "paddle/phi/api/ext/exception.h"
namespace paddle {
namespace pybind {
......@@ -122,6 +122,8 @@ void ThrowExceptionToPython(std::exception_ptr p) {
PyErr_SetString(EnforceNotMetException, e.what());
break;
}
} catch (const paddle::PD_Exception& e) {
PyErr_SetString(PyExc_OSError, e.what());
}
}
} // namespace pybind
......
......@@ -164,6 +164,9 @@ limitations under the License. */
#include "paddle/fluid/pybind/fleet_py.h"
#endif
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "pybind11/stl.h"
DECLARE_bool(use_mkldnn);
......@@ -187,6 +190,7 @@ PyTypeObject *g_cudapinnedplace_pytype = nullptr;
PyTypeObject *g_mluplace_pytype = nullptr;
PyTypeObject *g_framework_tensor_pytype = nullptr;
PyTypeObject *g_framework_lodtensorarray_pytype = nullptr;
PyTypeObject *g_custom_op_kernel_ctx_pytype = nullptr;
bool IsCompiledWithCUDA() {
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
......@@ -757,6 +761,57 @@ PYBIND11_MODULE(core_noavx, m) {
m.def("_promote_types_if_complex_exists",
&paddle::framework::PromoteTypesIfComplexExists);
py::class_<paddle::CustomOpKernelContext> custom_op_kernel_ctx(
m, "CustomOpKernelContext", R"DOC()DOC");
g_custom_op_kernel_ctx_pytype =
reinterpret_cast<PyTypeObject *>(custom_op_kernel_ctx.ptr());
custom_op_kernel_ctx.def(py::init<>())
.def("add_inputs",
[](paddle::CustomOpKernelContext &self, const py::handle &input) {
PyObject *obj = input.ptr();
if (PyList_Check(obj) || PyTuple_Check(obj)) {
self.EmplaceBackInputs(
std::move(CastPyArg2VectorOfTensor(obj, 1)));
} else {
self.EmplaceBackInput(std::move(CastPyArg2Tensor(obj, 1)));
}
})
.def("add_outputs",
[](paddle::CustomOpKernelContext &self, py::handle &outputs) {
PyObject *obj = outputs.ptr();
if (PyList_Check(obj) || PyTuple_Check(obj)) {
self.EmplaceBackOutputs(
std::move(CastPyArg2VectorOfTensor(obj, 1)));
} else {
self.EmplaceBackOutput(std::move(CastPyArg2Tensor(obj, 1)));
}
})
.def("add_attr", [](paddle::CustomOpKernelContext &self,
bool attr) { self.EmplaceBackAttr(attr); })
.def("add_attr", [](paddle::CustomOpKernelContext &self,
int attr) { self.EmplaceBackAttr(attr); })
.def("add_attr", [](paddle::CustomOpKernelContext &self,
float attr) { self.EmplaceBackAttr(attr); })
.def("add_attr", [](paddle::CustomOpKernelContext &self,
int64_t attr) { self.EmplaceBackAttr(attr); })
.def("add_attr",
[](paddle::CustomOpKernelContext &self, const std::string &attr) {
self.EmplaceBackAttr(attr);
})
.def("add_attr",
[](paddle::CustomOpKernelContext &self,
const std::vector<int> &attr) { self.EmplaceBackAttr(attr); })
.def("add_attr",
[](paddle::CustomOpKernelContext &self,
const std::vector<float> &attr) { self.EmplaceBackAttr(attr); })
.def("add_attr",
[](paddle::CustomOpKernelContext &self,
const std::vector<int64_t> &attr) { self.EmplaceBackAttr(attr); })
.def("add_attr", [](paddle::CustomOpKernelContext &self,
const std::vector<std::string> &attr) {
self.EmplaceBackAttr(attr);
});
py::class_<framework::Tensor> framework_tensor(m, "Tensor",
py::buffer_protocol());
g_framework_tensor_pytype =
......@@ -2827,10 +2882,11 @@ All parameter, weight, gradient are variables in Paddle.
m.def("init_gflags", framework::InitGflags);
m.def("init_glog", framework::InitGLOG);
m.def("load_op_meta_info_and_register_op",
framework::LoadOpMetaInfoAndRegisterOp);
m.def("load_op_meta_info_and_register_op", [](const std::string dso_name) {
egr::Controller::Instance().MergeOpMetaInfoMap(
framework::LoadOpMetaInfoAndRegisterOp(dso_name));
});
m.def("init_devices", []() { framework::InitDevices(); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_ascend", IsCompiledWithAscend);
m.def("is_compiled_with_rocm", IsCompiledWithROCM);
......
......@@ -86,19 +86,28 @@ class PADDLE_API CustomOpKernelContext {
CustomOpKernelContext() = default;
void EmplaceBackInput(Tensor&& input);
void EmplaceBackInputs(std::vector<Tensor>&& inputs);
void EmplaceBackInputs(const std::vector<Tensor>& inputs);
void EmplaceBackOutput(Tensor&& output);
void EmplaceBackOutputs(std::vector<Tensor>&& outputs);
void EmplaceBackOutputs(const std::vector<Tensor>& outputs);
void EmplaceBackAttr(paddle::any attr);
void EmplaceBackAttrs(const std::vector<paddle::any>& attrs) {
attrs_ = std::move(attrs);
}
const std::pair<size_t, size_t>& InputRangeAt(size_t idx) const;
const std::pair<size_t, size_t>& OutputRangeAt(size_t idx) const;
const Tensor& InputAt(size_t idx) const;
std::vector<Tensor> InputsBetween(size_t start, size_t end) const;
const std::vector<paddle::any>& Attrs() const { return attrs_; }
const std::vector<std::pair<size_t, size_t>>& InputRange() {
return input_range_;
}
const std::vector<std::pair<size_t, size_t>>& OutputRange() {
return output_range_;
}
Tensor* MutableOutputAt(size_t idx);
std::vector<Tensor*> MutableOutputBetweeen(size_t start, size_t end);
std::vector<Tensor> OutputsBetweeen(size_t start, size_t end);
std::vector<Tensor>* AllMutableOutput();
template <typename AttrType>
......@@ -552,7 +561,6 @@ class PADDLE_API OpMetaInfo {
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
std::vector<std::string> attrs_;
// 2. func info
KernelFunc kernel_fn_{nullptr};
InferShapeFunc infer_shape_fn_{nullptr};
......
......@@ -51,7 +51,8 @@ void CustomOpKernelContext::EmplaceBackInput(Tensor&& input) {
input_range_.emplace_back(std::make_pair(index, index + 1));
}
void CustomOpKernelContext::EmplaceBackInputs(std::vector<Tensor>&& inputs) {
void CustomOpKernelContext::EmplaceBackInputs(
const std::vector<Tensor>& inputs) {
size_t index = inputs_.size();
input_range_.emplace_back(std::make_pair(index, index + inputs.size()));
inputs_.insert(inputs_.end(),
......@@ -65,7 +66,8 @@ void CustomOpKernelContext::EmplaceBackOutput(Tensor&& output) {
output_range_.emplace_back(std::make_pair(index, index + 1));
}
void CustomOpKernelContext::EmplaceBackOutputs(std::vector<Tensor>&& outputs) {
void CustomOpKernelContext::EmplaceBackOutputs(
const std::vector<Tensor>& outputs) {
size_t index = outputs_.size();
output_range_.emplace_back(std::make_pair(index, index + outputs.size()));
outputs_.insert(outputs_.end(),
......@@ -75,6 +77,8 @@ void CustomOpKernelContext::EmplaceBackOutputs(std::vector<Tensor>&& outputs) {
void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) {
attrs_.emplace_back(std::move(attr));
VLOG(7) << "attrs_ No." << attrs_.size() - 1
<< " has value of type: " << attrs_[attrs_.size() - 1].type().name();
}
const Tensor& CustomOpKernelContext::InputAt(size_t idx) const {
......@@ -102,6 +106,15 @@ std::vector<Tensor*> CustomOpKernelContext::MutableOutputBetweeen(size_t start,
return rlt;
}
std::vector<Tensor> CustomOpKernelContext::OutputsBetweeen(size_t start,
size_t end) {
std::vector<Tensor> rlt;
for (size_t i = start; i < end; ++i) {
rlt.emplace_back(outputs_.at(i));
}
return rlt;
}
std::vector<Tensor>* CustomOpKernelContext::AllMutableOutput() {
return &outputs_;
}
......
......@@ -111,8 +111,8 @@ void Tensor::reshape(const std::vector<int64_t> &shape) {
"touching underlying data, this requires the total size of "
"the tensor to remain constant.";
if (is_dense_tensor()) {
std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->set_meta(
phi::DenseTensorMeta(dtype(), phi::make_ddim(shape)));
std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->Resize(
phi::make_ddim(shape));
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"Only support reshape operation on DenseTensor now."));
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/api/lib/ext_compat_utils.h"
#include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/tensor_base.h"
......
......@@ -81,15 +81,14 @@ def backward(tensors, grad_tensors=None, retain_graph=False):
if isinstance(in_out_list, (list, tuple)):
assert len(in_out_list) > 0, "{} connot be empyt".format(name)
for each_var in in_out_list:
assert isinstance(
each_var, paddle.
Tensor), "Elements of {} must be paddle.Tensor".format(name)
assert isinstance(each_var, (
paddle.Tensor, core.eager.Tensor
)), "Elements of {} must be paddle.Tensor".format(name)
return in_out_list
else:
assert isinstance(
in_out_list,
paddle.Tensor), "{} must be Tensor or list of Tensor".format(
name)
assert isinstance(in_out_list, (
paddle.Tensor, core.eager.Tensor
)), "{} must be Tensor or list of Tensor".format(name)
return [in_out_list]
tensors = check_tensors(tensors, "tensors")
......@@ -105,8 +104,11 @@ def backward(tensors, grad_tensors=None, retain_graph=False):
for each_tensor in grad_tensors:
if each_tensor is not None:
assert isinstance(
each_tensor, paddle.Tensor
each_tensor, (paddle.Tensor, core.eager.Tensor)
), "The argument 'grad_tensors' of paddle.autograd.backward is invalid, it can be 'None', 'paddle.Tensor' or 'list[None/paddle.Tensor]'."
else:
if core._in_eager_mode():
grad_tensors = []
else:
grad_tensors = [None] * len(tensors)
......@@ -116,5 +118,8 @@ def backward(tensors, grad_tensors=None, retain_graph=False):
assert isinstance(retain_graph, bool), "retain_graph must be True or False"
if core._in_eager_mode():
core.eager.run_backward(tensors, grad_tensors, retain_graph)
else:
core.dygraph_run_backward(tensors, grad_tensors, retain_graph,
framework._dygraph_tracer())
......@@ -311,7 +311,7 @@ def monkey_patch_varbase():
"""
if core._in_eager_mode():
if not self.grad._is_initialized():
if self.grad is None:
return None
# TODO(wanghuancoder) support SELECTED_ROWS
return self.grad.numpy()
......
......@@ -153,6 +153,7 @@ PD_BUILD_GRAD_OP(custom_relu_no_x_in_backward)
.SetInferShapeFn(PD_INFER_SHAPE(ReluBackwardWithoutXInferShape));
void relu_cpu_forward_out(const paddle::Tensor& x, paddle::Tensor* out) {
out->reshape(x.shape());
PD_DISPATCH_FLOATING_TYPES(
x.type(), "relu_cpu_forward", ([&] {
relu_cpu_forward_kernel<data_t>(
......@@ -164,6 +165,7 @@ void relu_cpu_backward_out(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out,
paddle::Tensor* grad_x) {
grad_x->reshape(x.shape());
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] {
relu_cpu_backward_kernel<data_t>(
grad_out.data<data_t>(),
......
......@@ -94,6 +94,7 @@ void relu_cuda_forward_out(const paddle::Tensor& x, paddle::Tensor* out) {
int numel = x.size();
int block = 512;
int grid = (numel + block - 1) / block;
out->reshape(x.shape());
PD_DISPATCH_FLOATING_AND_HALF_TYPES(
x.type(), "relu_cuda_forward_kernel", ([&] {
relu_cuda_forward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
......@@ -108,6 +109,7 @@ void relu_cuda_backward_out(const paddle::Tensor& x,
int numel = out.size();
int block = 512;
int grid = (numel + block - 1) / block;
grad_x->reshape(x.shape());
PD_DISPATCH_FLOATING_AND_HALF_TYPES(
out.type(), "relu_cuda_backward_kernel", ([&] {
relu_cuda_backward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
......
......@@ -20,6 +20,7 @@ import paddle
from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
......@@ -53,7 +54,7 @@ class TestJitCustomAttrs(unittest.TestCase):
self.int64_vec_attr = [10000000000, 10000000000, 10000000000]
self.str_vec_attr = ["StrAttr", "StrAttr", "StrAttr"]
def test_attr_value(self):
def func_attr_value(self):
x = paddle.ones([2, 2], dtype='float32')
x.stop_gradient = False
out = custom_attrs.attr_test(
......@@ -65,7 +66,12 @@ class TestJitCustomAttrs(unittest.TestCase):
self.assertTrue(np.array_equal(x.numpy(), out.numpy()))
def test_const_attr_value(self):
def test_attr_value(self):
with _test_eager_guard():
self.func_attr_value()
self.func_attr_value()
def func_const_attr_value(self):
x = paddle.ones([2, 2], dtype='float32')
x.stop_gradient = False
out = custom_attrs.const_attr_test(
......@@ -77,6 +83,11 @@ class TestJitCustomAttrs(unittest.TestCase):
self.assertTrue(np.array_equal(x.numpy(), out.numpy()))
def test_const_attr_value(self):
with _test_eager_guard():
self.func_const_attr_value()
self.func_const_attr_value()
if __name__ == '__main__':
unittest.main()
......@@ -21,6 +21,7 @@ import paddle.static as static
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
......@@ -116,7 +117,7 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
"custom op {}: {},\n paddle api {}: {}".format(name, out, name,
pd_out))
def test_dynamic(self):
def func_dynamic(self):
for dtype in self.dtypes:
for axis in self.axises:
out, grad_inputs = concat_dynamic(custom_ops.custom_concat,
......@@ -128,6 +129,11 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
for x_grad, pd_x_grad in zip(grad_inputs, pd_grad_inputs):
self.check_output(x_grad, pd_x_grad, "x_grad")
def test_dynamic(self):
with _test_eager_guard():
self.func_dynamic()
self.func_dynamic()
def test_static(self):
for dtype in self.dtypes:
for axis in self.axises:
......@@ -140,7 +146,7 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
self.check_output(x1_grad, pd_x1_grad, "x1_grad")
self.check_output(x2_grad, pd_x2_grad, "x2_grad")
def test_dynamic_with_attr(self):
def func_dynamic_with_attr(self):
for dtype in self.dtypes:
for axis in self.axises:
out, grad_inputs = concat_dynamic(
......@@ -153,6 +159,11 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
for x_grad, pd_x_grad in zip(grad_inputs, pd_grad_inputs):
self.check_output(x_grad, pd_x_grad, "x_grad")
def test_dynamic_with_attr(self):
with _test_eager_guard():
self.func_dynamic_with_attr()
self.func_dynamic_with_attr()
def test_static_with_attr(self):
for dtype in self.dtypes:
for axis in self.axises:
......
......@@ -21,6 +21,7 @@ import paddle.static as static
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.fluid.framework import _test_eager_guard
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
......@@ -116,11 +117,16 @@ class TestCustomConjJit(unittest.TestCase):
self.check_output(out, pd_out, "out")
self.check_output(x_grad, pd_x_grad, "x's grad")
def test_dynamic(self):
def func_dynamic(self):
for dtype in self.dtypes:
np_input = np.random.random(self.shape).astype(dtype)
self.run_dynamic(dtype, np_input)
def test_dynamic(self):
with _test_eager_guard():
self.func_dynamic()
self.func_dynamic()
def test_static(self):
for dtype in self.dtypes:
np_input = np.random.random(self.shape).astype(dtype)
......
......@@ -22,6 +22,7 @@ import paddle.nn.functional as F
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
......@@ -94,7 +95,7 @@ class TestCustomLinearJit(unittest.TestCase):
self.np_bias)
self.check_output(pten_out, pd_out, "pten_out")
def test_dynamic(self):
def func_dynamic(self):
for dtype in self.dtypes:
pten_out = linear_dynamic(custom_ops.pten_linear, dtype, self.np_x,
self.np_weight, self.np_bias)
......@@ -102,6 +103,11 @@ class TestCustomLinearJit(unittest.TestCase):
self.np_bias)
self.check_output(pten_out, pd_out, "pten_out")
def test_dynamic(self):
with _test_eager_guard():
self.func_dynamic()
self.func_dynamic()
if __name__ == "__main__":
unittest.main()
......@@ -68,12 +68,6 @@ class TestCustomRawReluOp(unittest.TestCase):
self.assertTrue(custom_raw_relu_op is not None)
return custom_raw_relu_op(x)
def test_dygraph(self):
x = paddle.to_tensor(np.random.uniform(low=-1.0, high=1.0, size=[2, 3]))
y1 = self.custom_raw_relu(x)
y2 = paddle.nn.ReLU()(x)
self.assertTrue(np.array_equal(y1.numpy(), y2.numpy()))
def test_static(self):
paddle.enable_static()
shape = [2, 3]
......
......@@ -22,6 +22,7 @@ from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_MAC
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
......@@ -98,7 +99,7 @@ class TestDygraphModel(unittest.TestCase):
self.x_spec = paddle.static.InputSpec(
shape=[None, self.in_dim], dtype='float32', name='x')
def test_train_eval(self):
def func_train_eval(self):
for device in self.devices:
# set device
paddle.set_device(device)
......@@ -106,27 +107,35 @@ class TestDygraphModel(unittest.TestCase):
# for train
origin_relu_train_out = self.train_model(use_custom_op=False)
custom_relu_train_out = self.train_model(use_custom_op=True)
# open this when dy2stat is ready for eager
if not _in_eager_mode():
custom_relu_dy2stat_train_out = self.train_model(
use_custom_op=True, dy2stat=True) # for to_static
self.assertTrue(
np.array_equal(origin_relu_train_out, custom_relu_train_out))
self.assertTrue(
np.array_equal(origin_relu_train_out,
custom_relu_dy2stat_train_out))
self.assertTrue(
np.array_equal(origin_relu_train_out, custom_relu_train_out))
# for eval
origin_relu_eval_out = self.eval_model(use_custom_op=False)
custom_relu_eval_out = self.eval_model(use_custom_op=True)
if not _in_eager_mode():
custom_relu_dy2stat_eval_out = self.eval_model(
use_custom_op=True, dy2stat=True) # for to_static
self.assertTrue(
np.array_equal(origin_relu_eval_out, custom_relu_eval_out))
self.assertTrue(
np.array_equal(origin_relu_eval_out,
custom_relu_dy2stat_eval_out))
self.assertTrue(
np.array_equal(origin_relu_eval_out, custom_relu_eval_out))
def test_train_eval(self):
with _test_eager_guard():
self.func_train_eval()
self.func_train_eval()
def train_model(self, use_custom_op=False, dy2stat=False):
# reset random seed
paddle.seed(self.seed)
......
......@@ -20,7 +20,7 @@ from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS, IS_MAC
from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format(
......@@ -75,7 +75,7 @@ class TestJITLoad(unittest.TestCase):
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_dynamic(self):
def func_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16':
......@@ -95,8 +95,14 @@ class TestJITLoad(unittest.TestCase):
"custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad))
def test_exception(self):
def test_dynamic(self):
with _test_eager_guard():
self.func_dynamic()
self.func_dynamic()
def func_exception(self):
caught_exception = False
# if not _in_eager_mode():
try:
x = np.random.uniform(-1, 1, [4, 8]).astype('int32')
custom_relu_dynamic(custom_module.custom_relu, 'cpu', 'int32', x)
......@@ -114,11 +120,11 @@ class TestJITLoad(unittest.TestCase):
"python/paddle/fluid/tests/custom_op/custom_relu_op.cc" in
str(e))
self.assertTrue(caught_exception)
caught_exception = False
# MAC-CI don't support GPU
if IS_MAC:
return
# if not _in_eager_mode():
try:
x = np.random.uniform(-1, 1, [4, 8]).astype('int32')
custom_relu_dynamic(custom_module.custom_relu, 'gpu', 'int32', x)
......@@ -132,6 +138,11 @@ class TestJITLoad(unittest.TestCase):
str(e))
self.assertTrue(caught_exception)
def test_exception(self):
with _test_eager_guard():
self.func_exception()
self.func_exception()
def test_load_multiple_module(self):
custom_module = load(
name='custom_conj_jit',
......
......@@ -21,6 +21,7 @@ import paddle.static as static
import subprocess
import numpy as np
from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.fluid.framework import _test_eager_guard
def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
......@@ -216,7 +217,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
"custom op out: {},\n paddle api out: {}".format(
out, pd_out))
def test_dynamic(self):
def func_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16':
......@@ -236,6 +237,11 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
"custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad))
def test_dynamic(self):
with _test_eager_guard():
self.func_dynamic()
self.func_dynamic()
def test_static_save_and_load_inference_model(self):
paddle.enable_static()
np_data = np.random.random((1, 1, 28, 28)).astype("float32")
......
......@@ -20,6 +20,7 @@ import paddle
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
......@@ -39,7 +40,7 @@ custom_ops = load(
class TestCustomSimpleSliceJit(unittest.TestCase):
def test_slice_output(self):
def func_slice_output(self):
np_x = np.random.random((5, 2)).astype("float32")
x = paddle.to_tensor(np_x)
custom_op_out = custom_ops.custom_simple_slice(x, 2, 3)
......@@ -48,6 +49,11 @@ class TestCustomSimpleSliceJit(unittest.TestCase):
np.array_equal(custom_op_out, np_out),
"custom op: {},\n numpy: {}".format(np_out, custom_op_out.numpy()))
def test_slice_output(self):
with _test_eager_guard():
self.func_slice_output()
self.func_slice_output()
if __name__ == "__main__":
unittest.main()
......@@ -19,7 +19,7 @@ import numpy as np
from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_cc_args
from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.fluid.framework import _test_eager_guard
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\dispatch_op\\dispatch_op.pyd'.format(get_build_directory())
......@@ -39,7 +39,7 @@ class TestJitDispatch(unittest.TestCase):
def setUp(self):
paddle.set_device('cpu')
def run_dispatch_test(self, func, dtype):
def run_dispatch_test_impl(self, func, dtype):
np_x = np.ones([2, 2]).astype(dtype)
x = paddle.to_tensor(np_x)
out = func(x)
......@@ -50,6 +50,11 @@ class TestJitDispatch(unittest.TestCase):
np.array_equal(np_x, np_out),
"custom op x: {},\n custom op out: {}".format(np_x, np_out))
def run_dispatch_test(self, func, dtype):
with _test_eager_guard():
self.run_dispatch_test_impl(func, dtype)
self.run_dispatch_test_impl(func, dtype)
def test_dispatch_integer(self):
dtypes = ["int32", "int64", "int8", "uint8", "int16"]
for dtype in dtypes:
......
......@@ -22,7 +22,7 @@ from paddle.utils.cpp_extension import load
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args
from paddle.fluid.framework import _test_eager_guard
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\multi_out_jit\\multi_out_jit.pyd'.format(get_build_directory())
......@@ -84,7 +84,7 @@ class TestMultiOutputDtypes(unittest.TestCase):
self.check_multi_outputs(res)
paddle.disable_static()
def test_dynamic(self):
def func_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
paddle.set_device(device)
......@@ -95,6 +95,11 @@ class TestMultiOutputDtypes(unittest.TestCase):
self.assertTrue(len(outs) == 3)
self.check_multi_outputs(outs, True)
def test_dynamic(self):
with _test_eager_guard():
self.func_dynamic()
self.func_dynamic()
if __name__ == '__main__':
unittest.main()
......@@ -20,6 +20,7 @@ import numpy as np
import paddle
import paddle.fluid.dygraph as dg
from op_test import OpTest
from paddle.fluid.framework import _test_eager_guard
class TestTensorBackward(unittest.TestCase):
......@@ -29,7 +30,7 @@ class TestTensorBackward(unittest.TestCase):
if paddle.is_compiled_with_cuda():
self._places.append(paddle.CUDAPlace(0))
def test_tensor_backward(self):
def func_tensor_backward(self):
for dtype in self._dtypes:
x = np.random.random([2, 100]).astype(dtype)
y = np.random.random([100, 2]).astype(dtype)
......@@ -48,6 +49,11 @@ class TestTensorBackward(unittest.TestCase):
self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
def test_tensor_backward(self):
with _test_eager_guard():
self.func_tensor_backward()
self.func_tensor_backward()
class TestBackwardAPI(unittest.TestCase):
def setUp(self):
......@@ -56,7 +62,7 @@ class TestBackwardAPI(unittest.TestCase):
if paddle.is_compiled_with_cuda():
self._places.append(paddle.CUDAPlace(0))
def test_backward_api(self):
def func_backward_api(self):
for dtype in self._dtypes:
x = np.random.random([2, 2]).astype(dtype)
y = np.random.random([2, 2]).astype(dtype)
......@@ -78,7 +84,12 @@ class TestBackwardAPI(unittest.TestCase):
self.assertTrue(
np.allclose(x_grad * 2, x_tensor.grad.numpy()))
def test_backward_single_tensor(self):
def test_backward_api(self):
with _test_eager_guard():
self.func_backward_api()
self.func_backward_api()
def func_backward_single_tensor(self):
for dtype in self._dtypes:
x = np.random.random([2, 2]).astype(dtype)
y = np.random.random([2, 2]).astype(dtype)
......@@ -97,7 +108,12 @@ class TestBackwardAPI(unittest.TestCase):
self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
def test_backward_none_grad_tensor(self):
def test_backward_single_tensor(self):
with _test_eager_guard():
self.func_backward_single_tensor()
self.func_backward_single_tensor()
def func_backward_none_grad_tensor(self):
for dtype in self._dtypes:
x = np.random.random([2, 2]).astype(dtype)
y = np.random.random([2, 2]).astype(dtype)
......@@ -115,7 +131,12 @@ class TestBackwardAPI(unittest.TestCase):
self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
def test_backward_accumulator_with_init_grad(self):
def test_backward_none_grad_tensor(self):
with _test_eager_guard():
self.func_backward_none_grad_tensor()
self.func_backward_none_grad_tensor()
def func_backward_accumulator_with_init_grad(self):
for dtype in self._dtypes:
x = np.random.random([10, ]).astype(dtype)
y_grad = np.random.random([10, ]).astype(dtype)
......@@ -134,11 +155,14 @@ class TestBackwardAPI(unittest.TestCase):
y = x**2
z = x**3
x_grad = 2 * x_tensor * (
y_grad_tensor + 3 * y_tensor * y_tensor * z_grad_tensor)
x_grad = 2 * x * (y_grad + 3 * y * y * z_grad)
self.assertTrue(
np.allclose(x_grad.numpy(), x_tensor.grad.numpy()))
self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
def test_backward_accumulator_with_init_grad(self):
with _test_eager_guard():
self.func_backward_accumulator_with_init_grad()
self.func_backward_accumulator_with_init_grad()
if __name__ == '__main__':
......
......@@ -50,7 +50,7 @@ class EagerScaleTestCase(unittest.TestCase):
data_eager.retain_grads()
out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
self.assertFalse(data_eager.grad._is_initialized())
self.assertIsNone(data_eager.grad)
out_eager.backward(grad_eager, False)
self.assertTrue(data_eager.grad._is_initialized())
self.assertTrue(np.array_equal(data_eager.grad.numpy(), input_data))
......@@ -72,7 +72,7 @@ class EagerScaleTestCase(unittest.TestCase):
data_eager.retain_grads()
out_eager = core.eager.scale(data_eager, 1.0, 0.9, True, True)
self.assertFalse(data_eager.grad._is_initialized())
self.assertIsNone(data_eager.grad)
with self.assertRaisesRegexp(
AssertionError,
"The type of grad_tensor must be paddle.Tensor"):
......
......@@ -158,6 +158,7 @@
param : [x]
kernel :
func : scale, scale_sr
inplace : (x -> out)
- api : sign
args : (Tensor x)
......
......@@ -146,6 +146,9 @@ def custom_write_stub(resource, pyfile):
import types
import paddle
cur_dir = os.path.dirname(os.path.abspath(__file__))
so_path = os.path.join(cur_dir, "{resource}")
def inject_ext_module(module_name, api_names):
if module_name in sys.modules:
return sys.modules[module_name]
......@@ -157,9 +160,6 @@ def custom_write_stub(resource, pyfile):
return new_module
def __bootstrap__():
cur_dir = os.path.dirname(os.path.abspath(__file__))
so_path = os.path.join(cur_dir, "{resource}")
assert os.path.exists(so_path)
# load custom op shared library with abs path
......@@ -169,6 +169,7 @@ def custom_write_stub(resource, pyfile):
__bootstrap__()
{custom_api}
""").lstrip()
# Parse registerring op information
......@@ -900,7 +901,7 @@ def _generate_python_module(module_name,
# delete the temp file before exit python process
atexit.register(lambda: remove_if_exit(api_file))
# write into .py file with RWLock
# write into .py file with RWLockc
api_content = [_custom_api_content(op_name) for op_name in op_names]
with open(api_file, 'w') as f:
f.write('\n\n'.join(api_content))
......@@ -911,11 +912,13 @@ def _generate_python_module(module_name,
def _custom_api_content(op_name):
params_str, ins_str, attrs_str, outs_str = _get_api_inputs_str(op_name)
params_str, ins_str, attrs_str, outs_str, in_names, attrs_names = _get_api_inputs_str(
op_name)
lower_in_names = [p.split("@")[0].lower() for p in in_names]
API_TEMPLATE = textwrap.dedent("""
from paddle.fluid.core import VarBase
from paddle.fluid.framework import in_dygraph_mode, _dygraph_tracer
import paddle.fluid.core as core
from paddle.fluid.core import VarBase, CustomOpKernelContext
from paddle.fluid.framework import in_dygraph_mode, _dygraph_tracer, _in_eager_mode
from paddle.fluid.layer_helper import LayerHelper
def {op_name}({inputs}):
......@@ -928,6 +931,17 @@ def _custom_api_content(op_name):
# The output variable's dtype use default value 'float32',
# and the actual dtype of output variable will be inferred in runtime.
if in_dygraph_mode():
if _in_eager_mode():
ctx = CustomOpKernelContext()
for i in {in_names}:
ctx.add_inputs(i)
for j in {attr_names}:
ctx.add_attr(j)
for out_name in out_names:
outs[out_name] = core.eager.Tensor()
ctx.add_outputs(outs[out_name])
core.eager._run_custom_op(ctx, "{op_name}", True)
else:
for out_name in out_names:
outs[out_name] = VarBase()
_dygraph_tracer().trace_op(type="{op_name}", inputs=ins, outputs=outs, attrs=attrs)
......@@ -949,6 +963,9 @@ def _custom_api_content(op_name):
inputs=params_str,
ins=ins_str,
attrs=attrs_str,
# "[x, y, z]""
in_names="[" + ",".join(lower_in_names) + "]",
attr_names="[" + ",".join(attrs_names) + "]",
out_names=outs_str)
return api_content
......@@ -996,7 +1013,7 @@ def _get_api_inputs_str(op_name):
])
# e.g: ['Out', 'Index']
outs_str = "[%s]" % ','.join(["'{}'".format(name) for name in out_names])
return params_str, ins_str, attrs_str, outs_str
return params_str, ins_str, attrs_str, outs_str, in_names, attr_names
def _write_setup_file(name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册