未验证 提交 da47544c 编写于 作者: J Jiabin Yang 提交者: GitHub

Support slim eager (#39874)

* eager, test=develop

* fix bug, test=develop

* eager, test=develop

* merge legacy to fluid

* eager, test=develop

* eager, test=develop

* Refactor TensorAdd func by template and remove gradient_accumulation in eager

* Remove needless target name

* eager, test=develop

* eager, test=develop

* Use overload instead of template

* Remove legacy code

* Remove legacy code

* selectedrows, test=develop

* Remove DataType test

* eager, test=develop

* eager, test=develop

* support gan, test=develop

* Using Tensor directly instead of using EagerTensor

* support gradient_accumulation

* make test_imperative_lod_tensor_to_selected_rows longer

* make test_imperative_lod_tensor_to_selected_rows longer

* refine code

* ptb, test=develop

* Rename all EagerTensor to Tensor

* Rename some EagerTensor to Tensor

* rename EagerTensor to EagerVariable

* eager, test=develop

* eager, test=develop

* eager, test=develop

* eager, test=develop

* add more test

* eager, test=develop

* Support copiable selected rows and merge develop

* save load, eager, test=develop

* save load, eager, test=develop

* refine, test=develop

* remove useless _set_value method

* refine, test=develop

* refine, test=develop

* revert static_runner, test=develop

* EagerTensor to Tensor, test=develop

* refine, test=develop

* refine, test=develop

* clear grad, test=develop

* merge, develop

* merge, develop

* merge, test=develop

* merge, test=develop

* Support quant and part of slice

* support legacy static save

* extend slim tests time

* remove imperative on inference

* remove imperative on inference

* merge develop

* fix typo

* fix typo

* split slice related code into 2 part for imperative and eager

* split slice from inference

* split slice from inference

* fix test_tensor_register_hook
Co-authored-by: NWang Huan <wanghuan29@baidu.com>
Co-authored-by: NWeilong Wu <veyron_wu@163.com>
Co-authored-by: Nwanghuancoder <wanghuancoder@163.com>
上级 d9884e20
...@@ -24,11 +24,14 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -24,11 +24,14 @@ class GradNodeAccumulation : public GradNodeBase {
public: public:
// Constructor: configure fwd input tensors to grad node // Constructor: configure fwd input tensors to grad node
explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) { explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) {
VLOG(6) << "Construct GradNodeAccumulation";
weak_grad_ = meta->WeakGrad(); weak_grad_ = meta->WeakGrad();
SetDefaultGradInOutMeta(); SetDefaultGradInOutMeta();
} }
~GradNodeAccumulation() override = default; ~GradNodeAccumulation() override {
VLOG(6) << "Destruct GradNodeAccumulation";
}
// Functor: perform backward computations // Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
......
...@@ -46,7 +46,7 @@ class GradNodeScale : public GradNodeBase { ...@@ -46,7 +46,7 @@ class GradNodeScale : public GradNodeBase {
const std::vector<paddle::experimental::Tensor>& tensors); const std::vector<paddle::experimental::Tensor>& tensors);
void SetAttributes_scale(float scale); void SetAttributes_scale(float scale);
std::string name() override { return ""; }
// Members: define fwd input tensors // Members: define fwd input tensors
// For Scale there is no fwd input tensor needed // For Scale there is no fwd input tensor needed
private: private:
......
...@@ -996,6 +996,29 @@ static std::string GenerateGradNodeCreationContent( ...@@ -996,6 +996,29 @@ static std::string GenerateGradNodeCreationContent(
// then generate: "egr::AutogradMeta* p_autograd_out = // then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto->outputs()[0].name()")" // egr::EagerUtils::autograd_meta("op_proto->outputs()[0].name()")"
std::string get_autograd_meta_str = " // Prepare Autograd Meta \n"; std::string get_autograd_meta_str = " // Prepare Autograd Meta \n";
// If single output slotname and not duplicable,
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")"
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;
if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
}
}
VLOG(6) << "Generated outputs autograd_meta";
for (const proto::OpProto::Var& input : in_vars) { for (const proto::OpProto::Var& input : in_vars) {
const std::string& input_name = input.name(); const std::string& input_name = input.name();
const std::string& input_autograd_name = "p_autograd_" + input_name; const std::string& input_autograd_name = "p_autograd_" + input_name;
...@@ -1024,31 +1047,6 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1024,31 +1047,6 @@ static std::string GenerateGradNodeCreationContent(
} }
VLOG(6) << "Generated inputs autograd_meta"; VLOG(6) << "Generated inputs autograd_meta";
// If single output slotname and not duplicable,
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")"
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;
// Skip Intermediate Tensor
if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
}
}
VLOG(6) << "Generated outputs autograd_meta";
std::string prepare_autograd_meta_str = ""; std::string prepare_autograd_meta_str = "";
prepare_autograd_meta_str += get_autograd_meta_str; prepare_autograd_meta_str += get_autograd_meta_str;
prepare_autograd_meta_str += "\n"; prepare_autograd_meta_str += "\n";
...@@ -1204,11 +1202,12 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1204,11 +1202,12 @@ static std::string GenerateGradNodeCreationContent(
" %s" " %s"
" bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(%s);\n" " bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(%s);\n"
" if(require_any_grad) {\n" " if(require_any_grad) {\n"
" VLOG(6) << \" Construct Grad for %s \"; \n"
" egr::EagerUtils::PassStopGradient(%s);\n" " egr::EagerUtils::PassStopGradient(%s);\n"
"%s\n }"; "%s\n }";
std::string grad_node_creation_body_str = paddle::string::Sprintf( std::string grad_node_creation_body_str = paddle::string::Sprintf(
GRAD_NODE_CREATION_TEMPLATE, prepare_autograd_meta_str, GRAD_NODE_CREATION_TEMPLATE, prepare_autograd_meta_str,
compute_require_grad_args, pass_stop_gradient_args, compute_require_grad_args, op_type, pass_stop_gradient_args,
grad_node_creation_str); grad_node_creation_str);
return grad_node_creation_body_str; return grad_node_creation_body_str;
...@@ -2083,22 +2082,24 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -2083,22 +2082,24 @@ static std::string GenerateGradNodeHeaderContents(
const char* GRAD_NODE_TEMPLATE = const char* GRAD_NODE_TEMPLATE =
"class GradNode%s : public egr::GradNodeBase {\n" "class GradNode%s : public egr::GradNodeBase {\n"
" public:\n" " public:\n"
" GradNode%s() : egr::GradNodeBase() {}\n" " GradNode%s() : egr::GradNodeBase() { VLOG(7) << \" Construct "
"GradNode%s \"; }\n"
" GradNode%s(size_t bwd_in_slot_num, size_t bwd_out_slot_num) : " " GradNode%s(size_t bwd_in_slot_num, size_t bwd_out_slot_num) : "
"egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}\n" "egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) { VLOG(7) << \" "
" ~GradNode%s() override = default;\n" "Construct GradNode%s \"; }\n"
" ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n"
"\n" "\n"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> " " virtual std::vector<std::vector<paddle::experimental::Tensor>> "
"operator()(const " "operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) " "std::vector<std::vector<paddle::experimental::Tensor>>& grads) "
"override;\n" "override;\n"
"\n" "\n"
" std::string name() override { return \" GradNode%s \"; } \n "
"\n"
" // SetX, SetY, ...\n" " // SetX, SetY, ...\n"
"%s\n" "%s\n"
" // SetAttrMap\n" " // SetAttrMap\n"
"%s\n" "%s\n"
" std::string name() { return \"GradNode%s\"; }\n"
"\n"
" private:\n" " private:\n"
" // TensorWrappers\n" " // TensorWrappers\n"
"%s\n" "%s\n"
...@@ -2195,8 +2196,8 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -2195,8 +2196,8 @@ static std::string GenerateGradNodeHeaderContents(
VLOG(6) << "Generated TensorWrapper"; VLOG(6) << "Generated TensorWrapper";
std::string grad_node_str = paddle::string::Sprintf( std::string grad_node_str = paddle::string::Sprintf(
GRAD_NODE_TEMPLATE, op_type, op_type, op_type, op_type, GRAD_NODE_TEMPLATE, op_type, op_type, op_type, op_type, op_type, op_type,
set_tensor_wrappers_str, set_attr_map_str, op_type, op_type, op_type, set_tensor_wrappers_str, set_attr_map_str,
tensor_wrapper_members_str, attr_members_str); tensor_wrapper_members_str, attr_members_str);
return grad_node_str; return grad_node_str;
......
...@@ -538,7 +538,7 @@ class {} : public egr::GradNodeBase {{ ...@@ -538,7 +538,7 @@ class {} : public egr::GradNodeBase {{
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) override; const std::vector<std::vector<paddle::experimental::Tensor>>& grads) override;
std::string name() override {{ return \" {} \"; }}
// SetTensorWrapperX, SetTensorWrapperY, ... // SetTensorWrapperX, SetTensorWrapperY, ...
{} {}
// SetAttributes // SetAttributes
...@@ -553,8 +553,9 @@ class {} : public egr::GradNodeBase {{ ...@@ -553,8 +553,9 @@ class {} : public egr::GradNodeBase {{
""" """
node_declaration_str = NODE_DECLARATION_TEMPLATE.format( node_declaration_str = NODE_DECLARATION_TEMPLATE.format(
grad_node_name, grad_node_name, grad_node_name, grad_node_name, grad_node_name, grad_node_name, grad_node_name, grad_node_name,
set_tensor_wrapper_methods_str, set_attribute_methods_str, grad_node_name, set_tensor_wrapper_methods_str,
tensor_wrapper_members_str, attribute_members_str) set_attribute_methods_str, tensor_wrapper_members_str,
attribute_members_str)
return node_declaration_str return node_declaration_str
......
...@@ -48,12 +48,16 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap( ...@@ -48,12 +48,16 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
} }
visited.insert(node); visited.insert(node);
PADDLE_ENFORCE_NOT_NULL(
node,
paddle::platform::errors::Fatal(
"We got null node when we traverse the backward graph, and this "
"should not happened please check your code and contact us."));
// Find and append next nodes // Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges(); const std::vector<std::vector<Edge>>& edges = node->GetEdges();
for (const auto& edge_list : edges) { for (const auto& edge_list : edges) {
for (const Edge& edge : edge_list) { for (const Edge& edge : edge_list) {
GradNodeBase* next_node = edge.GetMutableGradNode().get(); GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Next node could be nullptr if it is leaf tensor with no // Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached // AccumulationNode attached
// Or it could also originated from dispensable inputs // Or it could also originated from dispensable inputs
...@@ -67,7 +71,6 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap( ...@@ -67,7 +71,6 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
} }
} }
} }
return node_in_degree_map; return node_in_degree_map;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
namespace egr { namespace egr {
GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
VLOG(6) << "Construct GradNodeBase";
bwd_in_meta_.resize(bwd_in_slot_num); bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num); bwd_out_meta_.resize(bwd_out_slot_num);
// adj_edges has the same num as backward outputs // adj_edges has the same num as backward outputs
...@@ -49,11 +50,15 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) { ...@@ -49,11 +50,15 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
// its pre-ops // its pre-ops
if (meta && !meta->StopGradient()) { if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node) { if (node && node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} else { } else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta)); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
...@@ -70,7 +75,7 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { ...@@ -70,7 +75,7 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"inputs's slot num.")); "inputs's slot num."));
if (meta && !meta->StopGradient()) { if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node) { if (node && node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name(); << this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
......
...@@ -76,10 +76,10 @@ class GradSlotMeta { ...@@ -76,10 +76,10 @@ class GradSlotMeta {
class GradNodeBase { class GradNodeBase {
public: public:
GradNodeBase() = default; GradNodeBase() { VLOG(6) << "Construct GradNodeBase"; }
GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num); GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num);
// TODO(jiabin): Should we have other constructor here? // TODO(jiabin): Should we have other constructor here?
virtual ~GradNodeBase() = default; virtual ~GradNodeBase() { VLOG(6) << "Destruct GradNodeBase"; }
/** /**
* operator() designed to contian the real backward execution logic, it should * operator() designed to contian the real backward execution logic, it should
......
...@@ -30,6 +30,7 @@ class GradTestNode : public egr::GradNodeBase { ...@@ -30,6 +30,7 @@ class GradTestNode : public egr::GradNodeBase {
GradTestNode(float val, int in_num, int out_num) GradTestNode(float val, int in_num, int out_num)
: GradNodeBase(in_num, out_num), val_(val) {} : GradNodeBase(in_num, out_num), val_(val) {}
GradTestNode() : GradNodeBase() { val_ = 1.0; } GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::string name() override { return "GradTestNode"; }
std::vector<std::vector<paddle::experimental::Tensor>> operator()( std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
override { override {
......
...@@ -122,9 +122,10 @@ paddle::experimental::Tensor* EagerUtils::mutable_grad( ...@@ -122,9 +122,10 @@ paddle::experimental::Tensor* EagerUtils::mutable_grad(
void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas, void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
const std::shared_ptr<GradNodeBase>& grad_node) { const std::shared_ptr<GradNodeBase>& grad_node) {
for (const auto& autograd_meta : *autograd_metas) { for (const auto& autograd_meta : *autograd_metas) {
if (dynamic_cast<GradNodeAccumulation*>(autograd_meta->GradNode())) { if (autograd_meta->GradNode()) {
VLOG(6) << "Warning: Reseting GradNodeAccumulation for leaf tensor is " VLOG(7) << "Should not set grad node twice, original node is:"
"detected"; << autograd_meta->GradNode()->name()
<< "current is: " << grad_node->name();
} }
autograd_meta->SetGradNode(grad_node); autograd_meta->SetGradNode(grad_node);
} }
...@@ -132,11 +133,11 @@ void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas, ...@@ -132,11 +133,11 @@ void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
void EagerUtils::SetHistory(AutogradMeta* autograd_meta, void EagerUtils::SetHistory(AutogradMeta* autograd_meta,
const std::shared_ptr<GradNodeBase>& grad_node) { const std::shared_ptr<GradNodeBase>& grad_node) {
if (dynamic_cast<GradNodeAccumulation*>(autograd_meta->GradNode())) { if (autograd_meta->GradNode()) {
VLOG(6) VLOG(7) << "Should not set grad node twice, original node is:"
<< "Warning: Reseting GradNodeAccumulation for leaf tensor is detected"; << autograd_meta->GradNode()->name()
<< "current is: " << grad_node->name();
} }
autograd_meta->SetGradNode(grad_node); autograd_meta->SetGradNode(grad_node);
} }
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h" #include "paddle/fluid/eager/hooks.h"
...@@ -30,10 +31,12 @@ limitations under the License. */ ...@@ -30,10 +31,12 @@ limitations under the License. */
#include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/slice_utils.h"
#include "paddle/phi/api/include/api.h" #include "paddle/phi/api/include/api.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
...@@ -119,6 +122,29 @@ extern void InitTensorWithNumpyValue(TensorObject* self, ...@@ -119,6 +122,29 @@ extern void InitTensorWithNumpyValue(TensorObject* self,
extern PyTypeObject* p_tensor_type; extern PyTypeObject* p_tensor_type;
Py_ssize_t GetSliceIndexFromPyObject(PyObject* obj) {
if (PyObject_IsInstance(obj, reinterpret_cast<PyObject*>(p_tensor_type))) {
VLOG(6) << "Call GetSliceIndexFromTensor in Eager";
paddle::experimental::Tensor tensor = CastPyArg2Tensor(obj, 0);
PADDLE_ENFORCE_EQ(
tensor.initialized(), true,
paddle::platform::errors::InvalidArgument(
"We can only support initialized tensor in slice, however we got "
"uninitialized tensor %s, please check your code.",
tensor.name()));
return GetSliceIndexFromTensor((*static_cast<phi::DenseTensor*>(
CastPyArg2Tensor(obj, 0).impl().get())));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"We should only get paddle::experimental::Tensor or VarBase in this "
"method, when you reach this means we got another type index."));
}
}
bool PyCheckTensor(PyObject* obj) {
return PyObject_IsInstance(obj, reinterpret_cast<PyObject*>(p_tensor_type));
}
static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args, static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
...@@ -468,16 +494,111 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self, ...@@ -468,16 +494,111 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
// NOTE(wuweilong): Set value and not change self's original place static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
static PyObject* tensor_method_set_value(TensorObject* self, PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
VLOG(4) << "Value " << self->tensor.name(); PyObject* _index = PyTuple_GET_ITEM(args, 0);
pybind11::object numpy_value = VLOG(4) << "Call _getitem_index_not_tensor";
pybind11::object(pybind11::handle(PyTuple_GET_ITEM(args, 0)), true); std::vector<int> slice_axes, slice_starts, slice_ends, slice_strides,
InitTensorWithNumpyValue(self, numpy_value, false); decrease_axis, none_axes, infer_flags, list_select_idxs;
Py_INCREF(Py_None); // if index is a list, list_select_flag will be true
return Py_None; bool list_select_flag = false;
PADDLE_ENFORCE_EQ(
self->tensor.is_initialized(), true,
platform::errors::InvalidArgument(
"tensor %s has not been initialized, we can only slice initialized "
"tensor please init it first with numpy or other tensor.",
self->tensor.name()));
auto tensor = static_cast<phi::DenseTensor*>(self->tensor.impl().get());
ParseIndexingSlice(tensor, _index, &slice_axes, &slice_starts, &slice_ends,
&slice_strides, &decrease_axis, &none_axes, &infer_flags,
&list_select_idxs, &list_select_flag);
auto out = slice_axes.empty() && !list_select_flag
? self->tensor
: paddle::experimental::Tensor(
egr::Controller::Instance().GenerateUniqueName());
if (!slice_axes.empty()) {
framework::AttributeMap attrs = {{"axes", slice_axes},
{"starts", slice_starts},
{"ends", slice_ends},
{"infer_flags", infer_flags},
{"decrease_axis", decrease_axis}};
std::string op_type = "slice";
for (auto stride : slice_strides) {
if (stride != 1) {
op_type = "strided_slice";
attrs.insert({"strides", slice_strides});
attrs.erase("decrease_axis");
break;
}
}
if (op_type == "slice") {
out = slice_dygraph_function(self->tensor, paddle::experimental::Tensor(),
paddle::experimental::Tensor(),
std::move(attrs));
} else if (op_type == "strided_slice") {
out = strided_slice_dygraph_function(self->tensor, attrs);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Slice is only support slice and strided_slice, but we got %s which "
"is impossible, please check your code first or contact us by "
"issue. ",
op_type));
}
}
if (!none_axes.empty()) {
// Deal with cases when all axes are decreased.
// After slice, the shape of out is [1], which should have been
// [], but Paddle doesn't support scalar.
// In order to ensure the correctness of the final shape of out,
// one dimension of out needs to be decreased.
// For example:
// # x.shape: (2,3,4)
// out = x[0, 1, 1, None] # out.shape : (1)
if (static_cast<int>(decrease_axis.size()) == tensor->dims().size()) {
none_axes.pop_back();
}
if (!none_axes.empty()) {
// Deal with cases that decrease_axes is not empty
// For example:
// # x.shape: (2,3,4)
// out = x[0, 0:2, None] # out.shape : (2, 1, 4)
for (auto& axis : none_axes) {
int len = 0;
for (int da : decrease_axis) {
if (da < axis) {
len++;
}
}
axis -= len;
}
paddle::experimental::Tensor new_out;
framework::AttributeMap attrs = {{"axes", none_axes}};
new_out = std::get<0>(unsqueeze2_dygraph_function(out, std::move(attrs)));
return ToPyObject(new_out);
}
}
// the index is a list
if (list_select_flag) {
auto select_index = paddle::experimental::Tensor(
egr::Controller::Instance().GenerateUniqueName());
auto idx_tensor = std::make_shared<phi::DenseTensor>();
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(
egr::Controller::Instance().GetExpectedPlace());
paddle::framework::TensorFromVector(list_select_idxs, *dev_ctx,
idx_tensor.get());
framework::AttributeMap attrs = {{"dim", 0}};
out = index_select_dygraph_function(self->tensor, select_index,
std::move(attrs));
}
return ToPyObject(out);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
...@@ -602,7 +723,8 @@ PyMethodDef variable_methods[] = { ...@@ -602,7 +723,8 @@ PyMethodDef variable_methods[] = {
{"get_tensor", {"get_tensor",
(PyCFunction)(void (*)(void))tensor_method_get_underline_tensor, (PyCFunction)(void (*)(void))tensor_method_get_underline_tensor,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_value", (PyCFunction)(void (*)(void))tensor_method_set_value, {"_getitem_index_not_tensor",
(PyCFunction)(void (*)(void))tensor__getitem_index_not_tensor,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"_register_grad_hook", {"_register_grad_hook",
(PyCFunction)(void (*)(void))tensor_register_grad_hook, (PyCFunction)(void (*)(void))tensor_register_grad_hook,
......
...@@ -16,8 +16,11 @@ limitations under the License. */ ...@@ -16,8 +16,11 @@ limitations under the License. */
#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope_guard.h"
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/operators/py_func_op.h" #include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/eager_utils.h"
...@@ -184,6 +187,11 @@ paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos) { ...@@ -184,6 +187,11 @@ paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos) {
} }
} }
std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
ssize_t arg_pos) {
return py::cast<std::shared_ptr<imperative::VarBase>>(obj);
}
std::vector<paddle::experimental::Tensor> CastPyArg2VectorOfTensor( std::vector<paddle::experimental::Tensor> CastPyArg2VectorOfTensor(
PyObject* obj, ssize_t arg_pos) { PyObject* obj, ssize_t arg_pos) {
std::vector<paddle::experimental::Tensor> result; std::vector<paddle::experimental::Tensor> result;
...@@ -737,5 +745,6 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs( ...@@ -737,5 +745,6 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs(
return result; return result;
} }
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "pybind11/pybind11.h" #include "pybind11/pybind11.h"
#include "pybind11/stl.h" #include "pybind11/stl.h"
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
...@@ -33,6 +32,8 @@ int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos); ...@@ -33,6 +32,8 @@ int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos);
float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos); float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos);
std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos); std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos);
paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos); paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos);
std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
ssize_t arg_pos);
std::vector<paddle::experimental::Tensor> CastPyArg2VectorOfTensor( std::vector<paddle::experimental::Tensor> CastPyArg2VectorOfTensor(
PyObject* obj, ssize_t arg_pos); PyObject* obj, ssize_t arg_pos);
platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos); platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos);
...@@ -112,5 +113,7 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs( ...@@ -112,5 +113,7 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args, const std::string& op_type, const std::string& arg_name, PyObject* args,
ssize_t arg_idx, bool dispensable = false); ssize_t arg_idx, bool dispensable = false);
// end of Slice related methods
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -54,6 +54,7 @@ limitations under the License. */ ...@@ -54,6 +54,7 @@ limitations under the License. */
#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/pybind/op_function.h" #include "paddle/fluid/pybind/op_function.h"
#include "paddle/fluid/pybind/pybind_boost_headers.h" #include "paddle/fluid/pybind/pybind_boost_headers.h"
#include "paddle/fluid/pybind/slice_utils.h"
#include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/pybind/tensor_py.h"
namespace paddle { namespace paddle {
...@@ -319,6 +320,23 @@ static std::string GetTypeName(const imperative::VarBase &var) { ...@@ -319,6 +320,23 @@ static std::string GetTypeName(const imperative::VarBase &var) {
} }
} }
Py_ssize_t GetSliceIndexFromPyObject(PyObject *obj) {
if (py::isinstance<imperative::VarBase>(obj)) {
VLOG(6) << "Call GetSliceIndexFromTensor in Imperative";
return GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(obj)
->Var()
.Get<framework::LoDTensor>());
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"We should only get paddle::experimental::Tensor or VarBase in this "
"method, when you reach this means we got another type index."));
}
}
bool PyCheckTensor(PyObject *obj) {
return py::isinstance<imperative::VarBase>(obj);
}
using PyNameVarBaseMap = std::unordered_map<std::string, py::handle>; using PyNameVarBaseMap = std::unordered_map<std::string, py::handle>;
// NOTE(zjl): py::handle is a very light wrapper of PyObject *. // NOTE(zjl): py::handle is a very light wrapper of PyObject *.
...@@ -360,18 +378,6 @@ GetVarBaseListFromPyHandle(const py::handle &handle) { ...@@ -360,18 +378,6 @@ GetVarBaseListFromPyHandle(const py::handle &handle) {
return result; return result;
} }
static bool IsNumpyType(PyObject *obj) {
// It is not a good way to judge the type of obj by its type'name. Maybe using
// `PyArray_IsScalar` will be better. However, this interface cannot be used
// by including pybind11, and it needs to compile with numpy.
auto type_name = std::string(Py_TYPE(obj)->tp_name);
return type_name == "numpy.int64" || type_name == "numpy.longlong" ||
type_name == "numpy.int32" || type_name == "numpy.int16";
}
static bool PyCheckTensor(PyObject *obj) {
return py::isinstance<imperative::VarBase>(obj);
}
// cast numpy type form S to T, this may allocate new memory // cast numpy type form S to T, this may allocate new memory
template <class T, class S> template <class T, class S>
...@@ -429,260 +435,6 @@ static imperative::NameVarBaseMap ConvertToNameVarBaseMap( ...@@ -429,260 +435,6 @@ static imperative::NameVarBaseMap ConvertToNameVarBaseMap(
return result; return result;
} }
static bool PyCheckInteger(PyObject *obj) {
#if PY_VERSION_HEX < 0x03000000
return (PyLong_Check(obj) || PyInt_Check(obj)) && !PyBool_Check(obj);
#else
return PyLong_Check(obj) && !PyBool_Check(obj);
#endif
}
static Py_ssize_t GetSliceIndexFromTensor(
const std::shared_ptr<imperative::VarBase> &tensor_index) {
const auto &tensor = tensor_index->Var().Get<framework::LoDTensor>();
if (tensor.numel() == 1) {
if (framework::TransToProtoVarType(tensor.dtype()) ==
framework::proto::VarType::INT32) {
return static_cast<Py_ssize_t>(operators::GetValue<int32_t>(&tensor));
} else if (framework::TransToProtoVarType(tensor.dtype()) ==
framework::proto::VarType::INT64) {
return static_cast<Py_ssize_t>(operators::GetValue<int64_t>(&tensor));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, the type of tensor in slice indices only allows "
"int32 and int64, please check the type of index tensor."));
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, tensor in slice indices only allows 1 element, "
"but received %d.",
tensor.numel()));
}
}
// NOTE(zhiqiu): Revised version of PySlice_GetIndices. From:
// https://github.com/python/cpython/blob/8d21aa21f2cbc6d50aab3f420bb23be1d081dac4/Objects/sliceobject.c#L103
// Original PySlice_GetIndices return wrong result when
// slice_item contains long int, such as arr[:180L].
// NOT sure why this happens !!!
// Besides, PySlice_GetIndices cannot raise error when float in slice item.
// So, I make a revised version of PySlice_GetIndices, named to
// _PySlice_GetIndices. Try to use _PySlice_Unpack which is more robust than
// PySlice_GetIndices in the future.
static int _PySlice_GetIndices(PySliceObject *r, Py_ssize_t length,
Py_ssize_t *start, Py_ssize_t *stop,
Py_ssize_t *step) {
/* XXX support long ints */
if (r->step == Py_None) {
*step = 1;
} else {
if (PyCheckInteger(r->step) || IsNumpyType(r->step)) {
*step = PyLong_AsLong(r->step);
} else if (PyCheckTensor(r->step)) {
*step = GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(r->step));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->step)->tp_name)));
}
}
if (r->start == Py_None) {
*start = *step < 0 ? length - 1 : 0;
} else {
if (PyCheckInteger(r->start) || IsNumpyType(r->start)) {
*start = PyLong_AsLong(r->start);
} else if (PyCheckTensor(r->start)) {
*start = GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(r->start));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->start)->tp_name)));
}
if (*start < 0) *start += length;
*start = std::max(*start, static_cast<Py_ssize_t>(0));
}
if (r->stop == Py_None) {
*stop = *step < 0 ? -1 : length;
} else {
if (PyCheckInteger(r->stop) || IsNumpyType(r->stop)) {
*stop = PyLong_AsLong(r->stop);
} else if (PyCheckTensor(r->stop)) {
*stop = GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(r->stop));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->stop)->tp_name)));
}
if (0 < *step && *stop < 0) *stop += length;
*stop = std::min(*stop, length);
}
if (*stop > length) return -1;
if (*start >= length) return -1;
if (*step == 0) return -1;
return 0;
}
static void ParseIndexingSlice(
framework::LoDTensor *tensor, PyObject *_index,
std::vector<int> *slice_axes, std::vector<int> *slice_starts,
std::vector<int> *slice_ends, std::vector<int> *slice_strides,
std::vector<int> *decrease_axis, std::vector<int> *none_axes,
std::vector<int> *infer_flags, std::vector<int> *list_select_idxs,
bool *list_select_flag) {
// We allow indexing by Integers, Slices, Ellipsis, None, tuples of those
// types, and list of Bool and Integers.
// wrap to tuple
// NOTE(zhiqiu): PyTuple_Pack increases refcount.
PyObject *index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index;
DEFINE_PADDLE_SCOPE_GUARD([index, _index]() {
if (!PyTuple_Check(_index)) {
Py_DECREF(index);
VLOG(4) << "Call Py_DECREF";
}
});
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), true,
platform::errors::InvalidArgument("tensor has not been initialized"));
const auto &shape = tensor->dims();
const int rank = shape.size();
const int size = PyTuple_GET_SIZE(index);
// specified_dims is the number of dimensions which indexed by Interger,
// Slices.
int specified_dims = 0;
int ell_count = 0;
for (int dim = 0; dim < size; ++dim) {
PyObject *slice_item = PyTuple_GetItem(index, dim);
if (PyCheckInteger(slice_item) || PySlice_Check(slice_item)) {
specified_dims++;
} else if (slice_item == Py_Ellipsis) {
ell_count++;
}
}
PADDLE_ENFORCE_LE(ell_count, 1,
platform::errors::InvalidArgument(
"An index can only have a single ellipsis ('...')"));
int none_count = 0;
for (int i = 0, dim = 0; i < size; ++i) {
PyObject *slice_item = PyTuple_GetItem(index, i);
infer_flags->push_back(1);
int dim_len = shape[dim];
if (PyCheckInteger(slice_item) || IsNumpyType(slice_item)) {
// integer, PyLong_AsLong supports both int and long
int start = static_cast<int>(PyLong_AsLong(slice_item));
auto s_t = start;
start = start < 0 ? start + dim_len : start;
if (start >= dim_len || start < 0) {
std::string str_error_message =
"The starting index " + std::to_string(s_t) +
" of slice is out of bounds in tensor " + std::to_string(dim) +
"-th axis, it shound be in the range of [" +
std::to_string(-dim_len) + ", " + std::to_string(dim_len) + ")";
// py::index_error is corresponding to IndexError in Python
// Used to indicate out of bounds access in __getitem__, __setitem__
throw py::index_error(str_error_message);
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(start + 1);
slice_strides->push_back(1);
decrease_axis->push_back(dim);
dim++;
} else if (PySlice_Check(slice_item)) {
// slice item
Py_ssize_t start, end, step;
PySliceObject *p = reinterpret_cast<PySliceObject *>(slice_item);
_PySlice_GetIndices(p, dim_len, &start, &end, &step);
// :: or : or 0:dim_len:1
if (start == 0 && end == dim_len && step == 1) {
dim++;
continue;
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(end);
slice_strides->push_back(step);
dim++;
} else if (slice_item == Py_Ellipsis) {
dim += rank - specified_dims;
} else if (slice_item == Py_None) {
none_axes->push_back(dim + none_count);
none_count++;
} else if (PyList_Check(slice_item)) {
*list_select_flag = true;
PADDLE_ENFORCE_EQ(
size, 1,
platform::errors::InvalidArgument(
"When index contains a list, its length is excepted to 1, "
"but received %d",
size));
bool all_bool = true;
int list_size = PyList_GET_SIZE(slice_item);
for (int j = 0; j < list_size; ++j) {
PyObject *list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
all_bool = false;
} else if (!PyBool_Check(list_item)) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support int or bool in index list."));
}
}
if (all_bool) {
PADDLE_ENFORCE_EQ(
list_size, shape[0],
platform::errors::InvalidArgument(
"The dimension of bool index doesn't match indexed array along "
"dimension 0, the target dimension is %d, but received %d.",
shape[0], list_size));
for (int j = 0; j < list_size; ++j) {
PyObject *list_item = PyList_GetItem(slice_item, j);
if (list_item == Py_True) {
list_select_idxs->push_back(j);
}
}
} else {
for (int j = 0; j < list_size; ++j) {
PyObject *list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
list_select_idxs->push_back(
static_cast<int>(PyLong_AsLong(list_item)));
} else if (list_item == Py_True) {
list_select_idxs->push_back(1);
} else {
list_select_idxs->push_back(0);
}
}
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, Tensor.__indices__() only allows indexing "
"by Integers, Slices, Ellipsis, None, tuples of these types "
"and list of Bool and Integers, but received "
"%s in %dth slice item",
std::string(Py_TYPE(slice_item)->tp_name), i + 1));
}
}
// valid_index is the number of dimensions exclude None index
const int valid_indexs = size - none_axes->size() - ell_count;
PADDLE_ENFORCE_EQ(valid_indexs <= rank, true,
platform::errors::InvalidArgument(
"Too many indices (%d) for tensor of dimension %d.",
valid_indexs, rank));
}
template <typename P> template <typename P>
static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src, // NOLINT static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src, // NOLINT
imperative::VarBase &dst, // NOLINT imperative::VarBase &dst, // NOLINT
......
...@@ -80,6 +80,7 @@ limitations under the License. */ ...@@ -80,6 +80,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/cuda_streams_py.h" #include "paddle/fluid/pybind/cuda_streams_py.h"
#include "paddle/fluid/pybind/distributed_py.h" #include "paddle/fluid/pybind/distributed_py.h"
#include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/pybind/io.h" #include "paddle/fluid/pybind/io.h"
#include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/lod_utils.h" #include "paddle/phi/core/lod_utils.h"
...@@ -101,7 +102,6 @@ limitations under the License. */ ...@@ -101,7 +102,6 @@ limitations under the License. */
#include "paddle/fluid/pybind/gloo_context_py.h" #include "paddle/fluid/pybind/gloo_context_py.h"
#include "paddle/fluid/pybind/gloo_wrapper_py.h" #include "paddle/fluid/pybind/gloo_wrapper_py.h"
#include "paddle/fluid/pybind/heter_wrapper_py.h" #include "paddle/fluid/pybind/heter_wrapper_py.h"
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/pybind/inference_api.h" #include "paddle/fluid/pybind/inference_api.h"
#include "paddle/fluid/pybind/ir.h" #include "paddle/fluid/pybind/ir.h"
#include "paddle/fluid/pybind/metrics_py.h" #include "paddle/fluid/pybind/metrics_py.h"
...@@ -527,6 +527,7 @@ PYBIND11_MODULE(core_avx, m) { ...@@ -527,6 +527,7 @@ PYBIND11_MODULE(core_avx, m) {
PYBIND11_MODULE(core_noavx, m) { PYBIND11_MODULE(core_noavx, m) {
#endif #endif
BindImperative(&m);
BindEager(&m); BindEager(&m);
BindCudaStream(&m); BindCudaStream(&m);
...@@ -741,8 +742,6 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -741,8 +742,6 @@ PYBIND11_MODULE(core_noavx, m) {
m.def("_promote_types_if_complex_exists", m.def("_promote_types_if_complex_exists",
&paddle::framework::PromoteTypesIfComplexExists); &paddle::framework::PromoteTypesIfComplexExists);
BindImperative(&m);
py::class_<framework::Tensor> framework_tensor(m, "Tensor", py::class_<framework::Tensor> framework_tensor(m, "Tensor",
py::buffer_protocol()); py::buffer_protocol());
g_framework_tensor_pytype = g_framework_tensor_pytype =
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Python.h>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope_guard.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
namespace py = pybind11;
namespace paddle {
namespace pybind {
static bool PyCheckTensor(PyObject* obj);
static Py_ssize_t GetSliceIndexFromPyObject(PyObject* obj);
// Slice related methods
static bool PyCheckInteger(PyObject* obj) {
#if PY_VERSION_HEX < 0x03000000
return (PyLong_Check(obj) || PyInt_Check(obj)) && !PyBool_Check(obj);
#else
return PyLong_Check(obj) && !PyBool_Check(obj);
#endif
}
static bool IsNumpyType(PyObject* obj) {
// It is not a good way to judge the type of obj by its type'name. Maybe using
// `PyArray_IsScalar` will be better. However, this interface cannot be used
// by including pybind11, and it needs to compile with numpy.
auto type_name = std::string(Py_TYPE(obj)->tp_name);
return type_name == "numpy.int64" || type_name == "numpy.longlong" ||
type_name == "numpy.int32" || type_name == "numpy.int16";
}
static Py_ssize_t GetSliceIndexFromTensor(const phi::DenseTensor& tensor) {
if (tensor.numel() == 1) {
if (framework::TransToProtoVarType(tensor.type()) ==
framework::proto::VarType::INT32) {
return static_cast<Py_ssize_t>(operators::GetValue<int32_t>(&tensor));
} else if (framework::TransToProtoVarType(tensor.type()) ==
framework::proto::VarType::INT64) {
return static_cast<Py_ssize_t>(operators::GetValue<int64_t>(&tensor));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, the type of tensor in slice indices only allows "
"int32 and int64, please check the type of index tensor."));
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, tensor in slice indices only allows 1 element, "
"but received %d.",
tensor.numel()));
}
}
// NOTE(zhiqiu): Revised version of PySlice_GetIndices. From:
// https://github.com/python/cpython/blob/8d21aa21f2cbc6d50aab3f420bb23be1d081dac4/Objects/sliceobject.c#L103
// Original PySlice_GetIndices return wrong result when
// slice_item contains long int, such as arr[:180L].
// NOT sure why this happens !!!
// Besides, PySlice_GetIndices cannot raise error when float in slice item.
// So, I make a revised version of PySlice_GetIndices, named to
// _PySlice_GetIndices. Try to use _PySlice_Unpack which is more robust than
// PySlice_GetIndices in the future.
static int _PySlice_GetIndices(PySliceObject* r, Py_ssize_t length,
Py_ssize_t* start, Py_ssize_t* stop,
Py_ssize_t* step) {
/* XXX support long ints */
if (r->step == Py_None) {
*step = 1;
} else {
if (PyCheckInteger(r->step) || IsNumpyType(r->step)) {
*step = PyLong_AsLong(r->step);
} else if (PyCheckTensor(r->step)) {
*step = GetSliceIndexFromPyObject(r->step);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->step)->tp_name)));
}
}
if (r->start == Py_None) {
*start = *step < 0 ? length - 1 : 0;
} else {
if (PyCheckInteger(r->start) || IsNumpyType(r->start)) {
*start = PyLong_AsLong(r->start);
} else if (PyCheckTensor(r->start)) {
*start = GetSliceIndexFromPyObject(r->start);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->start)->tp_name)));
}
if (*start < 0) *start += length;
*start = std::max(*start, static_cast<Py_ssize_t>(0));
}
if (r->stop == Py_None) {
*stop = *step < 0 ? -1 : length;
} else {
if (PyCheckInteger(r->stop) || IsNumpyType(r->stop)) {
*stop = PyLong_AsLong(r->stop);
} else if (PyCheckTensor(r->stop)) {
*stop = GetSliceIndexFromPyObject(r->stop);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->stop)->tp_name)));
}
if (0 < *step && *stop < 0) *stop += length;
*stop = std::min(*stop, length);
}
if (*stop > length) return -1;
if (*start >= length) return -1;
if (*step == 0) return -1;
return 0;
}
static void ParseIndexingSlice(
framework::LoDTensor* tensor, PyObject* _index,
std::vector<int>* slice_axes, std::vector<int>* slice_starts,
std::vector<int>* slice_ends, std::vector<int>* slice_strides,
std::vector<int>* decrease_axis, std::vector<int>* none_axes,
std::vector<int>* infer_flags, std::vector<int>* list_select_idxs,
bool* list_select_flag) {
// We allow indexing by Integers, Slices, Ellipsis, None, tuples of those
// types, and list of Bool and Integers.
// wrap to tuple
// NOTE(zhiqiu): PyTuple_Pack increases refcount.
PyObject* index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index;
DEFINE_PADDLE_SCOPE_GUARD([index, _index]() {
if (!PyTuple_Check(_index)) {
Py_DECREF(index);
VLOG(4) << "Call Py_DECREF";
}
});
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), true,
platform::errors::InvalidArgument("tensor has not been initialized"));
const auto& shape = tensor->dims();
const int rank = shape.size();
const int size = PyTuple_GET_SIZE(index);
// specified_dims is the number of dimensions which indexed by Interger,
// Slices.
int specified_dims = 0;
int ell_count = 0;
for (int dim = 0; dim < size; ++dim) {
PyObject* slice_item = PyTuple_GetItem(index, dim);
if (PyCheckInteger(slice_item) || PySlice_Check(slice_item)) {
specified_dims++;
} else if (slice_item == Py_Ellipsis) {
ell_count++;
}
}
PADDLE_ENFORCE_LE(ell_count, 1,
platform::errors::InvalidArgument(
"An index can only have a single ellipsis ('...')"));
int none_count = 0;
for (int i = 0, dim = 0; i < size; ++i) {
PyObject* slice_item = PyTuple_GetItem(index, i);
infer_flags->push_back(1);
int dim_len = shape[dim];
if (PyCheckInteger(slice_item) || IsNumpyType(slice_item)) {
// integer, PyLong_AsLong supports both int and long
int start = static_cast<int>(PyLong_AsLong(slice_item));
auto s_t = start;
start = start < 0 ? start + dim_len : start;
if (start >= dim_len || start < 0) {
std::string str_error_message =
"The starting index " + std::to_string(s_t) +
" of slice is out of bounds in tensor " + std::to_string(dim) +
"-th axis, it shound be in the range of [" +
std::to_string(-dim_len) + ", " + std::to_string(dim_len) + ")";
// py::index_error is corresponding to IndexError in Python
// Used to indicate out of bounds access in __getitem__, __setitem__
throw py::index_error(str_error_message);
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(start + 1);
slice_strides->push_back(1);
decrease_axis->push_back(dim);
dim++;
} else if (PySlice_Check(slice_item)) {
// slice item
Py_ssize_t start, end, step;
PySliceObject* p = reinterpret_cast<PySliceObject*>(slice_item);
_PySlice_GetIndices(p, dim_len, &start, &end, &step);
// :: or : or 0:dim_len:1
if (start == 0 && end == dim_len && step == 1) {
dim++;
continue;
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(end);
slice_strides->push_back(step);
dim++;
} else if (slice_item == Py_Ellipsis) {
dim += rank - specified_dims;
} else if (slice_item == Py_None) {
none_axes->push_back(dim + none_count);
none_count++;
} else if (PyList_Check(slice_item)) {
*list_select_flag = true;
PADDLE_ENFORCE_EQ(
size, 1,
platform::errors::InvalidArgument(
"When index contains a list, its length is excepted to 1, "
"but received %d",
size));
bool all_bool = true;
int list_size = PyList_GET_SIZE(slice_item);
for (int j = 0; j < list_size; ++j) {
PyObject* list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
all_bool = false;
} else if (!PyBool_Check(list_item)) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support int or bool in index list."));
}
}
if (all_bool) {
PADDLE_ENFORCE_EQ(
list_size, shape[0],
platform::errors::InvalidArgument(
"The dimension of bool index doesn't match indexed array along "
"dimension 0, the target dimension is %d, but received %d.",
shape[0], list_size));
for (int j = 0; j < list_size; ++j) {
PyObject* list_item = PyList_GetItem(slice_item, j);
if (list_item == Py_True) {
list_select_idxs->push_back(j);
}
}
} else {
for (int j = 0; j < list_size; ++j) {
PyObject* list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
list_select_idxs->push_back(
static_cast<int>(PyLong_AsLong(list_item)));
} else if (list_item == Py_True) {
list_select_idxs->push_back(1);
} else {
list_select_idxs->push_back(0);
}
}
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, Tensor.__indices__() only allows indexing "
"by Integers, Slices, Ellipsis, None, tuples of these types "
"and list of Bool and Integers, but received "
"%s in %dth slice item",
std::string(Py_TYPE(slice_item)->tp_name), i + 1));
}
}
// valid_index is the number of dimensions exclude None index
const int valid_indexs = size - none_axes->size() - ell_count;
PADDLE_ENFORCE_EQ(valid_indexs <= rank, true,
platform::errors::InvalidArgument(
"Too many indices (%d) for tensor of dimension %d.",
valid_indexs, rank));
}
} // namespace pybind
} // namespace paddle
...@@ -351,10 +351,10 @@ endif() ...@@ -351,10 +351,10 @@ endif()
set_tests_properties(test_graph PROPERTIES TIMEOUT 120) set_tests_properties(test_graph PROPERTIES TIMEOUT 120)
set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 200)
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120) set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 200)
if(LINUX AND WITH_MKLDNN) if(LINUX AND WITH_MKLDNN)
set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120) set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)
set_tests_properties(convert_model2dot_ernie PROPERTIES TIMEOUT 120) set_tests_properties(convert_model2dot_ernie PROPERTIES TIMEOUT 120)
......
...@@ -26,7 +26,7 @@ import paddle.fluid as fluid ...@@ -26,7 +26,7 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph, _test_eager_guard
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.container import Sequential
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
...@@ -122,7 +122,7 @@ class ImperativeLenet(fluid.dygraph.Layer): ...@@ -122,7 +122,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
class TestImperativeOutSclae(unittest.TestCase): class TestImperativeOutSclae(unittest.TestCase):
def test_out_scale_acc(self): def func_out_scale_acc(self):
seed = 1000 seed = 1000
lr = 0.001 lr = 0.001
...@@ -166,9 +166,14 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -166,9 +166,14 @@ class TestImperativeOutSclae(unittest.TestCase):
loss_list[i] > loss_list[i + 1], loss_list[i] > loss_list[i + 1],
msg='Failed to do the imperative qat.') msg='Failed to do the imperative qat.')
def test_out_scale_acc(self):
with _test_eager_guard():
self.func_out_scale_acc()
self.func_out_scale_acc()
class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
def test_save_quantized_model(self): def func_save_quantized_model(self):
lr = 0.001 lr = 0.001
load_param_path = "test_save_quantized_model/lenet.pdparams" load_param_path = "test_save_quantized_model/lenet.pdparams"
...@@ -206,6 +211,11 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): ...@@ -206,6 +211,11 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
loss_list[i] > loss_list[i + 1], loss_list[i] > loss_list[i + 1],
msg='Failed to do the imperative qat.') msg='Failed to do the imperative qat.')
def test_save_quantized_model(self):
with _test_eager_guard():
self.func_save_quantized_model()
self.func_save_quantized_model()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -29,6 +29,7 @@ import paddle.fluid as fluid ...@@ -29,6 +29,7 @@ import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import * from paddle.fluid.contrib.slim.quantization import *
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download from paddle.dataset.common import download
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn
from imperative_test_utils import ImperativeLinearBn_hook from imperative_test_utils import ImperativeLinearBn_hook
...@@ -194,7 +195,7 @@ class TestImperativePTQ(unittest.TestCase): ...@@ -194,7 +195,7 @@ class TestImperativePTQ(unittest.TestCase):
break break
return top1_correct_num / total_num return top1_correct_num / total_num
def test_ptq(self): def func_ptq(self):
start_time = time.time() start_time = time.time()
self.set_vars() self.set_vars()
...@@ -244,9 +245,14 @@ class TestImperativePTQ(unittest.TestCase): ...@@ -244,9 +245,14 @@ class TestImperativePTQ(unittest.TestCase):
end_time = time.time() end_time = time.time()
print("total time: %ss \n" % (end_time - start_time)) print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQfuse(TestImperativePTQ): class TestImperativePTQfuse(TestImperativePTQ):
def test_ptq(self): def func_ptq(self):
start_time = time.time() start_time = time.time()
self.set_vars() self.set_vars()
...@@ -305,6 +311,11 @@ class TestImperativePTQfuse(TestImperativePTQ): ...@@ -305,6 +311,11 @@ class TestImperativePTQfuse(TestImperativePTQ):
end_time = time.time() end_time = time.time()
print("total time: %ss \n" % (end_time - start_time)) print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQHist(TestImperativePTQ): class TestImperativePTQHist(TestImperativePTQ):
def set_vars(self): def set_vars(self):
......
...@@ -32,7 +32,7 @@ from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose ...@@ -32,7 +32,7 @@ from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import QuantizedConv2D, QuantizedConv2DTranspose from paddle.nn.quant.quant_layers import QuantizedConv2D, QuantizedConv2DTranspose
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict, ImperativeLenet from imperative_test_utils import fix_model_dict, ImperativeLenet
paddle.enable_static() paddle.enable_static()
...@@ -55,7 +55,7 @@ class TestImperativeQat(unittest.TestCase): ...@@ -55,7 +55,7 @@ class TestImperativeQat(unittest.TestCase):
self.activation_quantize_type = 'moving_average_abs_max' self.activation_quantize_type = 'moving_average_abs_max'
print('weight_quantize_type', self.weight_quantize_type) print('weight_quantize_type', self.weight_quantize_type)
def test_qat(self): def func_qat(self):
self.set_vars() self.set_vars()
imperative_qat = ImperativeQuantAware( imperative_qat = ImperativeQuantAware(
...@@ -193,6 +193,11 @@ class TestImperativeQat(unittest.TestCase): ...@@ -193,6 +193,11 @@ class TestImperativeQat(unittest.TestCase):
np.allclose(after_save, before_save.numpy()), np.allclose(after_save, before_save.numpy()),
msg='Failed to save the inference quantized model.') msg='Failed to save the inference quantized model.')
def test_qat(self):
with _test_eager_guard():
self.func_qat()
self.func_qat()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -27,7 +27,7 @@ import paddle.fluid as fluid ...@@ -27,7 +27,7 @@ import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download from paddle.dataset.common import download
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict, ImperativeLenet from imperative_test_utils import fix_model_dict, ImperativeLenet
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
......
...@@ -30,7 +30,7 @@ from paddle.fluid.dygraph import Pool2D ...@@ -30,7 +30,7 @@ from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Linear from paddle.fluid.dygraph import Linear
from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.fluid.framework import _test_eager_guard
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
_logger = get_logger( _logger = get_logger(
...@@ -157,7 +157,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase): ...@@ -157,7 +157,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
_logger.info("test act_preprocess") _logger.info("test act_preprocess")
self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT) self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT)
def test_quant_aware_training(self): def func_quant_aware_training(self):
imperative_qat = self.imperative_qat imperative_qat = self.imperative_qat
seed = 1 seed = 1
np.random.seed(seed) np.random.seed(seed)
...@@ -243,6 +243,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase): ...@@ -243,6 +243,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
train(lenet) train(lenet)
test(lenet) test(lenet)
def test_quant_aware_training(self):
with _test_eager_guard():
self.func_quant_aware_training()
self.func_quant_aware_training()
class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess): class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess):
def setUp(self): def setUp(self):
......
...@@ -32,6 +32,7 @@ from paddle.fluid.dygraph.nn import Pool2D ...@@ -32,6 +32,7 @@ from paddle.fluid.dygraph.nn import Pool2D
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant
from paddle.fluid.framework import _test_eager_guard
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
...@@ -42,7 +43,8 @@ _logger = get_logger( ...@@ -42,7 +43,8 @@ _logger = get_logger(
class TestImperativeOutSclae(unittest.TestCase): class TestImperativeOutSclae(unittest.TestCase):
def test_out_scale_acc(self): def func_out_scale_acc(self):
paddle.disable_static()
seed = 1000 seed = 1000
lr = 0.1 lr = 0.1
...@@ -125,6 +127,11 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -125,6 +127,11 @@ class TestImperativeOutSclae(unittest.TestCase):
if find_matmul: if find_matmul:
self.assertTrue(matmul_skip_count == 1) self.assertTrue(matmul_skip_count == 1)
def test_out_scale_acc(self):
with _test_eager_guard():
self.func_out_scale_acc()
self.func_out_scale_acc()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -99,18 +99,19 @@ def param_guard(parameters): ...@@ -99,18 +99,19 @@ def param_guard(parameters):
yield yield
def _convert_into_variable(var_base): def _convert_into_variable(tensor):
""" """
Convert Varbase into Variable. Convert Varbase into Variable.
""" """
if isinstance(var_base, core.VarBase): if isinstance(tensor, (core.eager.Tensor, core.VarBase)):
# Check whether has been created before. # Check whether has been created before.
new_var = var_base.block._find_var_recursive(var_base.name) new_var = tensor.block._find_var_recursive(tensor.name)
if new_var is not None: if new_var is not None:
assert isinstance(new_var, framework.Variable) assert isinstance(new_var, framework.Variable)
# Convert ParamBase into Parameter with same attributes in dy2stat. # Convert ParamBase into Parameter with same attributes in dy2stat.
elif isinstance(var_base, framework.ParamBase): elif isinstance(tensor,
new_var = var_base._to_static_var(to_parameter=True) (framework.EagerParamBase, framework.ParamBase)):
new_var = tensor._to_static_var(to_parameter=True)
else: else:
# Note(Aurelius84): Convert VarBase in self._buffers into Variable with # Note(Aurelius84): Convert VarBase in self._buffers into Variable with
# same attributes and set persistable=True to allow saving this var. # same attributes and set persistable=True to allow saving this var.
...@@ -120,13 +121,13 @@ def _convert_into_variable(var_base): ...@@ -120,13 +121,13 @@ def _convert_into_variable(var_base):
# But if its shape is empty while created from `create_variable()`, we consider this buffer # But if its shape is empty while created from `create_variable()`, we consider this buffer
# non-persistable. See case of `drop_state` in lstm api. # non-persistable. See case of `drop_state` in lstm api.
is_persistable = len(var_base.shape) > 0 is_persistable = len(tensor.shape) > 0
new_var = var_base._to_static_var( new_var = tensor._to_static_var(
to_parameter=False, persistable=is_persistable) to_parameter=False, persistable=is_persistable)
return new_var return new_var
else: else:
return var_base return tensor
def enabled(): def enabled():
......
...@@ -61,7 +61,8 @@ class NestSequence(object): ...@@ -61,7 +61,8 @@ class NestSequence(object):
def _get_var_ids(self): def _get_var_ids(self):
var_ids = [] var_ids = []
for idx, var in enumerate(self.__input_list): for idx, var in enumerate(self.__input_list):
if isinstance(var, (framework.Variable, core.VarBase)): if isinstance(var, (framework.Variable, core.VarBase,
core.eager.Tensor)):
var_ids.append(idx) var_ids.append(idx)
return var_ids return var_ids
...@@ -73,7 +74,8 @@ class NestSequence(object): ...@@ -73,7 +74,8 @@ class NestSequence(object):
if need_check: if need_check:
warning_types = set() warning_types = set()
for var in self.__input_list: for var in self.__input_list:
if not isinstance(var, (framework.Variable, core.VarBase)): if not isinstance(var, (framework.Variable, core.VarBase,
core.eager.Tensor)):
warning_types.add(type(var)) warning_types.add(type(var))
if warning_types: if warning_types:
logging_utils.warn( logging_utils.warn(
...@@ -301,10 +303,17 @@ class PartialProgramLayer: ...@@ -301,10 +303,17 @@ class PartialProgramLayer:
for name in block.vars: for name in block.vars:
if "@GRAD" in name: if "@GRAD" in name:
var_desc = block.vars[name].desc var_desc = block.vars[name].desc
var_base = None
if not core._in_eager_mode():
var_base = core.VarBase(var_desc.dtype(), var_base = core.VarBase(var_desc.dtype(),
var_desc.shape(), var_desc.shape(),
var_desc.name(), var_desc.name(),
var_desc.type(), False) var_desc.type(), False)
else:
var_base = core.eager.Tensor(var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(), False)
double_grads.append(var_base) double_grads.append(var_base)
return self._valid_vars(double_grads) return self._valid_vars(double_grads)
...@@ -386,13 +395,22 @@ class PartialProgramLayer: ...@@ -386,13 +395,22 @@ class PartialProgramLayer:
expected_place = framework._current_expected_place() expected_place = framework._current_expected_place()
for i, value in enumerate(flatten_inputs): for i, value in enumerate(flatten_inputs):
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
var = None
if not core._in_eager_mode():
var = core.VarBase( var = core.VarBase(
value=value, value=value,
name=self._inputs[i].desc.name(), name=self._inputs[i].desc.name(),
persistable=False, persistable=False,
place=expected_place, place=expected_place,
zero_copy=True) zero_copy=True)
elif isinstance(value, core.VarBase): else:
var = core.eager.Tensor(
value=value,
name=self._inputs[i].desc.name(),
persistable=False,
place=expected_place,
zero_copy=True)
elif isinstance(value, (core.VarBase, core.eager.Tensor)):
# NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times # NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times
# into CUDAPlace when it's as input of multi Ops. so we move it in advance # into CUDAPlace when it's as input of multi Ops. so we move it in advance
# to avoid this problem. # to avoid this problem.
...@@ -411,9 +429,16 @@ class PartialProgramLayer: ...@@ -411,9 +429,16 @@ class PartialProgramLayer:
var = self._outputs[var_id] var = self._outputs[var_id]
assert isinstance(var, framework.Variable) assert isinstance(var, framework.Variable)
var_desc = var.desc var_desc = var.desc
varbase = None
if not core._in_eager_mode():
var_base = core.VarBase(var_desc.dtype(), var_base = core.VarBase(var_desc.dtype(),
var_desc.shape(), var_desc.shape(),
var_desc.name(), var_desc.type(), False) var_desc.name(), var_desc.type(), False)
else:
var_base = core.eager.Tensor(var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(), False)
return var_base return var_base
# Create VarBase to receive output data. # Create VarBase to receive output data.
...@@ -423,9 +448,16 @@ class PartialProgramLayer: ...@@ -423,9 +448,16 @@ class PartialProgramLayer:
def _create_scope_vec(self): def _create_scope_vec(self):
# Hold forward variables # Hold forward variables
tmp_scope_vec = None
if not core._in_eager_mode():
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [], tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
"program_out_scope", "program_out_scope",
core.VarDesc.VarType.STEP_SCOPES, True) core.VarDesc.VarType.STEP_SCOPES, True)
# TODO(jiabin): Support this later.
# else:
# tmp_scope_vec = core.eager.Tensor(core.VarDesc.VarType.FP32, [],
# "program_out_scope",
# core.VarDesc.VarType.STEP_SCOPES, True)
inner_scope = core.Scope() inner_scope = core.Scope()
tmp_scope_vec.value().set_scope(inner_scope) tmp_scope_vec.value().set_scope(inner_scope)
...@@ -450,7 +482,8 @@ class PartialProgramLayer: ...@@ -450,7 +482,8 @@ class PartialProgramLayer:
return main_program.clone(for_test=True) return main_program.clone(for_test=True)
def _is_no_value(self, var): def _is_no_value(self, var):
if isinstance(var, core.VarBase) and var.shape == [1]: if isinstance(var,
(core.VarBase, core.eager.Tensor)) and var.shape == [1]:
# NOTE: .numpy() will insert MemcpySync operation, it hits performance. # NOTE: .numpy() will insert MemcpySync operation, it hits performance.
if var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM: if var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM:
return True return True
...@@ -460,7 +493,7 @@ class PartialProgramLayer: ...@@ -460,7 +493,7 @@ class PartialProgramLayer:
""" """
Removes invalid value for various-length return statement Removes invalid value for various-length return statement
""" """
if isinstance(out_vars, core.VarBase): if isinstance(out_vars, (core.VarBase, core.eager.Tensor)):
if self._is_no_value(out_vars): if self._is_no_value(out_vars):
return None return None
return out_vars return out_vars
...@@ -527,7 +560,7 @@ class PartialProgramLayer: ...@@ -527,7 +560,7 @@ class PartialProgramLayer:
param_and_buffer_names_set = set() param_and_buffer_names_set = set()
for i, var in enumerate(self._params): for i, var in enumerate(self._params):
# self._params constains parameters and buffers with persistable=True. # self._params constains parameters and buffers with persistable=True.
if not isinstance(var, core.VarBase): if not isinstance(var, (core.VarBase, core.eager.Tensor)):
raise TypeError( raise TypeError(
'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'. 'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'.
format(i, type(var))) format(i, type(var)))
...@@ -559,10 +592,18 @@ def _create_fake_var(): ...@@ -559,10 +592,18 @@ def _create_fake_var():
""" """
Create a fake_var (force on CPU) to handle empty input or output Create a fake_var (force on CPU) to handle empty input or output
""" """
if not core._in_eager_mode():
return [ return [
core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var", core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var",
core.VarDesc.VarType.RAW, False) core.VarDesc.VarType.RAW, False)
] ]
else:
return []
# TODO(jiabin): Support this later
# return [
# core.eager.Tensor(core.VarDesc.VarType.FP32, [], "Fake_var",
# core.VarDesc.VarType.RAW, False)
# ]
def partial_program_from(concrete_program): def partial_program_from(concrete_program):
......
...@@ -25,7 +25,7 @@ import threading ...@@ -25,7 +25,7 @@ import threading
import six import six
import paddle import paddle
from paddle.fluid import core from paddle.fluid import core, dygraph
from paddle.fluid.compiler import BuildStrategy, CompiledProgram, ExecutionStrategy from paddle.fluid.compiler import BuildStrategy, CompiledProgram, ExecutionStrategy
from paddle.fluid.data_feeder import check_type from paddle.fluid.data_feeder import check_type
from paddle.fluid.layers.utils import flatten, pack_sequence_as from paddle.fluid.layers.utils import flatten, pack_sequence_as
...@@ -898,6 +898,7 @@ def save(layer, path, input_spec=None, **configs): ...@@ -898,6 +898,7 @@ def save(layer, path, input_spec=None, **configs):
state_var_dict[var.name] = var state_var_dict[var.name] = var
# 3. share parameters from Layer to scope & record var info # 3. share parameters from Layer to scope & record var info
with dygraph.guard():
for param_or_buffer in concrete_program.parameters: for param_or_buffer in concrete_program.parameters:
# share to scope # share to scope
if param_or_buffer.type == core.VarDesc.VarType.VOCAB: if param_or_buffer.type == core.VarDesc.VarType.VOCAB:
...@@ -915,12 +916,14 @@ def save(layer, path, input_spec=None, **configs): ...@@ -915,12 +916,14 @@ def save(layer, path, input_spec=None, **configs):
if param_or_buffer.name not in extra_var_info: if param_or_buffer.name not in extra_var_info:
extra_info_dict = dict() extra_info_dict = dict()
if param_or_buffer.name in state_names_dict: if param_or_buffer.name in state_names_dict:
extra_info_dict['structured_name'] = state_names_dict[ extra_info_dict[
'structured_name'] = state_names_dict[
param_or_buffer.name] param_or_buffer.name]
extra_info_dict[ extra_info_dict[
'stop_gradient'] = param_or_buffer.stop_gradient 'stop_gradient'] = param_or_buffer.stop_gradient
if isinstance(param_or_buffer, ParamBase): if isinstance(param_or_buffer, ParamBase):
extra_info_dict['trainable'] = param_or_buffer.trainable extra_info_dict[
'trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict extra_var_info[param_or_buffer.name] = extra_info_dict
# 4. build input & output of save_infernece_model # 4. build input & output of save_infernece_model
......
...@@ -94,7 +94,7 @@ def monkey_patch_varbase(): ...@@ -94,7 +94,7 @@ def monkey_patch_varbase():
# Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph. # Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph.
# It will fail. So, for propery that different between dynamic and static graph, should not getattr(self, attr, None). # It will fail. So, for propery that different between dynamic and static graph, should not getattr(self, attr, None).
attr_not_need_keys = ['grad', 'T'] attr_not_need_keys = ['grad', 'T']
if isinstance(self, ParamBase): if isinstance(self, (ParamBase, EagerParamBase)):
attr_kwargs = self.__dict__.copy() attr_kwargs = self.__dict__.copy()
else: else:
attr_names = [] attr_names = []
...@@ -111,7 +111,7 @@ def monkey_patch_varbase(): ...@@ -111,7 +111,7 @@ def monkey_patch_varbase():
attr_kwargs.update(kwargs) attr_kwargs.update(kwargs)
if to_parameter or isinstance(self, ParamBase): if to_parameter or isinstance(self, (ParamBase, EagerParamBase)):
del attr_kwargs['persistable'] del attr_kwargs['persistable']
# NOTE(Aurelius84): All parameters should be placed into global block. # NOTE(Aurelius84): All parameters should be placed into global block.
attr_kwargs['block'] = attr_kwargs['block'].program.global_block() attr_kwargs['block'] = attr_kwargs['block'].program.global_block()
......
...@@ -1821,7 +1821,7 @@ def _pack_loaded_dict(load_obj): ...@@ -1821,7 +1821,7 @@ def _pack_loaded_dict(load_obj):
@static_only @static_only
def _legacy_save(param_dict, model_path, protocol=2): def _legacy_save(param_dict, model_path, protocol=2):
def get_tensor(var): def get_tensor(var):
if isinstance(var, core.VarBase): if isinstance(var, (core.VarBase, core.eager.Tensor)):
return var.numpy() return var.numpy()
elif isinstance(var, core.LoDTensor): elif isinstance(var, core.LoDTensor):
return np.array(var) return np.array(var)
......
...@@ -10148,6 +10148,9 @@ def flatten(x, axis=1, name=None): ...@@ -10148,6 +10148,9 @@ def flatten(x, axis=1, name=None):
check_variable_and_dtype( check_variable_and_dtype(
x, 'x', ['float32', 'float64', 'int8', 'int32', 'int64', 'uint8'], x, 'x', ['float32', 'float64', 'int8', 'int32', 'int64', 'uint8'],
'flatten') 'flatten')
if in_dygraph_mode():
return _C_ops.flatten2(x, 'axis', axis)[0]
helper = LayerHelper('flatten', **locals()) helper = LayerHelper('flatten', **locals())
if not (isinstance(x, Variable)): if not (isinstance(x, Variable)):
......
...@@ -663,6 +663,8 @@ def assign(input, output=None): ...@@ -663,6 +663,8 @@ def assign(input, output=None):
}) })
if is_inplace and in_dygraph_mode(): if is_inplace and in_dygraph_mode():
# TODO(jiabin): Remove this when we support inplace
if not core._in_eager_mode():
output._bump_inplace_version() output._bump_inplace_version()
return output return output
......
...@@ -771,13 +771,13 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): ...@@ -771,13 +771,13 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
self.assertTrue(np.array_equal(egr_tensor.numpy(), ori_arr)) self.assertTrue(np.array_equal(egr_tensor.numpy(), ori_arr))
ori_place = egr_tensor.place ori_place = egr_tensor.place
new_arr = np.random.rand(4, 4, 16, 32).astype('float32') new_arr = np.random.rand(4, 16, 16, 32).astype('float32')
self.assertFalse(np.array_equal(egr_tensor.numpy(), new_arr)) self.assertFalse(np.array_equal(egr_tensor.numpy(), new_arr))
egr_tensor._set_value(new_arr) egr_tensor.set_value(new_arr)
self.assertEqual(egr_tensor.stop_gradient, True) self.assertEqual(egr_tensor.stop_gradient, True)
self.assertTrue(egr_tensor.place._equals(ori_place)) self.assertTrue(egr_tensor.place._equals(ori_place))
self.assertEqual(egr_tensor.shape, [4, 4, 16, 32]) self.assertEqual(egr_tensor.shape, [4, 16, 16, 32])
self.assertTrue(np.array_equal(egr_tensor.numpy(), new_arr)) self.assertTrue(np.array_equal(egr_tensor.numpy(), new_arr))
...@@ -880,7 +880,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): ...@@ -880,7 +880,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase):
new_weight = np.ones([1, 3]).astype('float32') new_weight = np.ones([1, 3]).astype('float32')
self.assertFalse(np.array_equal(linear.weight.numpy(), new_weight)) self.assertFalse(np.array_equal(linear.weight.numpy(), new_weight))
linear.weight._set_value(new_weight) linear.weight.set_value(new_weight)
self.assertTrue(np.array_equal(linear.weight.numpy(), new_weight)) self.assertTrue(np.array_equal(linear.weight.numpy(), new_weight))
self.assertTrue(linear.weight.place._equals(ori_place)) self.assertTrue(linear.weight.place._equals(ori_place))
......
...@@ -533,10 +533,6 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -533,10 +533,6 @@ class TestTensorRegisterHook(unittest.TestCase):
size=[self.batch_size, self.in_size]).astype('float32') size=[self.batch_size, self.in_size]).astype('float32')
data_t = paddle.to_tensor(data) data_t = paddle.to_tensor(data)
if _in_eager_mode():
with self.assertRaises(TypeError):
out = jit_net(data_t)
else:
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
out = jit_net(data_t) out = jit_net(data_t)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册