未验证 提交 da47544c 编写于 作者: J Jiabin Yang 提交者: GitHub

Support slim eager (#39874)

* eager, test=develop

* fix bug, test=develop

* eager, test=develop

* merge legacy to fluid

* eager, test=develop

* eager, test=develop

* Refactor TensorAdd func by template and remove gradient_accumulation in eager

* Remove needless target name

* eager, test=develop

* eager, test=develop

* Use overload instead of template

* Remove legacy code

* Remove legacy code

* selectedrows, test=develop

* Remove DataType test

* eager, test=develop

* eager, test=develop

* support gan, test=develop

* Using Tensor directly instead of using EagerTensor

* support gradient_accumulation

* make test_imperative_lod_tensor_to_selected_rows longer

* make test_imperative_lod_tensor_to_selected_rows longer

* refine code

* ptb, test=develop

* Rename all EagerTensor to Tensor

* Rename some EagerTensor to Tensor

* rename EagerTensor to EagerVariable

* eager, test=develop

* eager, test=develop

* eager, test=develop

* eager, test=develop

* add more test

* eager, test=develop

* Support copiable selected rows and merge develop

* save load, eager, test=develop

* save load, eager, test=develop

* refine, test=develop

* remove useless _set_value method

* refine, test=develop

* refine, test=develop

* revert static_runner, test=develop

* EagerTensor to Tensor, test=develop

* refine, test=develop

* refine, test=develop

* clear grad, test=develop

* merge, develop

* merge, develop

* merge, test=develop

* merge, test=develop

* Support quant and part of slice

* support legacy static save

* extend slim tests time

* remove imperative on inference

* remove imperative on inference

* merge develop

* fix typo

* fix typo

* split slice related code into 2 part for imperative and eager

* split slice from inference

* split slice from inference

* fix test_tensor_register_hook
Co-authored-by: NWang Huan <wanghuan29@baidu.com>
Co-authored-by: NWeilong Wu <veyron_wu@163.com>
Co-authored-by: Nwanghuancoder <wanghuancoder@163.com>
上级 d9884e20
......@@ -24,11 +24,14 @@ class GradNodeAccumulation : public GradNodeBase {
public:
// Constructor: configure fwd input tensors to grad node
explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) {
VLOG(6) << "Construct GradNodeAccumulation";
weak_grad_ = meta->WeakGrad();
SetDefaultGradInOutMeta();
}
~GradNodeAccumulation() override = default;
~GradNodeAccumulation() override {
VLOG(6) << "Destruct GradNodeAccumulation";
}
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
......
......@@ -46,7 +46,7 @@ class GradNodeScale : public GradNodeBase {
const std::vector<paddle::experimental::Tensor>& tensors);
void SetAttributes_scale(float scale);
std::string name() override { return ""; }
// Members: define fwd input tensors
// For Scale there is no fwd input tensor needed
private:
......
......@@ -996,6 +996,29 @@ static std::string GenerateGradNodeCreationContent(
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto->outputs()[0].name()")"
std::string get_autograd_meta_str = " // Prepare Autograd Meta \n";
// If single output slotname and not duplicable,
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")"
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;
if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
}
}
VLOG(6) << "Generated outputs autograd_meta";
for (const proto::OpProto::Var& input : in_vars) {
const std::string& input_name = input.name();
const std::string& input_autograd_name = "p_autograd_" + input_name;
......@@ -1024,31 +1047,6 @@ static std::string GenerateGradNodeCreationContent(
}
VLOG(6) << "Generated inputs autograd_meta";
// If single output slotname and not duplicable,
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")"
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;
// Skip Intermediate Tensor
if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
}
}
VLOG(6) << "Generated outputs autograd_meta";
std::string prepare_autograd_meta_str = "";
prepare_autograd_meta_str += get_autograd_meta_str;
prepare_autograd_meta_str += "\n";
......@@ -1204,11 +1202,12 @@ static std::string GenerateGradNodeCreationContent(
" %s"
" bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(%s);\n"
" if(require_any_grad) {\n"
" VLOG(6) << \" Construct Grad for %s \"; \n"
" egr::EagerUtils::PassStopGradient(%s);\n"
"%s\n }";
std::string grad_node_creation_body_str = paddle::string::Sprintf(
GRAD_NODE_CREATION_TEMPLATE, prepare_autograd_meta_str,
compute_require_grad_args, pass_stop_gradient_args,
compute_require_grad_args, op_type, pass_stop_gradient_args,
grad_node_creation_str);
return grad_node_creation_body_str;
......@@ -2083,22 +2082,24 @@ static std::string GenerateGradNodeHeaderContents(
const char* GRAD_NODE_TEMPLATE =
"class GradNode%s : public egr::GradNodeBase {\n"
" public:\n"
" GradNode%s() : egr::GradNodeBase() {}\n"
" GradNode%s() : egr::GradNodeBase() { VLOG(7) << \" Construct "
"GradNode%s \"; }\n"
" GradNode%s(size_t bwd_in_slot_num, size_t bwd_out_slot_num) : "
"egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}\n"
" ~GradNode%s() override = default;\n"
"egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) { VLOG(7) << \" "
"Construct GradNode%s \"; }\n"
" ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n"
"\n"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
"operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) "
"override;\n"
"\n"
" std::string name() override { return \" GradNode%s \"; } \n "
"\n"
" // SetX, SetY, ...\n"
"%s\n"
" // SetAttrMap\n"
"%s\n"
" std::string name() { return \"GradNode%s\"; }\n"
"\n"
" private:\n"
" // TensorWrappers\n"
"%s\n"
......@@ -2195,8 +2196,8 @@ static std::string GenerateGradNodeHeaderContents(
VLOG(6) << "Generated TensorWrapper";
std::string grad_node_str = paddle::string::Sprintf(
GRAD_NODE_TEMPLATE, op_type, op_type, op_type, op_type,
set_tensor_wrappers_str, set_attr_map_str, op_type,
GRAD_NODE_TEMPLATE, op_type, op_type, op_type, op_type, op_type, op_type,
op_type, op_type, set_tensor_wrappers_str, set_attr_map_str,
tensor_wrapper_members_str, attr_members_str);
return grad_node_str;
......
......@@ -538,7 +538,7 @@ class {} : public egr::GradNodeBase {{
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) override;
std::string name() override {{ return \" {} \"; }}
// SetTensorWrapperX, SetTensorWrapperY, ...
{}
// SetAttributes
......@@ -553,8 +553,9 @@ class {} : public egr::GradNodeBase {{
"""
node_declaration_str = NODE_DECLARATION_TEMPLATE.format(
grad_node_name, grad_node_name, grad_node_name, grad_node_name,
set_tensor_wrapper_methods_str, set_attribute_methods_str,
tensor_wrapper_members_str, attribute_members_str)
grad_node_name, set_tensor_wrapper_methods_str,
set_attribute_methods_str, tensor_wrapper_members_str,
attribute_members_str)
return node_declaration_str
......
......@@ -48,12 +48,16 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
}
visited.insert(node);
PADDLE_ENFORCE_NOT_NULL(
node,
paddle::platform::errors::Fatal(
"We got null node when we traverse the backward graph, and this "
"should not happened please check your code and contact us."));
// Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges();
for (const auto& edge_list : edges) {
for (const Edge& edge : edge_list) {
GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached
// Or it could also originated from dispensable inputs
......@@ -67,7 +71,6 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
}
}
}
return node_in_degree_map;
}
......
......@@ -30,6 +30,7 @@
namespace egr {
GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
VLOG(6) << "Construct GradNodeBase";
bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num);
// adj_edges has the same num as backward outputs
......@@ -49,11 +50,15 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
// its pre-ops
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (node) {
if (node && node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
......@@ -70,7 +75,7 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"inputs's slot num."));
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (node) {
if (node && node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
......
......@@ -76,10 +76,10 @@ class GradSlotMeta {
class GradNodeBase {
public:
GradNodeBase() = default;
GradNodeBase() { VLOG(6) << "Construct GradNodeBase"; }
GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num);
// TODO(jiabin): Should we have other constructor here?
virtual ~GradNodeBase() = default;
virtual ~GradNodeBase() { VLOG(6) << "Destruct GradNodeBase"; }
/**
* operator() designed to contian the real backward execution logic, it should
......
......@@ -30,6 +30,7 @@ class GradTestNode : public egr::GradNodeBase {
GradTestNode(float val, int in_num, int out_num)
: GradNodeBase(in_num, out_num), val_(val) {}
GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::string name() override { return "GradTestNode"; }
std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
override {
......
......@@ -122,9 +122,10 @@ paddle::experimental::Tensor* EagerUtils::mutable_grad(
void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
const std::shared_ptr<GradNodeBase>& grad_node) {
for (const auto& autograd_meta : *autograd_metas) {
if (dynamic_cast<GradNodeAccumulation*>(autograd_meta->GradNode())) {
VLOG(6) << "Warning: Reseting GradNodeAccumulation for leaf tensor is "
"detected";
if (autograd_meta->GradNode()) {
VLOG(7) << "Should not set grad node twice, original node is:"
<< autograd_meta->GradNode()->name()
<< "current is: " << grad_node->name();
}
autograd_meta->SetGradNode(grad_node);
}
......@@ -132,11 +133,11 @@ void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
void EagerUtils::SetHistory(AutogradMeta* autograd_meta,
const std::shared_ptr<GradNodeBase>& grad_node) {
if (dynamic_cast<GradNodeAccumulation*>(autograd_meta->GradNode())) {
VLOG(6)
<< "Warning: Reseting GradNodeAccumulation for leaf tensor is detected";
if (autograd_meta->GradNode()) {
VLOG(7) << "Should not set grad node twice, original node is:"
<< autograd_meta->GradNode()->name()
<< "current is: " << grad_node->name();
}
autograd_meta->SetGradNode(grad_node);
}
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
......@@ -30,10 +31,12 @@ limitations under the License. */
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/slice_utils.h"
#include "paddle/phi/api/include/api.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
namespace paddle {
namespace pybind {
......@@ -119,6 +122,29 @@ extern void InitTensorWithNumpyValue(TensorObject* self,
extern PyTypeObject* p_tensor_type;
Py_ssize_t GetSliceIndexFromPyObject(PyObject* obj) {
if (PyObject_IsInstance(obj, reinterpret_cast<PyObject*>(p_tensor_type))) {
VLOG(6) << "Call GetSliceIndexFromTensor in Eager";
paddle::experimental::Tensor tensor = CastPyArg2Tensor(obj, 0);
PADDLE_ENFORCE_EQ(
tensor.initialized(), true,
paddle::platform::errors::InvalidArgument(
"We can only support initialized tensor in slice, however we got "
"uninitialized tensor %s, please check your code.",
tensor.name()));
return GetSliceIndexFromTensor((*static_cast<phi::DenseTensor*>(
CastPyArg2Tensor(obj, 0).impl().get())));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"We should only get paddle::experimental::Tensor or VarBase in this "
"method, when you reach this means we got another type index."));
}
}
bool PyCheckTensor(PyObject* obj) {
return PyObject_IsInstance(obj, reinterpret_cast<PyObject*>(p_tensor_type));
}
static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
......@@ -468,16 +494,111 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
// NOTE(wuweilong): Set value and not change self's original place
static PyObject* tensor_method_set_value(TensorObject* self, PyObject* args,
PyObject* kwargs) {
static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
VLOG(4) << "Value " << self->tensor.name();
pybind11::object numpy_value =
pybind11::object(pybind11::handle(PyTuple_GET_ITEM(args, 0)), true);
InitTensorWithNumpyValue(self, numpy_value, false);
Py_INCREF(Py_None);
return Py_None;
PyObject* _index = PyTuple_GET_ITEM(args, 0);
VLOG(4) << "Call _getitem_index_not_tensor";
std::vector<int> slice_axes, slice_starts, slice_ends, slice_strides,
decrease_axis, none_axes, infer_flags, list_select_idxs;
// if index is a list, list_select_flag will be true
bool list_select_flag = false;
PADDLE_ENFORCE_EQ(
self->tensor.is_initialized(), true,
platform::errors::InvalidArgument(
"tensor %s has not been initialized, we can only slice initialized "
"tensor please init it first with numpy or other tensor.",
self->tensor.name()));
auto tensor = static_cast<phi::DenseTensor*>(self->tensor.impl().get());
ParseIndexingSlice(tensor, _index, &slice_axes, &slice_starts, &slice_ends,
&slice_strides, &decrease_axis, &none_axes, &infer_flags,
&list_select_idxs, &list_select_flag);
auto out = slice_axes.empty() && !list_select_flag
? self->tensor
: paddle::experimental::Tensor(
egr::Controller::Instance().GenerateUniqueName());
if (!slice_axes.empty()) {
framework::AttributeMap attrs = {{"axes", slice_axes},
{"starts", slice_starts},
{"ends", slice_ends},
{"infer_flags", infer_flags},
{"decrease_axis", decrease_axis}};
std::string op_type = "slice";
for (auto stride : slice_strides) {
if (stride != 1) {
op_type = "strided_slice";
attrs.insert({"strides", slice_strides});
attrs.erase("decrease_axis");
break;
}
}
if (op_type == "slice") {
out = slice_dygraph_function(self->tensor, paddle::experimental::Tensor(),
paddle::experimental::Tensor(),
std::move(attrs));
} else if (op_type == "strided_slice") {
out = strided_slice_dygraph_function(self->tensor, attrs);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Slice is only support slice and strided_slice, but we got %s which "
"is impossible, please check your code first or contact us by "
"issue. ",
op_type));
}
}
if (!none_axes.empty()) {
// Deal with cases when all axes are decreased.
// After slice, the shape of out is [1], which should have been
// [], but Paddle doesn't support scalar.
// In order to ensure the correctness of the final shape of out,
// one dimension of out needs to be decreased.
// For example:
// # x.shape: (2,3,4)
// out = x[0, 1, 1, None] # out.shape : (1)
if (static_cast<int>(decrease_axis.size()) == tensor->dims().size()) {
none_axes.pop_back();
}
if (!none_axes.empty()) {
// Deal with cases that decrease_axes is not empty
// For example:
// # x.shape: (2,3,4)
// out = x[0, 0:2, None] # out.shape : (2, 1, 4)
for (auto& axis : none_axes) {
int len = 0;
for (int da : decrease_axis) {
if (da < axis) {
len++;
}
}
axis -= len;
}
paddle::experimental::Tensor new_out;
framework::AttributeMap attrs = {{"axes", none_axes}};
new_out = std::get<0>(unsqueeze2_dygraph_function(out, std::move(attrs)));
return ToPyObject(new_out);
}
}
// the index is a list
if (list_select_flag) {
auto select_index = paddle::experimental::Tensor(
egr::Controller::Instance().GenerateUniqueName());
auto idx_tensor = std::make_shared<phi::DenseTensor>();
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(
egr::Controller::Instance().GetExpectedPlace());
paddle::framework::TensorFromVector(list_select_idxs, *dev_ctx,
idx_tensor.get());
framework::AttributeMap attrs = {{"dim", 0}};
out = index_select_dygraph_function(self->tensor, select_index,
std::move(attrs));
}
return ToPyObject(out);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
......@@ -602,7 +723,8 @@ PyMethodDef variable_methods[] = {
{"get_tensor",
(PyCFunction)(void (*)(void))tensor_method_get_underline_tensor,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_value", (PyCFunction)(void (*)(void))tensor_method_set_value,
{"_getitem_index_not_tensor",
(PyCFunction)(void (*)(void))tensor__getitem_index_not_tensor,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_register_grad_hook",
(PyCFunction)(void (*)(void))tensor_register_grad_hook,
......
......@@ -16,8 +16,11 @@ limitations under the License. */
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope_guard.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h"
......@@ -184,6 +187,11 @@ paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos) {
}
}
std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
ssize_t arg_pos) {
return py::cast<std::shared_ptr<imperative::VarBase>>(obj);
}
std::vector<paddle::experimental::Tensor> CastPyArg2VectorOfTensor(
PyObject* obj, ssize_t arg_pos) {
std::vector<paddle::experimental::Tensor> result;
......@@ -737,5 +745,6 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs(
return result;
}
} // namespace pybind
} // namespace paddle
......@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/phi/core/dense_tensor.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
namespace paddle {
namespace pybind {
......@@ -33,6 +32,8 @@ int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos);
float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos);
std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos);
paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos);
std::shared_ptr<imperative::VarBase> CastPyArg2VarBase(PyObject* obj,
ssize_t arg_pos);
std::vector<paddle::experimental::Tensor> CastPyArg2VectorOfTensor(
PyObject* obj, ssize_t arg_pos);
platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos);
......@@ -112,5 +113,7 @@ std::vector<paddle::experimental::Tensor*> GetTensorPtrListFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args,
ssize_t arg_idx, bool dispensable = false);
// end of Slice related methods
} // namespace pybind
} // namespace paddle
......@@ -54,6 +54,7 @@ limitations under the License. */
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/pybind/op_function.h"
#include "paddle/fluid/pybind/pybind_boost_headers.h"
#include "paddle/fluid/pybind/slice_utils.h"
#include "paddle/fluid/pybind/tensor_py.h"
namespace paddle {
......@@ -319,6 +320,23 @@ static std::string GetTypeName(const imperative::VarBase &var) {
}
}
Py_ssize_t GetSliceIndexFromPyObject(PyObject *obj) {
if (py::isinstance<imperative::VarBase>(obj)) {
VLOG(6) << "Call GetSliceIndexFromTensor in Imperative";
return GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(obj)
->Var()
.Get<framework::LoDTensor>());
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"We should only get paddle::experimental::Tensor or VarBase in this "
"method, when you reach this means we got another type index."));
}
}
bool PyCheckTensor(PyObject *obj) {
return py::isinstance<imperative::VarBase>(obj);
}
using PyNameVarBaseMap = std::unordered_map<std::string, py::handle>;
// NOTE(zjl): py::handle is a very light wrapper of PyObject *.
......@@ -360,18 +378,6 @@ GetVarBaseListFromPyHandle(const py::handle &handle) {
return result;
}
static bool IsNumpyType(PyObject *obj) {
// It is not a good way to judge the type of obj by its type'name. Maybe using
// `PyArray_IsScalar` will be better. However, this interface cannot be used
// by including pybind11, and it needs to compile with numpy.
auto type_name = std::string(Py_TYPE(obj)->tp_name);
return type_name == "numpy.int64" || type_name == "numpy.longlong" ||
type_name == "numpy.int32" || type_name == "numpy.int16";
}
static bool PyCheckTensor(PyObject *obj) {
return py::isinstance<imperative::VarBase>(obj);
}
// cast numpy type form S to T, this may allocate new memory
template <class T, class S>
......@@ -429,260 +435,6 @@ static imperative::NameVarBaseMap ConvertToNameVarBaseMap(
return result;
}
static bool PyCheckInteger(PyObject *obj) {
#if PY_VERSION_HEX < 0x03000000
return (PyLong_Check(obj) || PyInt_Check(obj)) && !PyBool_Check(obj);
#else
return PyLong_Check(obj) && !PyBool_Check(obj);
#endif
}
static Py_ssize_t GetSliceIndexFromTensor(
const std::shared_ptr<imperative::VarBase> &tensor_index) {
const auto &tensor = tensor_index->Var().Get<framework::LoDTensor>();
if (tensor.numel() == 1) {
if (framework::TransToProtoVarType(tensor.dtype()) ==
framework::proto::VarType::INT32) {
return static_cast<Py_ssize_t>(operators::GetValue<int32_t>(&tensor));
} else if (framework::TransToProtoVarType(tensor.dtype()) ==
framework::proto::VarType::INT64) {
return static_cast<Py_ssize_t>(operators::GetValue<int64_t>(&tensor));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, the type of tensor in slice indices only allows "
"int32 and int64, please check the type of index tensor."));
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, tensor in slice indices only allows 1 element, "
"but received %d.",
tensor.numel()));
}
}
// NOTE(zhiqiu): Revised version of PySlice_GetIndices. From:
// https://github.com/python/cpython/blob/8d21aa21f2cbc6d50aab3f420bb23be1d081dac4/Objects/sliceobject.c#L103
// Original PySlice_GetIndices return wrong result when
// slice_item contains long int, such as arr[:180L].
// NOT sure why this happens !!!
// Besides, PySlice_GetIndices cannot raise error when float in slice item.
// So, I make a revised version of PySlice_GetIndices, named to
// _PySlice_GetIndices. Try to use _PySlice_Unpack which is more robust than
// PySlice_GetIndices in the future.
static int _PySlice_GetIndices(PySliceObject *r, Py_ssize_t length,
Py_ssize_t *start, Py_ssize_t *stop,
Py_ssize_t *step) {
/* XXX support long ints */
if (r->step == Py_None) {
*step = 1;
} else {
if (PyCheckInteger(r->step) || IsNumpyType(r->step)) {
*step = PyLong_AsLong(r->step);
} else if (PyCheckTensor(r->step)) {
*step = GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(r->step));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->step)->tp_name)));
}
}
if (r->start == Py_None) {
*start = *step < 0 ? length - 1 : 0;
} else {
if (PyCheckInteger(r->start) || IsNumpyType(r->start)) {
*start = PyLong_AsLong(r->start);
} else if (PyCheckTensor(r->start)) {
*start = GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(r->start));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->start)->tp_name)));
}
if (*start < 0) *start += length;
*start = std::max(*start, static_cast<Py_ssize_t>(0));
}
if (r->stop == Py_None) {
*stop = *step < 0 ? -1 : length;
} else {
if (PyCheckInteger(r->stop) || IsNumpyType(r->stop)) {
*stop = PyLong_AsLong(r->stop);
} else if (PyCheckTensor(r->stop)) {
*stop = GetSliceIndexFromTensor(
py::cast<std::shared_ptr<imperative::VarBase>>(r->stop));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->stop)->tp_name)));
}
if (0 < *step && *stop < 0) *stop += length;
*stop = std::min(*stop, length);
}
if (*stop > length) return -1;
if (*start >= length) return -1;
if (*step == 0) return -1;
return 0;
}
static void ParseIndexingSlice(
framework::LoDTensor *tensor, PyObject *_index,
std::vector<int> *slice_axes, std::vector<int> *slice_starts,
std::vector<int> *slice_ends, std::vector<int> *slice_strides,
std::vector<int> *decrease_axis, std::vector<int> *none_axes,
std::vector<int> *infer_flags, std::vector<int> *list_select_idxs,
bool *list_select_flag) {
// We allow indexing by Integers, Slices, Ellipsis, None, tuples of those
// types, and list of Bool and Integers.
// wrap to tuple
// NOTE(zhiqiu): PyTuple_Pack increases refcount.
PyObject *index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index;
DEFINE_PADDLE_SCOPE_GUARD([index, _index]() {
if (!PyTuple_Check(_index)) {
Py_DECREF(index);
VLOG(4) << "Call Py_DECREF";
}
});
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), true,
platform::errors::InvalidArgument("tensor has not been initialized"));
const auto &shape = tensor->dims();
const int rank = shape.size();
const int size = PyTuple_GET_SIZE(index);
// specified_dims is the number of dimensions which indexed by Interger,
// Slices.
int specified_dims = 0;
int ell_count = 0;
for (int dim = 0; dim < size; ++dim) {
PyObject *slice_item = PyTuple_GetItem(index, dim);
if (PyCheckInteger(slice_item) || PySlice_Check(slice_item)) {
specified_dims++;
} else if (slice_item == Py_Ellipsis) {
ell_count++;
}
}
PADDLE_ENFORCE_LE(ell_count, 1,
platform::errors::InvalidArgument(
"An index can only have a single ellipsis ('...')"));
int none_count = 0;
for (int i = 0, dim = 0; i < size; ++i) {
PyObject *slice_item = PyTuple_GetItem(index, i);
infer_flags->push_back(1);
int dim_len = shape[dim];
if (PyCheckInteger(slice_item) || IsNumpyType(slice_item)) {
// integer, PyLong_AsLong supports both int and long
int start = static_cast<int>(PyLong_AsLong(slice_item));
auto s_t = start;
start = start < 0 ? start + dim_len : start;
if (start >= dim_len || start < 0) {
std::string str_error_message =
"The starting index " + std::to_string(s_t) +
" of slice is out of bounds in tensor " + std::to_string(dim) +
"-th axis, it shound be in the range of [" +
std::to_string(-dim_len) + ", " + std::to_string(dim_len) + ")";
// py::index_error is corresponding to IndexError in Python
// Used to indicate out of bounds access in __getitem__, __setitem__
throw py::index_error(str_error_message);
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(start + 1);
slice_strides->push_back(1);
decrease_axis->push_back(dim);
dim++;
} else if (PySlice_Check(slice_item)) {
// slice item
Py_ssize_t start, end, step;
PySliceObject *p = reinterpret_cast<PySliceObject *>(slice_item);
_PySlice_GetIndices(p, dim_len, &start, &end, &step);
// :: or : or 0:dim_len:1
if (start == 0 && end == dim_len && step == 1) {
dim++;
continue;
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(end);
slice_strides->push_back(step);
dim++;
} else if (slice_item == Py_Ellipsis) {
dim += rank - specified_dims;
} else if (slice_item == Py_None) {
none_axes->push_back(dim + none_count);
none_count++;
} else if (PyList_Check(slice_item)) {
*list_select_flag = true;
PADDLE_ENFORCE_EQ(
size, 1,
platform::errors::InvalidArgument(
"When index contains a list, its length is excepted to 1, "
"but received %d",
size));
bool all_bool = true;
int list_size = PyList_GET_SIZE(slice_item);
for (int j = 0; j < list_size; ++j) {
PyObject *list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
all_bool = false;
} else if (!PyBool_Check(list_item)) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support int or bool in index list."));
}
}
if (all_bool) {
PADDLE_ENFORCE_EQ(
list_size, shape[0],
platform::errors::InvalidArgument(
"The dimension of bool index doesn't match indexed array along "
"dimension 0, the target dimension is %d, but received %d.",
shape[0], list_size));
for (int j = 0; j < list_size; ++j) {
PyObject *list_item = PyList_GetItem(slice_item, j);
if (list_item == Py_True) {
list_select_idxs->push_back(j);
}
}
} else {
for (int j = 0; j < list_size; ++j) {
PyObject *list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
list_select_idxs->push_back(
static_cast<int>(PyLong_AsLong(list_item)));
} else if (list_item == Py_True) {
list_select_idxs->push_back(1);
} else {
list_select_idxs->push_back(0);
}
}
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, Tensor.__indices__() only allows indexing "
"by Integers, Slices, Ellipsis, None, tuples of these types "
"and list of Bool and Integers, but received "
"%s in %dth slice item",
std::string(Py_TYPE(slice_item)->tp_name), i + 1));
}
}
// valid_index is the number of dimensions exclude None index
const int valid_indexs = size - none_axes->size() - ell_count;
PADDLE_ENFORCE_EQ(valid_indexs <= rank, true,
platform::errors::InvalidArgument(
"Too many indices (%d) for tensor of dimension %d.",
valid_indexs, rank));
}
template <typename P>
static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src, // NOLINT
imperative::VarBase &dst, // NOLINT
......
......@@ -80,6 +80,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/cuda_streams_py.h"
#include "paddle/fluid/pybind/distributed_py.h"
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/pybind/io.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/lod_utils.h"
......@@ -101,7 +102,6 @@ limitations under the License. */
#include "paddle/fluid/pybind/gloo_context_py.h"
#include "paddle/fluid/pybind/gloo_wrapper_py.h"
#include "paddle/fluid/pybind/heter_wrapper_py.h"
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/pybind/inference_api.h"
#include "paddle/fluid/pybind/ir.h"
#include "paddle/fluid/pybind/metrics_py.h"
......@@ -527,6 +527,7 @@ PYBIND11_MODULE(core_avx, m) {
PYBIND11_MODULE(core_noavx, m) {
#endif
BindImperative(&m);
BindEager(&m);
BindCudaStream(&m);
......@@ -741,8 +742,6 @@ PYBIND11_MODULE(core_noavx, m) {
m.def("_promote_types_if_complex_exists",
&paddle::framework::PromoteTypesIfComplexExists);
BindImperative(&m);
py::class_<framework::Tensor> framework_tensor(m, "Tensor",
py::buffer_protocol());
g_framework_tensor_pytype =
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Python.h>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope_guard.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
namespace py = pybind11;
namespace paddle {
namespace pybind {
static bool PyCheckTensor(PyObject* obj);
static Py_ssize_t GetSliceIndexFromPyObject(PyObject* obj);
// Slice related methods
static bool PyCheckInteger(PyObject* obj) {
#if PY_VERSION_HEX < 0x03000000
return (PyLong_Check(obj) || PyInt_Check(obj)) && !PyBool_Check(obj);
#else
return PyLong_Check(obj) && !PyBool_Check(obj);
#endif
}
static bool IsNumpyType(PyObject* obj) {
// It is not a good way to judge the type of obj by its type'name. Maybe using
// `PyArray_IsScalar` will be better. However, this interface cannot be used
// by including pybind11, and it needs to compile with numpy.
auto type_name = std::string(Py_TYPE(obj)->tp_name);
return type_name == "numpy.int64" || type_name == "numpy.longlong" ||
type_name == "numpy.int32" || type_name == "numpy.int16";
}
static Py_ssize_t GetSliceIndexFromTensor(const phi::DenseTensor& tensor) {
if (tensor.numel() == 1) {
if (framework::TransToProtoVarType(tensor.type()) ==
framework::proto::VarType::INT32) {
return static_cast<Py_ssize_t>(operators::GetValue<int32_t>(&tensor));
} else if (framework::TransToProtoVarType(tensor.type()) ==
framework::proto::VarType::INT64) {
return static_cast<Py_ssize_t>(operators::GetValue<int64_t>(&tensor));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, the type of tensor in slice indices only allows "
"int32 and int64, please check the type of index tensor."));
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, tensor in slice indices only allows 1 element, "
"but received %d.",
tensor.numel()));
}
}
// NOTE(zhiqiu): Revised version of PySlice_GetIndices. From:
// https://github.com/python/cpython/blob/8d21aa21f2cbc6d50aab3f420bb23be1d081dac4/Objects/sliceobject.c#L103
// Original PySlice_GetIndices return wrong result when
// slice_item contains long int, such as arr[:180L].
// NOT sure why this happens !!!
// Besides, PySlice_GetIndices cannot raise error when float in slice item.
// So, I make a revised version of PySlice_GetIndices, named to
// _PySlice_GetIndices. Try to use _PySlice_Unpack which is more robust than
// PySlice_GetIndices in the future.
static int _PySlice_GetIndices(PySliceObject* r, Py_ssize_t length,
Py_ssize_t* start, Py_ssize_t* stop,
Py_ssize_t* step) {
/* XXX support long ints */
if (r->step == Py_None) {
*step = 1;
} else {
if (PyCheckInteger(r->step) || IsNumpyType(r->step)) {
*step = PyLong_AsLong(r->step);
} else if (PyCheckTensor(r->step)) {
*step = GetSliceIndexFromPyObject(r->step);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->step)->tp_name)));
}
}
if (r->start == Py_None) {
*start = *step < 0 ? length - 1 : 0;
} else {
if (PyCheckInteger(r->start) || IsNumpyType(r->start)) {
*start = PyLong_AsLong(r->start);
} else if (PyCheckTensor(r->start)) {
*start = GetSliceIndexFromPyObject(r->start);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->start)->tp_name)));
}
if (*start < 0) *start += length;
*start = std::max(*start, static_cast<Py_ssize_t>(0));
}
if (r->stop == Py_None) {
*stop = *step < 0 ? -1 : length;
} else {
if (PyCheckInteger(r->stop) || IsNumpyType(r->stop)) {
*stop = PyLong_AsLong(r->stop);
} else if (PyCheckTensor(r->stop)) {
*stop = GetSliceIndexFromPyObject(r->stop);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, slice indices only allows None, integers, "
"tensor(int) and numpy(int) in slice item, but received %s.",
std::string(Py_TYPE(r->stop)->tp_name)));
}
if (0 < *step && *stop < 0) *stop += length;
*stop = std::min(*stop, length);
}
if (*stop > length) return -1;
if (*start >= length) return -1;
if (*step == 0) return -1;
return 0;
}
static void ParseIndexingSlice(
framework::LoDTensor* tensor, PyObject* _index,
std::vector<int>* slice_axes, std::vector<int>* slice_starts,
std::vector<int>* slice_ends, std::vector<int>* slice_strides,
std::vector<int>* decrease_axis, std::vector<int>* none_axes,
std::vector<int>* infer_flags, std::vector<int>* list_select_idxs,
bool* list_select_flag) {
// We allow indexing by Integers, Slices, Ellipsis, None, tuples of those
// types, and list of Bool and Integers.
// wrap to tuple
// NOTE(zhiqiu): PyTuple_Pack increases refcount.
PyObject* index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index;
DEFINE_PADDLE_SCOPE_GUARD([index, _index]() {
if (!PyTuple_Check(_index)) {
Py_DECREF(index);
VLOG(4) << "Call Py_DECREF";
}
});
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), true,
platform::errors::InvalidArgument("tensor has not been initialized"));
const auto& shape = tensor->dims();
const int rank = shape.size();
const int size = PyTuple_GET_SIZE(index);
// specified_dims is the number of dimensions which indexed by Interger,
// Slices.
int specified_dims = 0;
int ell_count = 0;
for (int dim = 0; dim < size; ++dim) {
PyObject* slice_item = PyTuple_GetItem(index, dim);
if (PyCheckInteger(slice_item) || PySlice_Check(slice_item)) {
specified_dims++;
} else if (slice_item == Py_Ellipsis) {
ell_count++;
}
}
PADDLE_ENFORCE_LE(ell_count, 1,
platform::errors::InvalidArgument(
"An index can only have a single ellipsis ('...')"));
int none_count = 0;
for (int i = 0, dim = 0; i < size; ++i) {
PyObject* slice_item = PyTuple_GetItem(index, i);
infer_flags->push_back(1);
int dim_len = shape[dim];
if (PyCheckInteger(slice_item) || IsNumpyType(slice_item)) {
// integer, PyLong_AsLong supports both int and long
int start = static_cast<int>(PyLong_AsLong(slice_item));
auto s_t = start;
start = start < 0 ? start + dim_len : start;
if (start >= dim_len || start < 0) {
std::string str_error_message =
"The starting index " + std::to_string(s_t) +
" of slice is out of bounds in tensor " + std::to_string(dim) +
"-th axis, it shound be in the range of [" +
std::to_string(-dim_len) + ", " + std::to_string(dim_len) + ")";
// py::index_error is corresponding to IndexError in Python
// Used to indicate out of bounds access in __getitem__, __setitem__
throw py::index_error(str_error_message);
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(start + 1);
slice_strides->push_back(1);
decrease_axis->push_back(dim);
dim++;
} else if (PySlice_Check(slice_item)) {
// slice item
Py_ssize_t start, end, step;
PySliceObject* p = reinterpret_cast<PySliceObject*>(slice_item);
_PySlice_GetIndices(p, dim_len, &start, &end, &step);
// :: or : or 0:dim_len:1
if (start == 0 && end == dim_len && step == 1) {
dim++;
continue;
}
slice_axes->push_back(dim);
slice_starts->push_back(start);
slice_ends->push_back(end);
slice_strides->push_back(step);
dim++;
} else if (slice_item == Py_Ellipsis) {
dim += rank - specified_dims;
} else if (slice_item == Py_None) {
none_axes->push_back(dim + none_count);
none_count++;
} else if (PyList_Check(slice_item)) {
*list_select_flag = true;
PADDLE_ENFORCE_EQ(
size, 1,
platform::errors::InvalidArgument(
"When index contains a list, its length is excepted to 1, "
"but received %d",
size));
bool all_bool = true;
int list_size = PyList_GET_SIZE(slice_item);
for (int j = 0; j < list_size; ++j) {
PyObject* list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
all_bool = false;
} else if (!PyBool_Check(list_item)) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support int or bool in index list."));
}
}
if (all_bool) {
PADDLE_ENFORCE_EQ(
list_size, shape[0],
platform::errors::InvalidArgument(
"The dimension of bool index doesn't match indexed array along "
"dimension 0, the target dimension is %d, but received %d.",
shape[0], list_size));
for (int j = 0; j < list_size; ++j) {
PyObject* list_item = PyList_GetItem(slice_item, j);
if (list_item == Py_True) {
list_select_idxs->push_back(j);
}
}
} else {
for (int j = 0; j < list_size; ++j) {
PyObject* list_item = PyList_GetItem(slice_item, j);
if (PyCheckInteger(list_item)) {
list_select_idxs->push_back(
static_cast<int>(PyLong_AsLong(list_item)));
} else if (list_item == Py_True) {
list_select_idxs->push_back(1);
} else {
list_select_idxs->push_back(0);
}
}
}
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Currently, Tensor.__indices__() only allows indexing "
"by Integers, Slices, Ellipsis, None, tuples of these types "
"and list of Bool and Integers, but received "
"%s in %dth slice item",
std::string(Py_TYPE(slice_item)->tp_name), i + 1));
}
}
// valid_index is the number of dimensions exclude None index
const int valid_indexs = size - none_axes->size() - ell_count;
PADDLE_ENFORCE_EQ(valid_indexs <= rank, true,
platform::errors::InvalidArgument(
"Too many indices (%d) for tensor of dimension %d.",
valid_indexs, rank));
}
} // namespace pybind
} // namespace paddle
......@@ -351,10 +351,10 @@ endif()
set_tests_properties(test_graph PROPERTIES TIMEOUT 120)
set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 200)
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 200)
if(LINUX AND WITH_MKLDNN)
set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)
set_tests_properties(convert_model2dot_ernie PROPERTIES TIMEOUT 120)
......
......@@ -26,7 +26,7 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph
from paddle.fluid.framework import IrGraph, _test_eager_guard
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.dygraph.container import Sequential
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
......@@ -122,7 +122,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
class TestImperativeOutSclae(unittest.TestCase):
def test_out_scale_acc(self):
def func_out_scale_acc(self):
seed = 1000
lr = 0.001
......@@ -166,9 +166,14 @@ class TestImperativeOutSclae(unittest.TestCase):
loss_list[i] > loss_list[i + 1],
msg='Failed to do the imperative qat.')
def test_out_scale_acc(self):
with _test_eager_guard():
self.func_out_scale_acc()
self.func_out_scale_acc()
class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
def test_save_quantized_model(self):
def func_save_quantized_model(self):
lr = 0.001
load_param_path = "test_save_quantized_model/lenet.pdparams"
......@@ -206,6 +211,11 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
loss_list[i] > loss_list[i + 1],
msg='Failed to do the imperative qat.')
def test_save_quantized_model(self):
with _test_eager_guard():
self.func_save_quantized_model()
self.func_save_quantized_model()
if __name__ == '__main__':
unittest.main()
......@@ -29,6 +29,7 @@ import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import *
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn
from imperative_test_utils import ImperativeLinearBn_hook
......@@ -194,7 +195,7 @@ class TestImperativePTQ(unittest.TestCase):
break
return top1_correct_num / total_num
def test_ptq(self):
def func_ptq(self):
start_time = time.time()
self.set_vars()
......@@ -244,9 +245,14 @@ class TestImperativePTQ(unittest.TestCase):
end_time = time.time()
print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQfuse(TestImperativePTQ):
def test_ptq(self):
def func_ptq(self):
start_time = time.time()
self.set_vars()
......@@ -305,6 +311,11 @@ class TestImperativePTQfuse(TestImperativePTQ):
end_time = time.time()
print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQHist(TestImperativePTQ):
def set_vars(self):
......
......@@ -32,7 +32,7 @@ from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose
from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import QuantizedConv2D, QuantizedConv2DTranspose
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict, ImperativeLenet
paddle.enable_static()
......@@ -55,7 +55,7 @@ class TestImperativeQat(unittest.TestCase):
self.activation_quantize_type = 'moving_average_abs_max'
print('weight_quantize_type', self.weight_quantize_type)
def test_qat(self):
def func_qat(self):
self.set_vars()
imperative_qat = ImperativeQuantAware(
......@@ -193,6 +193,11 @@ class TestImperativeQat(unittest.TestCase):
np.allclose(after_save, before_save.numpy()),
msg='Failed to save the inference quantized model.')
def test_qat(self):
with _test_eager_guard():
self.func_qat()
self.func_qat()
if __name__ == '__main__':
unittest.main()
......@@ -27,7 +27,7 @@ import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict, ImperativeLenet
os.environ["CPU_NUM"] = "1"
......
......@@ -30,7 +30,7 @@ from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Linear
from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose
from paddle.fluid.log_helper import get_logger
from paddle.fluid.framework import _test_eager_guard
os.environ["CPU_NUM"] = "1"
_logger = get_logger(
......@@ -157,7 +157,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
_logger.info("test act_preprocess")
self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT)
def test_quant_aware_training(self):
def func_quant_aware_training(self):
imperative_qat = self.imperative_qat
seed = 1
np.random.seed(seed)
......@@ -243,6 +243,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
train(lenet)
test(lenet)
def test_quant_aware_training(self):
with _test_eager_guard():
self.func_quant_aware_training()
self.func_quant_aware_training()
class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess):
def setUp(self):
......
......@@ -32,6 +32,7 @@ from paddle.fluid.dygraph.nn import Pool2D
from paddle.fluid.log_helper import get_logger
from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant
from paddle.fluid.framework import _test_eager_guard
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
......@@ -42,7 +43,8 @@ _logger = get_logger(
class TestImperativeOutSclae(unittest.TestCase):
def test_out_scale_acc(self):
def func_out_scale_acc(self):
paddle.disable_static()
seed = 1000
lr = 0.1
......@@ -125,6 +127,11 @@ class TestImperativeOutSclae(unittest.TestCase):
if find_matmul:
self.assertTrue(matmul_skip_count == 1)
def test_out_scale_acc(self):
with _test_eager_guard():
self.func_out_scale_acc()
self.func_out_scale_acc()
if __name__ == '__main__':
unittest.main()
......@@ -99,18 +99,19 @@ def param_guard(parameters):
yield
def _convert_into_variable(var_base):
def _convert_into_variable(tensor):
"""
Convert Varbase into Variable.
"""
if isinstance(var_base, core.VarBase):
if isinstance(tensor, (core.eager.Tensor, core.VarBase)):
# Check whether has been created before.
new_var = var_base.block._find_var_recursive(var_base.name)
new_var = tensor.block._find_var_recursive(tensor.name)
if new_var is not None:
assert isinstance(new_var, framework.Variable)
# Convert ParamBase into Parameter with same attributes in dy2stat.
elif isinstance(var_base, framework.ParamBase):
new_var = var_base._to_static_var(to_parameter=True)
elif isinstance(tensor,
(framework.EagerParamBase, framework.ParamBase)):
new_var = tensor._to_static_var(to_parameter=True)
else:
# Note(Aurelius84): Convert VarBase in self._buffers into Variable with
# same attributes and set persistable=True to allow saving this var.
......@@ -120,13 +121,13 @@ def _convert_into_variable(var_base):
# But if its shape is empty while created from `create_variable()`, we consider this buffer
# non-persistable. See case of `drop_state` in lstm api.
is_persistable = len(var_base.shape) > 0
is_persistable = len(tensor.shape) > 0
new_var = var_base._to_static_var(
new_var = tensor._to_static_var(
to_parameter=False, persistable=is_persistable)
return new_var
else:
return var_base
return tensor
def enabled():
......
......@@ -61,7 +61,8 @@ class NestSequence(object):
def _get_var_ids(self):
var_ids = []
for idx, var in enumerate(self.__input_list):
if isinstance(var, (framework.Variable, core.VarBase)):
if isinstance(var, (framework.Variable, core.VarBase,
core.eager.Tensor)):
var_ids.append(idx)
return var_ids
......@@ -73,7 +74,8 @@ class NestSequence(object):
if need_check:
warning_types = set()
for var in self.__input_list:
if not isinstance(var, (framework.Variable, core.VarBase)):
if not isinstance(var, (framework.Variable, core.VarBase,
core.eager.Tensor)):
warning_types.add(type(var))
if warning_types:
logging_utils.warn(
......@@ -301,10 +303,17 @@ class PartialProgramLayer:
for name in block.vars:
if "@GRAD" in name:
var_desc = block.vars[name].desc
var_base = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(), False)
var_base = None
if not core._in_eager_mode():
var_base = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(), False)
else:
var_base = core.eager.Tensor(var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(), False)
double_grads.append(var_base)
return self._valid_vars(double_grads)
......@@ -386,13 +395,22 @@ class PartialProgramLayer:
expected_place = framework._current_expected_place()
for i, value in enumerate(flatten_inputs):
if isinstance(value, np.ndarray):
var = core.VarBase(
value=value,
name=self._inputs[i].desc.name(),
persistable=False,
place=expected_place,
zero_copy=True)
elif isinstance(value, core.VarBase):
var = None
if not core._in_eager_mode():
var = core.VarBase(
value=value,
name=self._inputs[i].desc.name(),
persistable=False,
place=expected_place,
zero_copy=True)
else:
var = core.eager.Tensor(
value=value,
name=self._inputs[i].desc.name(),
persistable=False,
place=expected_place,
zero_copy=True)
elif isinstance(value, (core.VarBase, core.eager.Tensor)):
# NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times
# into CUDAPlace when it's as input of multi Ops. so we move it in advance
# to avoid this problem.
......@@ -411,9 +429,16 @@ class PartialProgramLayer:
var = self._outputs[var_id]
assert isinstance(var, framework.Variable)
var_desc = var.desc
var_base = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(), var_desc.type(), False)
varbase = None
if not core._in_eager_mode():
var_base = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(), var_desc.type(), False)
else:
var_base = core.eager.Tensor(var_desc.dtype(),
var_desc.shape(),
var_desc.name(),
var_desc.type(), False)
return var_base
# Create VarBase to receive output data.
......@@ -423,12 +448,19 @@ class PartialProgramLayer:
def _create_scope_vec(self):
# Hold forward variables
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
"program_out_scope",
core.VarDesc.VarType.STEP_SCOPES, True)
inner_scope = core.Scope()
tmp_scope_vec.value().set_scope(inner_scope)
tmp_scope_vec = None
if not core._in_eager_mode():
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
"program_out_scope",
core.VarDesc.VarType.STEP_SCOPES, True)
# TODO(jiabin): Support this later.
# else:
# tmp_scope_vec = core.eager.Tensor(core.VarDesc.VarType.FP32, [],
# "program_out_scope",
# core.VarDesc.VarType.STEP_SCOPES, True)
inner_scope = core.Scope()
tmp_scope_vec.value().set_scope(inner_scope)
return tmp_scope_vec
def _restore_out(self, out_vars):
......@@ -450,7 +482,8 @@ class PartialProgramLayer:
return main_program.clone(for_test=True)
def _is_no_value(self, var):
if isinstance(var, core.VarBase) and var.shape == [1]:
if isinstance(var,
(core.VarBase, core.eager.Tensor)) and var.shape == [1]:
# NOTE: .numpy() will insert MemcpySync operation, it hits performance.
if var.numpy()[0] == RETURN_NO_VALUE_MAGIC_NUM:
return True
......@@ -460,7 +493,7 @@ class PartialProgramLayer:
"""
Removes invalid value for various-length return statement
"""
if isinstance(out_vars, core.VarBase):
if isinstance(out_vars, (core.VarBase, core.eager.Tensor)):
if self._is_no_value(out_vars):
return None
return out_vars
......@@ -527,7 +560,7 @@ class PartialProgramLayer:
param_and_buffer_names_set = set()
for i, var in enumerate(self._params):
# self._params constains parameters and buffers with persistable=True.
if not isinstance(var, core.VarBase):
if not isinstance(var, (core.VarBase, core.eager.Tensor)):
raise TypeError(
'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'.
format(i, type(var)))
......@@ -559,10 +592,18 @@ def _create_fake_var():
"""
Create a fake_var (force on CPU) to handle empty input or output
"""
return [
core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var",
core.VarDesc.VarType.RAW, False)
]
if not core._in_eager_mode():
return [
core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var",
core.VarDesc.VarType.RAW, False)
]
else:
return []
# TODO(jiabin): Support this later
# return [
# core.eager.Tensor(core.VarDesc.VarType.FP32, [], "Fake_var",
# core.VarDesc.VarType.RAW, False)
# ]
def partial_program_from(concrete_program):
......
......@@ -25,7 +25,7 @@ import threading
import six
import paddle
from paddle.fluid import core
from paddle.fluid import core, dygraph
from paddle.fluid.compiler import BuildStrategy, CompiledProgram, ExecutionStrategy
from paddle.fluid.data_feeder import check_type
from paddle.fluid.layers.utils import flatten, pack_sequence_as
......@@ -898,30 +898,33 @@ def save(layer, path, input_spec=None, **configs):
state_var_dict[var.name] = var
# 3. share parameters from Layer to scope & record var info
for param_or_buffer in concrete_program.parameters:
# share to scope
if param_or_buffer.type == core.VarDesc.VarType.VOCAB:
scr_tensor = param_or_buffer.value().get_map_tensor()
tgt_var = scope.var(param_or_buffer.name)
tgt_var.set_vocab(scr_tensor)
else:
param_or_buffer_tensor = scope.var(
param_or_buffer.name).get_tensor()
#src_tensor = param_or_buffer.value().get_tensor()
src_tensor = state_var_dict[param_or_buffer.name].value(
).get_tensor()
param_or_buffer_tensor._share_data_with(src_tensor)
# record var info
if param_or_buffer.name not in extra_var_info:
extra_info_dict = dict()
if param_or_buffer.name in state_names_dict:
extra_info_dict['structured_name'] = state_names_dict[
param_or_buffer.name]
extra_info_dict[
'stop_gradient'] = param_or_buffer.stop_gradient
if isinstance(param_or_buffer, ParamBase):
extra_info_dict['trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict
with dygraph.guard():
for param_or_buffer in concrete_program.parameters:
# share to scope
if param_or_buffer.type == core.VarDesc.VarType.VOCAB:
scr_tensor = param_or_buffer.value().get_map_tensor()
tgt_var = scope.var(param_or_buffer.name)
tgt_var.set_vocab(scr_tensor)
else:
param_or_buffer_tensor = scope.var(
param_or_buffer.name).get_tensor()
#src_tensor = param_or_buffer.value().get_tensor()
src_tensor = state_var_dict[param_or_buffer.name].value(
).get_tensor()
param_or_buffer_tensor._share_data_with(src_tensor)
# record var info
if param_or_buffer.name not in extra_var_info:
extra_info_dict = dict()
if param_or_buffer.name in state_names_dict:
extra_info_dict[
'structured_name'] = state_names_dict[
param_or_buffer.name]
extra_info_dict[
'stop_gradient'] = param_or_buffer.stop_gradient
if isinstance(param_or_buffer, ParamBase):
extra_info_dict[
'trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict
# 4. build input & output of save_infernece_model
# NOTE(chenweihang): [ Get input variables name ]
......
......@@ -94,7 +94,7 @@ def monkey_patch_varbase():
# Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph.
# It will fail. So, for propery that different between dynamic and static graph, should not getattr(self, attr, None).
attr_not_need_keys = ['grad', 'T']
if isinstance(self, ParamBase):
if isinstance(self, (ParamBase, EagerParamBase)):
attr_kwargs = self.__dict__.copy()
else:
attr_names = []
......@@ -111,7 +111,7 @@ def monkey_patch_varbase():
attr_kwargs.update(kwargs)
if to_parameter or isinstance(self, ParamBase):
if to_parameter or isinstance(self, (ParamBase, EagerParamBase)):
del attr_kwargs['persistable']
# NOTE(Aurelius84): All parameters should be placed into global block.
attr_kwargs['block'] = attr_kwargs['block'].program.global_block()
......
......@@ -1821,7 +1821,7 @@ def _pack_loaded_dict(load_obj):
@static_only
def _legacy_save(param_dict, model_path, protocol=2):
def get_tensor(var):
if isinstance(var, core.VarBase):
if isinstance(var, (core.VarBase, core.eager.Tensor)):
return var.numpy()
elif isinstance(var, core.LoDTensor):
return np.array(var)
......
......@@ -10148,6 +10148,9 @@ def flatten(x, axis=1, name=None):
check_variable_and_dtype(
x, 'x', ['float32', 'float64', 'int8', 'int32', 'int64', 'uint8'],
'flatten')
if in_dygraph_mode():
return _C_ops.flatten2(x, 'axis', axis)[0]
helper = LayerHelper('flatten', **locals())
if not (isinstance(x, Variable)):
......
......@@ -663,7 +663,9 @@ def assign(input, output=None):
})
if is_inplace and in_dygraph_mode():
output._bump_inplace_version()
# TODO(jiabin): Remove this when we support inplace
if not core._in_eager_mode():
output._bump_inplace_version()
return output
......
......@@ -771,13 +771,13 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
self.assertTrue(np.array_equal(egr_tensor.numpy(), ori_arr))
ori_place = egr_tensor.place
new_arr = np.random.rand(4, 4, 16, 32).astype('float32')
new_arr = np.random.rand(4, 16, 16, 32).astype('float32')
self.assertFalse(np.array_equal(egr_tensor.numpy(), new_arr))
egr_tensor._set_value(new_arr)
egr_tensor.set_value(new_arr)
self.assertEqual(egr_tensor.stop_gradient, True)
self.assertTrue(egr_tensor.place._equals(ori_place))
self.assertEqual(egr_tensor.shape, [4, 4, 16, 32])
self.assertEqual(egr_tensor.shape, [4, 16, 16, 32])
self.assertTrue(np.array_equal(egr_tensor.numpy(), new_arr))
......@@ -880,7 +880,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase):
new_weight = np.ones([1, 3]).astype('float32')
self.assertFalse(np.array_equal(linear.weight.numpy(), new_weight))
linear.weight._set_value(new_weight)
linear.weight.set_value(new_weight)
self.assertTrue(np.array_equal(linear.weight.numpy(), new_weight))
self.assertTrue(linear.weight.place._equals(ori_place))
......
......@@ -533,12 +533,8 @@ class TestTensorRegisterHook(unittest.TestCase):
size=[self.batch_size, self.in_size]).astype('float32')
data_t = paddle.to_tensor(data)
if _in_eager_mode():
with self.assertRaises(TypeError):
out = jit_net(data_t)
else:
with self.assertRaises(AssertionError):
out = jit_net(data_t)
with self.assertRaises(AssertionError):
out = jit_net(data_t)
def test_register_hook_in_dy2static_mode(self):
with _test_eager_guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册