未验证 提交 4640955c 编写于 作者: J Jiabin Yang 提交者: GitHub

support test_auto_prune_partial (#38871)

上级 e7f2bf37
......@@ -49,7 +49,6 @@ egr::EagerTensor CreateTensorWithValue(const pten::DDim& ddim,
egr::EagerTensor out = egr::EagerTensor();
out.set_tensor(std::make_shared<paddle::experimental::Tensor>(tensor));
auto meta = EagerUtils::autograd_meta(&out);
if (is_leaf) {
auto accumulation_node = std::make_shared<GradNodeAccumulation>();
meta->SetGradNode(accumulation_node);
......
......@@ -181,7 +181,9 @@ void RunBackward(const std::vector<egr::EagerTensor>& tensors,
PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(),
paddle::platform::errors::Fatal(
"Number of edges should be either empty ( for leaf node "
") or the same as number of output grad tensors"));
") or the same as number of output grad tensors, but we "
"got edges size is: %d, grad_output size is: %d",
edges.size(), grad_output_tensors.size()));
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
......
......@@ -195,7 +195,6 @@ class EagerTensor final {
}
tensor_->copy_(*(src.tensor_.get()), blocking);
}
/* Part 6: Operator overloading */
EagerTensor& operator=(const EagerTensor& x) & {
tensor_ = x.tensor_;
......@@ -238,7 +237,7 @@ class EagerTensor final {
// Contruct framework::Tensor from egr::EagerTensor
auto tensor_dense =
std::dynamic_pointer_cast<pten::DenseTensor>(tensor_->impl());
if (tensor_dense) {
if (tensor_dense && tensor_dense.get()) {
paddle::experimental::SharesStorage(tensor_dense.get(),
framework_tensor);
} else {
......@@ -292,11 +291,10 @@ class EagerTensor final {
template <typename LEGACY_TYPE, typename TYPE>
void SetImplWithLegacyTensor() {
const auto& framework_tensor = var_.Get<LEGACY_TYPE>();
if (this->initialized()) {
if (defined()) {
VLOG(8) << "Sync Var to initialized tensor for: " << name();
paddle::experimental::ReMakePtenDenseTensor(
framework_tensor,
static_cast<pten::DenseTensor*>(this->impl().get()));
framework_tensor, static_cast<pten::DenseTensor*>(impl().get()));
} else {
VLOG(8) << "Sync Var to uninitialized tensor for: " << name();
this->set_impl(std::move(
......
......@@ -47,45 +47,15 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
if (meta) {
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
}
}
void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
for (const auto& meta : metas) {
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
if (meta) {
auto node = meta->GetMutableGradNode();
if (node) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
}
......@@ -98,17 +68,16 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
if (meta) {
if (meta && !meta->StopGradient()) {
VLOG(6) << "Add Edges for slot: " << slot_id;
auto node = meta->GetMutableGradNode();
if (node) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!meta->StopGradient()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
......
......@@ -106,7 +106,6 @@ class GradNodeBase {
* This one is called slot by slot
* **/
void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id);
void AddEdges(const std::vector<AutogradMeta*>& metas, size_t slot_id);
void AddEdges(AutogradMeta* meta, size_t slot_id);
/**
......
......@@ -56,15 +56,17 @@ TEST(GradNodeInfo, GradNodeBase) {
VLOG(6) << "Test Add Edges";
egr::Edge edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(edge0);
auto_grad0->SetStopGradient(false);
egr::Edge edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1);
auto_grad1->SetStopGradient(false);
grad_test_node0->AddEdges(auto_grad0.get(), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
size_t(2));
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
grad_test_node0->AddEdges(metas, 1);
grad_test_node0->AddEdges(&metas, 1);
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first,
size_t(3));
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second,
......
......@@ -69,9 +69,11 @@ TEST(Backward, SingleNodeEmptyGrad) {
// Connect Node0 -> AccumulationNode via Edge
auto meta = egr::AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
node0_ptr->AddEdges({&meta}, 0);
std::vector<egr::AutogradMeta*> res = {&meta};
node0_ptr->AddEdges(&res, 0);
}
std::vector<egr::EagerTensor> outs = {target_tensor};
// Run Backward
......@@ -130,9 +132,11 @@ TEST(Backward, SingleNodeCustomGrad) {
// Connect Node0 -> AccumulationNode via Edge
auto meta = egr::AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
node0_ptr->AddEdges({&meta}, 0);
std::vector<egr::AutogradMeta*> res = {&meta};
node0_ptr->AddEdges(&res, 0);
}
// Run Backward
......@@ -188,9 +192,11 @@ TEST(Backward, LinearNodes) {
// Connect Node0 -> Node1 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
meta0.SetSingleOutRankWithSlot(0, 0);
meta0.SetGradNode(node1_ptr);
node0_ptr->AddEdges({&meta0}, 0);
std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>();
......@@ -204,9 +210,11 @@ TEST(Backward, LinearNodes) {
// Connect Node1 -> AccumulationNode via Edge
auto meta1 = egr::AutogradMeta();
meta1.SetStopGradient(false);
meta1.SetSingleOutRankWithSlot(0, 0);
meta1.SetGradNode(acc_node_ptr);
node1_ptr->AddEdges({&meta1}, 0);
std::vector<egr::AutogradMeta*> res1 = {&meta1};
node1_ptr->AddEdges(&res1, 0);
}
// Use Empty Grad Tensor
......@@ -283,15 +291,19 @@ TEST(Backward, WithAccumulation) {
// Connect Node0 -> Node2 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
meta0.SetSingleOutRankWithSlot(0, 0);
meta0.SetGradNode(node2_ptr);
node0_ptr->AddEdges({&meta0}, 0);
std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0);
// Connect Node1 -> Node2 via Edge
auto meta1 = egr::AutogradMeta();
meta1.SetStopGradient(false);
meta1.SetSingleOutRankWithSlot(0, 0);
meta1.SetGradNode(node2_ptr);
node1_ptr->AddEdges({&meta1}, 0);
std::vector<egr::AutogradMeta*> res1 = {&meta1};
node1_ptr->AddEdges(&res1, 0);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>();
......@@ -305,9 +317,11 @@ TEST(Backward, WithAccumulation) {
// Connect Node2 -> AccumulationNode via Edge
auto meta2 = egr::AutogradMeta();
meta2.SetStopGradient(false);
meta2.SetSingleOutRankWithSlot(0, 0);
meta2.SetGradNode(acc_node_ptr);
node2_ptr->AddEdges({&meta2}, 0);
std::vector<egr::AutogradMeta*> res2 = {&meta2};
node2_ptr->AddEdges(&res2, 0);
}
RunBackward(target_tensors, grad_tensors);
......
......@@ -62,8 +62,10 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
auto meta = AutogradMeta();
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetStopGradient(false);
meta.SetGradNode(acc_node_ptr);
scale_node_ptr->AddEdges({&meta}, 0);
std::vector<egr::AutogradMeta*> res = {&meta};
scale_node_ptr->AddEdges(&res, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
auto_grad_meta1->SetGradNode(
......
......@@ -105,9 +105,11 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
// Connect ScaleNode -> AccumulationNode via Edge
{
auto meta = AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
scale_node_ptr->AddEdges({&meta}, 0);
std::vector<egr::AutogradMeta*> res = {&meta};
scale_node_ptr->AddEdges(&res, 0);
}
// Retain Grad for leaf tensor1
......@@ -180,9 +182,11 @@ TEST(RetainGrad, HookAfterRetainGrad) {
// Connect ScaleNode -> AccumulationNode via Edge
{
auto meta = AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
scale_node_ptr->AddEdges({&meta}, 0);
std::vector<egr::AutogradMeta*> res = {&meta};
scale_node_ptr->AddEdges(&res, 0);
}
// Retain Grad for leaf tensor1
......
......@@ -234,6 +234,44 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_tensor__share_buffer_to(EagerTensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_SYNC_TRY
egr::EagerTensor* src_ptr =
&(reinterpret_cast<EagerTensorObject*>(PyTuple_GET_ITEM(args, 0))
->eager_tensor);
PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true,
platform::errors::InvalidArgument(
"Tensor %s has not been initialized! please initialize "
"src tensor before share_buffer_with to other.",
self->eager_tensor.name()));
src_ptr->set_impl(self->eager_tensor.impl());
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_tensor__is_shared_buffer_with(EagerTensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_SYNC_TRY
egr::EagerTensor src_tensor =
CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0);
PADDLE_ENFORCE_EQ(src_tensor.initialized(), true,
platform::errors::InvalidArgument(
"Tensor %s has not been initialized! please initialize "
"src tensor before share_buffer_with to other.",
src_tensor.name()));
bool res = false;
if (!self->eager_tensor.defined() || !src_tensor.defined()) {
return ToPyObject(res);
}
res = (self->eager_tensor.impl().get() == src_tensor.impl().get());
return ToPyObject(res);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_tensor_method_detach(EagerTensorObject* self,
PyObject* args, PyObject* kwargs) {
EAGER_SYNC_TRY
......@@ -278,6 +316,12 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_is_shared_buffer_to",
(PyCFunction)(void (*)(void))eager_tensor__share_buffer_to,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_share_buffer_with",
(PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with,
METH_VARARGS | METH_KEYWORDS, NULL},
{"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}};
......
......@@ -645,6 +645,33 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
self.assertTrue(tensor3.stop_gradient, True)
self.assertTrue(tensor3.place.is_cpu_place())
def test_share_buffer_to():
arr = np.ones([4, 16, 16, 32]).astype('float32')
arr1 = np.zeros([4, 16]).astype('float32')
arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones(
[4, 16, 16, 32]).astype('float32')
tensor = None
tensor2 = None
tensor = paddle.to_tensor(arr, core.VarDesc.VarType.FP32,
core.CPUPlace())
tensor3 = core.eager.EagerTensor()
if core.is_compiled_with_cuda():
tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32,
core.CUDAPlace(0))
else:
tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32,
core.CPUPlace())
self.assertTrue(np.array_equal(tensor.numpy(), arr1))
self.assertTrue(np.array_equal(tensor2.numpy(), arr2))
tensor2._share_buffer_to(tensor)
self.assertTrue(np.array_equal(tensor.numpy(), arr2))
self.assertTrue(np.array_equal(tensor2.numpy(), arr2))
self.assertTrue(tensor._is_shared_buffer_with(tensor2))
self.assertTrue(tensor2._is_shared_buffer_with(tensor))
tensor._share_buffer_to(tensor3)
self.assertTrue(np.array_equal(tensor3.numpy(), arr2))
self.assertTrue(tensor3._is_shared_buffer_with(tensor))
def test_properties(self):
print("Test_properties")
with _test_eager_guard():
......
......@@ -15,6 +15,7 @@
import unittest
import paddle.fluid as fluid
import numpy as np
from paddle.fluid.framework import _test_eager_guard
class AutoPruneLayer0(fluid.Layer):
......@@ -145,7 +146,7 @@ class MyLayer2(fluid.Layer):
class TestImperativeAutoPrune(unittest.TestCase):
def test_auto_prune(self):
def func_auto_prune(self):
with fluid.dygraph.guard():
case1 = AutoPruneLayer0(input_size=5)
value1 = np.arange(25).reshape(5, 5).astype("float32")
......@@ -157,7 +158,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue(case1.linear2.weight._grad_ivar() is not None)
self.assertTrue(case1.linear1.weight._grad_ivar() is not None)
def test_auto_prune2(self):
def test_auto_prune(self):
with _test_eager_guard():
self.func_auto_prune()
self.func_auto_prune()
def func_auto_prune2(self):
with fluid.dygraph.guard():
case2 = AutoPruneLayer1(input_size=5)
value1 = np.arange(25).reshape(5, 5).astype("float32")
......@@ -170,6 +176,11 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue(case2.linear2.weight._grad_ivar() is None)
self.assertTrue(case2.linear1.weight._grad_ivar() is not None)
def test_auto_prune2(self):
with _test_eager_guard():
self.func_auto_prune2()
self.func_auto_prune2()
def test_auto_prune3(self):
with fluid.dygraph.guard():
case3 = AutoPruneLayer3(input_size=784)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册