未验证 提交 f81569e3 编写于 作者: J Jiabin Yang 提交者: GitHub

Support auto prune logic in eager mode (#38960)

* support test_auto_prune_partial

* support rest of autoprune strategy in eager mode
上级 3115d005
......@@ -1852,7 +1852,7 @@ static std::string GenerateGradNodeCCContents(
" %s\n"
" return outputs;\n";
generated_grad_function_body = paddle::string::Sprintf(
BWD_RETURN_TEMPLATE, outs_size, generated_grad_function_body);
BWD_RETURN_TEMPLATE, in_vars.size(), generated_grad_function_body);
// [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE =
......
......@@ -103,7 +103,17 @@ void RunBackward(const std::vector<egr::EagerTensor>& tensors,
VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first
<< ", rank: " << input_info.second;
// Get target GradNodeBase from target tensors
GradNodeBase* grad_node = auto_grad_meta->GetMutableGradNode().get();
auto shared_grad_node = auto_grad_meta->GetMutableGradNode();
if (shared_grad_node == nullptr || shared_grad_node.get() == nullptr ||
auto_grad_meta->StopGradient()) {
VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
"stop_gradient=True: "
<< tensor.name();
continue;
}
GradNodeBase* grad_node = shared_grad_node.get();
// Prepare GradTensorHolder
if (!node_input_buffers_dict.count(grad_node)) {
......@@ -192,19 +202,38 @@ void RunBackward(const std::vector<egr::EagerTensor>& tensors,
// Since we make edge has as same rank as bwd outputs, we indexing them
// with
// the same rank(i, j)
VLOG(6) << "Get Edge with slot: " << i << ", rank: " << j;
egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j];
if (!grad_output_tensor.defined() ||
!grad_output_tensor.initialized()) {
VLOG(6) << "We get grad_output_tensor with slot: " << i
<< ", rank: " << j << " as uninitialized or undefined tensor";
}
GradNodeBase* next_node = edge.GetMutableGradNode().get();
auto next_node_shared = edge.GetMutableGradNode();
// Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached
// Or it could also originated from dispensable inputs
if (!next_node) continue;
if (!next_node_shared || !next_node_shared.get() ||
grad_output_tensors[i].empty()) {
continue;
}
PADDLE_ENFORCE_LT(
j, grad_output_tensors[i].size(),
paddle::platform::errors::Fatal(
"Rank of grad_output_tensors should be less than "
"grad_output_tensors[i].size(), which is: %d. This error may "
"indicate autoprune or autograd api error. ",
grad_output_tensors.size()));
egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j];
if ((!grad_output_tensor.defined() ||
!grad_output_tensor.initialized())) {
if (!grad_output_tensor.Var().IsInitialized()) {
VLOG(6)
<< "We get grad_output_tensor with slot: " << i
<< ", rank: " << j
<< " as uninitialized or undefined in both tensor and variable";
}
}
VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i
<< ", rank: " << j
<< " 's name is: " << grad_output_tensor.name();
auto* next_node = next_node_shared.get();
if (!node_input_buffers_dict.count(next_node)) {
node_input_buffers_dict[next_node] =
......
......@@ -164,6 +164,14 @@ class EagerTensor final {
*/
void reset() { tensor_->reset(); }
/**
* @brief Determine whether tensor is DenseTensor
*
* @return true
* @return false
*/
bool is_dense_tensor() const { return tensor_->is_dense_tensor(); }
/**
* @brief Transfer the current Tensor to the specified device and return.
*
......
......@@ -56,6 +56,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(node0_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>();
......@@ -119,7 +120,7 @@ TEST(Backward, SingleNodeCustomGrad) {
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(node0_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>();
......@@ -189,7 +190,7 @@ TEST(Backward, LinearNodes) {
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(node0_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
// Connect Node0 -> Node1 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
......@@ -281,13 +282,14 @@ TEST(Backward, WithAccumulation) {
auto_grad_meta0->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(node0_ptr));
auto_grad_meta0->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta0->SetStopGradient(false);
// Connect Inp1 and Node1 via AutoGradMeta
AutogradMeta* auto_grad_meta1 =
EagerUtils::autograd_meta(&(target_tensors[1]));
auto_grad_meta1->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(node1_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
// Connect Node0 -> Node2 via Edge
auto meta0 = egr::AutogradMeta();
......
......@@ -58,6 +58,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(scale_node_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0
auto meta = AutogradMeta();
......
......@@ -93,6 +93,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(scale_node_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
target_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
......@@ -171,6 +172,7 @@ TEST(RetainGrad, HookAfterRetainGrad) {
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(scale_node_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
target_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
......
......@@ -99,7 +99,12 @@ std::pair<size_t, size_t> EagerUtils::OutRankInfo(
std::shared_ptr<GradNodeBase> EagerUtils::grad_node(
const egr::EagerTensor& target) {
return unsafe_autograd_meta(target)->GetMutableGradNode();
auto* meta = nullable_autograd_meta(target);
if (meta) {
return meta->GetMutableGradNode();
} else {
return nullptr;
}
}
void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
......
......@@ -298,6 +298,21 @@ static PyObject* eager_tensor_method_detach(EagerTensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_tensor_method_get_underline_tensor(
EagerTensorObject* self, PyObject* args, PyObject* kwargs) {
EAGER_SYNC_TRY
if (self->eager_tensor.is_dense_tensor()) {
auto* tensor = static_cast<paddle::framework::LoDTensor*>(
self->eager_tensor.impl().get());
VLOG(6) << "tensor: " << tensor->IsInitialized();
return ToPyObject(tensor);
} else {
Py_IncRef(Py_None);
return Py_None;
}
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyMethodDef variable_methods[] = {
{"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy,
METH_VARARGS | METH_KEYWORDS, NULL},
......@@ -315,14 +330,17 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_is_shared_buffer_to",
{"_share_buffer_to",
(PyCFunction)(void (*)(void))eager_tensor__share_buffer_to,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_share_buffer_with",
{"_is_shared_buffer_with",
(PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with,
METH_VARARGS | METH_KEYWORDS, NULL},
{"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach,
METH_VARARGS | METH_KEYWORDS, NULL},
{"get_tensor",
(PyCFunction)(void (*)(void))eager_tensor_method_get_underline_tensor,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}};
} // namespace pybind
......
......@@ -42,6 +42,18 @@ PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* eager_tensor_properties_get_type(EagerTensorObject* self,
void* closure) {
EAGER_SYNC_TRY
if (self->eager_tensor.is_dense_tensor()) {
return ToPyObject(paddle::framework::proto::VarType::LOD_TENSOR);
} else {
Py_INCREF(Py_None);
return Py_None;
}
EAGER_CATCH_AND_THROW_RETURN_NULL
}
int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
void* closure) {
EAGER_SYNC_TRY
......@@ -74,8 +86,13 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
return ToPyObject(*accumulation_grad_node->Grad());
} else {
VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name();
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
return ToPyObject(meta->Grad());
auto meta = egr::EagerUtils::nullable_autograd_meta(self->eager_tensor);
if (meta) {
return ToPyObject(meta->Grad());
} else {
Py_INCREF(Py_None);
return Py_None;
}
}
EAGER_CATCH_AND_THROW_RETURN_NULL
}
......@@ -185,6 +202,8 @@ struct PyGetSetDef variable_properties[] = {
nullptr, nullptr},
{"dtype", (getter)eager_tensor_properties_get_dtype, nullptr, nullptr,
nullptr},
{"type", (getter)eager_tensor_properties_get_type, nullptr, nullptr,
nullptr},
{nullptr, nullptr, nullptr, nullptr, nullptr}};
} // namespace pybind
......
......@@ -450,6 +450,18 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype) {
return obj.ptr();
}
PyObject* ToPyObject(const paddle::framework::proto::VarType& type) {
auto obj = ::pybind11::cast(type);
obj.inc_ref();
return obj.ptr();
}
PyObject* ToPyObject(const paddle::framework::LoDTensor* value) {
auto obj = ::pybind11::cast(value, py::return_value_policy::copy);
obj.inc_ref();
return obj.ptr();
}
PyObject* ToPyObject(const void* value) {
if (value == nullptr) {
Py_INCREF(Py_None);
......
......@@ -11,6 +11,7 @@ limitations under the License. */
#pragma once
#include <Python.h>
#include "paddle/pten/core/dense_tensor.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
......@@ -54,7 +55,9 @@ PyObject* ToPyObject(const std::vector<float>& value);
PyObject* ToPyObject(const std::vector<double>& value);
PyObject* ToPyObject(const std::vector<egr::EagerTensor>& value);
PyObject* ToPyObject(const platform::Place& value);
PyObject* ToPyObject(const framework::LoDTensor* value);
PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype);
PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
PyObject* ToPyObject(const void* value);
PyObject* ToPyObject(
const std::unordered_map<std::string, std::vector<std::string>>& value);
......
......@@ -285,6 +285,9 @@ const paddle::platform::Place& DenseTensor::place() const {
storage_,
paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::place() is called."));
if (storage_->data_shared()) {
return storage_->data_shared()->place();
}
return storage_->place();
}
......
......@@ -758,10 +758,10 @@ def monkey_patch_varbase():
@framework.dygraph_only
def _grad_ivar(self):
if self.grad._is_initialized():
return self.grad
else:
return None
if self.grad is not None:
if self.grad._is_initialized():
return self.grad
return None
@framework.dygraph_only
def _set_grad_ivar(self, value):
......@@ -782,6 +782,10 @@ def monkey_patch_varbase():
def clone(self):
return _C_ops_.assign(self)
@framework.dygraph_only
def value(self):
return self
if core._in_eager_mode() and not hasattr(core, "eager"):
return
......@@ -805,6 +809,7 @@ def monkey_patch_varbase():
setattr(core.eager.EagerTensor, "_set_grad_ivar", _set_grad_ivar)
setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient)
setattr(core.eager.EagerTensor, "clone", clone)
setattr(core.eager.EagerTensor, "value", value)
else:
setattr(core.VarBase, "__name__", "Tensor")
setattr(core.VarBase, "grad", grad)
......
......@@ -109,7 +109,7 @@ class EagerDtypeTestCase(unittest.TestCase):
core.VarDesc.VarType.COMPLEX128)
class EagerTensorPropertiesTestCase(unittest.TestCase):
class EagerTensorPropertiesAndMethodsTestCase(unittest.TestCase):
def constructor(self, place):
egr_tensor = core.eager.EagerTensor()
self.assertEqual(egr_tensor.persistable, False)
......@@ -645,7 +645,8 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
self.assertTrue(tensor3.stop_gradient, True)
self.assertTrue(tensor3.place.is_cpu_place())
def test_share_buffer_to():
def test_share_buffer_to(self):
with _test_eager_guard():
arr = np.ones([4, 16, 16, 32]).astype('float32')
arr1 = np.zeros([4, 16]).astype('float32')
arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones(
......@@ -661,7 +662,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
else:
tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32,
core.CPUPlace())
self.assertTrue(np.array_equal(tensor.numpy(), arr1))
self.assertTrue(np.array_equal(tensor.numpy(), arr))
self.assertTrue(np.array_equal(tensor2.numpy(), arr2))
tensor2._share_buffer_to(tensor)
self.assertTrue(np.array_equal(tensor.numpy(), arr2))
......@@ -694,6 +695,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
self.assertEqual(tensor.stop_gradient, False)
tensor.stop_gradient = True
self.assertEqual(tensor.stop_gradient, True)
self.assertEqual(tensor.type, core.VarDesc.VarType.LOD_TENSOR)
def test_global_properties(self):
print("Test_global_properties")
......@@ -714,6 +716,25 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
self.assertTrue(core.eager._get_expected_place().is_cpu_place())
core._disable_eager_mode()
def test_value(self):
with _test_eager_guard():
arr = np.random.rand(4, 16, 16, 32).astype('float64')
egr_tensor0 = core.eager.EagerTensor(value=arr)
self.assertEqual(egr_tensor0.persistable, False)
self.assertTrue("generated" in egr_tensor0.name)
self.assertEqual(egr_tensor0.shape, [4, 16, 16, 32])
self.assertTrue(
egr_tensor0.place._equals(
paddle.fluid.framework._current_expected_place()))
self.assertEqual(egr_tensor0.dtype, core.VarDesc.VarType.FP64)
self.assertEqual(egr_tensor0.stop_gradient, True)
self.assertTrue(egr_tensor0.value().get_tensor()._dtype(),
core.VarDesc.VarType.FP64)
self.assertTrue(egr_tensor0.value().get_tensor()._place(),
paddle.fluid.framework._current_expected_place())
self.assertTrue(egr_tensor0.value().get_tensor()._is_initialized())
class EagerParamBaseUsageTestCase(unittest.TestCase):
def test_print(self):
......@@ -803,6 +824,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase):
self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace()))
self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4))
self.assertTrue(np.array_equal(egr_tensor12.gradient(), None))
egr_tensor12.stop_gradient = False
egr_tensor12.backward()
self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr))
......
......@@ -181,6 +181,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.func_auto_prune2()
self.func_auto_prune2()
# TODO(jiabin): Support this when we support better split tensor
def test_auto_prune3(self):
with fluid.dygraph.guard():
case3 = AutoPruneLayer3(input_size=784)
......@@ -217,7 +218,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue(case4.linear.weight._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 0).all())
def test_auto_prune6(self):
def func_auto_prune6(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
......@@ -235,7 +236,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue(linear.weight.gradient() is None)
self.assertTrue(out1.gradient() is None)
def test_auto_prune7(self):
def test_auto_prune6(self):
with _test_eager_guard():
self.func_auto_prune6()
self.func_auto_prune6()
def func_auto_prune7(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
......@@ -253,7 +259,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue(linear.weight.gradient() is None)
self.assertTrue(out1.gradient() is None)
def test_auto_prune8(self):
def test_auto_prune7(self):
with _test_eager_guard():
self.func_auto_prune7()
self.func_auto_prune7()
def func_auto_prune8(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
......@@ -278,7 +289,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertFalse(
np.array_equal(linear_origin, linear.weight.numpy()))
def test_auto_prune9(self):
def test_auto_prune8(self):
with _test_eager_guard():
self.func_auto_prune8()
self.func_auto_prune8()
def func_auto_prune9(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
......@@ -307,7 +323,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
except ValueError as e:
assert type(e) == ValueError
def test_auto_prune10(self):
def test_auto_prune9(self):
with _test_eager_guard():
self.func_auto_prune9()
self.func_auto_prune9()
def func_auto_prune10(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
......@@ -321,12 +342,18 @@ class TestImperativeAutoPrune(unittest.TestCase):
out2 = linear2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
#TODO(jiabin): In Eager Mode we don't actually need sort_sum_gradient, this test should be removed when we don't support fluid anymore.
fluid.set_flags({'FLAGS_sort_sum_gradient': True})
out.backward()
self.assertTrue(linear.weight.gradient() is None)
self.assertTrue(out1.gradient() is None)
def test_auto_prune_with_optimizer(self):
def test_auto_prune10(self):
with _test_eager_guard():
self.func_auto_prune10()
self.func_auto_prune10()
def func_auto_prune_with_optimizer(self):
vocab_size = 100
size = 20
batch_size = 16
......@@ -341,7 +368,6 @@ class TestImperativeAutoPrune(unittest.TestCase):
grad_clip = fluid.clip.GradientClipByGlobalNorm(0.001)
optimizer = fluid.optimizer.AdamOptimizer(
0.001, parameter_list=model.parameters(), grad_clip=grad_clip)
indices = fluid.dygraph.to_variable(indices)
embed = fluid.dygraph.to_variable(embed)
dummy_loss = model(embed)
......@@ -374,7 +400,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
assert model.embed1.weight._grad_ivar() is None
assert model.linear_1.weight._grad_ivar() is None
def test_case2_prune_no_grad_branch(self):
def test_auto_prune_with_optimizer(self):
with _test_eager_guard():
self.func_auto_prune_with_optimizer()
self.func_auto_prune_with_optimizer()
def func_case2_prune_no_grad_branch(self):
with fluid.dygraph.guard():
value1 = np.arange(784).reshape(1, 784)
value2 = np.arange(1).reshape(1, 1)
......@@ -386,7 +417,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
self.assertTrue(case3.linear2.weight._grad_ivar() is None)
self.assertTrue(case3.linear.weight._grad_ivar() is not None)
def test_case3_prune_no_grad_branch2(self):
def test_case2_prune_no_grad_branch(self):
with _test_eager_guard():
self.func_case2_prune_no_grad_branch()
self.func_case2_prune_no_grad_branch()
def func_case3_prune_no_grad_branch2(self):
with fluid.dygraph.guard():
value1 = np.arange(1).reshape(1, 1)
linear = fluid.dygraph.Linear(1, 1, act=None)
......@@ -399,13 +435,23 @@ class TestImperativeAutoPrune(unittest.TestCase):
loss.backward()
self.assertTrue(linear.weight._grad_ivar() is None)
def test_case4_with_no_grad_op_maker(self):
def test_case3_prune_no_grad_branch2(self):
with _test_eager_guard():
self.func_case3_prune_no_grad_branch2()
self.func_case3_prune_no_grad_branch2()
def func_case4_with_no_grad_op_maker(self):
with fluid.dygraph.guard():
out = fluid.layers.gaussian_random(shape=[20, 30])
loss = fluid.layers.mean(out)
loss.backward()
self.assertTrue(out._grad_ivar() is None)
def test_case4_with_no_grad_op_maker(self):
with _test_eager_guard():
self.func_case4_with_no_grad_op_maker()
self.func_case4_with_no_grad_op_maker()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册