未验证 提交 c06350c9 编写于 作者: Z Zhou Wei 提交者: GitHub

[cherry pick 2.0]support deepcopy for Layer/Tensor/Paramerbase (#29387) (#29873)

* support deepcopy for Layer/Tensor/Paramerbase

* fix some code
上级 878b6972
...@@ -283,6 +283,36 @@ std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place, ...@@ -283,6 +283,36 @@ std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
} }
} }
void VarBase::CopyFrom(const VarBase& src, const bool blocking) {
if (SharedVar()->IsEmpty()) {
VLOG(3) << "deep copy Variable from " << src.Name() << " to " << Name();
SetPersistable(src.Persistable());
SetDataType(src.DataType());
SetType(src.Type());
SetOverridedStopGradient(src.OverridedStopGradient());
if (!src.SharedVar()->IsEmpty()) {
const platform::Place& place = src.Place();
if (src.Var().IsType<framework::LoDTensor>()) {
auto& src_tensor = src.Var().Get<framework::LoDTensor>();
auto* dst_tensor = MutableVar()->GetMutable<framework::LoDTensor>();
dst_tensor->set_lod(src_tensor.lod());
framework::TensorCopy(src_tensor, place, dst_tensor);
} else if (src.Var().IsType<framework::SelectedRows>()) {
auto& src_selected_rows = src.Var().Get<framework::SelectedRows>();
auto* dst_selected_rows =
MutableVar()->GetMutable<framework::SelectedRows>();
dst_selected_rows->set_height(src_selected_rows.height());
dst_selected_rows->set_rows(src_selected_rows.rows());
framework::TensorCopy(src_selected_rows.value(), place,
dst_selected_rows->mutable_value());
}
if (blocking) {
platform::DeviceContextPool::Instance().Get(place)->Wait();
}
}
}
}
void VarBase::BumpInplaceVersion() { void VarBase::BumpInplaceVersion() {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
Var().IsInitialized(), true, Var().IsInitialized(), true,
......
...@@ -216,6 +216,8 @@ class VarBase { ...@@ -216,6 +216,8 @@ class VarBase {
std::shared_ptr<VarBase> NewVarBase(const platform::Place& dst_place, std::shared_ptr<VarBase> NewVarBase(const platform::Place& dst_place,
const bool blocking) const; const bool blocking) const;
void CopyFrom(const imperative::VarBase& src, bool blocking);
void BumpInplaceVersion(); void BumpInplaceVersion();
private: private:
......
...@@ -526,6 +526,13 @@ void BindImperative(py::module *m_ptr) { ...@@ -526,6 +526,13 @@ void BindImperative(py::module *m_ptr) {
py::class_<imperative::VarBase, std::shared_ptr<imperative::VarBase>>( py::class_<imperative::VarBase, std::shared_ptr<imperative::VarBase>>(
m, "VarBase", R"DOC()DOC") m, "VarBase", R"DOC()DOC")
.def_static("_alive_vars", &imperative::VarBase::AliveVarNames) .def_static("_alive_vars", &imperative::VarBase::AliveVarNames)
.def("__init__",
[](imperative::VarBase &self) {
std::string name =
imperative::GetCurrentTracer()->GenerateUniqueName(
"generated_tensor");
new (&self) imperative::VarBase(name);
})
.def("__init__", .def("__init__",
[](imperative::VarBase &self, framework::proto::VarType::Type dtype, [](imperative::VarBase &self, framework::proto::VarType::Type dtype,
const std::vector<int> &dims, const py::handle &name, const std::vector<int> &dims, const py::handle &name,
...@@ -1023,6 +1030,7 @@ void BindImperative(py::module *m_ptr) { ...@@ -1023,6 +1030,7 @@ void BindImperative(py::module *m_ptr) {
y = x.cuda(1) y = x.cuda(1)
print(y.place) # CUDAPlace(1) print(y.place) # CUDAPlace(1)
)DOC") )DOC")
.def("copy_", &imperative::VarBase::CopyFrom)
.def("_copy_to", .def("_copy_to",
[](const imperative::VarBase &self, const platform::CPUPlace &place, [](const imperative::VarBase &self, const platform::CPUPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); }, bool blocking) { return self.NewVarBase(place, blocking); },
......
...@@ -21,6 +21,7 @@ import re ...@@ -21,6 +21,7 @@ import re
import copy import copy
import weakref import weakref
import warnings import warnings
from copy import deepcopy
from . import parallel_helper from . import parallel_helper
from .. import unique_name from .. import unique_name
...@@ -1016,15 +1017,26 @@ class Layer(core.Layer): ...@@ -1016,15 +1017,26 @@ class Layer(core.Layer):
self._parameters[name] = parameter self._parameters[name] = parameter
return parameter return parameter
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__.update(state)
def __getattr__(self, name): def __getattr__(self, name):
if name in self._parameters: if '_parameters' in self.__dict__:
return self._parameters[name] _parameters = self.__dict__['_parameters']
elif name in self._sub_layers: if name in self._parameters:
return self._sub_layers[name] return self._parameters[name]
elif name in self._buffers: if '_sub_layers' in self.__dict__:
return self._buffers[name] _sub_layers = self.__dict__['_sub_layers']
else: if name in self._sub_layers:
return object.__getattribute__(self, name) return self._sub_layers[name]
if '_buffers' in self.__dict__:
_buffers = self.__dict__['_buffers']
if name in _buffers:
return _buffers[name]
return object.__getattribute__(self, name)
def __setattr__(self, name, value): def __setattr__(self, name, value):
def _remove_if_exist(*dicts): def _remove_if_exist(*dicts):
......
...@@ -18,6 +18,7 @@ import numpy as np ...@@ -18,6 +18,7 @@ import numpy as np
import paddle import paddle
from .. import framework from .. import framework
from .. import core from .. import core
from .. import unique_name
from ..framework import Variable, Parameter, ParamBase from ..framework import Variable, Parameter, ParamBase
from .base import switch_to_static_graph from .base import switch_to_static_graph
from .math_op_patch import monkey_patch_math_varbase from .math_op_patch import monkey_patch_math_varbase
...@@ -263,6 +264,37 @@ def monkey_patch_varbase(): ...@@ -263,6 +264,37 @@ def monkey_patch_varbase():
from paddle.tensor.to_string import to_string from paddle.tensor.to_string import to_string
return to_string(self) return to_string(self)
def __deepcopy__(self, memo):
"""
Deep copy Tensor, it will always performs Tensor copy.
Examples:
.. code-block:: python
import paddle
import copy
x = paddle.to_tensor(2.)
y = copy.deepcopy(x)
print(x)
# Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True,
# [2.])
print(y)
# Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=True,
# [2.])
"""
if not self.is_leaf:
raise RuntimeError(
"Only Leaf Tensor support the deepcopy at the moment, non-Leaf Tensors contains graph information that does't support deepcopy"
)
new_varbase = core.VarBase()
new_varbase.name = self.name + unique_name.generate("_deepcopy")
memo[id(self)] = new_varbase
new_varbase.copy_(self, True)
return new_varbase
@property @property
def block(self): def block(self):
return framework.default_main_program().global_block() return framework.default_main_program().global_block()
...@@ -283,7 +315,8 @@ def monkey_patch_varbase(): ...@@ -283,7 +315,8 @@ def monkey_patch_varbase():
("block", block), ("backward", backward), ("clear_grad", clear_grad), ("block", block), ("backward", backward), ("clear_grad", clear_grad),
("inplace_version", inplace_version), ("grad", grad), ("inplace_version", inplace_version), ("grad", grad),
("gradient", gradient), ("__str__", __str__), ("__repr__", __str__), ("gradient", gradient), ("__str__", __str__), ("__repr__", __str__),
("__module__", "paddle"), ("__name__", "Tensor")): ("__deepcopy__", __deepcopy__), ("__module__", "paddle"),
("__name__", "Tensor")):
setattr(core.VarBase, method_name, method) setattr(core.VarBase, method_name, method)
# patch math methods for varbase # patch math methods for varbase
......
...@@ -23,6 +23,7 @@ import os ...@@ -23,6 +23,7 @@ import os
import re import re
import traceback import traceback
import six import six
import copy
import numpy as np import numpy as np
import subprocess import subprocess
...@@ -5322,6 +5323,36 @@ class ParamBase(core.VarBase): ...@@ -5322,6 +5323,36 @@ class ParamBase(core.VarBase):
return "Parameter containing:\n{tensor}".format( return "Parameter containing:\n{tensor}".format(
tensor=super(ParamBase, self).__str__()) tensor=super(ParamBase, self).__str__())
def __deepcopy__(self, memo):
"""
Deep copy parameter, it will always performs Tensor copy.
Examples:
.. code-block:: python
import paddle
import copy
linear = paddle.nn.Linear(1, 3)
linear_copy = copy.deepcopy(linear)
print(linear.weight)
# Parameter containing:
# Tensor(shape=[1, 3], dtype=float32, place=CPUPlace, stop_gradient=False,
# [[-0.30929261, -0.90929240, -1.07851017]])
print(linear_copy.weight)
# Parameter containing:
# Tensor(shape=[1, 3], dtype=float32, place=CPUPlace, stop_gradient=False,
# [[-0.30929261, -0.90929240, -1.07851017]])
"""
state = copy.deepcopy(self.__dict__, memo)
state["name"] = self.name + unique_name.generate("_deepcopy")
new_param = ParamBase(self.shape, self.dtype, **state)
memo[id(self)] = new_param
new_param.copy_(self, True)
return new_param
__repr__ = __str__ __repr__ = __str__
......
...@@ -287,7 +287,6 @@ class TestImperative(unittest.TestCase): ...@@ -287,7 +287,6 @@ class TestImperative(unittest.TestCase):
with paddle.no_grad(): with paddle.no_grad():
self.assertTrue(l1.weight.stop_gradient is False) self.assertTrue(l1.weight.stop_gradient is False)
tmp = l1.weight * 2 tmp = l1.weight * 2
print(tmp)
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
x = fluid.dygraph.to_variable(data) x = fluid.dygraph.to_variable(data)
y = l0(x) + tmp y = l0(x) + tmp
...@@ -485,15 +484,15 @@ class TestImperative(unittest.TestCase): ...@@ -485,15 +484,15 @@ class TestImperative(unittest.TestCase):
for i in range(10): for i in range(10):
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
y.backward() y.backward()
print(x.grad)
self.assertEqual(x.grad, (i + 1) * 500) self.assertEqual(x.grad, (i + 1) * 500)
x.clear_gradient() x.clear_gradient()
self.assertEqual(x.grad, 0.) self.assertEqual(x.grad, 0.)
for i in range(5): for i in range(10):
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
y.backward() y.backward()
print(x.grad)
self.assertEqual(x.grad, (i + 1) * 500) self.assertEqual(x.grad, (i + 1) * 500)
x.clear_grad()
self.assertEqual(x.grad, 0.)
def test_simple_net(sort_sum_gradient): def test_simple_net(sort_sum_gradient):
fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
...@@ -504,9 +503,18 @@ class TestImperative(unittest.TestCase): ...@@ -504,9 +503,18 @@ class TestImperative(unittest.TestCase):
def fun(x, y, z): def fun(x, y, z):
loss1 = x * x * y loss1 = x * x * y
loss2 = x * z loss2 = x * z
loss1.backward(retain_graph=True)
loss2.backward(retain_graph=True)
self.assertTrue(np.array_equal(x.grad, [23.]))
self.assertTrue(np.array_equal(y.grad, [25.]))
self.assertTrue(np.array_equal(z.grad, [5.]))
x.clear_grad()
y.clear_grad()
z.clear_grad()
dx = paddle.grad([loss1], x, create_graph=True)[0] dx = paddle.grad([loss1], x, create_graph=True)[0]
# loss = x*x*y + x*z + 2*x*y
loss = loss1 + loss2 + dx loss = loss1 + loss2 + dx
# loss = x*x*y + x*z + 2*x*y
return loss return loss
loss = fun(x, y, z) loss = fun(x, y, z)
...@@ -539,12 +547,12 @@ class TestImperative(unittest.TestCase): ...@@ -539,12 +547,12 @@ class TestImperative(unittest.TestCase):
# generate the gradient of each step # generate the gradient of each step
mlp2 = MLP(input_size=input_size) mlp2 = MLP(input_size=input_size)
expected_weight1_grad = np.zeros(mlp2._linear1.weight.shape) expected_weight1_grad = 0.
expected_bias1_grad = np.zeros(mlp2._linear1.bias.shape) expected_bias1_grad = 0.
expected_weight2_grad = np.zeros(mlp2._linear2.weight.shape) expected_weight2_grad = 0.
expected_bias2_grad = np.zeros(mlp2._linear2.bias.shape) expected_bias2_grad = 0.
for batch_id in range(24): for batch_id in range(100):
x = paddle.uniform([10, input_size]) x = paddle.uniform([10, input_size])
detach_x = x.detach() detach_x = x.detach()
clear_loss = mlp2(detach_x) clear_loss = mlp2(detach_x)
...@@ -571,12 +579,12 @@ class TestImperative(unittest.TestCase): ...@@ -571,12 +579,12 @@ class TestImperative(unittest.TestCase):
mlp2.clear_gradients() mlp2.clear_gradients()
self.assertTrue(np.array_equal(clear_loss.grad, [1])) self.assertTrue(np.array_equal(clear_loss.grad, [1]))
if ((batch_id + 1) % 8) == 0: if ((batch_id + 1) % 10) == 0:
mlp1.clear_gradients() mlp1.clear_gradients()
expected_weight1_grad = np.zeros(mlp2._linear1.weight.shape) expected_weight1_grad = 0.
expected_bias1_grad = np.zeros(mlp2._linear1.bias.shape) expected_bias1_grad = 0.
expected_weight2_grad = np.zeros(mlp2._linear2.weight.shape) expected_weight2_grad = 0.
expected_bias2_grad = np.zeros(mlp2._linear2.bias.shape) expected_bias2_grad = 0.
with fluid.dygraph.guard(): with fluid.dygraph.guard():
test_single_api(False) test_single_api(False)
......
...@@ -15,6 +15,9 @@ ...@@ -15,6 +15,9 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import copy
import paddle
from paddle.fluid.dygraph import guard
from paddle.fluid.framework import default_main_program from paddle.fluid.framework import default_main_program
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
...@@ -26,7 +29,7 @@ main_program = default_main_program() ...@@ -26,7 +29,7 @@ main_program = default_main_program()
class ParameterChecks(unittest.TestCase): class ParameterChecks(unittest.TestCase):
def check_param(self): def check_parameter(self):
shape = [784, 100] shape = [784, 100]
val = 1.0625 val = 1.0625
b = main_program.global_block() b = main_program.global_block()
...@@ -46,6 +49,28 @@ class ParameterChecks(unittest.TestCase): ...@@ -46,6 +49,28 @@ class ParameterChecks(unittest.TestCase):
p = io.get_parameter_value_by_name('fc.w', exe, main_program) p = io.get_parameter_value_by_name('fc.w', exe, main_program)
self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val))
def check_parambase(self):
with guard():
linear = paddle.nn.Linear(10, 10)
param = linear.weight
memo = {}
param_copy = copy.deepcopy(param, memo)
self.assertEqual(param_copy.shape, param.shape)
self.assertEqual(param_copy.type, param.type)
self.assertEqual(param_copy.dtype, param.dtype)
self.assertEqual(str(param_copy.place), str(param.place))
self.assertTrue(np.array_equal(param_copy.numpy(), param.numpy()))
self.assertEqual(param_copy.optimize_attr, param.optimize_attr)
self.assertEqual(param_copy.regularizer, param.regularizer)
self.assertEqual(param_copy.do_model_average,
param.do_model_average)
self.assertEqual(param_copy.need_clip, param.need_clip)
self.assertEqual(param_copy.is_distributed, param.is_distributed)
pram_copy2 = copy.deepcopy(param, memo)
self.assertEqual(id(param_copy), id(pram_copy2))
def check_exceptions(self): def check_exceptions(self):
b = main_program.global_block() b = main_program.global_block()
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
...@@ -63,8 +88,11 @@ class ParameterChecks(unittest.TestCase): ...@@ -63,8 +88,11 @@ class ParameterChecks(unittest.TestCase):
class TestParameter(ParameterChecks): class TestParameter(ParameterChecks):
def test_param(self): def _test_parameter(self):
self.check_param() self.check_parameter()
def test_parambase(self):
self.check_parambase()
def test_exceptions(self): def test_exceptions(self):
self.check_exceptions() self.check_exceptions()
......
...@@ -17,6 +17,7 @@ from __future__ import print_function ...@@ -17,6 +17,7 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
import six import six
import copy
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -264,6 +265,68 @@ class TestVarBase(unittest.TestCase): ...@@ -264,6 +265,68 @@ class TestVarBase(unittest.TestCase):
var.stop_gradient = False var.stop_gradient = False
self.assertEqual(var.stop_gradient, False) self.assertEqual(var.stop_gradient, False)
def test_deep_copy(self):
with fluid.dygraph.guard():
empty_var = core.VarBase()
empty_var_copy = copy.deepcopy(empty_var)
self.assertEqual(empty_var.stop_gradient,
empty_var_copy.stop_gradient)
self.assertEqual(empty_var.persistable, empty_var_copy.persistable)
self.assertEqual(empty_var.type, empty_var_copy.type)
self.assertEqual(empty_var.dtype, empty_var_copy.dtype)
x = paddle.to_tensor([2.], stop_gradient=False)
y = paddle.to_tensor([3.], stop_gradient=False)
z = x * y
memo = {}
x_copy = copy.deepcopy(x, memo)
y_copy = copy.deepcopy(y, memo)
self.assertEqual(x_copy.stop_gradient, y_copy.stop_gradient)
self.assertEqual(x_copy.persistable, y_copy.persistable)
self.assertEqual(x_copy.type, y_copy.type)
self.assertEqual(x_copy.dtype, y_copy.dtype)
self.assertTrue(np.array_equal(x.numpy(), x_copy.numpy()))
self.assertTrue(np.array_equal(y.numpy(), y_copy.numpy()))
self.assertNotEqual(id(x), id(x_copy))
x_copy[:] = 5.
self.assertTrue(np.array_equal(x_copy.numpy(), [5.]))
self.assertTrue(np.array_equal(x.numpy(), [2.]))
with self.assertRaises(RuntimeError):
copy.deepcopy(z)
x_copy2 = copy.deepcopy(x, memo)
y_copy2 = copy.deepcopy(y, memo)
self.assertEqual(id(x_copy), id(x_copy2))
self.assertEqual(id(y_copy), id(y_copy2))
# test copy selected rows
x = core.VarBase(core.VarDesc.VarType.FP32, [3, 100],
"selected_rows",
core.VarDesc.VarType.SELECTED_ROWS, True)
selected_rows = x.value().get_selected_rows()
selected_rows.get_tensor().set(
np.random.rand(3, 100), core.CPUPlace())
selected_rows.set_height(10)
selected_rows.set_rows([3, 5, 7])
x_copy = copy.deepcopy(x)
self.assertEqual(x_copy.stop_gradient, x.stop_gradient)
self.assertEqual(x_copy.persistable, x.persistable)
self.assertEqual(x_copy.type, x.type)
self.assertEqual(x_copy.dtype, x.dtype)
copy_selected_rows = x_copy.value().get_selected_rows()
self.assertEqual(copy_selected_rows.height(),
selected_rows.height())
self.assertEqual(copy_selected_rows.rows(), selected_rows.rows())
self.assertTrue(
np.array_equal(
np.array(copy_selected_rows.get_tensor()),
np.array(selected_rows.get_tensor())))
# test some patched methods # test some patched methods
def test_set_value(self): def test_set_value(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册