From 067f558c59b34dd6d8626aad73e9943cf7f5960f Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 12 May 2021 10:43:02 +0800 Subject: [PATCH] add varbasecopy func to fix the ParamBase type bug in layers.to API (#32789) * add varbasecopy func to fix the paraBase type bug in layers.to API * overload _copy_to func for ParamBase * add xpuplace * add waiting varbsecopy completion when not blocking * fix dst_device bug * modify varbase to shared_ptr --- paddle/fluid/pybind/imperative.cc | 61 +++++++++++++++++++ python/paddle/fluid/dygraph/layers.py | 17 +++++- python/paddle/fluid/framework.py | 7 +++ .../fluid/tests/unittests/test_base_layer.py | 14 +++-- 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 450c992d41..4bdf1e21ba 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -469,6 +469,62 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, if (!PyTuple_Check(_index)) Py_DecRef(index); } +template +static void VarBaseCopy(std::shared_ptr &src, + imperative::VarBase &dst, const P &dst_device, + const bool blocking) { + if (dst.SharedVar()->IsEmpty()) { + VLOG(3) << "deep copy Variable from " << src->Name() << " to " + << dst.Name(); + dst.SetPersistable(src->Persistable()); + dst.SetDataType(src->DataType()); + dst.SetType(src->Type()); + dst.SetOverridedStopGradient(src->OverridedStopGradient()); + if (!src->SharedVar()->IsEmpty()) { + if (src->Var().IsType()) { + auto &src_tensor = src->Var().Get(); + auto *dst_tensor = dst.MutableVar()->GetMutable(); + dst_tensor->set_lod(src_tensor.lod()); + framework::TensorCopy(src_tensor, dst_device, dst_tensor); + if (blocking) { + platform::DeviceContextPool::Instance().Get(dst_device)->Wait(); + auto src_device = src_tensor.place(); + if (!(src_device == dst_device)) { + platform::DeviceContextPool::Instance().Get(src_device)->Wait(); + } + } + } else if (src->Var().IsType()) { + auto &src_selected_rows = src->Var().Get(); + auto *dst_selected_rows = + dst.MutableVar()->GetMutable(); + dst_selected_rows->set_height(src_selected_rows.height()); + dst_selected_rows->set_rows(src_selected_rows.rows()); + framework::TensorCopy(src_selected_rows.value(), dst_device, + dst_selected_rows->mutable_value()); + if (blocking) { + platform::DeviceContextPool::Instance().Get(dst_device)->Wait(); + auto src_device = src_selected_rows.value().place(); + if (!(src_device == dst_device)) { + platform::DeviceContextPool::Instance().Get(src_device)->Wait(); + } + } + } + + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(src, dst_device); + } + + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The source Tensor(%s) can not copy when it is empty.", src->Name())); + } + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The destion Tensor(%s) can not copy when it is not empty.", + dst.Name())); + } +} + // Bind Methods void BindImperative(py::module *m_ptr) { auto &m = *m_ptr; @@ -1639,6 +1695,11 @@ void BindImperative(py::module *m_ptr) { self.nrings_ = nrings; }); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def( "dygraph_partial_grad", [](const std::vector> &input_targets, diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 18dfff434a..1bde7ef8ab 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -34,7 +34,7 @@ from .base import program_desc_tracing_guard, param_guard from paddle.fluid import framework from ..param_attr import ParamAttr from paddle.fluid.executor import Executor, global_scope -from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.dygraph import no_grad import paddle.utils.deprecated as deprecated @@ -1427,8 +1427,19 @@ class Layer(core.Layer): dtype = t.dtype new_t = t._copy_to(device, blocking) - if dtype is not None and dtype != t.dtype: - new_t = new_t.cast(dtype=dtype) + if isinstance(t, framework.ParamBase): + if dtype is not None and dtype != t.dtype: + framework._dygraph_tracer().trace_op( + type='cast', + inputs={'X': new_t}, + outputs={'Out': new_t}, + attrs={ + 'in_dtype': t.dtype, + 'out_dtype': convert_np_dtype_to_dtype_(dtype) + }) + else: + if dtype is not None and dtype != t.dtype: + new_t = new_t.cast(dtype=dtype) return new_t diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 2eac5adcf2..f4cad7894a 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -5855,6 +5855,13 @@ class ParamBase(core.VarBase): new_param.copy_(self, True) return new_param + def _copy_to(self, device, blocking): + print("in ParamBase copy_to func") + state = copy.deepcopy(self.__dict__) + new_param = ParamBase(self.shape, self.dtype, **state) + core.varbase_copy(self, new_param, device, blocking) + return new_param + __repr__ = __str__ diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index 27c8869b21..fb5b8bde10 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -341,7 +341,7 @@ class TestLayerTo(unittest.TestCase): self.linear.register_buffer("buf_name", buffer, persistable=True) sublayer = paddle.nn.Conv1D(3, 2, 3) - self.linear.add_sublayer(1, sublayer) + self.linear.add_sublayer("1", sublayer) def test_to_api(self): self.linear.to(dtype='double') @@ -351,8 +351,8 @@ class TestLayerTo(unittest.TestCase): paddle.fluid.core.VarDesc.VarType.FP64) self.assertTrue( np.allclose(self.linear.weight.grad.numpy(), self.new_grad)) - self.assertTrue(self.linear.weight._grad_ivar().dtype, - paddle.fluid.core.VarDesc.VarType.FP64) + self.assertEqual(self.linear.weight._grad_ivar().dtype, + paddle.fluid.core.VarDesc.VarType.FP64) self.linear.to() self.assertEqual(self.linear.weight.dtype, @@ -361,8 +361,10 @@ class TestLayerTo(unittest.TestCase): paddle.fluid.core.VarDesc.VarType.FP64) self.assertTrue( np.allclose(self.linear.weight.grad.numpy(), self.new_grad)) - self.assertTrue(self.linear.weight._grad_ivar().dtype, - paddle.fluid.core.VarDesc.VarType.FP64) + self.assertEqual(self.linear.weight._grad_ivar().dtype, + paddle.fluid.core.VarDesc.VarType.FP64) + for p in self.linear.parameters(): + self.assertTrue(isinstance(p, paddle.fluid.framework.ParamBase)) if paddle.fluid.is_compiled_with_cuda(): self.linear.to(device=paddle.CUDAPlace(0)) @@ -384,6 +386,8 @@ class TestLayerTo(unittest.TestCase): )) self.assertEqual( self.linear.weight._grad_ivar().place.gpu_device_id(), 0) + for p in self.linear.parameters(): + self.assertTrue(isinstance(p, paddle.fluid.framework.ParamBase)) self.linear.to(device=paddle.CPUPlace()) self.assertTrue(self.linear.weight.place.is_cpu_place()) -- GitLab