diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index faecba6295d35fd9b6d841181a450a5a540a7074..0a4885ea325414419d006c3f4de3c12f185ffeee 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -43,6 +43,49 @@ namespace framework { class LoDTensor; +/* + NOTE(liym27): [ What is TensorInplaceVersion used for? ] + + TensorInplaceVersion is a version counter and every Tensor has a version + counter. It's used to check whether an inplace operation will result in an + incorrect gradient calculation. Version is incremented when the data of the + Variable is modified in place. + + - Question: In what scenarios will version counters be shared? + - Answer: When two Variables/VarBases share the same C++ Tensor(its Allocation + may change), both of them share the same version counter. For examples: + 1. `z = paddle.assign(input=x, output=y)`, `z` shares the same version counter + of `y` because z and y is the same VarBase; + 2. `y = x.detach()`, `y` shares the same version counter of `x`. + + - Question: In what scenarios will version counters NOT be shared? + - Answer: Replacing a `Variable`'s data by calling `Tensor::ShareDataWith(...)` + or `Tensor::ShareBufferWith(...)`. Because they share the same Allocation but + not framework::Tensor. + + - Question: Why put the inplace_version_counter_ in framework::Tensor instead + of Allocation or Variable? + - Answer: + 1. Tensor can call ResetHolder() to reset the corresponding Allocation so that + the inplace_version_counter_ changes if it's in Allocation, which will lead to + confusing information about inplace version. + 2. If inplace_version_counter_ is in Variable, different VariableWrappers + should be able to share the same Variable. However, a VariableWrapper hold a + Variable object but not a pointer. +*/ + +class TensorInplaceVersion { + public: + explicit TensorInplaceVersion(uint32_t inplace_version = 0) + : inplace_version_(inplace_version) {} + bool IsUnique() const { return inplace_version_ == 0; } + void Bump() { ++inplace_version_; } + uint32_t CurrentVersion() const { return inplace_version_; } + + private: + uint32_t inplace_version_; +}; + class Tensor { #ifdef PADDLE_WITH_MKLDNN @@ -189,6 +232,9 @@ class Tensor { void ResetHolderWithType(std::shared_ptr holder, const proto::VarType::Type type); + TensorInplaceVersion& InplaceVersionCounter() { + return inplace_version_counter_; + } private: /*! holds the memory block if allocated. */ @@ -225,6 +271,7 @@ class Tensor { * PlaceHolder::ptr_ and where the tensor data really begins. */ size_t offset_; + TensorInplaceVersion inplace_version_counter_; }; } // namespace framework diff --git a/paddle/fluid/framework/variable.h b/paddle/fluid/framework/variable.h index cf788ab013199e1567472864df7e28ae65d03c5a..792a2accd41d67e76d56dfdc058e4128018614e7 100644 --- a/paddle/fluid/framework/variable.h +++ b/paddle/fluid/framework/variable.h @@ -18,8 +18,8 @@ #include #include +#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/var_type_traits.h" - namespace paddle { namespace framework { @@ -69,6 +69,15 @@ class Variable { return holder_->Type(); } + private: + // This method hides type T, so it doesn't appear as a template parameter of + // Variable. + framework::TensorInplaceVersion* InplaceVersionCounter(); + + public: + uint32_t CurrentInplaceVersion(); + void BumpInplaceVersion(); + private: struct Placeholder { virtual ~Placeholder() PADDLE_MAY_THROW {} @@ -101,8 +110,48 @@ class Variable { }; // pointers to a PlaceholderImpl object indeed. - std::unique_ptr holder_; + std::shared_ptr holder_; }; +inline framework::TensorInplaceVersion* Variable::InplaceVersionCounter() { + framework::TensorInplaceVersion* version_counter_ptr(nullptr); + if (IsType()) { + version_counter_ptr = + &GetMutable()->InplaceVersionCounter(); + } else if (IsType()) { + version_counter_ptr = + &GetMutable()->InplaceVersionCounter(); + + } else if (IsType()) { + version_counter_ptr = &GetMutable() + ->mutable_value() + ->InplaceVersionCounter(); + } else { + VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have " + "TensorInplaceVersion, but received type " + << platform::demangle(framework::ToTypeName(Type())); + } + return version_counter_ptr; +} + +inline uint32_t Variable::CurrentInplaceVersion() { + auto version_counter_ptr = InplaceVersionCounter(); + if (version_counter_ptr) { + return version_counter_ptr->CurrentVersion(); + } else { + return 0; + } +} + +inline void Variable::BumpInplaceVersion() { + auto version_counter_ptr = InplaceVersionCounter(); + if (version_counter_ptr) { + return version_counter_ptr->Bump(); + } else { + VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have " + "TensorInplaceVersion, but received type " + << platform::demangle(framework::ToTypeName(Type())); + } +} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index e9214a8fea817435ea314fd9227299140f15df16..b37d8619e7e680f368ee87d3f386e6b332a3a50b 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -225,6 +225,31 @@ void BasicEngine::Execute() { } } + VLOG(4) << "Check whether there is any inplace operation affecting " + "gradient calculation."; + for (auto& pair : bwd_ins) { + for (auto& var_wrapper : pair.second) { + auto wrapper_version_snapshot = var_wrapper->InplaceVersionSnapshot(); + auto tensor_version = + var_wrapper->MutableVar()->CurrentInplaceVersion(); + PADDLE_ENFORCE_EQ( + tensor_version, wrapper_version_snapshot, + platform::errors::PermissionDenied( + "Tensor '%s' used in gradient computation in grad op '%s' " + "has been " + "modified by an inplace operation. " + "Its version is %s but the expected version is %s. " + "Please fix your code to void calling an inplace operator " + "after using the Tensor which will used in gradient " + "computation.", + var_wrapper->Name(), cur_op.Type(), tensor_version, + wrapper_version_snapshot)); + + VLOG(6) << " The version of Tensor '" << var_wrapper->Name() + << "' is [ " << wrapper_version_snapshot << " ]"; + } + } + { VLOG(3) << "Start to execute grad op " << cur_op.Type(); OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(), diff --git a/paddle/fluid/imperative/dygraph_grad_maker.h b/paddle/fluid/imperative/dygraph_grad_maker.h index f21781fbbecfb4b168c6a0ec0276707fbe7ddec1..0d81221c43306ce35f8dc038456af0d04830e365 100644 --- a/paddle/fluid/imperative/dygraph_grad_maker.h +++ b/paddle/fluid/imperative/dygraph_grad_maker.h @@ -147,7 +147,6 @@ class GradOpBaseMakerBase { bool is_input) const { const auto& data_map = is_input ? var_base_map_in_ : var_base_map_out_; auto iterator = data_map.find(name); - TracedVarList vec_temp; if (iterator != data_map.end()) { vec_temp.reserve(iterator->second.size()); @@ -226,6 +225,7 @@ class TracedGradOp { } auto var_wrappers = ToVarWrapperList(vars); + if (!var_wrappers.empty()) { op_->SetInput(name, std::move(var_wrappers), kRole == TracedVarRole::kBackward); @@ -293,7 +293,8 @@ class TracedGradOp { var->OverridedStopGradient()))) { result.emplace_back(); } else { - result.emplace_back(var->SharedVar()); + auto var_wrapper = SnapshotVarWrapper(var->SharedVar()); + result.emplace_back(var_wrapper); has_valid = true; } } @@ -304,6 +305,26 @@ class TracedGradOp { return result; } + // Get a snapshot of VariableWrapper at a certain inplace version. + // The inplace version number of VariableWrapper is used for inplace + // detection in gradient compution. + static const std::shared_ptr SnapshotVarWrapper( + const std::shared_ptr& var_wrapper) { + // NOTE(liym27): + // Use original var_wrapper if its inplace_version is not + // changed. Otherwise, it will affect the accuracy of the model + // results and affect double grad. + if (!var_wrapper->MutableVar()->IsInitialized() || + var_wrapper->InplaceVersionSnapshot() == + var_wrapper->MutableVar()->CurrentInplaceVersion()) { + return var_wrapper; + } else { + VariableWrapper new_var_wrapper = *var_wrapper.get(); + new_var_wrapper.ResetInplaceVersion(); + return std::make_shared(new_var_wrapper); + } + } + private: const std::shared_ptr& node_; OpBase* op_; diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index ec76f58d77ed5dece46c53795b3cccfe8bfbd902..eaf9986b200af8d6b1bd7a2da2c957415838abe0 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -278,6 +278,15 @@ std::shared_ptr VarBase::NewVarBase(const platform::Place& dst_place, } } +void VarBase::BumpInplaceVersion() { + PADDLE_ENFORCE_EQ( + Var().IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized, please check if it has no data.", + Name())); + MutableVar()->BumpInplaceVersion(); +} + void OpBase::SetType(const std::string& type) { op_ = framework::OpRegistry::CreateOp(type, {}, {}, {}, false); } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index d4df052a40d307c31978c33d9e14aef9ed4b3b82..9a587fd6d6c43bc9ae1ad4c3005c00b0d7f3eee8 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -202,6 +202,8 @@ class VarBase { std::shared_ptr NewVarBase(const platform::Place& dst_place, const bool blocking) const; + void BumpInplaceVersion(); + private: /** * NOTE(zengjinle): never remove the const qualifier of `var_` if you are diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h index e9b1ccc860df0fee32b203746dbf01f618de5f99..df972035ae377af3dd64f10d6181ebba749df710 100644 --- a/paddle/fluid/imperative/variable_wrapper.h +++ b/paddle/fluid/imperative/variable_wrapper.h @@ -174,6 +174,17 @@ class VariableWrapper { std::shared_ptr& GetLeafHooks() { return leaf_hooks_; } + uint32_t InplaceVersionSnapshot() const { return inplace_version_snapshot_; } + + void ResetInplaceVersion() { + auto new_version = var_.CurrentInplaceVersion(); + + VLOG(6) << "The wrapper version of VariableWrapper '" << name_ + << "' will be updated from " << inplace_version_snapshot_ << "to " + << new_version; + inplace_version_snapshot_ = new_version; + } + private: void SetGradVar(const std::shared_ptr& var) { auto shared_var = grad_var_.lock(); @@ -244,6 +255,10 @@ class VariableWrapper { int overrided_stop_gradient_{-1}; bool persistable_{false}; + // Used for checking whether there is any inplace operation affecting gradient + // calculation. + uint32_t inplace_version_snapshot_{0}; + framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR}; framework::proto::VarType::Type data_type_{framework::proto::VarType::FP32}; diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 303dcc0e0abcdfe92dd6cb7a3b4dda78b7786577..d675782a483d1465881c8579e647a80586322fcc 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -593,6 +593,10 @@ void BindImperative(py::module *m_ptr) { SetTensorFromPyArray(self_tensor, self_numpy, self_tensor->place(), true); } + // NOTE(liym27): + // Increase the version of VarBase self because __setitem__ is an + // inplace operator for the VarBase self. + self->BumpInplaceVersion(); }) .def("__getitem__", [](std::shared_ptr &self, py::handle _index) { @@ -632,6 +636,28 @@ void BindImperative(py::module *m_ptr) { return out; } }) + .def("_inplace_version", + [](imperative::VarBase &self) -> uint32_t { + const auto &var = self.MutableVar(); + PADDLE_ENFORCE_EQ( + var->IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor of %s is Empty, please check if it has no data.", + self.Name())); + return var->CurrentInplaceVersion(); + }) + .def("_bump_inplace_version", + [](std::shared_ptr &self) { + // NOTE(liym27): _bump_inplace_version is only used for inplace + // operation + self->BumpInplaceVersion(); + }, + R"DOC( + **Notes**: + **This API is ONLY available in Dygraph mode.** + **This is a very low level API. Users should not use it directly. ** + Bump the version whenever the Tensor is modified through an inplace operation. + )DOC") .def("numpy", [](imperative::VarBase &self) -> py::array { const auto &tensor = diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 0f65bdd6f210090fdc883bed31c62fc52c741343..ab5135645a01b70ff509e8175f95ba42f59a0745 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -226,6 +226,27 @@ def monkey_patch_varbase(): return self.gradient() + @property + def inplace_version(self): + """ + The inplace version of current Tensor. + The version number is incremented whenever the current Tensor is modified through an inplace operation. + + **Notes: This is a read-only property** + + Examples: + .. code-block:: python + + import paddle + var = paddle.ones(shape=[4, 2, 3], dtype="float32") + print(var.inplace_version) # 0 + + var[1] = 2.2 + print(var.inplace_version) # 1 + + """ + return self._inplace_version() + def __str__(self): """ Convert a VarBase object to a readable string. @@ -264,8 +285,9 @@ def monkey_patch_varbase(): ("__bool__", __bool__), ("__nonzero__", __nonzero__), ("_to_static_var", _to_static_var), ("set_value", set_value), ("block", block), ("backward", backward), ("grad", grad), - ("gradient", gradient), ("__str__", __str__), ("__repr__", __str__), - ("__module__", "paddle"), ("__name__", "Tensor")): + ("inplace_version", inplace_version), ("gradient", gradient), + ("__str__", __str__), ("__repr__", __str__), ("__module__", "paddle"), + ("__name__", "Tensor")): setattr(core.VarBase, method_name, method) # patch math methods for varbase diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index bab0a949bcabffc584bb58616a900eea401175b3..262a750d5b428a651138fa5d4c1b076289897f59 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -13,8 +13,12 @@ # limitations under the License. from __future__ import print_function + +import numpy import six +import warnings from six.moves import reduce + from ..layer_helper import LayerHelper from ..param_attr import ParamAttr from ..initializer import Initializer @@ -27,8 +31,7 @@ from .layer_function_generator import templatedoc from . import utils from ..data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from paddle.utils import deprecated -import numpy -import warnings + from .utils import check_shape __all__ = [ @@ -556,6 +559,8 @@ def assign(input, output=None): """ helper = LayerHelper('assign', **locals()) check_type(input, 'input', (Variable, numpy.ndarray), 'assign') + is_inplace = True if output is not None else False + if isinstance(input, Variable): check_dtype( input.dtype, 'input', @@ -600,6 +605,9 @@ def assign(input, output=None): value_name: values }) + if is_inplace and in_dygraph_mode(): + output._bump_inplace_version() + return output diff --git a/python/paddle/fluid/tests/unittests/test_inplace.py b/python/paddle/fluid/tests/unittests/test_inplace.py new file mode 100644 index 0000000000000000000000000000000000000000..45c208293e1b84dcc5263c42532869a3e6f9f806 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_inplace.py @@ -0,0 +1,99 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle +import paddle.fluid.core as core + + +class TestInplace(unittest.TestCase): + def test_forward_version(self): + with paddle.fluid.dygraph.guard(): + var = paddle.to_tensor(np.ones((4, 2, 3)).astype(np.float32)) + self.assertEqual(var.inplace_version, 0) + + var[0] = 1.1 + self.assertEqual(var.inplace_version, 1) + + paddle.nn.functional.assign(paddle.ones(shape=[3]), var) + + # NOTE(liym27): assign(input, output) is an inplace operation for output. + # There is inplace-related processing for api assign, var.inplace_version should be 2 not 1. + self.assertEqual(var.inplace_version, 2) + + var[2] = 3 + self.assertEqual(var.inplace_version, 3) + + def test_backward_error(self): + # It raises an error because the inplace operator will result + # in incorrect gradient computation. + with paddle.fluid.dygraph.guard(): + var_a = paddle.ones(shape=[4, 2, 3], dtype="float32") + var_a.stop_gradient = False + + var_b = var_a**2 + + # Here, the gradient computation will use the value of var_b + var_c = var_b**2 + var_b[1:2] = 3.3 # var_b is modified inplace after using it + + var_d = var_b**2 + + loss = paddle.nn.functional.relu(var_c + var_d) + with self.assertRaisesRegexp( + RuntimeError, + "received tensor_version:{} != wrapper_version_snapshot:{}". + format(1, 0)): + loss.backward() + + def test_backward_success_1(self): + # var_b is modified inplace before using it, the inplace operator doesn't result + # in incorrect gradient computation. + with paddle.fluid.dygraph.guard(): + var_a = paddle.ones(shape=[4, 2, 3], dtype="float32") + var_a.stop_gradient = False + + var_b = var_a**2 + var_b[1:2] = 3 # var_b is modified inplace before using it + + # Here, the gradient computation will use the value of var_b + var_c = var_b**2 + loss = var_c.sum() + loss.backward() + + def test_backward_success_2(self): + # Although var_b is modified inplace after using it, it does not used in gradient computation. + # The inplace operator doesn't result in incorrect gradient computation. + with paddle.fluid.dygraph.guard(): + var_a = paddle.ones(shape=[4, 2, 3], dtype="float32") + var_a.stop_gradient = False + + var_b = var_a**2 + + var_b[1:2] = 3 # var_b is modified inplace before using it + + var_c = var_b + var_b # Here, the grad op of sum doesn't use the value of var_b + loss = var_c.sum() + + var_b[1:2] = 3 # var_b is modified inplace after using it + + loss.backward() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 476372b6b6795c8f88badca60a8e54c174d0175a..1f101a17da986f8082bedd751b7cdb1f23685368 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -21,8 +21,6 @@ import six import paddle import paddle.fluid as fluid import paddle.fluid.core as core -import paddle.fluid.layers as layers -from paddle.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_, in_dygraph_mode class TestVarBase(unittest.TestCase): @@ -515,9 +513,11 @@ class TestVarBaseSetitem(unittest.TestCase): def _test(self, value): paddle.disable_static() - id_origin = id(self.tensor_x) + self.assertEqual(self.tensor_x.inplace_version, 0) + id_origin = id(self.tensor_x) self.tensor_x[0] = value + self.assertEqual(self.tensor_x.inplace_version, 1) if isinstance(value, (six.integer_types, float)): result = np.zeros((2, 3)).astype(np.float32) + value @@ -529,10 +529,12 @@ class TestVarBaseSetitem(unittest.TestCase): self.assertEqual(id_origin, id(self.tensor_x)) self.tensor_x[1:2] = value + self.assertEqual(self.tensor_x.inplace_version, 2) self.assertTrue(np.array_equal(self.tensor_x[1].numpy(), result)) self.assertEqual(id_origin, id(self.tensor_x)) self.tensor_x[...] = value + self.assertEqual(self.tensor_x.inplace_version, 3) self.assertTrue(np.array_equal(self.tensor_x[3].numpy(), result)) self.assertEqual(id_origin, id(self.tensor_x)) @@ -553,5 +555,30 @@ class TestVarBaseSetitem(unittest.TestCase): self._test(3.3) +class TestVarBaseInplaceVersion(unittest.TestCase): + def test_setitem(self): + paddle.disable_static() + + var = paddle.ones(shape=[4, 2, 3], dtype="float32") + self.assertEqual(var.inplace_version, 0) + + var[1] = 1 + self.assertEqual(var.inplace_version, 1) + + var[1:2] = 1 + self.assertEqual(var.inplace_version, 2) + + def test_bump_inplace_version(self): + paddle.disable_static() + var = paddle.ones(shape=[4, 2, 3], dtype="float32") + self.assertEqual(var.inplace_version, 0) + + var._bump_inplace_version() + self.assertEqual(var.inplace_version, 1) + + var._bump_inplace_version() + self.assertEqual(var.inplace_version, 2) + + if __name__ == '__main__': unittest.main()