未验证 提交 865a4598 编写于 作者: L liym27 提交者: GitHub

Check whether there is any inplace operation affecting gradient calculation. (#27901)

* Add a class TensorInplaceVersion to count the inplace version and put it in framework::Tensor instead of Allocation or Variable.

* Add a new attribute `_inplace_version` for VarBase.

* Raise exception if an inplace operation can result in incorrect gradient computation.

* Add a new interface _bump_inplace_version() for VarBase to bump the version whenever the Tensor is modified through an inplace operation.

* For api assign, call _bump_inplace_version() when it's an inplace operation inn dynamic mode.

* Use original var_wrapper if the inplace_version is not changed.

* Replace SnapshotVarWrapperList with SnapshotVarWrapper to optimize performane.
上级 c21a9797
......@@ -43,6 +43,49 @@ namespace framework {
class LoDTensor;
/*
NOTE(liym27): [ What is TensorInplaceVersion used for? ]
TensorInplaceVersion is a version counter and every Tensor has a version
counter. It's used to check whether an inplace operation will result in an
incorrect gradient calculation. Version is incremented when the data of the
Variable is modified in place.
- Question: In what scenarios will version counters be shared?
- Answer: When two Variables/VarBases share the same C++ Tensor(its Allocation
may change), both of them share the same version counter. For examples:
1. `z = paddle.assign(input=x, output=y)`, `z` shares the same version counter
of `y` because z and y is the same VarBase;
2. `y = x.detach()`, `y` shares the same version counter of `x`.
- Question: In what scenarios will version counters NOT be shared?
- Answer: Replacing a `Variable`'s data by calling `Tensor::ShareDataWith(...)`
or `Tensor::ShareBufferWith(...)`. Because they share the same Allocation but
not framework::Tensor.
- Question: Why put the inplace_version_counter_ in framework::Tensor instead
of Allocation or Variable?
- Answer:
1. Tensor can call ResetHolder() to reset the corresponding Allocation so that
the inplace_version_counter_ changes if it's in Allocation, which will lead to
confusing information about inplace version.
2. If inplace_version_counter_ is in Variable, different VariableWrappers
should be able to share the same Variable. However, a VariableWrapper hold a
Variable object but not a pointer.
*/
class TensorInplaceVersion {
public:
explicit TensorInplaceVersion(uint32_t inplace_version = 0)
: inplace_version_(inplace_version) {}
bool IsUnique() const { return inplace_version_ == 0; }
void Bump() { ++inplace_version_; }
uint32_t CurrentVersion() const { return inplace_version_; }
private:
uint32_t inplace_version_;
};
class Tensor {
#ifdef PADDLE_WITH_MKLDNN
......@@ -189,6 +232,9 @@ class Tensor {
void ResetHolderWithType(std::shared_ptr<memory::Allocation> holder,
const proto::VarType::Type type);
TensorInplaceVersion& InplaceVersionCounter() {
return inplace_version_counter_;
}
private:
/*! holds the memory block if allocated. */
......@@ -225,6 +271,7 @@ class Tensor {
* PlaceHolder::ptr_ and where the tensor data really begins.
*/
size_t offset_;
TensorInplaceVersion inplace_version_counter_;
};
} // namespace framework
......
......@@ -18,8 +18,8 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
namespace paddle {
namespace framework {
......@@ -69,6 +69,15 @@ class Variable {
return holder_->Type();
}
private:
// This method hides type T, so it doesn't appear as a template parameter of
// Variable.
framework::TensorInplaceVersion* InplaceVersionCounter();
public:
uint32_t CurrentInplaceVersion();
void BumpInplaceVersion();
private:
struct Placeholder {
virtual ~Placeholder() PADDLE_MAY_THROW {}
......@@ -101,8 +110,48 @@ class Variable {
};
// pointers to a PlaceholderImpl object indeed.
std::unique_ptr<Placeholder> holder_;
std::shared_ptr<Placeholder> holder_;
};
inline framework::TensorInplaceVersion* Variable::InplaceVersionCounter() {
framework::TensorInplaceVersion* version_counter_ptr(nullptr);
if (IsType<framework::LoDTensor>()) {
version_counter_ptr =
&GetMutable<framework::LoDTensor>()->InplaceVersionCounter();
} else if (IsType<framework::Tensor>()) {
version_counter_ptr =
&GetMutable<framework::Tensor>()->InplaceVersionCounter();
} else if (IsType<framework::SelectedRows>()) {
version_counter_ptr = &GetMutable<framework::SelectedRows>()
->mutable_value()
->InplaceVersionCounter();
} else {
VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have "
"TensorInplaceVersion, but received type "
<< platform::demangle(framework::ToTypeName(Type()));
}
return version_counter_ptr;
}
inline uint32_t Variable::CurrentInplaceVersion() {
auto version_counter_ptr = InplaceVersionCounter();
if (version_counter_ptr) {
return version_counter_ptr->CurrentVersion();
} else {
return 0;
}
}
inline void Variable::BumpInplaceVersion() {
auto version_counter_ptr = InplaceVersionCounter();
if (version_counter_ptr) {
return version_counter_ptr->Bump();
} else {
VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have "
"TensorInplaceVersion, but received type "
<< platform::demangle(framework::ToTypeName(Type()));
}
}
} // namespace framework
} // namespace paddle
......@@ -225,6 +225,31 @@ void BasicEngine::Execute() {
}
}
VLOG(4) << "Check whether there is any inplace operation affecting "
"gradient calculation.";
for (auto& pair : bwd_ins) {
for (auto& var_wrapper : pair.second) {
auto wrapper_version_snapshot = var_wrapper->InplaceVersionSnapshot();
auto tensor_version =
var_wrapper->MutableVar()->CurrentInplaceVersion();
PADDLE_ENFORCE_EQ(
tensor_version, wrapper_version_snapshot,
platform::errors::PermissionDenied(
"Tensor '%s' used in gradient computation in grad op '%s' "
"has been "
"modified by an inplace operation. "
"Its version is %s but the expected version is %s. "
"Please fix your code to void calling an inplace operator "
"after using the Tensor which will used in gradient "
"computation.",
var_wrapper->Name(), cur_op.Type(), tensor_version,
wrapper_version_snapshot));
VLOG(6) << " The version of Tensor '" << var_wrapper->Name()
<< "' is [ " << wrapper_version_snapshot << " ]";
}
}
{
VLOG(3) << "Start to execute grad op " << cur_op.Type();
OpBase::Run(cur_op.InnerOp(), bwd_ins, tmp_outs, cur_op.Attrs(),
......
......@@ -147,7 +147,6 @@ class GradOpBaseMakerBase {
bool is_input) const {
const auto& data_map = is_input ? var_base_map_in_ : var_base_map_out_;
auto iterator = data_map.find(name);
TracedVarList<VarBase, kRole> vec_temp;
if (iterator != data_map.end()) {
vec_temp.reserve(iterator->second.size());
......@@ -226,6 +225,7 @@ class TracedGradOp {
}
auto var_wrappers = ToVarWrapperList<kRole>(vars);
if (!var_wrappers.empty()) {
op_->SetInput(name, std::move(var_wrappers),
kRole == TracedVarRole::kBackward);
......@@ -293,7 +293,8 @@ class TracedGradOp {
var->OverridedStopGradient()))) {
result.emplace_back();
} else {
result.emplace_back(var->SharedVar());
auto var_wrapper = SnapshotVarWrapper(var->SharedVar());
result.emplace_back(var_wrapper);
has_valid = true;
}
}
......@@ -304,6 +305,26 @@ class TracedGradOp {
return result;
}
// Get a snapshot of VariableWrapper at a certain inplace version.
// The inplace version number of VariableWrapper is used for inplace
// detection in gradient compution.
static const std::shared_ptr<VariableWrapper> SnapshotVarWrapper(
const std::shared_ptr<VariableWrapper>& var_wrapper) {
// NOTE(liym27):
// Use original var_wrapper if its inplace_version is not
// changed. Otherwise, it will affect the accuracy of the model
// results and affect double grad.
if (!var_wrapper->MutableVar()->IsInitialized() ||
var_wrapper->InplaceVersionSnapshot() ==
var_wrapper->MutableVar()->CurrentInplaceVersion()) {
return var_wrapper;
} else {
VariableWrapper new_var_wrapper = *var_wrapper.get();
new_var_wrapper.ResetInplaceVersion();
return std::make_shared<VariableWrapper>(new_var_wrapper);
}
}
private:
const std::shared_ptr<GradOpNode>& node_;
OpBase* op_;
......
......@@ -278,6 +278,15 @@ std::shared_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
}
}
void VarBase::BumpInplaceVersion() {
PADDLE_ENFORCE_EQ(
Var().IsInitialized(), true,
platform::errors::InvalidArgument(
"Tensor %s has not been initialized, please check if it has no data.",
Name()));
MutableVar()->BumpInplaceVersion();
}
void OpBase::SetType(const std::string& type) {
op_ = framework::OpRegistry::CreateOp(type, {}, {}, {}, false);
}
......
......@@ -202,6 +202,8 @@ class VarBase {
std::shared_ptr<VarBase> NewVarBase(const platform::Place& dst_place,
const bool blocking) const;
void BumpInplaceVersion();
private:
/**
* NOTE(zengjinle): never remove the const qualifier of `var_` if you are
......
......@@ -174,6 +174,17 @@ class VariableWrapper {
std::shared_ptr<LeafVarHookPipeline>& GetLeafHooks() { return leaf_hooks_; }
uint32_t InplaceVersionSnapshot() const { return inplace_version_snapshot_; }
void ResetInplaceVersion() {
auto new_version = var_.CurrentInplaceVersion();
VLOG(6) << "The wrapper version of VariableWrapper '" << name_
<< "' will be updated from " << inplace_version_snapshot_ << "to "
<< new_version;
inplace_version_snapshot_ = new_version;
}
private:
void SetGradVar(const std::shared_ptr<VariableWrapper>& var) {
auto shared_var = grad_var_.lock();
......@@ -244,6 +255,10 @@ class VariableWrapper {
int overrided_stop_gradient_{-1};
bool persistable_{false};
// Used for checking whether there is any inplace operation affecting gradient
// calculation.
uint32_t inplace_version_snapshot_{0};
framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR};
framework::proto::VarType::Type data_type_{framework::proto::VarType::FP32};
......
......@@ -593,6 +593,10 @@ void BindImperative(py::module *m_ptr) {
SetTensorFromPyArray(self_tensor, self_numpy,
self_tensor->place(), true);
}
// NOTE(liym27):
// Increase the version of VarBase self because __setitem__ is an
// inplace operator for the VarBase self.
self->BumpInplaceVersion();
})
.def("__getitem__",
[](std::shared_ptr<imperative::VarBase> &self, py::handle _index) {
......@@ -632,6 +636,28 @@ void BindImperative(py::module *m_ptr) {
return out;
}
})
.def("_inplace_version",
[](imperative::VarBase &self) -> uint32_t {
const auto &var = self.MutableVar();
PADDLE_ENFORCE_EQ(
var->IsInitialized(), true,
platform::errors::InvalidArgument(
"Tensor of %s is Empty, please check if it has no data.",
self.Name()));
return var->CurrentInplaceVersion();
})
.def("_bump_inplace_version",
[](std::shared_ptr<imperative::VarBase> &self) {
// NOTE(liym27): _bump_inplace_version is only used for inplace
// operation
self->BumpInplaceVersion();
},
R"DOC(
**Notes**:
**This API is ONLY available in Dygraph mode.**
**This is a very low level API. Users should not use it directly. **
Bump the version whenever the Tensor is modified through an inplace operation.
)DOC")
.def("numpy",
[](imperative::VarBase &self) -> py::array {
const auto &tensor =
......
......@@ -226,6 +226,27 @@ def monkey_patch_varbase():
return self.gradient()
@property
def inplace_version(self):
"""
The inplace version of current Tensor.
The version number is incremented whenever the current Tensor is modified through an inplace operation.
**Notes: This is a read-only property**
Examples:
.. code-block:: python
import paddle
var = paddle.ones(shape=[4, 2, 3], dtype="float32")
print(var.inplace_version) # 0
var[1] = 2.2
print(var.inplace_version) # 1
"""
return self._inplace_version()
def __str__(self):
"""
Convert a VarBase object to a readable string.
......@@ -264,8 +285,9 @@ def monkey_patch_varbase():
("__bool__", __bool__), ("__nonzero__", __nonzero__),
("_to_static_var", _to_static_var), ("set_value", set_value),
("block", block), ("backward", backward), ("grad", grad),
("gradient", gradient), ("__str__", __str__), ("__repr__", __str__),
("__module__", "paddle"), ("__name__", "Tensor")):
("inplace_version", inplace_version), ("gradient", gradient),
("__str__", __str__), ("__repr__", __str__), ("__module__", "paddle"),
("__name__", "Tensor")):
setattr(core.VarBase, method_name, method)
# patch math methods for varbase
......
......@@ -13,8 +13,12 @@
# limitations under the License.
from __future__ import print_function
import numpy
import six
import warnings
from six.moves import reduce
from ..layer_helper import LayerHelper
from ..param_attr import ParamAttr
from ..initializer import Initializer
......@@ -27,8 +31,7 @@ from .layer_function_generator import templatedoc
from . import utils
from ..data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype
from paddle.utils import deprecated
import numpy
import warnings
from .utils import check_shape
__all__ = [
......@@ -556,6 +559,8 @@ def assign(input, output=None):
"""
helper = LayerHelper('assign', **locals())
check_type(input, 'input', (Variable, numpy.ndarray), 'assign')
is_inplace = True if output is not None else False
if isinstance(input, Variable):
check_dtype(
input.dtype, 'input',
......@@ -600,6 +605,9 @@ def assign(input, output=None):
value_name: values
})
if is_inplace and in_dygraph_mode():
output._bump_inplace_version()
return output
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
class TestInplace(unittest.TestCase):
def test_forward_version(self):
with paddle.fluid.dygraph.guard():
var = paddle.to_tensor(np.ones((4, 2, 3)).astype(np.float32))
self.assertEqual(var.inplace_version, 0)
var[0] = 1.1
self.assertEqual(var.inplace_version, 1)
paddle.nn.functional.assign(paddle.ones(shape=[3]), var)
# NOTE(liym27): assign(input, output) is an inplace operation for output.
# There is inplace-related processing for api assign, var.inplace_version should be 2 not 1.
self.assertEqual(var.inplace_version, 2)
var[2] = 3
self.assertEqual(var.inplace_version, 3)
def test_backward_error(self):
# It raises an error because the inplace operator will result
# in incorrect gradient computation.
with paddle.fluid.dygraph.guard():
var_a = paddle.ones(shape=[4, 2, 3], dtype="float32")
var_a.stop_gradient = False
var_b = var_a**2
# Here, the gradient computation will use the value of var_b
var_c = var_b**2
var_b[1:2] = 3.3 # var_b is modified inplace after using it
var_d = var_b**2
loss = paddle.nn.functional.relu(var_c + var_d)
with self.assertRaisesRegexp(
RuntimeError,
"received tensor_version:{} != wrapper_version_snapshot:{}".
format(1, 0)):
loss.backward()
def test_backward_success_1(self):
# var_b is modified inplace before using it, the inplace operator doesn't result
# in incorrect gradient computation.
with paddle.fluid.dygraph.guard():
var_a = paddle.ones(shape=[4, 2, 3], dtype="float32")
var_a.stop_gradient = False
var_b = var_a**2
var_b[1:2] = 3 # var_b is modified inplace before using it
# Here, the gradient computation will use the value of var_b
var_c = var_b**2
loss = var_c.sum()
loss.backward()
def test_backward_success_2(self):
# Although var_b is modified inplace after using it, it does not used in gradient computation.
# The inplace operator doesn't result in incorrect gradient computation.
with paddle.fluid.dygraph.guard():
var_a = paddle.ones(shape=[4, 2, 3], dtype="float32")
var_a.stop_gradient = False
var_b = var_a**2
var_b[1:2] = 3 # var_b is modified inplace before using it
var_c = var_b + var_b # Here, the grad op of sum doesn't use the value of var_b
loss = var_c.sum()
var_b[1:2] = 3 # var_b is modified inplace after using it
loss.backward()
if __name__ == '__main__':
unittest.main()
......@@ -21,8 +21,6 @@ import six
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_, in_dygraph_mode
class TestVarBase(unittest.TestCase):
......@@ -515,9 +513,11 @@ class TestVarBaseSetitem(unittest.TestCase):
def _test(self, value):
paddle.disable_static()
id_origin = id(self.tensor_x)
self.assertEqual(self.tensor_x.inplace_version, 0)
id_origin = id(self.tensor_x)
self.tensor_x[0] = value
self.assertEqual(self.tensor_x.inplace_version, 1)
if isinstance(value, (six.integer_types, float)):
result = np.zeros((2, 3)).astype(np.float32) + value
......@@ -529,10 +529,12 @@ class TestVarBaseSetitem(unittest.TestCase):
self.assertEqual(id_origin, id(self.tensor_x))
self.tensor_x[1:2] = value
self.assertEqual(self.tensor_x.inplace_version, 2)
self.assertTrue(np.array_equal(self.tensor_x[1].numpy(), result))
self.assertEqual(id_origin, id(self.tensor_x))
self.tensor_x[...] = value
self.assertEqual(self.tensor_x.inplace_version, 3)
self.assertTrue(np.array_equal(self.tensor_x[3].numpy(), result))
self.assertEqual(id_origin, id(self.tensor_x))
......@@ -553,5 +555,30 @@ class TestVarBaseSetitem(unittest.TestCase):
self._test(3.3)
class TestVarBaseInplaceVersion(unittest.TestCase):
def test_setitem(self):
paddle.disable_static()
var = paddle.ones(shape=[4, 2, 3], dtype="float32")
self.assertEqual(var.inplace_version, 0)
var[1] = 1
self.assertEqual(var.inplace_version, 1)
var[1:2] = 1
self.assertEqual(var.inplace_version, 2)
def test_bump_inplace_version(self):
paddle.disable_static()
var = paddle.ones(shape=[4, 2, 3], dtype="float32")
self.assertEqual(var.inplace_version, 0)
var._bump_inplace_version()
self.assertEqual(var.inplace_version, 1)
var._bump_inplace_version()
self.assertEqual(var.inplace_version, 2)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册