提交 29697c2e 编写于 作者: M minqiyang

Add stop_gradient to VarBase to support loss function

test=develop
上级 fba3712a
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
/* option optimize_for = LITE_RUNTIME; */
package paddle.framework.proto;
// Any incompatible changes to ProgramDesc and its dependencies should
......
......@@ -115,6 +115,7 @@ framework::Variable* CreateVariable(const std::string& name,
varname = string::Sprintf("%s@%d", varname, id);
}
LOG(ERROR) << "creating var " << varname;
VLOG(3) << "creating var " << varname;
framework::Variable* var = scope->Var(varname);
framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>();
......@@ -130,13 +131,22 @@ framework::LoDTensor& VarBase::Grad() {
}
void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) {
PADDLE_ENFORCE(grad->IsInitialized(), "grad %s must be initialized",
var_desc_->Name());
PADDLE_ENFORCE(grad->Get<framework::LoDTensor>().IsInitialized(),
"variable %s has NO gradient, please set stop_gradient to it",
var_desc_->Name());
VLOG(3) << "apply var grad " << var_desc_->Name() << " "
<< grad->Get<framework::LoDTensor>().data<float>()[0];
if (!grads_) {
grads_ =
CreateVariable(string::Sprintf("%s@IGrad", var_desc_->Name()),
var_->Get<framework::LoDTensor>().dims(), 0.0, scope);
}
AddTo(grad, grads_);
VLOG(3) << "grad_ after apply var grad " << var_desc_->Name() << " "
<< grads_->Get<framework::LoDTensor>().data<float>()[0];
......@@ -153,8 +163,9 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
// grad op inputs can be forward inputs, so not in grad_to_var.
continue;
}
VLOG(3) << "op grad in var " << grad_invar;
block_->FindRecursiveOrCreateVar(grad_invar);
VLOG(3) << "op grad input var " << grad_invar;
framework::VarDesc& grad_invar_desc =
block_->FindRecursiveOrCreateVar(grad_invar);
framework::Variable* var = scope->Var(grad_invar);
const std::string& invar = grad_to_var_->at(grad_invar);
for (VarBase* varbase : *output_vars_) {
......@@ -165,21 +176,33 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
break;
}
}
grad_invar_desc.SetShape(
framework::vectorize(var->Get<framework::LoDTensor>().dims()));
VLOG(3)
<< "set op grad var desc's shape size "
<< framework::vectorize(var->Get<framework::LoDTensor>().dims()).size();
}
LOG(ERROR) << "grad_op_desc_" << grad_op_desc_->Proto()->DebugString();
for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
VLOG(3) << "grad outvar " << outvar;
VLOG(3) << "op grad output var " << outvar;
block_->FindRecursiveOrCreateVar(outvar);
framework::Variable* var = scope->Var(outvar);
if (!var->IsInitialized()) {
VLOG(3) << "init op grad output var " << outvar;
framework::VarDesc* var_desc = block_->FindVar(outvar);
if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
var->GetMutable<framework::LoDTensor>();
// framework::Tensor* tensor = var->GetMutable<framework::LoDTensor>();
// tensor->mutable_data(platform::CPUPlace());
} else {
LOG(ERROR) << "tracer doesn't support yet";
}
}
VLOG(3) << "op grad output var " << outvar << " is inited";
}
grad_op_desc_->InferShape(*block_);
grad_op_desc_->InferVarType(block_);
std::unique_ptr<framework::OperatorBase> opbase =
......@@ -194,11 +217,15 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
VarBase* origin_var = (*input_vars_)[i];
for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
Variable* var = scope->FindVar(outvar);
std::string orig_var = grad_to_var_->at(outvar);
if (origin_var->var_desc_->Name() != orig_var) {
if (var->IsInitialized()) {
VLOG(3) << "get grad op output var " << outvar;
}
std::string orig_var_name = grad_to_var_->at(outvar);
if (origin_var->var_desc_->Name() != orig_var_name ||
origin_var->stop_gradient_) {
continue;
}
VLOG(3) << "apply grad " << outvar << " with origin " << orig_var;
VLOG(3) << "apply grad " << outvar << " with origin " << orig_var_name;
origin_var->ApplyGrad(scope, var);
found = true;
ret.push_back(var);
......
......@@ -29,12 +29,13 @@ class OpBase;
class VarBase {
public:
VarBase()
explicit VarBase(bool stop_gradient = false)
: pre_op_(nullptr),
pre_op_out_idx_(-1),
var_desc_(nullptr),
var_(nullptr),
grads_(nullptr) {}
grads_(nullptr),
stop_gradient_(stop_gradient) {}
virtual ~VarBase() {}
......@@ -50,6 +51,8 @@ class VarBase {
framework::VarDesc* var_desc_;
framework::Variable* var_;
framework::Variable* grads_;
bool stop_gradient_;
};
class OpBase {
......
......@@ -110,6 +110,8 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* label = ctx.Input<Tensor>("Label");
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
LOG(ERROR) << "CROSS ENTROPY GRAD DX: "
<< ctx.op().Output(framework::GradVarName("X"));
T* dx_data = dx->mutable_data<T>(ctx.GetPlace());
// Following computation only depends on the last dimension size. So it's
......
......@@ -111,7 +111,8 @@ PYBIND11_MODULE(core, m) {
BindException(&m);
py::class_<imperative::VarBase, PyVarBase>(m, "VarBase", R"DOC()DOC")
.def(py::init<>())
// .def(py::init<>())
.def(py::init<bool>(), py::arg("stop_gradient") = false)
.def("_run_backward",
[](imperative::VarBase &self, framework::Scope *scope) {
self.RunBackward(scope);
......@@ -129,7 +130,13 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self, framework::VarDesc *var_desc) {
self.var_desc_ = var_desc;
},
py::return_value_policy::reference);
py::return_value_policy::reference)
.def_property(
"stop_gradient",
[](const imperative::VarBase &self) { return self.stop_gradient_; },
[](imperative::VarBase &self, bool stop_gradient) {
self.stop_gradient_ = stop_gradient;
});
py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
.def(py::init<>())
......
......@@ -354,11 +354,11 @@ class Variable(object):
self.block.vars[name] = self
self.op = None
self.stop_gradient = stop_gradient
self.is_data = is_data
if _in_imperative_mode():
self._ivar = core.VarBase()
self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient
def _numpy(self):
scope = _imperative_tracer().get_scope()
......@@ -366,7 +366,7 @@ class Variable(object):
return np.array(tensor)
def _backward(self):
scope = _imperative_tracer().get_scope(self.block.desc)
scope = _imperative_tracer().get_scope()
self._ivar._run_backward(scope)
def _gradient(self):
......@@ -415,6 +415,14 @@ class Variable(object):
"""
self.desc = input
@property
def _stop_gradient(self):
return self._ivar.stop_gradient
@_stop_gradient.setter
def _stop_gradient(self, s):
self._ivar.stop_gradient = s
@property
def persistable(self):
return self.desc.persistable()
......
......@@ -25,12 +25,22 @@ __all__ = ['PyLayer']
class PyLayer(core.Layer):
def __init__(self, *args, **kwargs):
self._once_built = True
from ..layer_helper import LayerHelper
self._helper = LayerHelper(type(self).__name__, **kwargs)
self._dtype = kwargs.get("dtype", core.VarDesc.VarType.FP32)
def _build_once(self, inputs):
pass
def __call__(self, *inputs):
if self._once_built:
self._build_once(*inputs)
self._once_built = False
outputs = self.forward(*inputs)
return outputs
def forward(self, *inputs):
......
......@@ -18,14 +18,15 @@ import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.imperative.nn import Conv2D, Pool2D
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable
class SimpleImgConvPool(fluid.imperative.PyLayer):
def __init__(self,
num_channels,
num_filters,
filter_size,
num_filters,
pool_size,
pool_stride,
pool_padding=0,
......@@ -81,24 +82,24 @@ class MNIST(fluid.imperative.PyLayer):
super(MNIST, self).__init__(param_attr=param_attr, bias_attr=bias_attr)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
num_channels=3,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
1, 5, 20, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
num_channels=3,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
20, 5, 50, 2, 2, act="relu")
pool_2_shape = 50 * 8 * 8
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(-1,
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)))
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
......@@ -107,8 +108,20 @@ class TestImperativeMnist(unittest.TestCase):
with fluid.imperative.guard():
mnist = MNIST()
data = np.random.rand(2, 3, 5, 5).astype('float32')
mnist(data)
x_data = np.random.rand(128, 1, 28, 28).astype('float32')
img = to_variable(x_data)
y_data = np.random.rand(128, 1).astype('int64')
label = to_variable(y_data)
label._stop_gradient = True
predict = mnist(img)
print(predict.shape, predict.dtype, label.shape, label.dtype)
out = fluid.layers.cross_entropy(predict, label)
print(out.shape, out.dtype)
out._backward()
filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient(
)
print(filter_grad)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard():
# mlp = MLP()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册