diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index efdabffb9b33ddf007c13008d0f3afb7a3961eda..665adfd8cb4d9a4c286d293dcfd297d8f52d0756 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ syntax = "proto2"; -option optimize_for = LITE_RUNTIME; +/* option optimize_for = LITE_RUNTIME; */ package paddle.framework.proto; // Any incompatible changes to ProgramDesc and its dependencies should diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 35640ca6dc208dbabaf7528b22d020b1ff349b75..395fbd10007e274eb5ef648a6e85cbbc0631b190 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -115,6 +115,7 @@ framework::Variable* CreateVariable(const std::string& name, varname = string::Sprintf("%s@%d", varname, id); } + LOG(ERROR) << "creating var " << varname; VLOG(3) << "creating var " << varname; framework::Variable* var = scope->Var(varname); framework::LoDTensor* tensor = var->GetMutable(); @@ -130,13 +131,22 @@ framework::LoDTensor& VarBase::Grad() { } void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) { + PADDLE_ENFORCE(grad->IsInitialized(), "grad %s must be initialized", + var_desc_->Name()); + + PADDLE_ENFORCE(grad->Get().IsInitialized(), + "variable %s has NO gradient, please set stop_gradient to it", + var_desc_->Name()); + VLOG(3) << "apply var grad " << var_desc_->Name() << " " << grad->Get().data()[0]; + if (!grads_) { grads_ = CreateVariable(string::Sprintf("%s@IGrad", var_desc_->Name()), var_->Get().dims(), 0.0, scope); } + AddTo(grad, grads_); VLOG(3) << "grad_ after apply var grad " << var_desc_->Name() << " " << grads_->Get().data()[0]; @@ -153,8 +163,9 @@ std::vector OpBase::ApplyGrad(framework::Scope* scope) { // grad op inputs can be forward inputs, so not in grad_to_var. continue; } - VLOG(3) << "op grad in var " << grad_invar; - block_->FindRecursiveOrCreateVar(grad_invar); + VLOG(3) << "op grad input var " << grad_invar; + framework::VarDesc& grad_invar_desc = + block_->FindRecursiveOrCreateVar(grad_invar); framework::Variable* var = scope->Var(grad_invar); const std::string& invar = grad_to_var_->at(grad_invar); for (VarBase* varbase : *output_vars_) { @@ -165,21 +176,33 @@ std::vector OpBase::ApplyGrad(framework::Scope* scope) { break; } } + grad_invar_desc.SetShape( + framework::vectorize(var->Get().dims())); + VLOG(3) + << "set op grad var desc's shape size " + << framework::vectorize(var->Get().dims()).size(); } + LOG(ERROR) << "grad_op_desc_" << grad_op_desc_->Proto()->DebugString(); + for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { - VLOG(3) << "grad outvar " << outvar; + VLOG(3) << "op grad output var " << outvar; block_->FindRecursiveOrCreateVar(outvar); framework::Variable* var = scope->Var(outvar); if (!var->IsInitialized()) { + VLOG(3) << "init op grad output var " << outvar; framework::VarDesc* var_desc = block_->FindVar(outvar); if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { var->GetMutable(); + // framework::Tensor* tensor = var->GetMutable(); + // tensor->mutable_data(platform::CPUPlace()); } else { LOG(ERROR) << "tracer doesn't support yet"; } } + VLOG(3) << "op grad output var " << outvar << " is inited"; } + grad_op_desc_->InferShape(*block_); grad_op_desc_->InferVarType(block_); std::unique_ptr opbase = @@ -194,11 +217,15 @@ std::vector OpBase::ApplyGrad(framework::Scope* scope) { VarBase* origin_var = (*input_vars_)[i]; for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { Variable* var = scope->FindVar(outvar); - std::string orig_var = grad_to_var_->at(outvar); - if (origin_var->var_desc_->Name() != orig_var) { + if (var->IsInitialized()) { + VLOG(3) << "get grad op output var " << outvar; + } + std::string orig_var_name = grad_to_var_->at(outvar); + if (origin_var->var_desc_->Name() != orig_var_name || + origin_var->stop_gradient_) { continue; } - VLOG(3) << "apply grad " << outvar << " with origin " << orig_var; + VLOG(3) << "apply grad " << outvar << " with origin " << orig_var_name; origin_var->ApplyGrad(scope, var); found = true; ret.push_back(var); diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index faa64ff9eaae632de8878392c99f5ac9792c0f28..90cc3ae1a93a5ae0c84e40a8b5f97931e8471f43 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -29,12 +29,13 @@ class OpBase; class VarBase { public: - VarBase() + explicit VarBase(bool stop_gradient = false) : pre_op_(nullptr), pre_op_out_idx_(-1), var_desc_(nullptr), var_(nullptr), - grads_(nullptr) {} + grads_(nullptr), + stop_gradient_(stop_gradient) {} virtual ~VarBase() {} @@ -50,6 +51,8 @@ class VarBase { framework::VarDesc* var_desc_; framework::Variable* var_; framework::Variable* grads_; + + bool stop_gradient_; }; class OpBase { diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index f123e11542d85c904a81fe2a87f59ab52511cc15..2500c0443f5f276a85ee3e93a0ec08b4eea7a728 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -110,6 +110,8 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { auto* dy = ctx.Input(framework::GradVarName("Y")); auto* label = ctx.Input("Label"); auto* dx = ctx.Output(framework::GradVarName("X")); + LOG(ERROR) << "CROSS ENTROPY GRAD DX: " + << ctx.op().Output(framework::GradVarName("X")); T* dx_data = dx->mutable_data(ctx.GetPlace()); // Following computation only depends on the last dimension size. So it's diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index db6c88e01cf99290312e285250e63a18e5898c1a..e0d45050282ebc7108c2ff32ca65cf02be05f3d5 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -111,7 +111,8 @@ PYBIND11_MODULE(core, m) { BindException(&m); py::class_(m, "VarBase", R"DOC()DOC") - .def(py::init<>()) + // .def(py::init<>()) + .def(py::init(), py::arg("stop_gradient") = false) .def("_run_backward", [](imperative::VarBase &self, framework::Scope *scope) { self.RunBackward(scope); @@ -129,7 +130,13 @@ PYBIND11_MODULE(core, m) { [](imperative::VarBase &self, framework::VarDesc *var_desc) { self.var_desc_ = var_desc; }, - py::return_value_policy::reference); + py::return_value_policy::reference) + .def_property( + "stop_gradient", + [](const imperative::VarBase &self) { return self.stop_gradient_; }, + [](imperative::VarBase &self, bool stop_gradient) { + self.stop_gradient_ = stop_gradient; + }); py::class_(m, "OpBase", R"DOC()DOC") .def(py::init<>()) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index bcf5bc34988a1d75d9d2556cf4f2b2b29c07a0a7..dbe8fa429e4dc9115a5367097094f9b73c36d9d0 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -354,11 +354,11 @@ class Variable(object): self.block.vars[name] = self self.op = None - self.stop_gradient = stop_gradient self.is_data = is_data if _in_imperative_mode(): self._ivar = core.VarBase() self._ivar.desc = self.desc + self._ivar.stop_gradient = stop_gradient def _numpy(self): scope = _imperative_tracer().get_scope() @@ -366,7 +366,7 @@ class Variable(object): return np.array(tensor) def _backward(self): - scope = _imperative_tracer().get_scope(self.block.desc) + scope = _imperative_tracer().get_scope() self._ivar._run_backward(scope) def _gradient(self): @@ -415,6 +415,14 @@ class Variable(object): """ self.desc = input + @property + def _stop_gradient(self): + return self._ivar.stop_gradient + + @_stop_gradient.setter + def _stop_gradient(self, s): + self._ivar.stop_gradient = s + @property def persistable(self): return self.desc.persistable() diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 5ebc0430ccc390b302487727dba1f4dac5abbd99..80645acc8a5d103ff040c41912f75e6626dc8111 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -25,12 +25,22 @@ __all__ = ['PyLayer'] class PyLayer(core.Layer): def __init__(self, *args, **kwargs): + self._once_built = True + from ..layer_helper import LayerHelper self._helper = LayerHelper(type(self).__name__, **kwargs) self._dtype = kwargs.get("dtype", core.VarDesc.VarType.FP32) + def _build_once(self, inputs): + pass + def __call__(self, *inputs): + if self._once_built: + self._build_once(*inputs) + self._once_built = False + outputs = self.forward(*inputs) + return outputs def forward(self, *inputs): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py index 981e9eb2d621049b67aed242037a82810a683c78..85b613bddca3e8b047b1acfdb4eb24f8658368f1 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -18,14 +18,15 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid.imperative.nn import Conv2D, Pool2D +from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC +from paddle.fluid.imperative.base import to_variable class SimpleImgConvPool(fluid.imperative.PyLayer): def __init__(self, num_channels, - num_filters, filter_size, + num_filters, pool_size, pool_stride, pool_padding=0, @@ -81,24 +82,24 @@ class MNIST(fluid.imperative.PyLayer): super(MNIST, self).__init__(param_attr=param_attr, bias_attr=bias_attr) self._simple_img_conv_pool_1 = SimpleImgConvPool( - num_channels=3, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + 1, 5, 20, 2, 2, act="relu") self._simple_img_conv_pool_2 = SimpleImgConvPool( - num_channels=3, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + 20, 5, 50, 2, 2, act="relu") + + pool_2_shape = 50 * 8 * 8 + SIZE = 10 + scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 + self._fc = FC(-1, + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale))) def forward(self, inputs): x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_2(x) + x = self._fc(x) return x @@ -107,8 +108,20 @@ class TestImperativeMnist(unittest.TestCase): with fluid.imperative.guard(): mnist = MNIST() - data = np.random.rand(2, 3, 5, 5).astype('float32') - mnist(data) + x_data = np.random.rand(128, 1, 28, 28).astype('float32') + img = to_variable(x_data) + y_data = np.random.rand(128, 1).astype('int64') + label = to_variable(y_data) + label._stop_gradient = True + + predict = mnist(img) + print(predict.shape, predict.dtype, label.shape, label.dtype) + out = fluid.layers.cross_entropy(predict, label) + print(out.shape, out.dtype) + out._backward() + filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient( + ) + print(filter_grad) # np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) # with fluid.imperative.guard(): # mlp = MLP()