diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index e2bedc60d273b3d2269a266df728ebdc63962988..87bb28c0c55efcf709e31ed3d43144bb528ce0f8 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -69,6 +69,15 @@ inline std::string GradVarName(const std::string& var_name) { return result; } +inline std::string OriginVarName(const std::string& grad_var_name) { + std::size_t pos = grad_var_name.find_last_of(kGradVarSuffix); + if (pos == std::string::npos) { + return grad_var_name; + } else { + return grad_var_name.substr(0, pos); + } +} + proto::VarType::Type GetDataTypeOfVar(const Variable* var); const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var); Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var); diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index ab14732e4d6eab9dd15364da02b436c10ed68a19..1623dfca6f2e3575afc70f4feabb63d514f9c518 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -288,3 +288,12 @@ TEST(OpKernel, multi_inputs) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); op->Run(scope, cpu_place); } + +TEST(Functions, all) { + std::string var_name("X"); + std::string grad_var_name = paddle::framework::GradVarName(var_name); + ASSERT_EQ(grad_var_name.c_str(), "X@GRAD"); + std::string original_var_name = + paddle::framework::OriginVarName(grad_var_name); + ASSERT_EQ(original_var_name.c_str(), "X"); +} diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 0c07f77583c472e512c9d894f4008a5193ec6825..28ad829aa96de647d041e51b74602d13b6f81717 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -21,6 +21,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/string/printf.h" namespace paddle { @@ -31,8 +32,9 @@ using framework::Variable; void AddTo(Variable* src, Variable* dst) { framework::LoDTensor* dst_tensor = dst->GetMutable(); framework::LoDTensor* src_tensor = src->GetMutable(); - PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), "%lld vs %lld", - dst_tensor->numel(), src_tensor->numel()); + PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), + "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(), + src_tensor->numel()); float* dst_data = dst_tensor->mutable_data(platform::CPUPlace()); const float* src_data = src_tensor->data(); for (size_t i = 0; i < src_tensor->numel(); ++i) { @@ -114,7 +116,7 @@ framework::LoDTensor& VarBase::Grad() { std::map> OpBase::ApplyGrad() { if (!grad_op_desc_) { - VLOG(3) << "op with no grad: " << op_desc_->Type(); + LOG(WARNING) << "op with no grad: " << op_desc_->Type(); return {}; } VLOG(3) << "op grad " << grad_op_desc_->Type(); @@ -124,20 +126,18 @@ std::map> OpBase::ApplyGrad() { for (auto it : grad_output_vars_) { auto& outputs = grad_outputs[it.first]; for (size_t i = 0; i < it.second.size(); ++i) { - tmp_vars.emplace_back(new framework::Variable()); - outputs.push_back(tmp_vars.back().get()); - outputs.back()->GetMutable(); + // Allocate a new variable + Variable* tmp_var = new framework::Variable(); + tmp_var->GetMutable(); + + tmp_vars.emplace_back(tmp_var); + outputs.push_back(tmp_var); } - grad_invar_desc.SetShape( - framework::vectorize(var->Get().dims())); - VLOG(3) - << "set op grad var desc's shape size " - << framework::vectorize(var->Get().dims()).size(); } framework::RuntimeContext ctx(grad_input_vars_, grad_outputs); - // No need to do static infer shape here. + // No need to do compile time infer shape here. // grad_op_desc_->InferShape(*block_); grad_op_desc_->InferVarType(block_); @@ -156,9 +156,14 @@ std::map> OpBase::ApplyGrad() { for (auto it : grad_output_vars_) { auto& outputs = grad_outputs[it.first]; auto& origin_outputs = it.second; + + auto& forward_inputs = input_vars_[framework::OriginVarName(it.first)]; + for (size_t i = 0; i < outputs.size(); ++i) { - framework::Variable* orig_grad = origin_outputs[i]; - AddTo(outputs[i], orig_grad); + if (!forward_inputs[i]->stop_gradient_) { + framework::Variable* orig_grad = origin_outputs[i]; + AddTo(outputs[i], orig_grad); + } } } return input_vars_; diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 4d4ea22ed217a003b3c3c90157b1884a745fc686..420ca646e6b162a93247524861342c8bcc7cbd80 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -57,7 +57,7 @@ class Tracer { void Trace(OpBase* op, const std::map>& inputs, const std::map>& outputs, - framework::BlockDesc* block) { + framework::BlockDesc* block, const bool stop_gradient) { std::map vars; framework::OpDesc* op_desc = op->op_desc_; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 21208e82091528b5392b35ec297c1db34f812cbd..5e9d196531ea6b628588a4f892cd1180e2f8860f 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -152,9 +152,9 @@ PYBIND11_MODULE(core, m) { [](const imperative::VarBase &self) { return self.stop_gradient_; }, [](imperative::VarBase &self, bool stop_gradient) { self.stop_gradient_ = stop_gradient; - }) + }); - py::class_(m, "OpBase", R"DOC()DOC") + py::class_(m, "OpBase", R"DOC()DOC") .def(py::init<>()) .def_property( "desc", [](const imperative::OpBase &self) { return self.op_desc_; },