diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index fb57eca658aebbf34dde6eb2f93f45b10aee91c7..373d292b443b7651b785a52a6986b0a0be58ad61 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(layer SRCS layer.cc DEPS proto_desc) +cc_library(layer SRCS layer.cc DEPS proto_desc operator) cc_library(tracer SRCS tracer.cc DEPS proto_desc) cc_library(engine SRCS engine.cc) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index e0b6ad91e7b98be68b77fb57cb28125b5e06cb23..08379a7ed5473a12265e276c8c5962fdabd93607 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -13,7 +13,257 @@ // limitations under the License. #include "paddle/fluid/imperative/layer.h" +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/string/printf.h" namespace paddle { -namespace imperative {} // namespace imperative +namespace imperative { + +using framework::Variable; + +void AddTo(Variable* src, Variable* dst) { + framework::LoDTensor* dst_tensor = dst->GetMutable(); + framework::LoDTensor* src_tensor = src->GetMutable(); + PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), "%lld vs %lld", + dst_tensor->numel(), src_tensor->numel()); + float* dst_data = dst_tensor->mutable_data(platform::CPUPlace()); + const float* src_data = src_tensor->data(); + for (size_t i = 0; i < src_tensor->numel(); ++i) { + dst_data[i] += src_data[i]; + } +} + +class Autograd { + public: + explicit Autograd(framework::Scope* scope) : scope_(scope) {} + + void RunBackward(VarBase* var, framework::Variable* grad) { + if (!var->pre_op_) { + var->ApplyGrad(scope_, grad); + return; + } + PADDLE_ENFORCE(var->pre_op_->op_desc_); + // TODO(panyx0718): Only create vars that "require_grad" + std::vector op_grads = + CreateOpGrads(var->pre_op_->output_vars_->size()); + op_grads[var->pre_op_out_idx_] = grad; + + std::deque>> ready; + ready.push_back(std::make_pair(var->pre_op_, op_grads)); + + std::map dep_counts = ComputeDepCounts(var->pre_op_); + std::map> visited; + + while (!ready.empty()) { + OpBase* ready_op = ready.front().first; + std::vector ready_op_grads = ready.front().second; + ready.pop_front(); + std::vector input_grads = ready_op->ApplyGrad(scope_); + + for (size_t i = 0; i < input_grads.size(); ++i) { + if (!input_grads[i]) continue; + OpBase* pre_op = ready_op->pre_ops_->at(i); + if (!pre_op) continue; + int pre_op_out_idx = ready_op->pre_ops_out_idx_->at(i); + + dep_counts[pre_op] -= 1; + PADDLE_ENFORCE(dep_counts[pre_op] >= 0); + bool pre_op_ready = dep_counts[pre_op] == 0; + + if (pre_op_ready) { + if (visited.find(pre_op) == visited.end()) { + PADDLE_ENFORCE(pre_op->output_vars_->size() == 1); + visited[pre_op] = {input_grads[i]}; + } else { + std::vector& pre_op_grads = visited[pre_op]; + AccumGrads(pre_op_out_idx, input_grads[i], &pre_op_grads); + } + ready.push_back(std::make_pair(pre_op, visited[pre_op])); + } else { + if (visited.find(pre_op) == visited.end()) { + // TODO(panyx0718): Only create vars that "require_grad" + visited[pre_op] = CreateOpGrads(var->pre_op_->output_vars_->size()); + } else { + } + std::vector& pre_op_grads = visited[pre_op]; + AccumGrads(pre_op_out_idx, input_grads[i], &pre_op_grads); + } + } + } + } + + private: + void AccumGrads(int grad_idx, Variable* grad, + std::vector* op_grads) { + if (!(*op_grads)[grad_idx]) { + // FIXME(panyx0718): This should be a deep copy. + (*op_grads)[grad_idx] = grad; + return; + } + AddTo(grad, (*op_grads)[grad_idx]); + } + + std::map ComputeDepCounts(OpBase* op) { + std::map ret; + + std::deque queue; + queue.push_back(op); + std::unordered_set visited; + visited.insert(op); + while (!queue.empty()) { + OpBase* candidate = queue.front(); + queue.pop_front(); + for (OpBase* pre_op : *(candidate->pre_ops_)) { + if (!pre_op) continue; + if (visited.find(pre_op) == visited.end()) { + visited.insert(pre_op); + queue.push_back(pre_op); + } + ret[pre_op] += 1; + } + } + + return ret; + } + + std::vector CreateOpGrads(size_t count) { + std::vector op_grads; + for (size_t i = 0; i < count; ++i) { + op_grads.push_back(nullptr); + } + return op_grads; + } + + framework::Scope* scope_; +}; + +framework::Variable* CreateVariable(const std::string& name, + const framework::DDim& dim, float val, + framework::Scope* scope, + bool random_name = true) { + std::string varname = name; + if (random_name) { + std::mt19937 rng; + rng.seed(std::random_device()()); + std::uniform_int_distribution dist6( + 1, std::numeric_limits::max()); + int id = dist6(rng); + varname = string::Sprintf("%s@%d", varname, id); + } + + LOG(ERROR) << "creating var " << varname; + framework::Variable* var = scope->Var(varname); + framework::LoDTensor* tensor = var->GetMutable(); + + float* data = tensor->mutable_data(dim, platform::CPUPlace()); + std::fill(data, data + tensor->numel(), val); + return var; +} + +framework::LoDTensor& VarBase::Grad() { + VLOG(3) << "get var grad " << var_desc_->Name(); + return *grads_->GetMutable(); +} + +void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) { + VLOG(3) << "apply var grad " << var_desc_->Name() << " " + << grad->Get().data()[0]; + if (!grads_) { + grads_ = + CreateVariable(string::Sprintf("%s@IGrad", var_desc_->Name()), + var_->Get().dims(), 0.0, scope); + } + AddTo(grad, grads_); + VLOG(3) << "grad_ after apply var grad " << var_desc_->Name() << " " + << grads_->Get().data()[0]; +} + +std::vector OpBase::ApplyGrad(framework::Scope* scope) { + VLOG(3) << "op grad " << grad_op_desc_->Type(); + + for (const std::string& invar : grad_op_desc_->InputArgumentNames()) { + block_->FindRecursiveOrCreateVar(invar); + framework::Variable* var = scope->Var(invar); + LOG(ERROR) << "op grad in var " << invar; + if (!var->IsInitialized()) { + framework::VarDesc* var_desc = block_->FindVar(invar); + if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { + LOG(ERROR) << "grad op invar init " << invar; + var->GetMutable(); + } else { + LOG(ERROR) << "tracer doesn't support yet"; + } + } else { + var->GetMutable()->type(); + } + } + + std::vector ret; + for (size_t i = 0; i < input_vars_->size(); ++i) { + ret.push_back(nullptr); + } + for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { + LOG(ERROR) << "grad outvar " << outvar; + block_->FindRecursiveOrCreateVar(outvar); + framework::Variable* var = scope->Var(outvar); + if (!var->IsInitialized()) { + framework::VarDesc* var_desc = block_->FindVar(outvar); + if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { + var->GetMutable(); + } else { + LOG(ERROR) << "tracer doesn't support yet"; + } + } + } + grad_op_desc_->InferShape(*block_); + grad_op_desc_->InferVarType(block_); + std::unique_ptr opbase = + framework::OpRegistry::CreateOp(*grad_op_desc_); + + opbase->Run(*scope, platform::CPUPlace()); + + for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { + if (grad_to_var_->find(outvar) != grad_to_var_->end()) { + std::string origin_var = (*grad_to_var_)[outvar]; + for (size_t i = 0; i < input_vars_->size(); ++i) { + VarBase* origin_in_var = (*input_vars_)[i]; + if (origin_in_var->var_desc_->Name() == origin_var) { + framework::Variable* var = scope->FindVar(outvar); + LOG(ERROR) << "apply grad " << outvar << " with origin " + << origin_var; + // TODO(panyx0718): Accumulate. + // origin_in_var->grads_ = var; + origin_in_var->ApplyGrad(scope, var); + ret[i] = var; + // TODO(panyx0718): There might be 2 var with the same name. We + // currently assume the are the same Variable*. So it doesn't matter + // which one is used. + break; + } + } + } + } + return ret; +} + +void VarBase::RunBackward(framework::Scope* scope) { + // TODO(panyx0718): Might not be 0th, need to detect. + grads_ = CreateVariable(pre_op_->grad_op_desc_->InputArgumentNames()[0], + var_->Get().dims(), 1.0, scope, + false); + framework::Variable* grad = + CreateVariable("init@imperative_grad", + var_->Get().dims(), 1.0, scope); + + Autograd(scope).RunBackward(this, grad); +} + +} // namespace imperative } // namespace paddle diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index c400b19700094e40a3d234c2c063e27a0596e1bd..0cf0c27a6a9b9f9fc78dfc331977f9ac20e87cb9 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -14,8 +14,10 @@ #pragma once +#include #include #include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/enforce.h" @@ -27,26 +29,64 @@ class OpBase; class VarBase { public: - VarBase() {} - virtual ~VarBase() {} + VarBase() + : pre_op_(nullptr), + pre_op_out_idx_(-1), + var_desc_(nullptr), + var_(nullptr), + grads_(nullptr) {} + + virtual ~VarBase() { + LOG(ERROR) << "deleting var"; + LOG(ERROR) << "done deleting var"; + } + + void ApplyGrad(framework::Scope* scope, framework::Variable* grad); + + void RunBackward(framework::Scope* scope); + + framework::LoDTensor& Grad(); OpBase* pre_op_; + int pre_op_out_idx_; + framework::VarDesc* var_desc_; + framework::Variable* var_; + framework::Variable* grads_; }; class OpBase { public: OpBase() : input_vars_(new std::vector()), - output_vars_(new std::vector()) {} + output_vars_(new std::vector()), + pre_ops_(new std::vector()), + pre_ops_out_idx_(new std::vector()), + op_desc_(nullptr), + grad_op_desc_(nullptr) {} + virtual ~OpBase() { delete input_vars_; delete output_vars_; + + delete pre_ops_; + delete pre_ops_out_idx_; + + if (grad_op_desc_) delete grad_op_desc_; + if (grad_to_var_) delete grad_to_var_; } + std::vector ApplyGrad(framework::Scope* scope); + std::vector* input_vars_; std::vector* output_vars_; + std::vector* pre_ops_; + std::vector* pre_ops_out_idx_; framework::OpDesc* op_desc_; + + framework::OpDesc* grad_op_desc_; + std::unordered_map* grad_to_var_; + framework::BlockDesc* block_; }; class Layer { @@ -58,7 +98,7 @@ class Layer { return vars; } - virtual void Backward() { LOG(ERROR) << "backward at cpp."; } + virtual void Backward() { LOG(ERROR) << "To support customize"; } }; } // namespace imperative diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 9d7bdda8ccc83aedb25b4aba5868a57d49506370..c3cc0cb33e57012dab904c2176b7647853607d6a 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -27,6 +27,20 @@ namespace paddle { namespace imperative { +void CreateGradOp(const framework::OpDesc& op_desc, + const std::unordered_set& no_grad_set, + const std::vector& grad_sub_block, + framework::OpDesc** grad_op_desc, + std::unordered_map* grad_to_var) { + std::vector> grad_op_descs = + framework::OpInfoMap::Instance() + .Get(op_desc.Type()) + .GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block); + PADDLE_ENFORCE(grad_op_descs.size() == 1, "Only support 1 grad op now."); + // TODO(panyx0718): Leak? + *grad_op_desc = grad_op_descs[0].release(); +} + class Tracer { public: Tracer() {} @@ -44,6 +58,7 @@ class Tracer { for (VarBase* input : inputs) { const std::string vname = input->var_desc_->Name(); framework::Variable* var = scope_->Var(vname); + input->var_ = var; if (!var->IsInitialized()) { framework::VarDesc* var_desc = block_->FindVar(vname); if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { @@ -52,11 +67,17 @@ class Tracer { LOG(ERROR) << "tracer doesn't support yet"; } } + if (input->pre_op_) { + op->pre_ops_->push_back(input->pre_op_); + op->pre_ops_out_idx_->push_back(input->pre_op_out_idx_); + } else { + op->pre_ops_->push_back(nullptr); + } } *op->output_vars_ = outputs; - for (auto output : outputs) { - const std::string vname = output->var_desc_->Name(); + for (size_t i = 0; i < outputs.size(); ++i) { + const std::string vname = outputs[i]->var_desc_->Name(); framework::Variable* var = scope_->Var(vname); if (!var->IsInitialized()) { framework::VarDesc* var_desc = block_->FindVar(vname); @@ -66,9 +87,18 @@ class Tracer { LOG(ERROR) << "tracer doesn't support yet"; } } - output->pre_op_ = op; + outputs[i]->var_ = var; + outputs[i]->pre_op_ = op; + outputs[i]->pre_op_out_idx_ = i; } op_base->Run(*scope_, platform::CPUPlace()); + + framework::OpDesc* grad_op_desc; + auto grad_to_var = new std::unordered_map(); + CreateGradOp(*op_desc, {}, {block_}, &grad_op_desc, grad_to_var); + op->grad_op_desc_ = grad_op_desc; + op->grad_to_var_ = grad_to_var; + op->block_ = block_; } void SetScope(framework::Scope* scope) { scope_ = scope; } diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 7079f9fe04089f1d60c90b2d1758212e1adefd34..b8954cb12628d1f4f333956e0213ddf9c01e592c 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,5 +1,5 @@ -set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler) +set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer) set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc) if(WITH_PYTHON) diff --git a/paddle/fluid/pybind/imperative.h b/paddle/fluid/pybind/imperative.h index 5834b83df904bda8389782c5dd65e6f4a23ee4dd..7a9d3a01ea81f11ac85000c3d0153f20e108789a 100644 --- a/paddle/fluid/pybind/imperative.h +++ b/paddle/fluid/pybind/imperative.h @@ -42,6 +42,11 @@ class PyOpBase : public imperative::OpBase { using imperative::OpBase::OpBase; // Inherit constructors }; +class PyVarBase : public imperative::VarBase { + public: + using imperative::VarBase::VarBase; // Inherit constructors +}; + void BindTracer(pybind11::module* m); } // namespace pybind diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 656d28eb2a1b30eff3793a9fc42ebe06fff8d419..ea07372a289956a31dcf40c4d66e182c93adbf58 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -34,6 +34,7 @@ limitations under the License. */ #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/version.h" +#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" @@ -101,8 +102,13 @@ PYBIND11_MODULE(core, m) { BindException(&m); - py::class_(m, "VarBase", - R"DOC()DOC") + py::class_(m, "VarBase", R"DOC()DOC") + .def(py::init<>()) + .def("_run_backward", + [](imperative::VarBase &self, framework::Scope *scope) { + self.RunBackward(scope); + }) + .def("_grad", &imperative::VarBase::Grad) .def_property( "desc", [](const imperative::VarBase &self) { return self.var_desc_; }, @@ -111,13 +117,14 @@ PYBIND11_MODULE(core, m) { }, py::return_value_policy::reference); - py::class_(m, "OpBase", - R"DOC()DOC") + py::class_(m, "OpBase", R"DOC()DOC") .def(py::init<>()) .def_property( "desc", [](const imperative::OpBase &self) { return self.op_desc_; }, [](imperative::OpBase &self, framework::OpDesc *op_desc) { - self.op_desc_ = op_desc; + if (op_desc) { + self.op_desc_ = op_desc; + } }, py::return_value_policy::reference); diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index d4ca6901d5a51b5947fc7a4fcfaaddfaa94c4a40..ba3ffafc85f0492df2d81f545cf25496d3c0793c 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -276,6 +276,7 @@ class Variable(core.VarBase): stop_gradient=False, is_data=False, **kwargs): + core.VarBase.__init__(self) self.block = block self.error_clip = error_clip @@ -361,6 +362,12 @@ class Variable(core.VarBase): tensor = core.get_variable_tensor(scope, self.desc.name()) return np.array(tensor) + def backward(self, scope): + self._run_backward(scope) + + def grad(self): + return np.array(self._grad()) + def __str__(self): return self.to_string(True) @@ -983,6 +990,7 @@ class Block(object): self.desc = program.desc.block(idx) self.vars = collections.OrderedDict() # var_name --> var self.ops = list() # operator list + self._op_descs = list() self.program = program self.removed_vars = collections.OrderedDict() @@ -1238,13 +1246,12 @@ class Block(object): if _in_imperative_mode(): op_desc = core.OpDesc() op = Operator(block=self, desc=op_desc, *args, **kwargs) - sys.stderr.write('%s %s!!!\n' % (type(op.inputs), type(op.outputs))) _imperative_tracer().trace(op, op.inputs, op.outputs) - return - - op_desc = self.desc.append_op() - op = Operator(block=self, desc=op_desc, *args, **kwargs) + else: + op_desc = self.desc.append_op() + op = Operator(block=self, desc=op_desc, *args, **kwargs) self.ops.append(op) + self._op_descs.append(op_desc) return op def _insert_op(self, index, *args, **kwargs): diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py index af6a2167ce72142e3fb2740e7f2301fbc27f7ee9..18fe8f7c09ae34c5c7e28f07c3fd99539500cbf5 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -26,6 +26,7 @@ class MyLayer(fluid.imperative.PyLayer): def forward(self, inputs): x = fluid.layers.relu(inputs[0]) + self._x_for_debug = x return [fluid.layers.elementwise_mul(x, x)] @@ -43,6 +44,8 @@ class TestImperative(unittest.TestCase): x = l(np.array([1.0, 2.0, -1.0], dtype=np.float32))[0] self.assertIsNotNone(x) sys.stderr.write("%s output: %s\n" % (x, x.numpy(scope=l._scope))) + x.backward(l._scope) + sys.stderr.write("grad %s\n" % l._x_for_debug.grad()) if __name__ == '__main__':