diff --git a/doc/survey/dynamic_graph.md b/doc/survey/dynamic_graph.md index 553a9dbe15fcdc67fc10ca479ce080c384f012e8..6b80b014b1b1dc50f425e1296f70984c9e9b1cbd 100644 --- a/doc/survey/dynamic_graph.md +++ b/doc/survey/dynamic_graph.md @@ -171,7 +171,7 @@ Pytorch chooses immediate evaluation. It avoids ever materializing a "forward gr ## What can fluid learn from them? -TBD +Please refer to `paddle/contrib/dynamic/`. # Appendix diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt index 4b19256ef4533a09162edf907f6cd51146517e46..75d01705679c669123882b1a5fdbbdb55a70a4ff 100644 --- a/paddle/contrib/CMakeLists.txt +++ b/paddle/contrib/CMakeLists.txt @@ -14,3 +14,4 @@ # add_subdirectory(inference) +add_subdirectory(dynamic) diff --git a/paddle/contrib/dynamic/CMakeLists.txt b/paddle/contrib/dynamic/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0acef17d6a2cd69d334ce57dc388a5a8d67e1936 --- /dev/null +++ b/paddle/contrib/dynamic/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if(APPLE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") +endif(APPLE) + +cc_library(tape_variable SRCS variable.cc DEPS ${FLUID_CORE_MODULES}) +cc_library(tape SRCS tape.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} tape_variable) + +cc_test(test_tape + SRCS test_tape.cc + DEPS tape tape_variable) diff --git a/paddle/contrib/dynamic/README.md b/paddle/contrib/dynamic/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a056877093ab18500d42225b500bbb890c68dfdb --- /dev/null +++ b/paddle/contrib/dynamic/README.md @@ -0,0 +1,246 @@ +# Dynamic Graph on Fluid + +PaddlePaddle Fluid is targeting the autodiff without tape, which, however, is very challenging and we are still way from there. DyNet and PyTorch provide a good design idea, the *tape*, that significantly eases the challenge. Also, DyNet provides a C++ API that is as convenient as Python but with higher efficiency and could conveniently integrate with industrial/production systems. This package, `tape`, combines the good of + +1. tape from PyTorch and DyNet +2. C++ API and core from DyNet +3. rich set of operators from PaddlePaddle + +## Overview + +We can implement Dynet-like Tape(See this survey) by wrapping Paddle Fluid's `Operator` +and `Variable`. + +The user API is straight forward since + +1. it is imperative. And it uses host language's control flow logic. +1. it avoids extra concepts such as `Scope` and `Executor`. + +All of these benefits come at the cost of just adding one line `reset_global_tape` +at every iteration. + +## Code Structure + +In short, the `Tape` contains a vector of `OpHandle`s. And an `OpHandle` contains its +`type`, the pointers to the `Variable`s, and necessary attributes. + +```c++ +class Variable { +public: + VriableHandle Grad(); // returns its gradient variable +private: + framework::VarDesc desc_; // compile time infershape, necessary for lazy execution + framework::Variable var_; // run time variable, holds data memory +}; + +using VariableHandle = shared_ptr; + +struct OpHandle { + string type_; + map> inputs_; + map> outputs_; + AttributeMap attrs_; +}; + +class Tape { +public: + void AddOp(OpHandle); // add op + void Forward(); // execute the tape_ + void Backward(); // execute the backward of the tape_ +private: + vector tape_; +}; +``` + +We uses `Function` to indicate layers. It takes care of parameter +initialization and `AddOp` to the Tape when it is called. + +```c++ +class Linear { + public: + Linear(int in_dim, int out_dim, const std::string &act) + : w_(new Variable("LinearWeight")), + b_(new Variable("LinearBias")), + act_(act) { + Tape init_tape; + + std::string initializer = "fill_constant"; + framework::AttributeMap attrs; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{in_dim, out_dim}; + attrs["value"] = 1.0f; + init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs); + + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{out_dim}; + attrs["value"] = 1.0f; + init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs); + + init_tape.Forward(); + } + + VariableHandle operator()(VariableHandle input) { + VariableHandle pre_bias(new Variable("linear")); + get_global_tape().AddOp("mul", + {{"X", {input}}, {"Y", {w_}}}, + {{"Out", {pre_bias}}}, + {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); + VariableHandle pre_act(new Variable("linear")); + get_global_tape().AddOp("elementwise_add", + {{"X", {pre_bias}}, {"Y", {b_}}}, + {{"Out", {pre_act}}}, + {{"axis", 1}}); + VariableHandle post_act(new Variable("linear")); + get_global_tape().AddOp(act_, + {{"X", {pre_act}}}, + {{"Out", {post_act}}}, + {}); + return post_act; + } + + std::vector Params() { return {w_, b_}; } + + private: + VariableHandle w_; + VariableHandle b_; + std::string act_; +}; +``` + +## User API + +```c++ +// Model function +paddle::tape::Linear linear1(3, 3, "relu"); // init weight and bias +paddle::tape::Linear linear2(3, 3, "relu"); // init weight and bias +paddle::tape::Mean mean; + +// Optimizer +paddle::tape::SGD sgd(0.001); + +// Data Feeder +paddle::tape::Fill data_feeder(...); +VariableHandle input(new paddle::tape::Variable("input")); + +for (int i = 0; i < 2; ++i) { + reset_global_tape(); + + data_feeder(input); + + auto loss = mean(linear2(linear1(input))); // compile time InferShape & InferVarType + LOG(INFO) << loss.value(); // Run forward up to loss + + // Run backward, store gradient of w at w->Grad() + get_global_tape.Backward(loss); + + // Update w + sgd(linear1.Params()); + sgd(linear2.Params()); +} +``` + +
+ +digraph G { + + subgraph cluster_0 { + node [shape=record,style=filled]; + style=filled; + color=lightgrey; + linear1 [label="{type: mul | {input | {X: before_mul1 | Y: weight1}} | {output | Out: before_bias1}}"]; + elementwise_add1 [label="{type: elementwise_add | {input | {X: before_bias1 | Y: bias1}} | {output | Out: before_act1}}"]; + relu1 [label="{type: relu | {input | {X: before_act1 }} | {output | Out: after_act1}}"]; + + linear1 -> elementwise_add1->relu1; + label = "forward tape"; + } + + linear1:before_mul1->before_mul1 + linear1:weight1->weight1 + linear1:before_bias1->before_bias1 + + elementwise_add1:bias1->bias1 + elementwise_add1:before_bias1->before_bias1 + elementwise_add1:before_act1->before_act1 + + relu1:before_act1->before_act1 + relu1:after_act1->after_act1 + + subgraph cluster_1 { + node [shape=record,style=filled]; + style=filled; + color=lightgrey; + linear1_grad [label="{type: mul_grad | {input | {X: before_mul1 | Y: weight1| Out_grad: before_bias1_grad}} | {output |{X_grad: before_mul1_grad | Y_grad: weight1_grad}}}"]; + + elementwise_add1_grad [label="{type: elementwise_add_grad | {input | Out_grad: before_act1_grad} | {output |{X_grad: before_bias1_grad | Y_grad: bias1_grad}}}"]; + + relu1_grad [label="{type: relu_grad | {input | Out_grad: after_act1_grad} | {ouput | {X_grad: before_act1_grad }}}"]; + + linear1_grad -> elementwise_add1_grad ->relu1_grad [dir=back]; + label = "backward tape"; + } + + relu1_grad:after_act1_grad->after_act1_grad + relu1_grad:before_act1_grad->before_act1_grad + + elementwise_add1_grad:before_act1_grad->before_act1_grad + elementwise_add1_grad:before_bias1_grad->before_bias1_grad + elementwise_add1_grad:bias1_grad->bias1_grad + + linear1_grad:before_mul1->before_mul1 + linear1_grad:weight1->weight1 + linear1_grad:before_bias1_grad->before_bias1_grad + linear1_grad:before_mul1_grad->before_mul1_grad + linear1_grad:weight1_grad->weight1_grad + + + subgraph cluster_2 { + node [shape=record]; + label = "Linear1"; + weight1 + bias1 + } + + weight1 -> weight1_grad [ label="Grad()", style="dashed" ]; + bias1 -> bias1_grad [ label="Grad()", style="dashed"]; + + + +} +
+ +![Image](https://github.com/tonyyang-svail/Paddle/blob/cpp_tap/paddle/contrib/dynamic/computation_graph.png) + +## Code Reuse + +We want to stay close to Paddle Fluid as much as possible. + +### Reuse All Operators + +As all Ops are registered at `OpInfoMap`, the effort of adding a new `Function` +is about 10 lines of code, similar to expose an operator to Python. + +### Reuse Compile Time InferShape and InferVarType + +Note that all the symbolic information is stored at `tape::Varaible::desc_`, instead +of `ProgramDesc.block.vars`, we create a temporary `BlockDesc` to do `InferShape` and +`InferVarType` every time we `AddOp` to the tape. + +### Reuse Operator::Run + +We use smart pointer, instead of `Scope`, to manage memory. So we create a temporary +`Scope` for every `Operator::Run()`. + +## Possible Feature + +### Release Memory on Backward + +We can release memory aggressively. During backward, we can delete the OpHandle once +we have finished its backward. Since all the variable is managed by smart pointer, the +memory is automatically released when its `ref_count` goes to 0. + +### Kernel Fusion + +As a symbolic representation of the Tape is constructed first before the actual +execution, it would be possible to perform graph optimization. One use case is kernel +fusion. diff --git a/paddle/contrib/dynamic/computation_graph.png b/paddle/contrib/dynamic/computation_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..6cf5ead735d5d18b204b079771e53d44483cf016 Binary files /dev/null and b/paddle/contrib/dynamic/computation_graph.png differ diff --git a/paddle/contrib/dynamic/function.h b/paddle/contrib/dynamic/function.h new file mode 100644 index 0000000000000000000000000000000000000000..6434beebf4a46db11893ef6df051c01c73af9fc0 --- /dev/null +++ b/paddle/contrib/dynamic/function.h @@ -0,0 +1,130 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "paddle/contrib/dynamic/tape.h" +#include "paddle/contrib/dynamic/variable.h" +#include "paddle/fluid/framework/type_defs.h" + +namespace paddle { +namespace dynamic { + +class Function {}; + +class Fill { + public: + Fill(const std::string &initializer, const framework::AttributeMap &attrs) + : initializer_(initializer), attrs_(attrs) {} + + void operator()(VariableHandle var) { + get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_); + } + + private: + const std::string initializer_; + const framework::AttributeMap attrs_; +}; + +class Mean { + public: + VariableHandle operator()(VariableHandle var) { + VariableHandle out(new Variable("mean")); + get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {}); + return out; + } +}; + +class Linear { + public: + Linear(int in_dim, int out_dim, const std::string &act) + : w_(new Variable("LinearWeight")), + b_(new Variable("LinearBias")), + act_(act) { + Tape init_tape; + + std::string initializer = "fill_constant"; + framework::AttributeMap attrs; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{in_dim, out_dim}; + attrs["value"] = 1.0f; + init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs); + + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{out_dim}; + attrs["value"] = 1.0f; + init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs); + + init_tape.Forward(); + } + + VariableHandle operator()(VariableHandle input) { + VariableHandle pre_bias(new Variable("linear")); + get_global_tape().AddOp("mul", + {{"X", {input}}, {"Y", {w_}}}, + {{"Out", {pre_bias}}}, + {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); + VariableHandle pre_act(new Variable("linear")); + get_global_tape().AddOp("elementwise_add", + {{"X", {pre_bias}}, {"Y", {b_}}}, + {{"Out", {pre_act}}}, + {{"axis", 1}}); + VariableHandle post_act(new Variable("linear")); + get_global_tape().AddOp( + act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {}); + return post_act; + } + + std::vector Params() { return {w_, b_}; } + + private: + VariableHandle w_; + VariableHandle b_; + std::string act_; +}; + +class SGD { + public: + SGD(float learning_rate) : learning_rate_(new Variable("sgd")) { + Tape init_tape; + + std::string initializer = "fill_constant"; + framework::AttributeMap attrs; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{1}; + attrs["value"] = learning_rate; + init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs); + + init_tape.Forward(); + } + + void operator()(VariableHandle input) { + Tape temp_tape; + temp_tape.AddOp("sgd", + {{"Param", {input}}, + {"LearningRate", {learning_rate_}}, + {"Grad", {input->Grad()}}}, + {{"ParamOut", {input}}}, + {}); + temp_tape.Forward(); + input->ResetGrad(); + } + + private: + VariableHandle learning_rate_; +}; +} +} diff --git a/paddle/contrib/dynamic/tape.cc b/paddle/contrib/dynamic/tape.cc new file mode 100644 index 0000000000000000000000000000000000000000..fd24eabe9d00f92752cfc17492bd778f80bceb60 --- /dev/null +++ b/paddle/contrib/dynamic/tape.cc @@ -0,0 +1,265 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/contrib/dynamic/tape.h" + +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/dim.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/pybind/pybind.h" + +namespace paddle { +namespace dynamic { + +// borrowed from +// https://stackoverflow.com/questions/874134/find-if-string-ends-with-another-string-in-c +inline bool ends_with(std::string const &value, std::string const &ending) { + if (ending.size() > value.size()) return false; + return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); +} + +std::ostream &operator<<(std::ostream &os, const framework::VarDesc &var_desc) { + os << var_desc.Name(); + os << "[" << var_desc.GetType() << "]"; + os << "[" << var_desc.GetDataType() << "]"; + os << "{"; + for (auto &i : var_desc.GetShape()) { + os << i << ","; + } + os << "}"; + return os; +} + +std::string to_string(const std::string &type, + const VariableHandleMap &in_vars, + const VariableHandleMap &out_vars, + const framework::AttributeMap &attrs) { + std::stringstream ss; + ss << type << " "; + for (auto ¶m_name : in_vars) { + for (auto &var : param_name.second) { + ss << param_name.first << ":(" << var->Desc() << ") "; + } + } + for (auto ¶m_name : out_vars) { + for (auto &var : param_name.second) { + ss << param_name.first << ":(" << var->Desc() << ") "; + } + } + return ss.str(); +} + +framework::OpDesc CreateOpDesc(const std::string &type, + const VariableHandleMap &in_vars, + const VariableHandleMap &out_vars, + const framework::AttributeMap &attrs) { + framework::VariableNameMap inputs; + for (auto ¶m_name : in_vars) { + for (auto &var : param_name.second) { + inputs[param_name.first].emplace_back(var->Name()); + } + } + framework::VariableNameMap outputs; + for (auto ¶m_name : out_vars) { + for (auto &var : param_name.second) { + outputs[param_name.first].emplace_back(var->Name()); + } + } + return framework::OpDesc(type, inputs, outputs, attrs); +} + +void InferShapeAndVarType(const std::string &type, + const VariableHandleMap &in_vars, + VariableHandleMap *out_vars, + const framework::AttributeMap &attrs) { + framework::OpDesc op_desc = CreateOpDesc(type, in_vars, *out_vars, attrs); + + // Create a temporary block for compile-time + framework::ProgramDesc program_desc; + framework::BlockDesc *block_desc = program_desc.MutableBlock(0); + PADDLE_ENFORCE(block_desc); + + for (auto ¶m_name : in_vars) { + for (auto &var : param_name.second) { + *block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto(); + } + } + for (auto ¶m_name : *out_vars) { + for (auto &var : param_name.second) { + *block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto(); + } + } + + LOG(INFO) << "- " << to_string(type, in_vars, *out_vars, attrs); + op_desc.InferShape(*block_desc); + op_desc.InferVarType(block_desc); + for (auto ¶m_name : *out_vars) { + for (auto &var : param_name.second) { + *var->MutableDesc()->Proto() = *block_desc->Var(var->Name())->Proto(); + } + } + LOG(INFO) << "+ " << to_string(type, in_vars, *out_vars, attrs); +} + +void Tape::AddOp(const std::string &type, + const VariableHandleMap &in_vars, + VariableHandleMap out_vars, + const framework::AttributeMap &attrs) { + InferShapeAndVarType(type, in_vars, &out_vars, attrs); + tape_.emplace_back(type, in_vars, out_vars, attrs); +} + +// Temporary Scope for Operator::Run() +class ScopeWrapper : public framework::Scope { + public: + ScopeWrapper(const VariableHandleMap &in_vars, + const VariableHandleMap &out_vars) { + for (auto &v : in_vars) { + for (auto &vv : v.second) { + if (!vars_.count(vv->Name())) { + vars_[vv->Name()].reset(vv->Var()); + } + } + } + for (auto &v : out_vars) { + for (auto &vv : v.second) { + if (!vars_.count(vv->Name())) { + vars_[vv->Name()].reset(vv->Var()); + } + } + } + } + + ~ScopeWrapper() { + for (auto &pair : vars_) { + pair.second.release(); + } + } +}; + +void Tape::Forward() { + LOG(INFO) << "Starting forward -------------------------"; + PADDLE_ENFORCE(!has_been_backwarded_); + while (current_position_ < tape_.size()) { + OpHandle &op = tape_[current_position_]; + + // Create Output Tensor, this is only necessary for OpWithKernel + for (auto ¶m2var : op.outputs_) { + for (auto &var : param2var.second) { + var->InitializeVariable(); + } + } + + framework::OpDesc op_desc = + CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_); + ScopeWrapper scope(op.inputs_, op.outputs_); + framework::OpRegistry::CreateOp(op_desc)->Run(scope, platform::CPUPlace()); + current_position_++; + } + + LOG(INFO) << "Finishing forward -------------------------"; +} + +void Tape::Backward(VariableHandle target) { + PADDLE_ENFORCE(!has_been_backwarded_); + + Forward(); + + // TODO(tonyyang-svail): check output of last op is target + backward_tape_.reset(new Tape()); + + framework::AttributeMap attrs; + + // FIXME(tonyyang-svail): Need to infer_data_type + attrs["dtype"] = framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{1}; + attrs["value"] = 1.0f; + backward_tape_->AddOp( + "fill_constant", {}, {{"Out", {target->Grad()}}}, attrs); + + for (auto it = tape_.rbegin(); it != tape_.rend(); ++it) { + framework::OpDesc op_desc = + CreateOpDesc(it->type_, it->inputs_, it->outputs_, it->attrs_); + std::unordered_map grad_to_var; + std::vector> grad_op_descs = + framework::OpInfoMap::Instance() + .Get(op_desc.Type()) + .GradOpMaker()(op_desc, {}, &grad_to_var, {}); + + for (auto &op_desc : grad_op_descs) { + std::unordered_map name2var; + for (auto ¶m2vars : it->inputs_) { + for (auto &a : param2vars.second) { + name2var[a->Name()] = a; + } + } + for (auto ¶m2vars : it->outputs_) { + for (auto &a : param2vars.second) { + name2var[a->Name()] = a; + } + } + + VariableHandleMap in_vars; + VariableHandleMap out_vars; + std::map + loop_over{{&op_desc->Inputs(), &in_vars}, + {&op_desc->Outputs(), &out_vars}}; + for (auto &each : loop_over) { + auto &vmp = *each.first; + auto &vhm = *each.second; + for (auto &p2a : vmp) { + for (auto &argu : p2a.second) { + if (name2var.count(argu)) { + vhm[p2a.first].push_back(name2var[argu]); + } else { + PADDLE_ENFORCE(ends_with(argu, framework::kGradVarSuffix), + argu.c_str()); + std::string name = argu.substr( + 0, argu.size() - std::strlen(framework::kGradVarSuffix)); + PADDLE_ENFORCE(name2var.count(name), name.c_str()); + vhm[p2a.first].push_back(name2var[name]->Grad()); + } + } + } + } + + backward_tape_->AddOp( + op_desc->Type(), in_vars, out_vars, op_desc->GetAttrMap()); + } + + // TODO(tonyyang-svail): how to fill empty grad? + // TODO(tonyyang-svail): Sum var grad is necessary + } + + backward_tape_->Forward(); + has_been_backwarded_ = true; +} + +Tape &get_global_tape() { + static Tape T; + return T; +} + +void reset_global_tape() { get_global_tape() = Tape(); } +} +} diff --git a/paddle/contrib/dynamic/tape.h b/paddle/contrib/dynamic/tape.h new file mode 100644 index 0000000000000000000000000000000000000000..9467e9d5434216fd5a5f6e7b3a959e09b11d774f --- /dev/null +++ b/paddle/contrib/dynamic/tape.h @@ -0,0 +1,62 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "paddle/contrib/dynamic/variable.h" + +namespace paddle { +namespace dynamic { + +using VariableHandleMap = std::map>; + +struct OpHandle { + OpHandle(const std::string &type, + const VariableHandleMap &in_vars, + const VariableHandleMap &out_vars, + const framework::AttributeMap &attrs) + : type_(type), inputs_(in_vars), outputs_(out_vars), attrs_(attrs) {} + + std::string type_; + VariableHandleMap inputs_; + VariableHandleMap outputs_; + framework::AttributeMap attrs_; +}; + +class Tape { + public: + void AddOp(const std::string &type, + const VariableHandleMap &in_vars, + VariableHandleMap out_vars, + const framework::AttributeMap &attrs); + void Forward(); + void Backward(VariableHandle target); + + private: + bool has_been_backwarded_ = false; + size_t current_position_ = 0; + + std::vector tape_; + std::shared_ptr backward_tape_; +}; + +Tape &get_global_tape(); + +void reset_global_tape(); +} +} diff --git a/paddle/contrib/dynamic/test_tape.cc b/paddle/contrib/dynamic/test_tape.cc new file mode 100644 index 0000000000000000000000000000000000000000..ad8ee8c756133d05f0b4a0d158f3139c48c7b82c --- /dev/null +++ b/paddle/contrib/dynamic/test_tape.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "paddle/contrib/dynamic/function.h" + +using namespace paddle::dynamic; + +TEST(Tape, TestMLP) { + LOG(INFO) << "TestMLP"; + Linear linear1(3, 3, "relu"); + Linear linear2(3, 3, "relu"); + Mean mean; + + SGD sgd(0.001); + + std::string initializer = "fill_constant"; + paddle::framework::AttributeMap attrs; + attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; + attrs["shape"] = std::vector{3, 3}; + attrs["value"] = 1.0f; + Fill filler(initializer, attrs); + + for (int i = 0; i < 2; ++i) { + reset_global_tape(); + + VariableHandle input(new Variable("input")); + filler(input); + + auto loss = mean(linear2(linear1(input))); + + get_global_tape().Backward(loss); + + for (auto w : linear1.Params()) { + sgd(w); + } + for (auto w : linear2.Params()) { + sgd(w); + } + } +} + +int main(int argc, char** argv) { + std::vector places; + places.emplace_back(paddle::platform::CPUPlace()); + paddle::platform::DeviceContextPool::Init(places); + + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/contrib/dynamic/variable.cc b/paddle/contrib/dynamic/variable.cc new file mode 100644 index 0000000000000000000000000000000000000000..8eede414f52e5b7ae4555e0b7978168700adf5c8 --- /dev/null +++ b/paddle/contrib/dynamic/variable.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/contrib/dynamic/variable.h" + +namespace paddle { +namespace dynamic { + +void Variable::InitializeVariable() { + LOG(INFO) << "Initialzing " << desc_.Name() << " as " << desc_.GetType(); + framework::proto::VarType::Type var_type = desc_.GetType(); + if (var_type == framework::proto::VarType::LOD_TENSOR) { + var_.GetMutable(); + } else if (var_type == framework::proto::VarType::SELECTED_ROWS) { + var_.GetMutable(); + } else { + PADDLE_THROW("Variable type %d is not in [LOD_TENSOR, SELECTED_ROWS]", + var_type); + } +} +} +} diff --git a/paddle/contrib/dynamic/variable.h b/paddle/contrib/dynamic/variable.h new file mode 100644 index 0000000000000000000000000000000000000000..55b5f1fda9a358cdde25f9beb99abe8006da36a5 --- /dev/null +++ b/paddle/contrib/dynamic/variable.h @@ -0,0 +1,85 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include + +#include "paddle/fluid/framework/operator.h" // framework::kGradVarSuffix +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace dynamic { + +class Variable; +using VariableHandle = std::shared_ptr; + +/* + * Combination of + * framework::VarDesc desc_; + * framework::Variable var_; + */ +class Variable { + public: + Variable(const std::string pre_fix) + : desc_(pre_fix + std::to_string(count())) {} + + Variable(const std::string pre_fix, bool is_grad) + : desc_(pre_fix + (is_grad ? framework::kGradVarSuffix + : std::to_string(count()))) {} + + ~Variable() { LOG(INFO) << "Deleting " << Name(); } + + // Instantiate LoDTensor/SelectedRow + void InitializeVariable(); + + VariableHandle Grad() { + if (grad_ == nullptr) { + grad_.reset(new Variable(desc_.Name(), true)); + } + + return grad_; + } + + void ResetGrad() { grad_ = nullptr; } + + // Stochastic Gradient Descent with Momentum + // VariableHandle Momentum (); + + // void init(const std::string& initializer, + // const framework::AttributeMap& attrs); + + // void value() {}; + + const framework::VarDesc& Desc() const { return desc_; } + framework::VarDesc* MutableDesc() { return &desc_; } + + // TODO(tonyyang-svail): No need to expose name + std::string Name() const { return desc_.Name(); } + + framework::Variable* Var() { return &var_; } + + private: + int count() { + static int counter = 0; + return counter++; + } + + framework::VarDesc desc_; + framework::Variable var_; + + VariableHandle grad_; +}; +} +} diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index d22ac66c5c5f5c395849286f3f42e74ebd8f1f13..122ee1dab35b8c7d42392a983b5b15b7c1be7869 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -98,6 +98,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { } void OperatorBase::Run(const Scope& scope, const platform::Place& place) { + VLOG(10) << "- " << DebugStringEx(&scope); if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA PADDLE_THROW("Cannot run operator on place %s", place); @@ -107,6 +108,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { #endif } RunImpl(scope, place); + VLOG(10) << "+ " << DebugStringEx(&scope); } bool OperatorBase::HasInputs(const std::string& name) const { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 98d103d867987fc02dc66df5ac855a14b66b8f03..95b4f7c5f66a4161058955c7666be34414f5074c 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -81,6 +81,9 @@ class Scope { // Rename variable to a new name and return the new name std::string Rename(const std::string& origin_name) const; + protected: + mutable std::unordered_map> vars_; + private: // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const* parent) : parent_(parent) {} @@ -93,8 +96,6 @@ class Scope { // Caller doesn't own the returned Variable. Variable* FindVarLocally(const std::string& name) const; - mutable std::unordered_map> vars_; - // Scope in `kids_` are owned by this class. mutable std::list kids_; Scope const* parent_{nullptr};