未验证 提交 c9b4adf0 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #15220 from velconia/imperative_shared_ptr

Refine imperative VarBase
...@@ -44,7 +44,7 @@ void AddTo(Variable* src, Variable* dst) { ...@@ -44,7 +44,7 @@ void AddTo(Variable* src, Variable* dst) {
src_tensor->numel()); src_tensor->numel());
float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace()); float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
const float* src_data = src_tensor->data<float>(); const float* src_data = src_tensor->data<float>();
for (size_t i = 0; i < src_tensor->numel(); ++i) { for (int64_t i = 0; i < src_tensor->numel(); ++i) {
dst_data[i] += src_data[i]; dst_data[i] += src_data[i];
} }
} }
...@@ -117,9 +117,9 @@ class Autograd { ...@@ -117,9 +117,9 @@ class Autograd {
} }
}; };
framework::LoDTensor& VarBase::Grad() { framework::LoDTensor& VarBase::GradValue() {
VLOG(3) << "get var grad " << var_desc_->Name(); VLOG(3) << "get var grad " << var_desc_->Name();
return *grads_->GetMutable<framework::LoDTensor>(); return *(grads_->var_->GetMutable<framework::LoDTensor>());
} }
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
...@@ -183,7 +183,7 @@ void VarBase::RunBackward() { ...@@ -183,7 +183,7 @@ void VarBase::RunBackward() {
if (!pre_op_) return; if (!pre_op_) return;
VLOG(3) << "start backward"; VLOG(3) << "start backward";
auto grads_t = grads_->GetMutable<framework::LoDTensor>(); auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
float* data = grads_t->mutable_data<float>(platform::CPUPlace()); float* data = grads_t->mutable_data<float>(platform::CPUPlace());
std::fill(data, data + grads_t->numel(), 1.0); std::fill(data, data + grads_t->numel(), 1.0);
...@@ -209,7 +209,7 @@ std::vector<VarBase*> PyLayer::Apply(int func_id, ...@@ -209,7 +209,7 @@ std::vector<VarBase*> PyLayer::Apply(int func_id,
std::vector<Variable*> outvars = CallPythonFunc(py_funcs_[func_id], invars); std::vector<Variable*> outvars = CallPythonFunc(py_funcs_[func_id], invars);
std::vector<VarBase*> ret; std::vector<VarBase*> ret;
for (Variable* v : outvars) { for (Variable* v : outvars) {
ret.push_back(new VarBase(v, new Variable())); ret.push_back(new VarBase(v, new VarBase(true)));
} }
return ret; return ret;
} }
......
...@@ -17,13 +17,14 @@ ...@@ -17,13 +17,14 @@
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
#include "pybind11/pybind11.h"
#include "Python.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "pybind11/pybind11.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -85,13 +86,19 @@ class PreparedOp { ...@@ -85,13 +86,19 @@ class PreparedOp {
class OpBase; class OpBase;
/* The wrapper for Variable which holds a Variable and a VarBase of its
* gradient. This object should be managed totally by Python intepreter.
*
* Nearly all interface should be implemented in C++.
*/
class VarBase { class VarBase {
public: public:
VarBase() : VarBase(new framework::Variable(), new framework::Variable()) {} VarBase() : VarBase(new framework::Variable(), new VarBase(true)) {}
// Owns `var` and `grad` // Owns `var` and `grad`
VarBase(framework::Variable* var, framework::Variable* grad) VarBase(framework::Variable* var, VarBase* grad)
: pre_op_(nullptr), : pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1), pre_op_out_idx_(-1),
var_desc_(nullptr), var_desc_(nullptr),
var_(var), var_(var),
...@@ -100,17 +107,26 @@ class VarBase { ...@@ -100,17 +107,26 @@ class VarBase {
explicit VarBase(bool stop_gradient) explicit VarBase(bool stop_gradient)
: pre_op_(nullptr), : pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1), pre_op_out_idx_(-1),
var_desc_(nullptr), var_desc_(nullptr),
var_(new framework::Variable()), var_(new framework::Variable()),
grads_(new framework::Variable()), grads_(stop_gradient ? nullptr : new VarBase(true)),
stop_gradient_(stop_gradient) {} stop_gradient_(stop_gradient) {}
virtual ~VarBase() {} virtual ~VarBase() {
if (var_) {
delete var_;
}
if (grads_) {
delete grads_;
}
}
void RunBackward(); void RunBackward();
framework::LoDTensor& Grad(); framework::LoDTensor& GradValue();
inline std::string GradName() const { inline std::string GradName() const {
PADDLE_ENFORCE( PADDLE_ENFORCE(
...@@ -124,12 +140,16 @@ class VarBase { ...@@ -124,12 +140,16 @@ class VarBase {
int pre_op_out_idx_; int pre_op_out_idx_;
framework::VarDesc* var_desc_; framework::VarDesc* var_desc_;
framework::Variable* var_; framework::Variable* var_;
framework::Variable* grads_; VarBase* grads_;
bool stop_gradient_; bool stop_gradient_;
}; };
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
* gradient. This object should be managed totally by Python intepreter.
*/
class OpBase { class OpBase {
public: public:
OpBase() OpBase()
...@@ -153,13 +173,13 @@ class OpBase { ...@@ -153,13 +173,13 @@ class OpBase {
framework::OpDesc* grad_op_desc_; framework::OpDesc* grad_op_desc_;
int backward_id_; int backward_id_;
std::map<std::string, std::vector<VarBase*>> input_vars_; VarBasePtrMap input_vars_;
std::map<std::string, std::vector<VarBase*>> output_vars_; VarBasePtrMap output_vars_;
std::map<std::string, std::vector<OpBase*>> pre_ops_; OpBasePtrMap pre_ops_;
std::map<std::string, std::vector<int>> pre_ops_out_idx_; std::map<std::string, std::vector<int>> pre_ops_out_idx_;
std::map<std::string, std::vector<framework::Variable*>> grad_input_vars_; framework::VariableValueMap grad_input_vars_;
std::map<std::string, std::vector<framework::Variable*>> grad_output_vars_; framework::VariableValueMap grad_output_vars_;
framework::BlockDesc* block_; framework::BlockDesc* block_;
}; };
......
...@@ -15,5 +15,199 @@ ...@@ -15,5 +15,199 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
namespace paddle { namespace paddle {
namespace imperative {} // namespace imperative namespace imperative {
void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
framework::OpDesc** grad_op_desc,
std::unordered_map<std::string, std::string>* grad_to_var) {
std::vector<std::unique_ptr<framework::OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block);
PADDLE_ENFORCE(grad_op_descs.size() == 1, "Only support 1 grad op now.");
// TODO(panyx0718): Leak?
*grad_op_desc = grad_op_descs[0].release();
}
void InitVar(framework::Variable* var, framework::Variable* grad_var) {
auto& var_t = var->Get<framework::LoDTensor>();
float* data =
grad_var->GetMutable<framework::LoDTensor>()->mutable_data<float>(
var_t.dims(), platform::CPUPlace());
std::fill(data, data + var_t.numel(), 0.0);
}
void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs, framework::BlockDesc* block,
const bool stop_gradient) {
std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block);
op_desc->InferVarType(block);
std::unique_ptr<framework::OperatorBase> op_base =
framework::OpRegistry::CreateOp(*op_desc);
framework::VariableValueMap invars_map;
framework::VariableValueMap outvars_map;
op->input_vars_ = inputs;
for (auto it : op->input_vars_) {
auto& invars = invars_map[it.first];
for (VarBase* inp : it.second) {
PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr",
op->op_desc_->Type(), inp->var_desc_->Name());
invars.push_back(inp->var_);
vars[inp->var_desc_->Name()] = inp;
if (inp->pre_op_) {
op->pre_ops_[it.first].push_back(inp->pre_op_);
op->pre_ops_out_idx_[it.first].push_back(inp->pre_op_out_idx_);
} else {
op->pre_ops_[it.first].push_back(nullptr);
}
VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
<< inp->var_->IsInitialized();
}
}
op->output_vars_ = outputs;
for (auto it : op->output_vars_) {
auto& outvars = outvars_map[it.first];
const std::vector<VarBase*>& outputs = it.second;
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
outvars.push_back(out->var_);
vars[out->var_desc_->Name()] = out;
framework::VarDesc* var_desc = block->FindVar(out->var_desc_->Name());
if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
out->var_->GetMutable<framework::LoDTensor>();
} else {
LOG(ERROR) << "tracer doesn't support yet";
}
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = it.first;
out->pre_op_out_idx_ = i;
VLOG(3) << "output vname " << out->var_desc_->Name() << " "
<< out->var_->IsInitialized();
}
}
VLOG(3) << "tracer running " << op_desc->Type();
framework::RuntimeContext ctx(invars_map, outvars_map);
// TODO(panyx0718): Cache p.
framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(op_base.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
platform::CPUPlace place;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
if (!stop_gradient) {
framework::OpDesc* grad_op_desc;
// TODO(panyx): Is this leaked?
std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var(
new std::unordered_map<std::string, std::string>());
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var.get());
op->grad_op_desc_ = grad_op_desc;
for (auto it : grad_op_desc->Inputs()) {
auto& grad_in_vars = op->grad_input_vars_[it.first];
for (const std::string& grad_invar : it.second) {
block->FindRecursiveOrCreateVar(grad_invar);
auto var_it = grad_to_var->find(grad_invar);
if (var_it == grad_to_var->end()) {
auto fwd_var_it = vars.find(grad_invar);
PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_);
} else {
VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_->var_);
}
// Douts.
grad_in_vars.push_back(var->grads_->var_);
}
}
}
for (auto it : grad_op_desc->Outputs()) {
auto& grad_out_vars = op->grad_output_vars_[it.first];
for (const std::string& grad_outvar : it.second) {
block->FindRecursiveOrCreateVar(grad_outvar);
auto var_it = grad_to_var->find(grad_outvar);
PADDLE_ENFORCE(var_it != grad_to_var->end());
VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_->var_);
}
grad_out_vars.push_back(var->grads_->var_);
}
}
}
op->block_ = block;
}
std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
const std::vector<VarBase*>& inputs,
bool stop_gradient) {
VLOG(3) << "py_trace";
op->input_vars_["X"] = inputs;
op->output_vars_["Out"] = PyLayer::Apply(op->forward_id_, inputs);
for (VarBase* inp : inputs) {
if (inp->pre_op_) {
op->pre_ops_["X"].push_back(inp->pre_op_);
op->pre_ops_out_idx_["X"].push_back(inp->pre_op_out_idx_);
} else {
op->pre_ops_["X"].push_back(nullptr);
}
}
auto& outputs = op->output_vars_["Out"];
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = "Out";
out->pre_op_out_idx_ = i;
}
if (!stop_gradient) {
auto& grad_input_vars = op->grad_input_vars_["X@GRAD"];
auto& grad_output_vars = op->grad_output_vars_["Out@GRAD"];
for (const VarBase* inp : inputs) {
grad_input_vars.push_back(inp->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->grads_->var_);
if (!grad_input_vars.back()->IsInitialized()) {
InitVar(out->var_, grad_input_vars.back());
}
}
for (const VarBase* inp : inputs) {
grad_output_vars.push_back(inp->grads_->var_);
if (!grad_output_vars.back()->IsInitialized()) {
InitVar(inp->var_, grad_output_vars.back());
}
}
}
return outputs;
}
} // namespace imperative
} // namespace paddle } // namespace paddle
...@@ -30,23 +30,9 @@ void CreateGradOp(const framework::OpDesc& op_desc, ...@@ -30,23 +30,9 @@ void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set, const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block, const std::vector<framework::BlockDesc*>& grad_sub_block,
framework::OpDesc** grad_op_desc, framework::OpDesc** grad_op_desc,
std::unordered_map<std::string, std::string>* grad_to_var) { std::unordered_map<std::string, std::string>* grad_to_var);
std::vector<std::unique_ptr<framework::OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block);
PADDLE_ENFORCE(grad_op_descs.size() == 1, "Only support 1 grad op now.");
// TODO(panyx0718): Leak?
*grad_op_desc = grad_op_descs[0].release();
}
void InitVar(framework::Variable* var, framework::Variable* grad_var) { void InitVar(framework::Variable* var, framework::Variable* grad_var);
auto& var_t = var->Get<framework::LoDTensor>();
float* data =
grad_var->GetMutable<framework::LoDTensor>()->mutable_data<float>(
var_t.dims(), platform::CPUPlace());
std::fill(data, data + var_t.numel(), 0.0);
}
class Tracer { class Tracer {
public: public:
...@@ -57,172 +43,10 @@ class Tracer { ...@@ -57,172 +43,10 @@ class Tracer {
void Trace(OpBase* op, void Trace(OpBase* op,
const std::map<std::string, std::vector<VarBase*>>& inputs, const std::map<std::string, std::vector<VarBase*>>& inputs,
const std::map<std::string, std::vector<VarBase*>>& outputs, const std::map<std::string, std::vector<VarBase*>>& outputs,
framework::BlockDesc* block, const bool stop_gradient = false) { framework::BlockDesc* block, const bool stop_gradient = false);
std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block);
op_desc->InferVarType(block);
std::unique_ptr<framework::OperatorBase> op_base =
framework::OpRegistry::CreateOp(*op_desc);
framework::VariableValueMap invars_map;
framework::VariableValueMap outvars_map;
op->input_vars_ = inputs;
for (auto it : op->input_vars_) {
auto& invars = invars_map[it.first];
for (VarBase* inp : it.second) {
PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr",
op->op_desc_->Type(), inp->var_desc_->Name());
invars.push_back(inp->var_);
vars[inp->var_desc_->Name()] = inp;
if (inp->pre_op_) {
op->pre_ops_[it.first].push_back(inp->pre_op_);
op->pre_ops_out_idx_[it.first].push_back(inp->pre_op_out_idx_);
} else {
op->pre_ops_[it.first].push_back(nullptr);
}
VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
<< inp->var_->IsInitialized();
}
}
op->output_vars_ = outputs;
for (auto it : op->output_vars_) {
auto& outvars = outvars_map[it.first];
const std::vector<VarBase*>& outputs = it.second;
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
outvars.push_back(out->var_);
vars[out->var_desc_->Name()] = out;
framework::VarDesc* var_desc = block->FindVar(out->var_desc_->Name());
if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
out->var_->GetMutable<framework::LoDTensor>();
} else {
LOG(ERROR) << "tracer doesn't support yet";
}
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = it.first;
out->pre_op_out_idx_ = i;
VLOG(3) << "output vname " << out->var_desc_->Name() << " "
<< out->var_->IsInitialized();
}
}
VLOG(3) << "tracer running " << op_desc->Type();
framework::RuntimeContext ctx(invars_map, outvars_map);
// TODO(panyx0718): Cache p.
framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(op_base.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
platform::CPUPlace place;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
if (!stop_gradient) {
framework::OpDesc* grad_op_desc;
// TODO(panyx): Is this leaked?
std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var(
new std::unordered_map<std::string, std::string>());
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var.get());
op->grad_op_desc_ = grad_op_desc;
for (auto it : grad_op_desc->Inputs()) {
auto& grad_in_vars = op->grad_input_vars_[it.first];
for (const std::string& grad_invar : it.second) {
block->FindRecursiveOrCreateVar(grad_invar);
auto var_it = grad_to_var->find(grad_invar);
if (var_it == grad_to_var->end()) {
auto fwd_var_it = vars.find(grad_invar);
PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_);
} else {
VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) {
InitVar(var->var_, var->grads_);
}
// Douts.
grad_in_vars.push_back(var->grads_);
}
}
}
for (auto it : grad_op_desc->Outputs()) {
auto& grad_out_vars = op->grad_output_vars_[it.first];
for (const std::string& grad_outvar : it.second) {
block->FindRecursiveOrCreateVar(grad_outvar);
auto var_it = grad_to_var->find(grad_outvar);
PADDLE_ENFORCE(var_it != grad_to_var->end());
VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) {
InitVar(var->var_, var->grads_);
}
grad_out_vars.push_back(var->grads_);
}
}
}
op->block_ = block;
}
std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs, std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs,
bool stop_gradient = false) { bool stop_gradient = false);
VLOG(3) << "py_trace";
op->input_vars_["X"] = inputs;
op->output_vars_["Out"] = PyLayer::Apply(op->forward_id_, inputs);
for (VarBase* inp : inputs) {
if (inp->pre_op_) {
op->pre_ops_["X"].push_back(inp->pre_op_);
op->pre_ops_out_idx_["X"].push_back(inp->pre_op_out_idx_);
} else {
op->pre_ops_["X"].push_back(nullptr);
}
}
auto& outputs = op->output_vars_["Out"];
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = "Out";
out->pre_op_out_idx_ = i;
}
if (!stop_gradient) {
auto& grad_input_vars = op->grad_input_vars_["X@GRAD"];
auto& grad_output_vars = op->grad_output_vars_["Out@GRAD"];
for (const VarBase* inp : inputs) {
grad_input_vars.push_back(inp->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->grads_);
if (!grad_input_vars.back()->IsInitialized()) {
InitVar(out->var_, grad_input_vars.back());
}
}
for (const VarBase* inp : inputs) {
grad_output_vars.push_back(inp->grads_);
if (!grad_output_vars.back()->IsInitialized()) {
InitVar(inp->var_, grad_output_vars.back());
}
}
}
return outputs;
}
private: private:
framework::BlockDesc* root_block_; framework::BlockDesc* root_block_;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <string>
#include <vector>
namespace paddle {
namespace imperative {
class VarBase;
class OpBase;
typedef std::map<std::string, std::vector<VarBase*>> VarBasePtrMap;
typedef std::map<std::string, std::vector<OpBase*>> OpBasePtrMap;
} // namespace imperative
} // namespace paddle
set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune
set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer scope_pool) feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
tracer)
if(WITH_PYTHON) if(WITH_PYTHON)
list(APPEND PYBIND_DEPS py_func_op) list(APPEND PYBIND_DEPS py_func_op)
endif() endif()
......
...@@ -126,25 +126,17 @@ PYBIND11_MODULE(core, m) { ...@@ -126,25 +126,17 @@ PYBIND11_MODULE(core, m) {
m.add_object("_cleanup", m.add_object("_cleanup",
py::capsule([]() { ScopePool::Instance().Clear(); })); py::capsule([]() { ScopePool::Instance().Clear(); }));
py::class_<imperative::VarBase, std::shared_ptr<imperative::VarBase>>( py::class_<imperative::VarBase>(m, "VarBase", R"DOC()DOC")
m, "VarBase", R"DOC()DOC")
// .def(py::init<>()) // .def(py::init<>())
.def(py::init<bool>(), py::arg("stop_gradient") = false) .def(py::init<bool>(), py::arg("stop_gradient") = false)
.def("_run_backward", .def("_run_backward",
[](imperative::VarBase &self) { self.RunBackward(); }) [](imperative::VarBase &self) { self.RunBackward(); })
.def("_grad_name", &imperative::VarBase::GradName) .def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad) .def("_grad_value", &imperative::VarBase::GradValue)
.def_property("grad_value", .def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; }, [](const imperative::VarBase &self) { return self.grads_; },
[](imperative::VarBase &self, framework::Variable *grad) {
self.grads_ = grad;
},
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property("value", .def("value", [](const imperative::VarBase &self) { return self.var_; },
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
self.var_ = var;
},
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property( .def_property(
"desc", "desc",
......
...@@ -372,30 +372,21 @@ class Variable(object): ...@@ -372,30 +372,21 @@ class Variable(object):
self.stop_gradient = stop_gradient self.stop_gradient = stop_gradient
self.is_data = is_data self.is_data = is_data
if _in_imperative_mode(): if _in_imperative_mode():
if 'ivar' in kwargs: self._ivar = kwargs.get("ivar", None)
self._ivar = kwargs['ivar'] if not self._ivar:
else:
self._ivar = core.VarBase() self._ivar = core.VarBase()
self._ivar.desc = self.desc self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
def _numpy(self): def _numpy(self):
tensor = self._ivar.value.get_tensor() tensor = self._ivar.value().get_tensor()
return np.array(tensor) return np.array(tensor)
def _backward(self): def _backward(self):
self._ivar._run_backward() self._ivar._run_backward()
def _gradient(self): def _gradient(self):
return np.array(self._ivar._grad()) return np.array(self._ivar._grad_value())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
......
...@@ -45,7 +45,7 @@ def to_variable(value, block=None): ...@@ -45,7 +45,7 @@ def to_variable(value, block=None):
name=None, name=None,
shape=value.shape, shape=value.shape,
dtype=value.dtype) dtype=value.dtype)
var = py_var._ivar.value var = py_var._ivar.value()
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set(value, core.CPUPlace()) tensor.set(value, core.CPUPlace())
return py_var return py_var
......
...@@ -55,18 +55,18 @@ class PyLayer(core.PyLayer): ...@@ -55,18 +55,18 @@ class PyLayer(core.PyLayer):
super(PyLayer, self).__init__() super(PyLayer, self).__init__()
@staticmethod @staticmethod
def forward(inputs): def forward(*inputs):
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
def backward(douts): def backward(*douts):
raise NotImplementedError raise NotImplementedError
@classmethod @classmethod
def __call__(cls, inputs): def __call__(cls, *inputs):
tracer = framework._imperative_tracer() tracer = framework._imperative_tracer()
block = framework.default_main_program().current_block() block = framework.default_main_program().current_block()
inputs = [x._ivar for x in inputs] ivar_inputs = [x._ivar for x in inputs]
if not hasattr(cls, 'forward_id'): if not hasattr(cls, 'forward_id'):
cls.forward_id = core.PyLayer.num_funcs() + 1 cls.forward_id = core.PyLayer.num_funcs() + 1
...@@ -78,11 +78,11 @@ class PyLayer(core.PyLayer): ...@@ -78,11 +78,11 @@ class PyLayer(core.PyLayer):
iop.forward_id = cls.forward_id iop.forward_id = cls.forward_id
iop.backward_id = cls.backward_id iop.backward_id = cls.backward_id
block.ops.append(iop) block.ops.append(iop)
ivars = tracer.py_trace(iop, inputs, False) ivars = tracer.py_trace(iop, ivar_inputs, False)
# ivars = core.PyLayer.apply(cls.forward, inputs) # ivars = core.PyLayer.apply(cls.forward, inputs)
ret = [] ret = []
for ivar in ivars: for ivar in ivars:
tensor = ivar.value.get_tensor() tensor = ivar.value().get_tensor()
py_var = framework.Variable( py_var = framework.Variable(
block, block,
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
......
...@@ -390,8 +390,8 @@ class Optimizer(object): ...@@ -390,8 +390,8 @@ class Optimizer(object):
grad_var = Variable( grad_var = Variable(
block=loss.block, block=loss.block,
name=param._ivar._grad_name(), name=param._ivar._grad_name(),
stop_gradient=True) stop_gradient=True,
grad_var._value = param._ivar.grad_value ivar=param._ivar._grad_ivar())
params_grads.append((param, grad_var)) params_grads.append((param, grad_var))
with program_guard(program, startup_program): with program_guard(program, startup_program):
optimize_ops = self._create_optimization_pass(params_grads) optimize_ops = self._create_optimization_pass(params_grads)
......
...@@ -97,35 +97,35 @@ class TestImperative(unittest.TestCase): ...@@ -97,35 +97,35 @@ class TestImperative(unittest.TestCase):
super(PyLayer1, self).__init__() super(PyLayer1, self).__init__()
@staticmethod @staticmethod
def forward(inputs): def forward(input):
return inputs return input
@staticmethod @staticmethod
def backward(inputs): def backward(input):
return inputs return input
class PyLayer2(fluid.imperative.PyLayer): class PyLayer2(fluid.imperative.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer2, self).__init__() super(PyLayer2, self).__init__()
@staticmethod @staticmethod
def forward(inputs): def forward(input):
return inputs return input
@staticmethod @staticmethod
def backward(inputs): def backward(input):
return inputs return input
py_layer_1 = PyLayer1() py_layer_1 = PyLayer1()
py_layer_2 = PyLayer2() py_layer_2 = PyLayer2()
py_layer_1([fluid.imperative.base.to_variable(np.ones([2, 2]))]) py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2])))
py_layer_2([fluid.imperative.base.to_variable(np.ones([2, 2]))]) py_layer_2(fluid.imperative.base.to_variable(np.ones([2, 2])))
id = py_layer_1.forward_id id = py_layer_1.forward_id
self.assertGreater(id, 0) self.assertGreater(id, 0)
self.assertEqual(py_layer_1.backward_id, id + 1) self.assertEqual(py_layer_1.backward_id, id + 1)
self.assertEqual(py_layer_2.forward_id, id + 2) self.assertEqual(py_layer_2.forward_id, id + 2)
self.assertEqual(py_layer_2.backward_id, id + 3) self.assertEqual(py_layer_2.backward_id, id + 3)
py_layer_1([fluid.imperative.base.to_variable(np.ones([2, 2]))]) py_layer_1(fluid.imperative.base.to_variable(np.ones([2, 2])))
self.assertEqual(py_layer_1.forward_id, id) self.assertEqual(py_layer_1.forward_id, id)
def test_pylayer(self): def test_pylayer(self):
...@@ -133,7 +133,7 @@ class TestImperative(unittest.TestCase): ...@@ -133,7 +133,7 @@ class TestImperative(unittest.TestCase):
with fluid.imperative.guard(): with fluid.imperative.guard():
my_py_layer = MyPyLayer() my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.imperative.base.to_variable(np_inp)
outs = my_py_layer([var_inp]) outs = my_py_layer(var_inp)
dy_out = np.sum(outs[0]._numpy()) dy_out = np.sum(outs[0]._numpy())
outs[0]._backward() outs[0]._backward()
dy_grad = var_inp._gradient() dy_grad = var_inp._gradient()
......
...@@ -105,7 +105,6 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -105,7 +105,6 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# mnist = Conv2D(1, 20, 5)
mnist = MNIST() mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -126,16 +125,17 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -126,16 +125,17 @@ class TestImperativeMnist(unittest.TestCase):
label._stop_gradient = True label._stop_gradient = True
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.reduce_mean(cost) loss = fluid.layers.cross_entropy(cost, label)
dy_out = loss._numpy() avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy()
if batch_id == 0: if batch_id == 0:
for param in fluid.default_main_program().global_block( for param in fluid.default_main_program().global_block(
).all_parameters(): ).all_parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
loss._backward() avg_loss._backward()
sgd.minimize(loss) sgd.minimize(avg_loss)
dy_param_value = {} dy_param_value = {}
for param in fluid.default_main_program().global_block( for param in fluid.default_main_program().global_block(
).all_parameters(): ).all_parameters():
...@@ -147,7 +147,6 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -147,7 +147,6 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
# mnist = Conv2D(1, 20, 5)
mnist = MNIST() mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -157,8 +156,9 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -157,8 +156,9 @@ class TestImperativeMnist(unittest.TestCase):
name='pixel', shape=[1, 28, 28], dtype='float32') name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.reduce_mean(cost) loss = fluid.layers.cross_entropy(cost, label)
sgd.minimize(loss) avg_loss = fluid.layers.mean(loss)
sgd.minimize(avg_loss)
# initialize params and fetch them # initialize params and fetch them
static_param_init_value = {} static_param_init_value = {}
...@@ -182,7 +182,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -182,7 +182,7 @@ class TestImperativeMnist(unittest.TestCase):
y_data = np.array([x[1] for x in data]).astype('int64').reshape( y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[128, 1]) [128, 1])
fetch_list = [loss.name] fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list) fetch_list.extend(static_param_name_list)
out = exe.run(fluid.default_main_program(), out = exe.run(fluid.default_main_program(),
feed={"pixel": x_data, feed={"pixel": x_data,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册