未验证 提交 d1220f23 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #15229 from panyx0718/imperative

support python codes in the imperative model
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
std::map<int, py::object> py_funcs_;
using framework::Variable; using framework::Variable;
void AddTo(Variable* src, Variable* dst) { void AddTo(Variable* src, Variable* dst) {
...@@ -55,6 +57,7 @@ class Autograd { ...@@ -55,6 +57,7 @@ class Autograd {
if (var->stop_gradient_) { if (var->stop_gradient_) {
return; return;
} }
VLOG(3) << "start autograd";
std::deque<OpBase*> ready; std::deque<OpBase*> ready;
ready.push_back(var->pre_op_); ready.push_back(var->pre_op_);
...@@ -120,51 +123,57 @@ framework::LoDTensor& VarBase::Grad() { ...@@ -120,51 +123,57 @@ framework::LoDTensor& VarBase::Grad() {
} }
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
if (!grad_op_desc_) { if (!grad_op_desc_ && backward_id_ <= 0) {
LOG(WARNING) << "op with no grad: " << op_desc_->Type(); LOG(WARNING) << "op with no grad: " << op_desc_->Type();
return {}; return {};
} }
VLOG(3) << "op grad " << grad_op_desc_->Type();
std::vector<std::unique_ptr<framework::Variable>> tmp_vars;
std::map<std::string, std::vector<framework::Variable*>> grad_outputs; std::map<std::string, std::vector<framework::Variable*>> grad_outputs;
for (auto it : grad_output_vars_) { if (backward_id_ > 0) {
auto& outputs = grad_outputs[it.first]; VLOG(3) << "py_layer_grad";
for (size_t i = 0; i < it.second.size(); ++i) { grad_outputs["Out@GRAD"] =
// Allocate a new variable PyLayer::ApplyGrad(backward_id_, grad_input_vars_["X@GRAD"]);
Variable* tmp_var = new framework::Variable(); } else {
tmp_var->GetMutable<framework::LoDTensor>(); VLOG(3) << "op grad " << grad_op_desc_->Type();
for (auto it : grad_output_vars_) {
tmp_vars.emplace_back(tmp_var); auto& outputs = grad_outputs[it.first];
outputs.push_back(tmp_var); for (size_t i = 0; i < it.second.size(); ++i) {
// Allocate a new variable
Variable* tmp_var = new framework::Variable();
tmp_var->GetMutable<framework::LoDTensor>();
outputs.push_back(tmp_var);
}
} }
}
framework::RuntimeContext ctx(grad_input_vars_, grad_outputs); framework::RuntimeContext ctx(grad_input_vars_, grad_outputs);
// No need to do compile time infer shape here. // No need to do compile time infer shape here.
// grad_op_desc_->InferShape(*block_); // grad_op_desc_->InferShape(*block_);
grad_op_desc_->InferVarType(block_); grad_op_desc_->InferVarType(block_);
std::unique_ptr<framework::OperatorBase> opbase = std::unique_ptr<framework::OperatorBase> opbase =
framework::OpRegistry::CreateOp(*grad_op_desc_); framework::OpRegistry::CreateOp(*grad_op_desc_);
framework::OperatorWithKernel* op_kernel = framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(opbase.get()); dynamic_cast<framework::OperatorWithKernel*>(opbase.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope; framework::Scope scope;
platform::CPUPlace place; platform::CPUPlace place;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx); p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
}
for (auto it : grad_output_vars_) { for (auto it : grad_output_vars_) {
auto& outputs = grad_outputs[it.first]; auto& outputs = grad_outputs[it.first];
auto& origin_outputs = it.second; auto& origin_outputs = it.second;
PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* grad = outputs[i];
framework::Variable* orig_grad = origin_outputs[i]; framework::Variable* orig_grad = origin_outputs[i];
AddTo(outputs[i], orig_grad); AddTo(grad, orig_grad);
delete grad;
} }
} }
return input_vars_; return input_vars_;
...@@ -173,6 +182,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -173,6 +182,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
void VarBase::RunBackward() { void VarBase::RunBackward() {
if (!pre_op_) return; if (!pre_op_) return;
VLOG(3) << "start backward";
auto grads_t = grads_->GetMutable<framework::LoDTensor>(); auto grads_t = grads_->GetMutable<framework::LoDTensor>();
float* data = grads_t->mutable_data<float>(platform::CPUPlace()); float* data = grads_t->mutable_data<float>(platform::CPUPlace());
std::fill(data, data + grads_t->numel(), 1.0); std::fill(data, data + grads_t->numel(), 1.0);
...@@ -183,5 +193,65 @@ void VarBase::RunBackward() { ...@@ -183,5 +193,65 @@ void VarBase::RunBackward() {
Autograd().RunBackward(this); Autograd().RunBackward(this);
} }
void PyLayer::RegisterFunc(int func_id, const py::object& py_func) {
py_funcs_[func_id] = py_func;
}
int PyLayer::NumFuncs() { return py_funcs_.size(); }
std::vector<VarBase*> PyLayer::Apply(int func_id,
const std::vector<VarBase*>& inputs) {
std::vector<framework::Variable*> invars;
for (const VarBase* in : inputs) {
invars.push_back(in->var_);
}
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
std::vector<Variable*> outvars = CallPythonFunc(py_funcs_[func_id], invars);
std::vector<VarBase*> ret;
for (Variable* v : outvars) {
ret.push_back(new VarBase(v, new Variable()));
}
return ret;
}
std::vector<Variable*> PyLayer::ApplyGrad(
int func_id, const std::vector<framework::Variable*>& inputs) {
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
return CallPythonFunc(py_funcs_[func_id], inputs);
}
std::vector<framework::Variable*> PyLayer::CallPythonFunc(
const py::object& callable, const std::vector<framework::Variable*>& ins) {
py::gil_scoped_acquire guard;
py::tuple in_args(ins.size());
for (size_t i = 0; i < ins.size(); ++i) {
const framework::LoDTensor& t = ins[i]->Get<framework::LoDTensor>();
in_args[i] = t.IsInitialized() ? py::cast(t) : py::cast(nullptr);
}
VLOG(3) << "pyfunc in " << py::len(in_args);
// TODO(panyx0718): Who owns the returned LoDTensor.
auto ret = callable(in_args);
auto ret_tuple = py::cast<py::tuple>(ret);
size_t ret_num = py::len(ret_tuple);
std::vector<framework::Variable*> outs;
VLOG(3) << "pyfunc out " << ret_num;
for (size_t i = 0; i < ret_num; ++i) {
try {
auto* py_out_tensor = py::cast<framework::LoDTensor*>(ret_tuple[i]);
PADDLE_ENFORCE_NOT_NULL(py_out_tensor,
"Output tensor %d should not be nullptr", i);
auto* var = new framework::Variable();
auto* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(*py_out_tensor);
tensor->set_lod(py_out_tensor->lod());
outs.push_back(var);
} catch (py::cast_error&) {
PADDLE_THROW("The %d-th output must be LoDTensor", i);
}
}
return outs;
}
} // namespace imperative } // namespace imperative
} // namespace paddle } // namespace paddle
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
#include "pybind11/pybind11.h"
#include "Python.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
...@@ -25,6 +28,8 @@ ...@@ -25,6 +28,8 @@
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
namespace py = ::pybind11;
class PreparedOp { class PreparedOp {
public: public:
PreparedOp(const framework::OperatorBase& op, PreparedOp(const framework::OperatorBase& op,
...@@ -82,12 +87,15 @@ class OpBase; ...@@ -82,12 +87,15 @@ class OpBase;
class VarBase { class VarBase {
public: public:
VarBase() VarBase() : VarBase(new framework::Variable(), new framework::Variable()) {}
// Owns `var` and `grad`
VarBase(framework::Variable* var, framework::Variable* grad)
: pre_op_(nullptr), : pre_op_(nullptr),
pre_op_out_idx_(-1), pre_op_out_idx_(-1),
var_desc_(nullptr), var_desc_(nullptr),
var_(new framework::Variable()), var_(var),
grads_(new framework::Variable()), grads_(grad),
stop_gradient_(false) {} stop_gradient_(false) {}
explicit VarBase(bool stop_gradient) explicit VarBase(bool stop_gradient)
...@@ -124,7 +132,11 @@ class VarBase { ...@@ -124,7 +132,11 @@ class VarBase {
class OpBase { class OpBase {
public: public:
OpBase() : op_desc_(nullptr), grad_op_desc_(nullptr) {} OpBase()
: op_desc_(nullptr),
forward_id_(-1),
grad_op_desc_(nullptr),
backward_id_(-1) {}
virtual ~OpBase() { virtual ~OpBase() {
if (grad_op_desc_) delete grad_op_desc_; if (grad_op_desc_) delete grad_op_desc_;
...@@ -132,8 +144,14 @@ class OpBase { ...@@ -132,8 +144,14 @@ class OpBase {
std::map<std::string, std::vector<VarBase*>> ApplyGrad(); std::map<std::string, std::vector<VarBase*>> ApplyGrad();
// One of `op_desc_` or `forward_id_` is set, not both.
// For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
framework::OpDesc* op_desc_; framework::OpDesc* op_desc_;
int forward_id_;
// When has backward, one of `grad_op_desc_` or `backward_id_` is set,
// not both.
framework::OpDesc* grad_op_desc_; framework::OpDesc* grad_op_desc_;
int backward_id_;
std::map<std::string, std::vector<VarBase*>> input_vars_; std::map<std::string, std::vector<VarBase*>> input_vars_;
std::map<std::string, std::vector<VarBase*>> output_vars_; std::map<std::string, std::vector<VarBase*>> output_vars_;
...@@ -153,8 +171,25 @@ class Layer { ...@@ -153,8 +171,25 @@ class Layer {
std::vector<VarBase> vars; std::vector<VarBase> vars;
return vars; return vars;
} }
};
class PyLayer {
public:
virtual ~PyLayer() {}
static void RegisterFunc(int func_id, const py::object& py_func);
static int NumFuncs();
static std::vector<VarBase*> Apply(int func_id,
const std::vector<VarBase*>& inputs);
static std::vector<framework::Variable*> ApplyGrad(
int func_id, const std::vector<framework::Variable*>& inputs);
virtual void Backward() { LOG(ERROR) << "To support customize"; } private:
static std::vector<framework::Variable*> CallPythonFunc(
const py::object& callable, const std::vector<framework::Variable*>& ins);
}; };
} // namespace imperative } // namespace imperative
......
...@@ -131,8 +131,10 @@ class Tracer { ...@@ -131,8 +131,10 @@ class Tracer {
if (!stop_gradient) { if (!stop_gradient) {
framework::OpDesc* grad_op_desc; framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>(); // TODO(panyx): Is this leaked?
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var); std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var(
new std::unordered_map<std::string, std::string>());
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var.get());
op->grad_op_desc_ = grad_op_desc; op->grad_op_desc_ = grad_op_desc;
for (auto it : grad_op_desc->Inputs()) { for (auto it : grad_op_desc->Inputs()) {
...@@ -143,12 +145,14 @@ class Tracer { ...@@ -143,12 +145,14 @@ class Tracer {
if (var_it == grad_to_var->end()) { if (var_it == grad_to_var->end()) {
auto fwd_var_it = vars.find(grad_invar); auto fwd_var_it = vars.find(grad_invar);
PADDLE_ENFORCE(fwd_var_it != vars.end()); PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_); grad_in_vars.push_back(fwd_var_it->second->var_);
} else { } else {
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) { if (!var->grads_->IsInitialized()) {
InitVar(var->var_, var->grads_); InitVar(var->var_, var->grads_);
} }
// Douts.
grad_in_vars.push_back(var->grads_); grad_in_vars.push_back(var->grads_);
} }
} }
...@@ -172,6 +176,54 @@ class Tracer { ...@@ -172,6 +176,54 @@ class Tracer {
op->block_ = block; op->block_ = block;
} }
std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs,
bool stop_gradient = false) {
VLOG(3) << "py_trace";
op->input_vars_["X"] = inputs;
op->output_vars_["Out"] = PyLayer::Apply(op->forward_id_, inputs);
for (VarBase* inp : inputs) {
if (inp->pre_op_) {
op->pre_ops_["X"].push_back(inp->pre_op_);
op->pre_ops_out_idx_["X"].push_back(inp->pre_op_out_idx_);
} else {
op->pre_ops_["X"].push_back(nullptr);
}
}
auto& outputs = op->output_vars_["Out"];
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = "Out";
out->pre_op_out_idx_ = i;
}
if (!stop_gradient) {
auto& grad_input_vars = op->grad_input_vars_["X@GRAD"];
auto& grad_output_vars = op->grad_output_vars_["Out@GRAD"];
for (const VarBase* inp : inputs) {
grad_input_vars.push_back(inp->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->grads_);
if (!grad_input_vars.back()->IsInitialized()) {
InitVar(out->var_, grad_input_vars.back());
}
}
for (const VarBase* inp : inputs) {
grad_output_vars.push_back(inp->grads_);
if (!grad_output_vars.back()->IsInitialized()) {
InitVar(inp->var_, grad_output_vars.back());
}
}
}
return outputs;
}
private: private:
framework::BlockDesc* root_block_; framework::BlockDesc* root_block_;
}; };
......
...@@ -26,7 +26,9 @@ void BindTracer(pybind11::module *m) { ...@@ -26,7 +26,9 @@ void BindTracer(pybind11::module *m) {
[](imperative::Tracer &self, framework::BlockDesc *root_block) { [](imperative::Tracer &self, framework::BlockDesc *root_block) {
new (&self) imperative::Tracer(root_block); new (&self) imperative::Tracer(root_block);
}) })
.def("trace", &imperative::Tracer::Trace); .def("trace", &imperative::Tracer::Trace)
.def("py_trace", &imperative::Tracer::PyTrace,
pybind11::return_value_policy::take_ownership);
} }
} // namespace pybind } // namespace pybind
......
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
class PyLayer : public imperative::Layer { class Layer : public imperative::Layer {
public: public:
using imperative::Layer::Layer; // Inherit constructors using imperative::Layer::Layer; // Inherit constructors
...@@ -31,10 +31,6 @@ class PyLayer : public imperative::Layer { ...@@ -31,10 +31,6 @@ class PyLayer : public imperative::Layer {
PYBIND11_OVERLOAD(std::vector<imperative::VarBase>, Layer, Forward, PYBIND11_OVERLOAD(std::vector<imperative::VarBase>, Layer, Forward,
inputs); // NOLINT inputs); // NOLINT
} }
void Backward() override {
PYBIND11_OVERLOAD(void, Layer, Backward, ); // NOLINT
}
}; };
class PyOpBase : public imperative::OpBase { class PyOpBase : public imperative::OpBase {
......
...@@ -169,16 +169,44 @@ PYBIND11_MODULE(core, m) { ...@@ -169,16 +169,44 @@ PYBIND11_MODULE(core, m) {
self.op_desc_ = op_desc; self.op_desc_ = op_desc;
} }
}, },
py::return_value_policy::reference)
.def_property(
"forward_id",
[](const imperative::OpBase &self) { return self.forward_id_; },
[](imperative::OpBase &self, int forward_id) {
self.forward_id_ = forward_id;
},
py::return_value_policy::reference)
.def_property(
"backward_id",
[](const imperative::OpBase &self) { return self.backward_id_; },
[](imperative::OpBase &self, int backward_id) {
self.backward_id_ = backward_id;
},
py::return_value_policy::reference); py::return_value_policy::reference);
py::class_<imperative::Layer, PyLayer /* <--- trampoline*/> layer(m, "Layer"); py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
layer.def(py::init<>()) layer.def(py::init<>())
.def("forward", .def("forward", [](imperative::Layer &self,
[](imperative::Layer &self, const std::vector<imperative::VarBase> &inputs) {
const std::vector<imperative::VarBase> &inputs) { return self.Forward(inputs);
return self.Forward(inputs); });
})
.def("backward", &imperative::Layer::Backward); py::class_<imperative::PyLayer>(m, "PyLayer")
.def(py::init<>())
.def_static(
"apply",
[](int func_id, const std::vector<imperative::VarBase *> &inputs)
-> std::vector<imperative::VarBase *> {
return imperative::PyLayer::Apply(func_id, inputs);
},
py::return_value_policy::take_ownership)
.def_static("register_func",
[](int func_id, const py::object &callable) {
imperative::PyLayer::RegisterFunc(func_id, callable);
})
.def_static("num_funcs", &imperative::PyLayer::NumFuncs);
BindTracer(&m); BindTracer(&m);
py::class_<Tensor>(m, "Tensor", py::buffer_protocol()) py::class_<Tensor>(m, "Tensor", py::buffer_protocol())
......
...@@ -372,7 +372,10 @@ class Variable(object): ...@@ -372,7 +372,10 @@ class Variable(object):
self.stop_gradient = stop_gradient self.stop_gradient = stop_gradient
self.is_data = is_data self.is_data = is_data
if _in_imperative_mode(): if _in_imperative_mode():
self._ivar = core.VarBase() if 'ivar' in kwargs:
self._ivar = kwargs['ivar']
else:
self._ivar = core.VarBase()
self._ivar.desc = self.desc self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
......
...@@ -20,10 +20,12 @@ from paddle.fluid import core ...@@ -20,10 +20,12 @@ from paddle.fluid import core
from paddle.fluid import framework from paddle.fluid import framework
from paddle.fluid.imperative import base from paddle.fluid.imperative import base
__all__ = ['PyLayer'] __all__ = ['Layer', 'PyLayer']
class PyLayer(core.Layer): class Layer(core.Layer):
"""Layers composed of operators."""
def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None): def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None):
self._once_built = False self._once_built = False
self._dtype = dtype self._dtype = dtype
...@@ -37,8 +39,56 @@ class PyLayer(core.Layer): ...@@ -37,8 +39,56 @@ class PyLayer(core.Layer):
self._once_built = True self._once_built = True
outputs = self.forward(*inputs) outputs = self.forward(*inputs)
return outputs return outputs
def forward(self, *inputs): def forward(self, *inputs):
raise NotImplementedError raise NotImplementedError
def backward(self, *inputs):
raise ValueError("Layer shouldn't implement backward")
class PyLayer(core.PyLayer):
"""Layers composed of user-defined python codes."""
def __init__(self):
super(PyLayer, self).__init__()
@staticmethod
def forward(inputs):
raise NotImplementedError
@staticmethod
def backward(douts):
raise NotImplementedError
@classmethod
def __call__(cls, inputs):
tracer = framework._imperative_tracer()
block = framework.default_main_program().current_block()
inputs = [x._ivar for x in inputs]
if not hasattr(cls, 'forward_id'):
cls.forward_id = core.PyLayer.num_funcs() + 1
PyLayer.register_func(cls.forward_id, cls.forward)
cls.backward_id = core.PyLayer.num_funcs() + 1
PyLayer.register_func(cls.backward_id, cls.backward)
iop = core.OpBase()
iop.forward_id = cls.forward_id
iop.backward_id = cls.backward_id
block.ops.append(iop)
ivars = tracer.py_trace(iop, inputs, False)
# ivars = core.PyLayer.apply(cls.forward, inputs)
ret = []
for ivar in ivars:
tensor = ivar.value.get_tensor()
py_var = framework.Variable(
block,
type=core.VarDesc.VarType.LOD_TENSOR,
name=None,
shape=tensor.shape(),
dtype=tensor._dtype(),
ivar=ivar)
ret.append(py_var)
return ret
...@@ -30,7 +30,7 @@ __all__ = [ ...@@ -30,7 +30,7 @@ __all__ = [
] ]
class Conv2D(layers.PyLayer): class Conv2D(layers.Layer):
def __init__(self, def __init__(self,
num_channels, num_channels,
num_filters, num_filters,
...@@ -143,7 +143,7 @@ class Conv2D(layers.PyLayer): ...@@ -143,7 +143,7 @@ class Conv2D(layers.PyLayer):
return self._helper.append_activation(pre_act) return self._helper.append_activation(pre_act)
class Pool2D(layers.PyLayer): class Pool2D(layers.Layer):
def __init__(self, def __init__(self,
pool_size=-1, pool_size=-1,
pool_type="max", pool_type="max",
...@@ -205,7 +205,7 @@ class Pool2D(layers.PyLayer): ...@@ -205,7 +205,7 @@ class Pool2D(layers.PyLayer):
return pool_out return pool_out
class FC(layers.PyLayer): class FC(layers.Layer):
def __init__(self, def __init__(self,
size, size,
param_attr=None, param_attr=None,
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import contextlib import contextlib
import unittest import unittest
import numpy as np import numpy as np
import sys
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
...@@ -22,7 +23,7 @@ from paddle.fluid.imperative.nn import FC ...@@ -22,7 +23,7 @@ from paddle.fluid.imperative.nn import FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MyLayer(fluid.imperative.PyLayer): class MyLayer(fluid.imperative.Layer):
def __init__(self): def __init__(self):
super(MyLayer, self).__init__() super(MyLayer, self).__init__()
...@@ -34,7 +35,35 @@ class MyLayer(fluid.imperative.PyLayer): ...@@ -34,7 +35,35 @@ class MyLayer(fluid.imperative.PyLayer):
return [x] return [x]
class MLP(fluid.imperative.PyLayer): class MyPyLayer(fluid.imperative.PyLayer):
def __init__(self):
super(MyPyLayer, self).__init__()
@staticmethod
def forward(inputs):
sys.stderr.write('before forward\n')
ret = np.tanh(inputs[0])
sys.stderr.write('after forward: %s\n' % ret)
tensor = core.LoDTensor()
tensor.set(ret, core.CPUPlace())
return tuple([tensor])
@staticmethod
def backward(inputs):
sys.stderr.write('calling into backward: %s\n' % str(inputs))
inp, out, dout = inputs
inp = np.array(inp)
out = np.array(out)
dout = np.array(dout)
sys.stderr.write('calling into backward: %s, %s, %s\n' %
(inp, out, dout))
ret = np.array(dout) * (1 - np.square(np.array(out)))
tensor = core.LoDTensor()
tensor.set(ret, core.CPUPlace())
return tuple([tensor])
class MLP(fluid.imperative.Layer):
def __init__(self): def __init__(self):
super(MLP, self).__init__() super(MLP, self).__init__()
self._fc1 = FC(3, self._fc1 = FC(3,
...@@ -56,9 +85,77 @@ class TestImperative(unittest.TestCase): ...@@ -56,9 +85,77 @@ class TestImperative(unittest.TestCase):
with fluid.imperative.guard(): with fluid.imperative.guard():
cl = core.Layer() cl = core.Layer()
cl.forward([]) cl.forward([])
l = fluid.imperative.PyLayer() l = fluid.imperative.Layer()
self.assertRaises(NotImplementedError, l.forward, []) self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self):
with fluid.imperative.guard():
class PyLayer1(fluid.imperative.PyLayer):
def __init__(self):
super(PyLayer1, self).__init__()
@staticmethod
def forward(inputs):
return inputs
@staticmethod
def backward(inputs):
return inputs
class PyLayer2(fluid.imperative.PyLayer):
def __init__(self):
super(PyLayer2, self).__init__()
@staticmethod
def forward(inputs):
return inputs
@staticmethod
def backward(inputs):
return inputs
py_layer_1 = PyLayer1()
py_layer_2 = PyLayer2()
py_layer_1([fluid.imperative.base.to_variable(np.ones([2, 2]))])
py_layer_2([fluid.imperative.base.to_variable(np.ones([2, 2]))])
id = py_layer_1.forward_id
self.assertGreater(id, 0)
self.assertEqual(py_layer_1.backward_id, id + 1)
self.assertEqual(py_layer_2.forward_id, id + 2)
self.assertEqual(py_layer_2.backward_id, id + 3)
py_layer_1([fluid.imperative.base.to_variable(np.ones([2, 2]))])
self.assertEqual(py_layer_1.forward_id, id)
def test_pylayer(self):
np_inp = np.ones([2, 2], np.float32)
with fluid.imperative.guard():
my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp)
outs = my_py_layer([var_inp])
dy_out = np.sum(outs[0]._numpy())
outs[0]._backward()
dy_grad = var_inp._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
# TODO(panyx0718): Paddle doesn't diff against data `inp`.
x1 = inp * 1
# TODO(panyx0718): If reduce_sum is skipped, the result is wrong.
x = fluid.layers.reduce_sum(fluid.layers.tanh(x1))
param_grads = fluid.backward.append_backward(
x, parameter_list=[x1.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
def test_layer_in_out(self): def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(): with fluid.imperative.guard():
......
...@@ -26,7 +26,7 @@ from paddle.fluid.imperative.base import to_variable ...@@ -26,7 +26,7 @@ from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.imperative.PyLayer): class SimpleImgConvPool(fluid.imperative.Layer):
def __init__(self, def __init__(self,
num_channels, num_channels,
num_filters, num_filters,
...@@ -72,7 +72,7 @@ class SimpleImgConvPool(fluid.imperative.PyLayer): ...@@ -72,7 +72,7 @@ class SimpleImgConvPool(fluid.imperative.PyLayer):
return x return x
class MNIST(fluid.imperative.PyLayer): class MNIST(fluid.imperative.Layer):
def __init__(self, param_attr=None, bias_attr=None): def __init__(self, param_attr=None, bias_attr=None):
super(MNIST, self).__init__() super(MNIST, self).__init__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册