未验证 提交 d1220f23 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #15229 from panyx0718/imperative

support python codes in the imperative model
......@@ -27,6 +27,8 @@
namespace paddle {
namespace imperative {
std::map<int, py::object> py_funcs_;
using framework::Variable;
void AddTo(Variable* src, Variable* dst) {
......@@ -55,6 +57,7 @@ class Autograd {
if (var->stop_gradient_) {
return;
}
VLOG(3) << "start autograd";
std::deque<OpBase*> ready;
ready.push_back(var->pre_op_);
......@@ -120,51 +123,57 @@ framework::LoDTensor& VarBase::Grad() {
}
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
if (!grad_op_desc_) {
if (!grad_op_desc_ && backward_id_ <= 0) {
LOG(WARNING) << "op with no grad: " << op_desc_->Type();
return {};
}
VLOG(3) << "op grad " << grad_op_desc_->Type();
std::vector<std::unique_ptr<framework::Variable>> tmp_vars;
std::map<std::string, std::vector<framework::Variable*>> grad_outputs;
for (auto it : grad_output_vars_) {
auto& outputs = grad_outputs[it.first];
for (size_t i = 0; i < it.second.size(); ++i) {
// Allocate a new variable
Variable* tmp_var = new framework::Variable();
tmp_var->GetMutable<framework::LoDTensor>();
tmp_vars.emplace_back(tmp_var);
outputs.push_back(tmp_var);
if (backward_id_ > 0) {
VLOG(3) << "py_layer_grad";
grad_outputs["Out@GRAD"] =
PyLayer::ApplyGrad(backward_id_, grad_input_vars_["X@GRAD"]);
} else {
VLOG(3) << "op grad " << grad_op_desc_->Type();
for (auto it : grad_output_vars_) {
auto& outputs = grad_outputs[it.first];
for (size_t i = 0; i < it.second.size(); ++i) {
// Allocate a new variable
Variable* tmp_var = new framework::Variable();
tmp_var->GetMutable<framework::LoDTensor>();
outputs.push_back(tmp_var);
}
}
}
framework::RuntimeContext ctx(grad_input_vars_, grad_outputs);
framework::RuntimeContext ctx(grad_input_vars_, grad_outputs);
// No need to do compile time infer shape here.
// grad_op_desc_->InferShape(*block_);
grad_op_desc_->InferVarType(block_);
// No need to do compile time infer shape here.
// grad_op_desc_->InferShape(*block_);
grad_op_desc_->InferVarType(block_);
std::unique_ptr<framework::OperatorBase> opbase =
framework::OpRegistry::CreateOp(*grad_op_desc_);
framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(opbase.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
std::unique_ptr<framework::OperatorBase> opbase =
framework::OpRegistry::CreateOp(*grad_op_desc_);
framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(opbase.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
platform::CPUPlace place;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
framework::Scope scope;
platform::CPUPlace place;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
}
for (auto it : grad_output_vars_) {
auto& outputs = grad_outputs[it.first];
auto& origin_outputs = it.second;
PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* grad = outputs[i];
framework::Variable* orig_grad = origin_outputs[i];
AddTo(outputs[i], orig_grad);
AddTo(grad, orig_grad);
delete grad;
}
}
return input_vars_;
......@@ -173,6 +182,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
void VarBase::RunBackward() {
if (!pre_op_) return;
VLOG(3) << "start backward";
auto grads_t = grads_->GetMutable<framework::LoDTensor>();
float* data = grads_t->mutable_data<float>(platform::CPUPlace());
std::fill(data, data + grads_t->numel(), 1.0);
......@@ -183,5 +193,65 @@ void VarBase::RunBackward() {
Autograd().RunBackward(this);
}
void PyLayer::RegisterFunc(int func_id, const py::object& py_func) {
py_funcs_[func_id] = py_func;
}
int PyLayer::NumFuncs() { return py_funcs_.size(); }
std::vector<VarBase*> PyLayer::Apply(int func_id,
const std::vector<VarBase*>& inputs) {
std::vector<framework::Variable*> invars;
for (const VarBase* in : inputs) {
invars.push_back(in->var_);
}
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
std::vector<Variable*> outvars = CallPythonFunc(py_funcs_[func_id], invars);
std::vector<VarBase*> ret;
for (Variable* v : outvars) {
ret.push_back(new VarBase(v, new Variable()));
}
return ret;
}
std::vector<Variable*> PyLayer::ApplyGrad(
int func_id, const std::vector<framework::Variable*>& inputs) {
PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end());
return CallPythonFunc(py_funcs_[func_id], inputs);
}
std::vector<framework::Variable*> PyLayer::CallPythonFunc(
const py::object& callable, const std::vector<framework::Variable*>& ins) {
py::gil_scoped_acquire guard;
py::tuple in_args(ins.size());
for (size_t i = 0; i < ins.size(); ++i) {
const framework::LoDTensor& t = ins[i]->Get<framework::LoDTensor>();
in_args[i] = t.IsInitialized() ? py::cast(t) : py::cast(nullptr);
}
VLOG(3) << "pyfunc in " << py::len(in_args);
// TODO(panyx0718): Who owns the returned LoDTensor.
auto ret = callable(in_args);
auto ret_tuple = py::cast<py::tuple>(ret);
size_t ret_num = py::len(ret_tuple);
std::vector<framework::Variable*> outs;
VLOG(3) << "pyfunc out " << ret_num;
for (size_t i = 0; i < ret_num; ++i) {
try {
auto* py_out_tensor = py::cast<framework::LoDTensor*>(ret_tuple[i]);
PADDLE_ENFORCE_NOT_NULL(py_out_tensor,
"Output tensor %d should not be nullptr", i);
auto* var = new framework::Variable();
auto* tensor = var->GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(*py_out_tensor);
tensor->set_lod(py_out_tensor->lod());
outs.push_back(var);
} catch (py::cast_error&) {
PADDLE_THROW("The %d-th output must be LoDTensor", i);
}
}
return outs;
}
} // namespace imperative
} // namespace paddle
......@@ -17,6 +17,9 @@
#include <map>
#include <string>
#include <vector>
#include "pybind11/pybind11.h"
#include "Python.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h"
......@@ -25,6 +28,8 @@
namespace paddle {
namespace imperative {
namespace py = ::pybind11;
class PreparedOp {
public:
PreparedOp(const framework::OperatorBase& op,
......@@ -82,12 +87,15 @@ class OpBase;
class VarBase {
public:
VarBase()
VarBase() : VarBase(new framework::Variable(), new framework::Variable()) {}
// Owns `var` and `grad`
VarBase(framework::Variable* var, framework::Variable* grad)
: pre_op_(nullptr),
pre_op_out_idx_(-1),
var_desc_(nullptr),
var_(new framework::Variable()),
grads_(new framework::Variable()),
var_(var),
grads_(grad),
stop_gradient_(false) {}
explicit VarBase(bool stop_gradient)
......@@ -124,7 +132,11 @@ class VarBase {
class OpBase {
public:
OpBase() : op_desc_(nullptr), grad_op_desc_(nullptr) {}
OpBase()
: op_desc_(nullptr),
forward_id_(-1),
grad_op_desc_(nullptr),
backward_id_(-1) {}
virtual ~OpBase() {
if (grad_op_desc_) delete grad_op_desc_;
......@@ -132,8 +144,14 @@ class OpBase {
std::map<std::string, std::vector<VarBase*>> ApplyGrad();
// One of `op_desc_` or `forward_id_` is set, not both.
// For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
framework::OpDesc* op_desc_;
int forward_id_;
// When has backward, one of `grad_op_desc_` or `backward_id_` is set,
// not both.
framework::OpDesc* grad_op_desc_;
int backward_id_;
std::map<std::string, std::vector<VarBase*>> input_vars_;
std::map<std::string, std::vector<VarBase*>> output_vars_;
......@@ -153,8 +171,25 @@ class Layer {
std::vector<VarBase> vars;
return vars;
}
};
class PyLayer {
public:
virtual ~PyLayer() {}
static void RegisterFunc(int func_id, const py::object& py_func);
static int NumFuncs();
static std::vector<VarBase*> Apply(int func_id,
const std::vector<VarBase*>& inputs);
static std::vector<framework::Variable*> ApplyGrad(
int func_id, const std::vector<framework::Variable*>& inputs);
virtual void Backward() { LOG(ERROR) << "To support customize"; }
private:
static std::vector<framework::Variable*> CallPythonFunc(
const py::object& callable, const std::vector<framework::Variable*>& ins);
};
} // namespace imperative
......
......@@ -131,8 +131,10 @@ class Tracer {
if (!stop_gradient) {
framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>();
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var);
// TODO(panyx): Is this leaked?
std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var(
new std::unordered_map<std::string, std::string>());
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var.get());
op->grad_op_desc_ = grad_op_desc;
for (auto it : grad_op_desc->Inputs()) {
......@@ -143,12 +145,14 @@ class Tracer {
if (var_it == grad_to_var->end()) {
auto fwd_var_it = vars.find(grad_invar);
PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_);
} else {
VarBase* var = vars[var_it->second];
if (!var->grads_->IsInitialized()) {
InitVar(var->var_, var->grads_);
}
// Douts.
grad_in_vars.push_back(var->grads_);
}
}
......@@ -172,6 +176,54 @@ class Tracer {
op->block_ = block;
}
std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs,
bool stop_gradient = false) {
VLOG(3) << "py_trace";
op->input_vars_["X"] = inputs;
op->output_vars_["Out"] = PyLayer::Apply(op->forward_id_, inputs);
for (VarBase* inp : inputs) {
if (inp->pre_op_) {
op->pre_ops_["X"].push_back(inp->pre_op_);
op->pre_ops_out_idx_["X"].push_back(inp->pre_op_out_idx_);
} else {
op->pre_ops_["X"].push_back(nullptr);
}
}
auto& outputs = op->output_vars_["Out"];
for (size_t i = 0; i < outputs.size(); ++i) {
VarBase* out = outputs[i];
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = "Out";
out->pre_op_out_idx_ = i;
}
if (!stop_gradient) {
auto& grad_input_vars = op->grad_input_vars_["X@GRAD"];
auto& grad_output_vars = op->grad_output_vars_["Out@GRAD"];
for (const VarBase* inp : inputs) {
grad_input_vars.push_back(inp->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->var_);
}
for (VarBase* out : outputs) {
grad_input_vars.push_back(out->grads_);
if (!grad_input_vars.back()->IsInitialized()) {
InitVar(out->var_, grad_input_vars.back());
}
}
for (const VarBase* inp : inputs) {
grad_output_vars.push_back(inp->grads_);
if (!grad_output_vars.back()->IsInitialized()) {
InitVar(inp->var_, grad_output_vars.back());
}
}
}
return outputs;
}
private:
framework::BlockDesc* root_block_;
};
......
......@@ -26,7 +26,9 @@ void BindTracer(pybind11::module *m) {
[](imperative::Tracer &self, framework::BlockDesc *root_block) {
new (&self) imperative::Tracer(root_block);
})
.def("trace", &imperative::Tracer::Trace);
.def("trace", &imperative::Tracer::Trace)
.def("py_trace", &imperative::Tracer::PyTrace,
pybind11::return_value_policy::take_ownership);
}
} // namespace pybind
......
......@@ -22,7 +22,7 @@ limitations under the License. */
namespace paddle {
namespace pybind {
class PyLayer : public imperative::Layer {
class Layer : public imperative::Layer {
public:
using imperative::Layer::Layer; // Inherit constructors
......@@ -31,10 +31,6 @@ class PyLayer : public imperative::Layer {
PYBIND11_OVERLOAD(std::vector<imperative::VarBase>, Layer, Forward,
inputs); // NOLINT
}
void Backward() override {
PYBIND11_OVERLOAD(void, Layer, Backward, ); // NOLINT
}
};
class PyOpBase : public imperative::OpBase {
......
......@@ -169,16 +169,44 @@ PYBIND11_MODULE(core, m) {
self.op_desc_ = op_desc;
}
},
py::return_value_policy::reference)
.def_property(
"forward_id",
[](const imperative::OpBase &self) { return self.forward_id_; },
[](imperative::OpBase &self, int forward_id) {
self.forward_id_ = forward_id;
},
py::return_value_policy::reference)
.def_property(
"backward_id",
[](const imperative::OpBase &self) { return self.backward_id_; },
[](imperative::OpBase &self, int backward_id) {
self.backward_id_ = backward_id;
},
py::return_value_policy::reference);
py::class_<imperative::Layer, PyLayer /* <--- trampoline*/> layer(m, "Layer");
py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
layer.def(py::init<>())
.def("forward",
[](imperative::Layer &self,
const std::vector<imperative::VarBase> &inputs) {
return self.Forward(inputs);
})
.def("backward", &imperative::Layer::Backward);
.def("forward", [](imperative::Layer &self,
const std::vector<imperative::VarBase> &inputs) {
return self.Forward(inputs);
});
py::class_<imperative::PyLayer>(m, "PyLayer")
.def(py::init<>())
.def_static(
"apply",
[](int func_id, const std::vector<imperative::VarBase *> &inputs)
-> std::vector<imperative::VarBase *> {
return imperative::PyLayer::Apply(func_id, inputs);
},
py::return_value_policy::take_ownership)
.def_static("register_func",
[](int func_id, const py::object &callable) {
imperative::PyLayer::RegisterFunc(func_id, callable);
})
.def_static("num_funcs", &imperative::PyLayer::NumFuncs);
BindTracer(&m);
py::class_<Tensor>(m, "Tensor", py::buffer_protocol())
......
......@@ -372,7 +372,10 @@ class Variable(object):
self.stop_gradient = stop_gradient
self.is_data = is_data
if _in_imperative_mode():
self._ivar = core.VarBase()
if 'ivar' in kwargs:
self._ivar = kwargs['ivar']
else:
self._ivar = core.VarBase()
self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient
......
......@@ -20,10 +20,12 @@ from paddle.fluid import core
from paddle.fluid import framework
from paddle.fluid.imperative import base
__all__ = ['PyLayer']
__all__ = ['Layer', 'PyLayer']
class PyLayer(core.Layer):
class Layer(core.Layer):
"""Layers composed of operators."""
def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None):
self._once_built = False
self._dtype = dtype
......@@ -37,8 +39,56 @@ class PyLayer(core.Layer):
self._once_built = True
outputs = self.forward(*inputs)
return outputs
def forward(self, *inputs):
raise NotImplementedError
def backward(self, *inputs):
raise ValueError("Layer shouldn't implement backward")
class PyLayer(core.PyLayer):
"""Layers composed of user-defined python codes."""
def __init__(self):
super(PyLayer, self).__init__()
@staticmethod
def forward(inputs):
raise NotImplementedError
@staticmethod
def backward(douts):
raise NotImplementedError
@classmethod
def __call__(cls, inputs):
tracer = framework._imperative_tracer()
block = framework.default_main_program().current_block()
inputs = [x._ivar for x in inputs]
if not hasattr(cls, 'forward_id'):
cls.forward_id = core.PyLayer.num_funcs() + 1
PyLayer.register_func(cls.forward_id, cls.forward)
cls.backward_id = core.PyLayer.num_funcs() + 1
PyLayer.register_func(cls.backward_id, cls.backward)
iop = core.OpBase()
iop.forward_id = cls.forward_id
iop.backward_id = cls.backward_id
block.ops.append(iop)
ivars = tracer.py_trace(iop, inputs, False)
# ivars = core.PyLayer.apply(cls.forward, inputs)
ret = []
for ivar in ivars:
tensor = ivar.value.get_tensor()
py_var = framework.Variable(
block,
type=core.VarDesc.VarType.LOD_TENSOR,
name=None,
shape=tensor.shape(),
dtype=tensor._dtype(),
ivar=ivar)
ret.append(py_var)
return ret
......@@ -30,7 +30,7 @@ __all__ = [
]
class Conv2D(layers.PyLayer):
class Conv2D(layers.Layer):
def __init__(self,
num_channels,
num_filters,
......@@ -143,7 +143,7 @@ class Conv2D(layers.PyLayer):
return self._helper.append_activation(pre_act)
class Pool2D(layers.PyLayer):
class Pool2D(layers.Layer):
def __init__(self,
pool_size=-1,
pool_type="max",
......@@ -205,7 +205,7 @@ class Pool2D(layers.PyLayer):
return pool_out
class FC(layers.PyLayer):
class FC(layers.Layer):
def __init__(self,
size,
param_attr=None,
......
......@@ -15,6 +15,7 @@
import contextlib
import unittest
import numpy as np
import sys
import paddle.fluid as fluid
from paddle.fluid import core
......@@ -22,7 +23,7 @@ from paddle.fluid.imperative.nn import FC
from test_imperative_base import new_program_scope
class MyLayer(fluid.imperative.PyLayer):
class MyLayer(fluid.imperative.Layer):
def __init__(self):
super(MyLayer, self).__init__()
......@@ -34,7 +35,35 @@ class MyLayer(fluid.imperative.PyLayer):
return [x]
class MLP(fluid.imperative.PyLayer):
class MyPyLayer(fluid.imperative.PyLayer):
def __init__(self):
super(MyPyLayer, self).__init__()
@staticmethod
def forward(inputs):
sys.stderr.write('before forward\n')
ret = np.tanh(inputs[0])
sys.stderr.write('after forward: %s\n' % ret)
tensor = core.LoDTensor()
tensor.set(ret, core.CPUPlace())
return tuple([tensor])
@staticmethod
def backward(inputs):
sys.stderr.write('calling into backward: %s\n' % str(inputs))
inp, out, dout = inputs
inp = np.array(inp)
out = np.array(out)
dout = np.array(dout)
sys.stderr.write('calling into backward: %s, %s, %s\n' %
(inp, out, dout))
ret = np.array(dout) * (1 - np.square(np.array(out)))
tensor = core.LoDTensor()
tensor.set(ret, core.CPUPlace())
return tuple([tensor])
class MLP(fluid.imperative.Layer):
def __init__(self):
super(MLP, self).__init__()
self._fc1 = FC(3,
......@@ -56,9 +85,77 @@ class TestImperative(unittest.TestCase):
with fluid.imperative.guard():
cl = core.Layer()
cl.forward([])
l = fluid.imperative.PyLayer()
l = fluid.imperative.Layer()
self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self):
with fluid.imperative.guard():
class PyLayer1(fluid.imperative.PyLayer):
def __init__(self):
super(PyLayer1, self).__init__()
@staticmethod
def forward(inputs):
return inputs
@staticmethod
def backward(inputs):
return inputs
class PyLayer2(fluid.imperative.PyLayer):
def __init__(self):
super(PyLayer2, self).__init__()
@staticmethod
def forward(inputs):
return inputs
@staticmethod
def backward(inputs):
return inputs
py_layer_1 = PyLayer1()
py_layer_2 = PyLayer2()
py_layer_1([fluid.imperative.base.to_variable(np.ones([2, 2]))])
py_layer_2([fluid.imperative.base.to_variable(np.ones([2, 2]))])
id = py_layer_1.forward_id
self.assertGreater(id, 0)
self.assertEqual(py_layer_1.backward_id, id + 1)
self.assertEqual(py_layer_2.forward_id, id + 2)
self.assertEqual(py_layer_2.backward_id, id + 3)
py_layer_1([fluid.imperative.base.to_variable(np.ones([2, 2]))])
self.assertEqual(py_layer_1.forward_id, id)
def test_pylayer(self):
np_inp = np.ones([2, 2], np.float32)
with fluid.imperative.guard():
my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp)
outs = my_py_layer([var_inp])
dy_out = np.sum(outs[0]._numpy())
outs[0]._backward()
dy_grad = var_inp._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
# TODO(panyx0718): Paddle doesn't diff against data `inp`.
x1 = inp * 1
# TODO(panyx0718): If reduce_sum is skipped, the result is wrong.
x = fluid.layers.reduce_sum(fluid.layers.tanh(x1))
param_grads = fluid.backward.append_backward(
x, parameter_list=[x1.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard():
......
......@@ -26,7 +26,7 @@ from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.imperative.PyLayer):
class SimpleImgConvPool(fluid.imperative.Layer):
def __init__(self,
num_channels,
num_filters,
......@@ -72,7 +72,7 @@ class SimpleImgConvPool(fluid.imperative.PyLayer):
return x
class MNIST(fluid.imperative.PyLayer):
class MNIST(fluid.imperative.Layer):
def __init__(self, param_attr=None, bias_attr=None):
super(MNIST, self).__init__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册