未验证 提交 cf3a07e8 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #14878 from panyx0718/imperative

MLP forward backward
...@@ -188,11 +188,13 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) { ...@@ -188,11 +188,13 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
std::vector<Variable*> ret; std::vector<Variable*> ret;
for (size_t i = 0; i < input_vars_->size(); ++i) { for (size_t i = 0; i < input_vars_->size(); ++i) {
bool found = false; bool found = false;
VarBase* origin_var = (*input_vars_)[i];
for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
Variable* var = scope->FindVar(outvar); Variable* var = scope->FindVar(outvar);
VarBase* origin_var = (*input_vars_)[i];
std::string orig_var = grad_to_var_->at(outvar); std::string orig_var = grad_to_var_->at(outvar);
PADDLE_ENFORCE(origin_var->var_desc_->Name() == orig_var); if (origin_var->var_desc_->Name() != orig_var) {
continue;
}
VLOG(3) << "apply grad " << outvar << " with origin " << orig_var; VLOG(3) << "apply grad " << outvar << " with origin " << orig_var;
origin_var->ApplyGrad(scope, var); origin_var->ApplyGrad(scope, var);
found = true; found = true;
......
...@@ -43,9 +43,12 @@ void CreateGradOp(const framework::OpDesc& op_desc, ...@@ -43,9 +43,12 @@ void CreateGradOp(const framework::OpDesc& op_desc,
class Tracer { class Tracer {
public: public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { explicit Tracer(framework::BlockDesc* root_block,
framework::BlockDesc* startup_block)
: root_block_(root_block), startup_block_(startup_block) {
root_scope_ = new framework::Scope(); root_scope_ = new framework::Scope();
scopes_[root_block_] = root_scope_; scopes_[root_block_] = root_scope_;
scopes_[startup_block_] = root_scope_;
} }
virtual ~Tracer() { delete root_scope_; } virtual ~Tracer() { delete root_scope_; }
...@@ -80,6 +83,8 @@ class Tracer { ...@@ -80,6 +83,8 @@ class Tracer {
} else { } else {
op->pre_ops_->push_back(nullptr); op->pre_ops_->push_back(nullptr);
} }
VLOG(3) << "input vname " << vname << " "
<< var->Get<framework::LoDTensor>().dims().size();
} }
*op->output_vars_ = outputs; *op->output_vars_ = outputs;
...@@ -98,12 +103,19 @@ class Tracer { ...@@ -98,12 +103,19 @@ class Tracer {
outputs[i]->pre_op_ = op; outputs[i]->pre_op_ = op;
outputs[i]->pre_op_out_idx_ = i; outputs[i]->pre_op_out_idx_ = i;
} }
VLOG(3) << "tracer running " << op_desc->Type();
op_base->Run(*scope, platform::CPUPlace()); op_base->Run(*scope, platform::CPUPlace());
framework::OpDesc* grad_op_desc; if (block == startup_block_) {
auto grad_to_var = new std::unordered_map<std::string, std::string>(); op->grad_op_desc_ = nullptr;
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var); op->grad_to_var_ = nullptr;
op->grad_op_desc_ = grad_op_desc; } else {
op->grad_to_var_ = grad_to_var; framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>();
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var);
op->grad_op_desc_ = grad_op_desc;
op->grad_to_var_ = grad_to_var;
}
op->block_ = block; op->block_ = block;
} }
...@@ -121,6 +133,7 @@ class Tracer { ...@@ -121,6 +133,7 @@ class Tracer {
private: private:
std::map<framework::BlockDesc*, framework::Scope*> scopes_; std::map<framework::BlockDesc*, framework::Scope*> scopes_;
framework::BlockDesc* root_block_; framework::BlockDesc* root_block_;
framework::BlockDesc* startup_block_;
framework::Scope* root_scope_; framework::Scope* root_scope_;
}; };
......
...@@ -49,7 +49,8 @@ class MulOp : public framework::OperatorWithKernel { ...@@ -49,7 +49,8 @@ class MulOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
y_dims.size(), y_num_col_dims, y_dims.size(), y_num_col_dims,
"The input tensor Y's rank of MulOp should be larger than " "The input tensor Y's rank of MulOp should be larger than "
"y_num_col_dims."); "y_num_col_dims: %ld vs %ld",
y_dims.size(), y_num_col_dims);
auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims); auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims); auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
......
...@@ -24,8 +24,9 @@ namespace pybind { ...@@ -24,8 +24,9 @@ namespace pybind {
void BindTracer(pybind11::module *m) { void BindTracer(pybind11::module *m) {
pybind11::class_<imperative::Tracer>(*m, "Tracer", "") pybind11::class_<imperative::Tracer>(*m, "Tracer", "")
.def("__init__", .def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block) { [](imperative::Tracer &self, framework::BlockDesc *root_block,
new (&self) imperative::Tracer(root_block); framework::BlockDesc *startup_block) {
new (&self) imperative::Tracer(root_block, startup_block);
}) })
.def("trace", &imperative::Tracer::Trace) .def("trace", &imperative::Tracer::Trace)
.def("get_scope", &imperative::Tracer::GetScope, .def("get_scope", &imperative::Tracer::GetScope,
......
...@@ -489,8 +489,11 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -489,8 +489,11 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
grad_to_var = dict() grad_to_var = dict()
op_desc = _create_op_desc_( op_desc = _create_op_desc_(
"fill_constant", {}, {"Out": [_append_grad_suffix_(loss.name)]}, { "fill_constant",
"shape": [1], {},
{"Out": [_append_grad_suffix_(loss.name)]},
{
"shape": [1], # TODO(panyx0718): This can be loss.shape.
"value": 1.0, "value": 1.0,
"dtype": loss.dtype, "dtype": loss.dtype,
"force_cpu": False, "force_cpu": False,
......
...@@ -1324,6 +1324,9 @@ class Block(object): ...@@ -1324,6 +1324,9 @@ class Block(object):
def _prepend_op(self, *args, **kwargs): def _prepend_op(self, *args, **kwargs):
op_desc = self.desc._prepend_op() op_desc = self.desc._prepend_op()
op = Operator(self, op_desc, *args, **kwargs) op = Operator(self, op_desc, *args, **kwargs)
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc)
self.ops.insert(0, op) self.ops.insert(0, op)
return op return op
......
...@@ -28,7 +28,8 @@ def enabled(): ...@@ -28,7 +28,8 @@ def enabled():
def guard(): def guard():
train = framework.Program() train = framework.Program()
startup = framework.Program() startup = framework.Program()
tracer = core.Tracer(train.current_block().desc) tracer = core.Tracer(train.current_block().desc,
startup.current_block().desc)
with framework.program_guard(train, startup): with framework.program_guard(train, startup):
with framework.unique_name.guard(): with framework.unique_name.guard():
with framework._imperative_guard(tracer): with framework._imperative_guard(tracer):
......
...@@ -25,11 +25,9 @@ __all__ = ['PyLayer'] ...@@ -25,11 +25,9 @@ __all__ = ['PyLayer']
class PyLayer(core.Layer): class PyLayer(core.Layer):
def __init__(self): def __init__(self):
pass self._built = False
def __call__(self, inputs): def __call__(self, inputs):
# TODO(panyx0718): Support declarative mode as well.
assert base.enabled()
if not isinstance(inputs, list) and not isinstance(inputs, tuple): if not isinstance(inputs, list) and not isinstance(inputs, tuple):
inputs = [inputs] inputs = [inputs]
...@@ -37,8 +35,15 @@ class PyLayer(core.Layer): ...@@ -37,8 +35,15 @@ class PyLayer(core.Layer):
for x in inputs: for x in inputs:
py_var = base.to_variable(x) py_var = base.to_variable(x)
var_inputs.append(py_var) var_inputs.append(py_var)
if not self._built:
self._build_once(inputs)
self._built = True
outputs = self.forward(var_inputs) outputs = self.forward(var_inputs)
return outputs return outputs
def _build_once(self, inputs):
pass
def forward(self, inputs): def forward(self, inputs):
return [] return []
...@@ -29,6 +29,7 @@ from . import utils ...@@ -29,6 +29,7 @@ from . import utils
from .. import unique_name from .. import unique_name
from functools import reduce from functools import reduce
from .. import core from .. import core
from ..imperative import layers
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -9426,3 +9427,47 @@ def huber_loss(input, label, delta): ...@@ -9426,3 +9427,47 @@ def huber_loss(input, label, delta):
'Residual': residual}, 'Residual': residual},
attrs={'delta': delta}) attrs={'delta': delta})
return out return out
class FC(layers.PyLayer):
def __init__(self,
size,
param_attr=None,
num_flatten_dims=1,
dtype=core.VarDesc.VarType.FP32):
super(FC, self).__init__()
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._helper = LayerHelper('FC', param_attr=param_attr)
def _build_once(self, inputs):
input_shape = inputs[0].shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1)
] + [self._size]
self._w = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=False)
def forward(self, inputs):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inputs[0],
"Y": self._w},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": [tmp]},
outputs={"Out": out},
attrs={"use_mkldnn": False})
return out
...@@ -12,12 +12,23 @@ ...@@ -12,12 +12,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import contextlib
import unittest import unittest
import sys
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layers.nn import FC
@contextlib.contextmanager
def new_program_scope():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
class MyLayer(fluid.imperative.PyLayer): class MyLayer(fluid.imperative.PyLayer):
...@@ -30,6 +41,23 @@ class MyLayer(fluid.imperative.PyLayer): ...@@ -30,6 +41,23 @@ class MyLayer(fluid.imperative.PyLayer):
return [fluid.layers.elementwise_mul(x, x)] return [fluid.layers.elementwise_mul(x, x)]
class MLP(fluid.imperative.PyLayer):
def __init__(self):
super(MLP, self).__init__()
self._fc1 = FC(3,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = FC(4,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs[0])
x = self._fc2(x)
x = fluid.layers.reduce_sum(x)
return x
class TestImperative(unittest.TestCase): class TestImperative(unittest.TestCase):
def test_layer(self): def test_layer(self):
with fluid.imperative.guard(): with fluid.imperative.guard():
...@@ -39,13 +67,56 @@ class TestImperative(unittest.TestCase): ...@@ -39,13 +67,56 @@ class TestImperative(unittest.TestCase):
l.forward([]) l.forward([])
def test_layer_in_out(self): def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(): with fluid.imperative.guard():
l = MyLayer() l = MyLayer()
x = l(np.array([1.0, 2.0, -1.0], dtype=np.float32))[0] x = l(np_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
sys.stderr.write("%s output: %s\n" % (x, x._numpy())) dy_out = x._numpy()
x._backward() x._backward()
sys.stderr.write("grad %s\n" % l._x_for_debug._gradient()) dy_grad = l._x_for_debug._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[3], append_batch_size=False)
l = MyLayer()
x = l(inp)[0]
param_grads = fluid.backward.append_backward(
x, parameter_list=[l._x_for_debug.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard():
mlp = MLP()
out = mlp(np_inp)
dy_out = out._numpy()
out._backward()
dy_grad = mlp._fc1._w._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP()
out = mlp(inp)
param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1._w.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[out.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册