提交 63240326 编写于 作者: X Xin Pan

MLP forward backward

test=develop
上级 c89a1fb2
......@@ -188,11 +188,13 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
std::vector<Variable*> ret;
for (size_t i = 0; i < input_vars_->size(); ++i) {
bool found = false;
VarBase* origin_var = (*input_vars_)[i];
for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
Variable* var = scope->FindVar(outvar);
VarBase* origin_var = (*input_vars_)[i];
std::string orig_var = grad_to_var_->at(outvar);
PADDLE_ENFORCE(origin_var->var_desc_->Name() == orig_var);
if (origin_var->var_desc_->Name() != orig_var) {
continue;
}
VLOG(3) << "apply grad " << outvar << " with origin " << orig_var;
origin_var->ApplyGrad(scope, var);
found = true;
......
......@@ -43,9 +43,12 @@ void CreateGradOp(const framework::OpDesc& op_desc,
class Tracer {
public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
explicit Tracer(framework::BlockDesc* root_block,
framework::BlockDesc* startup_block)
: root_block_(root_block), startup_block_(startup_block) {
root_scope_ = new framework::Scope();
scopes_[root_block_] = root_scope_;
scopes_[startup_block_] = root_scope_;
}
virtual ~Tracer() { delete root_scope_; }
......@@ -80,6 +83,8 @@ class Tracer {
} else {
op->pre_ops_->push_back(nullptr);
}
VLOG(3) << "input vname " << vname << " "
<< var->Get<framework::LoDTensor>().dims().size();
}
*op->output_vars_ = outputs;
......@@ -98,12 +103,19 @@ class Tracer {
outputs[i]->pre_op_ = op;
outputs[i]->pre_op_out_idx_ = i;
}
VLOG(3) << "tracer running " << op_desc->Type();
op_base->Run(*scope, platform::CPUPlace());
if (block == startup_block_) {
op->grad_op_desc_ = nullptr;
op->grad_to_var_ = nullptr;
} else {
framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>();
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var);
op->grad_op_desc_ = grad_op_desc;
op->grad_to_var_ = grad_to_var;
}
op->block_ = block;
}
......@@ -121,6 +133,7 @@ class Tracer {
private:
std::map<framework::BlockDesc*, framework::Scope*> scopes_;
framework::BlockDesc* root_block_;
framework::BlockDesc* startup_block_;
framework::Scope* root_scope_;
};
......
......@@ -49,7 +49,8 @@ class MulOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(
y_dims.size(), y_num_col_dims,
"The input tensor Y's rank of MulOp should be larger than "
"y_num_col_dims.");
"y_num_col_dims: %ld vs %ld",
y_dims.size(), y_num_col_dims);
auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
......
......@@ -24,8 +24,9 @@ namespace pybind {
void BindTracer(pybind11::module *m) {
pybind11::class_<imperative::Tracer>(*m, "Tracer", "")
.def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block) {
new (&self) imperative::Tracer(root_block);
[](imperative::Tracer &self, framework::BlockDesc *root_block,
framework::BlockDesc *startup_block) {
new (&self) imperative::Tracer(root_block, startup_block);
})
.def("trace", &imperative::Tracer::Trace)
.def("get_scope", &imperative::Tracer::GetScope,
......
......@@ -489,8 +489,11 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
grad_to_var = dict()
op_desc = _create_op_desc_(
"fill_constant", {}, {"Out": [_append_grad_suffix_(loss.name)]}, {
"shape": [1],
"fill_constant",
{},
{"Out": [_append_grad_suffix_(loss.name)]},
{
"shape": [1], # TODO(panyx0718): This can be loss.shape.
"value": 1.0,
"dtype": loss.dtype,
"force_cpu": False,
......
......@@ -1316,6 +1316,9 @@ class Block(object):
def _prepend_op(self, *args, **kwargs):
op_desc = self.desc._prepend_op()
op = Operator(self, op_desc, *args, **kwargs)
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc)
self.ops.insert(0, op)
return op
......
......@@ -28,7 +28,8 @@ def enabled():
def guard():
train = framework.Program()
startup = framework.Program()
tracer = core.Tracer(train.current_block().desc)
tracer = core.Tracer(train.current_block().desc,
startup.current_block().desc)
with framework.program_guard(train, startup):
with framework.unique_name.guard():
with framework._imperative_guard(tracer):
......
......@@ -25,11 +25,9 @@ __all__ = ['PyLayer']
class PyLayer(core.Layer):
def __init__(self):
pass
self._built = False
def __call__(self, inputs):
# TODO(panyx0718): Support declarative mode as well.
assert base.enabled()
if not isinstance(inputs, list) and not isinstance(inputs, tuple):
inputs = [inputs]
......@@ -37,8 +35,15 @@ class PyLayer(core.Layer):
for x in inputs:
py_var = base.to_variable(x)
var_inputs.append(py_var)
if not self._built:
self._build_once(inputs)
self._built = True
outputs = self.forward(var_inputs)
return outputs
def _build_once(self, inputs):
pass
def forward(self, inputs):
return []
......@@ -29,6 +29,7 @@ from . import utils
from .. import unique_name
from functools import reduce
from .. import core
from ..imperative import layers
__all__ = [
'fc',
......@@ -9426,3 +9427,48 @@ def huber_loss(input, label, delta):
'Residual': residual},
attrs={'delta': delta})
return out
class FC(layers.PyLayer):
def __init__(self,
size,
param_attr=None,
num_flatten_dims=1,
dtype=core.VarDesc.VarType.FP32):
super(FC, self).__init__()
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._helper = LayerHelper('FC', param_attr=param_attr)
def _build_once(self, inputs):
input_shape = inputs[0].shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1)
] + [self._size]
self._w = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=False)
def forward(self, inputs):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inputs[0],
"Y": self._w},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": [tmp]},
outputs={"Out": out},
attrs={"use_mkldnn": False})
return out
......@@ -12,12 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import sys
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.layers.nn import FC
@contextlib.contextmanager
def new_program_scope():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
class MyLayer(fluid.imperative.PyLayer):
......@@ -30,6 +41,23 @@ class MyLayer(fluid.imperative.PyLayer):
return [fluid.layers.elementwise_mul(x, x)]
class MLP(fluid.imperative.PyLayer):
def __init__(self):
super(MLP, self).__init__()
self._fc1 = FC(3,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = FC(4,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs[0])
x = self._fc2(x)
x = fluid.layers.reduce_sum(x)
return x
class TestImperative(unittest.TestCase):
def test_layer(self):
with fluid.imperative.guard():
......@@ -39,13 +67,56 @@ class TestImperative(unittest.TestCase):
l.forward([])
def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard():
l = MyLayer()
x = l(np.array([1.0, 2.0, -1.0], dtype=np.float32))[0]
x = l(np_inp)[0]
self.assertIsNotNone(x)
sys.stderr.write("%s output: %s\n" % (x, x._numpy()))
dy_out = x._numpy()
x._backward()
sys.stderr.write("grad %s\n" % l._x_for_debug._gradient())
dy_grad = l._x_for_debug._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[3], append_batch_size=False)
l = MyLayer()
x = l(inp)[0]
param_grads = fluid.backward.append_backward(
x, parameter_list=[l._x_for_debug.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard():
mlp = MLP()
out = mlp(np_inp)
dy_out = out._numpy()
out._backward()
dy_grad = mlp._fc1._w._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP()
out = mlp(inp)
param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1._w.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[out.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册