提交 95fb3128 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into refine/jit

...@@ -188,11 +188,13 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) { ...@@ -188,11 +188,13 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
std::vector<Variable*> ret; std::vector<Variable*> ret;
for (size_t i = 0; i < input_vars_->size(); ++i) { for (size_t i = 0; i < input_vars_->size(); ++i) {
bool found = false; bool found = false;
VarBase* origin_var = (*input_vars_)[i];
for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) {
Variable* var = scope->FindVar(outvar); Variable* var = scope->FindVar(outvar);
VarBase* origin_var = (*input_vars_)[i];
std::string orig_var = grad_to_var_->at(outvar); std::string orig_var = grad_to_var_->at(outvar);
PADDLE_ENFORCE(origin_var->var_desc_->Name() == orig_var); if (origin_var->var_desc_->Name() != orig_var) {
continue;
}
VLOG(3) << "apply grad " << outvar << " with origin " << orig_var; VLOG(3) << "apply grad " << outvar << " with origin " << orig_var;
origin_var->ApplyGrad(scope, var); origin_var->ApplyGrad(scope, var);
found = true; found = true;
......
...@@ -43,9 +43,12 @@ void CreateGradOp(const framework::OpDesc& op_desc, ...@@ -43,9 +43,12 @@ void CreateGradOp(const framework::OpDesc& op_desc,
class Tracer { class Tracer {
public: public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { explicit Tracer(framework::BlockDesc* root_block,
framework::BlockDesc* startup_block)
: root_block_(root_block), startup_block_(startup_block) {
root_scope_ = new framework::Scope(); root_scope_ = new framework::Scope();
scopes_[root_block_] = root_scope_; scopes_[root_block_] = root_scope_;
scopes_[startup_block_] = root_scope_;
} }
virtual ~Tracer() { delete root_scope_; } virtual ~Tracer() { delete root_scope_; }
...@@ -80,6 +83,8 @@ class Tracer { ...@@ -80,6 +83,8 @@ class Tracer {
} else { } else {
op->pre_ops_->push_back(nullptr); op->pre_ops_->push_back(nullptr);
} }
VLOG(3) << "input vname " << vname << " "
<< var->Get<framework::LoDTensor>().dims().size();
} }
*op->output_vars_ = outputs; *op->output_vars_ = outputs;
...@@ -98,12 +103,19 @@ class Tracer { ...@@ -98,12 +103,19 @@ class Tracer {
outputs[i]->pre_op_ = op; outputs[i]->pre_op_ = op;
outputs[i]->pre_op_out_idx_ = i; outputs[i]->pre_op_out_idx_ = i;
} }
VLOG(3) << "tracer running " << op_desc->Type();
op_base->Run(*scope, platform::CPUPlace()); op_base->Run(*scope, platform::CPUPlace());
framework::OpDesc* grad_op_desc; if (block == startup_block_) {
auto grad_to_var = new std::unordered_map<std::string, std::string>(); op->grad_op_desc_ = nullptr;
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var); op->grad_to_var_ = nullptr;
op->grad_op_desc_ = grad_op_desc; } else {
op->grad_to_var_ = grad_to_var; framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>();
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var);
op->grad_op_desc_ = grad_op_desc;
op->grad_to_var_ = grad_to_var;
}
op->block_ = block; op->block_ = block;
} }
...@@ -121,6 +133,7 @@ class Tracer { ...@@ -121,6 +133,7 @@ class Tracer {
private: private:
std::map<framework::BlockDesc*, framework::Scope*> scopes_; std::map<framework::BlockDesc*, framework::Scope*> scopes_;
framework::BlockDesc* root_block_; framework::BlockDesc* root_block_;
framework::BlockDesc* startup_block_;
framework::Scope* root_scope_; framework::Scope* root_scope_;
}; };
......
...@@ -49,7 +49,8 @@ class MulOp : public framework::OperatorWithKernel { ...@@ -49,7 +49,8 @@ class MulOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
y_dims.size(), y_num_col_dims, y_dims.size(), y_num_col_dims,
"The input tensor Y's rank of MulOp should be larger than " "The input tensor Y's rank of MulOp should be larger than "
"y_num_col_dims."); "y_num_col_dims: %ld vs %ld",
y_dims.size(), y_num_col_dims);
auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims); auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims); auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
......
...@@ -24,8 +24,9 @@ namespace pybind { ...@@ -24,8 +24,9 @@ namespace pybind {
void BindTracer(pybind11::module *m) { void BindTracer(pybind11::module *m) {
pybind11::class_<imperative::Tracer>(*m, "Tracer", "") pybind11::class_<imperative::Tracer>(*m, "Tracer", "")
.def("__init__", .def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block) { [](imperative::Tracer &self, framework::BlockDesc *root_block,
new (&self) imperative::Tracer(root_block); framework::BlockDesc *startup_block) {
new (&self) imperative::Tracer(root_block, startup_block);
}) })
.def("trace", &imperative::Tracer::Trace) .def("trace", &imperative::Tracer::Trace)
.def("get_scope", &imperative::Tracer::GetScope, .def("get_scope", &imperative::Tracer::GetScope,
......
...@@ -509,11 +509,11 @@ function assert_api_spec_approvals() { ...@@ -509,11 +509,11 @@ function assert_api_spec_approvals() {
if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have at least 2 approvals for the api change! ${API_FILE}" echo "You must have panyx0718 approval for the api change! ${API_FILE}"
exit 1 exit 1
fi fi
fi fi
done done
...@@ -521,11 +521,11 @@ function assert_api_spec_approvals() { ...@@ -521,11 +521,11 @@ function assert_api_spec_approvals() {
HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true` HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true`
if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have at least 2 approvals for the const_cast" echo "You must have panyx0718 approval for the const_cast"
exit 1 exit 1
fi fi
fi fi
......
...@@ -489,8 +489,11 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -489,8 +489,11 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
grad_to_var = dict() grad_to_var = dict()
op_desc = _create_op_desc_( op_desc = _create_op_desc_(
"fill_constant", {}, {"Out": [_append_grad_suffix_(loss.name)]}, { "fill_constant",
"shape": [1], {},
{"Out": [_append_grad_suffix_(loss.name)]},
{
"shape": [1], # TODO(panyx0718): This can be loss.shape.
"value": 1.0, "value": 1.0,
"dtype": loss.dtype, "dtype": loss.dtype,
"force_cpu": False, "force_cpu": False,
......
...@@ -1324,6 +1324,9 @@ class Block(object): ...@@ -1324,6 +1324,9 @@ class Block(object):
def _prepend_op(self, *args, **kwargs): def _prepend_op(self, *args, **kwargs):
op_desc = self.desc._prepend_op() op_desc = self.desc._prepend_op()
op = Operator(self, op_desc, *args, **kwargs) op = Operator(self, op_desc, *args, **kwargs)
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc)
self.ops.insert(0, op) self.ops.insert(0, op)
return op return op
......
...@@ -28,7 +28,8 @@ def enabled(): ...@@ -28,7 +28,8 @@ def enabled():
def guard(): def guard():
train = framework.Program() train = framework.Program()
startup = framework.Program() startup = framework.Program()
tracer = core.Tracer(train.current_block().desc) tracer = core.Tracer(train.current_block().desc,
startup.current_block().desc)
with framework.program_guard(train, startup): with framework.program_guard(train, startup):
with framework.unique_name.guard(): with framework.unique_name.guard():
with framework._imperative_guard(tracer): with framework._imperative_guard(tracer):
......
...@@ -25,11 +25,9 @@ __all__ = ['PyLayer'] ...@@ -25,11 +25,9 @@ __all__ = ['PyLayer']
class PyLayer(core.Layer): class PyLayer(core.Layer):
def __init__(self): def __init__(self):
pass self._built = False
def __call__(self, inputs): def __call__(self, inputs):
# TODO(panyx0718): Support declarative mode as well.
assert base.enabled()
if not isinstance(inputs, list) and not isinstance(inputs, tuple): if not isinstance(inputs, list) and not isinstance(inputs, tuple):
inputs = [inputs] inputs = [inputs]
...@@ -37,8 +35,15 @@ class PyLayer(core.Layer): ...@@ -37,8 +35,15 @@ class PyLayer(core.Layer):
for x in inputs: for x in inputs:
py_var = base.to_variable(x) py_var = base.to_variable(x)
var_inputs.append(py_var) var_inputs.append(py_var)
if not self._built:
self._build_once(inputs)
self._built = True
outputs = self.forward(var_inputs) outputs = self.forward(var_inputs)
return outputs return outputs
def _build_once(self, inputs):
pass
def forward(self, inputs): def forward(self, inputs):
return [] return []
...@@ -29,6 +29,7 @@ from . import utils ...@@ -29,6 +29,7 @@ from . import utils
from .. import unique_name from .. import unique_name
from functools import reduce from functools import reduce
from .. import core from .. import core
from ..imperative import layers
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -9426,3 +9427,47 @@ def huber_loss(input, label, delta): ...@@ -9426,3 +9427,47 @@ def huber_loss(input, label, delta):
'Residual': residual}, 'Residual': residual},
attrs={'delta': delta}) attrs={'delta': delta})
return out return out
class FC(layers.PyLayer):
def __init__(self,
size,
param_attr=None,
num_flatten_dims=1,
dtype=core.VarDesc.VarType.FP32):
super(FC, self).__init__()
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._helper = LayerHelper('FC', param_attr=param_attr)
def _build_once(self, inputs):
input_shape = inputs[0].shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1)
] + [self._size]
self._w = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=False)
def forward(self, inputs):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inputs[0],
"Y": self._w},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": [tmp]},
outputs={"Out": out},
attrs={"use_mkldnn": False})
return out
...@@ -12,12 +12,23 @@ ...@@ -12,12 +12,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import contextlib
import unittest import unittest
import sys
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layers.nn import FC
@contextlib.contextmanager
def new_program_scope():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
class MyLayer(fluid.imperative.PyLayer): class MyLayer(fluid.imperative.PyLayer):
...@@ -30,6 +41,23 @@ class MyLayer(fluid.imperative.PyLayer): ...@@ -30,6 +41,23 @@ class MyLayer(fluid.imperative.PyLayer):
return [fluid.layers.elementwise_mul(x, x)] return [fluid.layers.elementwise_mul(x, x)]
class MLP(fluid.imperative.PyLayer):
def __init__(self):
super(MLP, self).__init__()
self._fc1 = FC(3,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = FC(4,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs[0])
x = self._fc2(x)
x = fluid.layers.reduce_sum(x)
return x
class TestImperative(unittest.TestCase): class TestImperative(unittest.TestCase):
def test_layer(self): def test_layer(self):
with fluid.imperative.guard(): with fluid.imperative.guard():
...@@ -39,13 +67,56 @@ class TestImperative(unittest.TestCase): ...@@ -39,13 +67,56 @@ class TestImperative(unittest.TestCase):
l.forward([]) l.forward([])
def test_layer_in_out(self): def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(): with fluid.imperative.guard():
l = MyLayer() l = MyLayer()
x = l(np.array([1.0, 2.0, -1.0], dtype=np.float32))[0] x = l(np_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
sys.stderr.write("%s output: %s\n" % (x, x._numpy())) dy_out = x._numpy()
x._backward() x._backward()
sys.stderr.write("grad %s\n" % l._x_for_debug._gradient()) dy_grad = l._x_for_debug._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[3], append_batch_size=False)
l = MyLayer()
x = l(inp)[0]
param_grads = fluid.backward.append_backward(
x, parameter_list=[l._x_for_debug.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard():
mlp = MLP()
out = mlp(np_inp)
dy_out = out._numpy()
out._backward()
dy_grad = mlp._fc1._w._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP()
out = mlp(inp)
param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1._w.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[out.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册