提交 3cd10a7c 编写于 作者: M minqiyang

Add Conv2D forward

test=develop
上级 8d88c5a8
......@@ -144,6 +144,9 @@ void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) {
std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
VLOG(3) << "op grad " << grad_op_desc_->Type();
if (!grad_to_var_) {
return {};
}
for (const std::string& grad_invar : grad_op_desc_->InputArgumentNames()) {
if (grad_to_var_->find(grad_invar) == grad_to_var_->end()) {
......
......@@ -60,7 +60,8 @@ class OpBase {
pre_ops_(new std::vector<OpBase*>()),
pre_ops_out_idx_(new std::vector<int>()),
op_desc_(nullptr),
grad_op_desc_(nullptr) {}
grad_op_desc_(nullptr),
grad_to_var_(nullptr) {}
virtual ~OpBase() {
delete input_vars_;
......
......@@ -43,20 +43,14 @@ void CreateGradOp(const framework::OpDesc& op_desc,
class Tracer {
public:
explicit Tracer(framework::BlockDesc* root_block,
framework::BlockDesc* startup_block)
: root_block_(root_block), startup_block_(startup_block) {
root_scope_ = new framework::Scope();
scopes_[root_block_] = root_scope_;
scopes_[startup_block_] = root_scope_;
}
explicit Tracer(framework::BlockDesc* root_block)
: root_scope_(new framework::Scope()) {}
virtual ~Tracer() { delete root_scope_; }
virtual ~Tracer() {}
void Trace(OpBase* op, const std::vector<VarBase*>& inputs,
const std::vector<VarBase*>& outputs,
framework::BlockDesc* block) {
framework::Scope* scope = GetScope(block);
const std::vector<VarBase*>& outputs, framework::BlockDesc* block,
const bool stop_gradient) {
framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block);
......@@ -67,7 +61,7 @@ class Tracer {
*op->input_vars_ = inputs;
for (VarBase* input : inputs) {
const std::string vname = input->var_desc_->Name();
framework::Variable* var = scope->Var(vname);
framework::Variable* var = root_scope_->Var(vname);
input->var_ = var;
if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname);
......@@ -90,7 +84,7 @@ class Tracer {
*op->output_vars_ = outputs;
for (size_t i = 0; i < outputs.size(); ++i) {
const std::string vname = outputs[i]->var_desc_->Name();
framework::Variable* var = scope->Var(vname);
framework::Variable* var = root_scope_->Var(vname);
if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname);
if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
......@@ -105,11 +99,8 @@ class Tracer {
}
VLOG(3) << "tracer running " << op_desc->Type();
op_base->Run(*scope, platform::CPUPlace());
if (block == startup_block_) {
op->grad_op_desc_ = nullptr;
op->grad_to_var_ = nullptr;
} else {
op_base->Run(*root_scope_, platform::CPUPlace());
if (!stop_gradient) {
framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>();
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var);
......@@ -119,22 +110,10 @@ class Tracer {
op->block_ = block;
}
framework::Scope* GetScope(framework::BlockDesc* block) {
if (scopes_.find(block) != scopes_.end()) {
return scopes_.at(block);
}
framework::BlockDesc* parent_block = block->ParentBlock();
PADDLE_ENFORCE(scopes_.find(parent_block) != scopes_.end());
framework::Scope* scope = &scopes_[parent_block]->NewScope();
scopes_[block] = scope;
return scope;
}
framework::Scope* GetScope() { return root_scope_.get(); }
private:
std::map<framework::BlockDesc*, framework::Scope*> scopes_;
framework::BlockDesc* root_block_;
framework::BlockDesc* startup_block_;
framework::Scope* root_scope_;
std::unique_ptr<framework::Scope> root_scope_;
};
} // namespace imperative
......
......@@ -24,9 +24,8 @@ namespace pybind {
void BindTracer(pybind11::module *m) {
pybind11::class_<imperative::Tracer>(*m, "Tracer", "")
.def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block,
framework::BlockDesc *startup_block) {
new (&self) imperative::Tracer(root_block, startup_block);
[](imperative::Tracer &self, framework::BlockDesc *root_block) {
new (&self) imperative::Tracer(root_block);
})
.def("trace", &imperative::Tracer::Trace)
.def("get_scope", &imperative::Tracer::GetScope,
......
......@@ -117,6 +117,12 @@ PYBIND11_MODULE(core, m) {
self.RunBackward(scope);
})
.def("_grad", &imperative::VarBase::Grad)
.def_property("value",
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
self.var_ = var;
},
py::return_value_policy::reference)
.def_property(
"desc",
[](const imperative::VarBase &self) { return self.var_desc_; },
......
......@@ -361,7 +361,7 @@ class Variable(object):
self._ivar.desc = self.desc
def _numpy(self):
scope = _imperative_tracer().get_scope(self.block.desc)
scope = _imperative_tracer().get_scope()
tensor = core.get_variable_tensor(scope, self.desc.name())
return np.array(tensor)
......@@ -573,7 +573,8 @@ class Operator(object):
type=None,
inputs=None,
outputs=None,
attrs=None):
attrs=None,
stop_gradient=False):
self.block = block
self.desc = desc
# note: not add self.attrs here:
......@@ -1264,9 +1265,12 @@ class Block(object):
"""
op_desc = self.desc.append_op()
op = Operator(block=self, desc=op_desc, *args, **kwargs)
print("append_op", kwargs.get("type"), kwargs.get("stop_gradient",
False))
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc)
[v._ivar for v in op.outputs], self.desc,
kwargs.get("stop_gradient", False))
self.ops.append(op)
return op
......@@ -1316,9 +1320,12 @@ class Block(object):
def _prepend_op(self, *args, **kwargs):
op_desc = self.desc._prepend_op()
op = Operator(self, op_desc, *args, **kwargs)
print("prepend_op", kwargs.get("type"), kwargs.get("stop_gradient",
False))
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc)
[v._ivar for v in op.outputs], self.desc,
kwargs.get("stop_gradient", False))
self.ops.insert(0, op)
return op
......
......@@ -20,6 +20,10 @@ from .base import *
from . import layers
from .layers import *
from . import nn
from .nn import *
__all__ = []
__all__ += layers.__all__
__all__ += base.__all__
__all__ += nn.__all__
......@@ -28,8 +28,7 @@ def enabled():
def guard():
train = framework.Program()
startup = framework.Program()
tracer = core.Tracer(train.current_block().desc,
startup.current_block().desc)
tracer = core.Tracer(train.current_block().desc)
with framework.program_guard(train, startup):
with framework.unique_name.guard():
with framework._imperative_guard(tracer):
......@@ -46,7 +45,7 @@ def to_variable(value, block=None):
name=None,
shape=value.shape,
dtype=value.dtype)
scope = framework._imperative_tracer().get_scope(block.desc)
scope = framework._imperative_tracer().get_scope()
var = scope.var(py_var.name)
tensor = var.get_tensor()
tensor.set(value, core.CPUPlace())
......
......@@ -24,8 +24,10 @@ __all__ = ['PyLayer']
class PyLayer(core.Layer):
def __init__(self):
self._built = False
def __init__(self, *args, **kwargs):
from ..layer_helper import LayerHelper
self._helper = LayerHelper(type(self).__name__, **kwargs)
self._dtype = kwargs.get("dtype", core.VarDesc.VarType.FP32)
def __call__(self, inputs):
if not isinstance(inputs, list) and not isinstance(inputs, tuple):
......@@ -35,15 +37,10 @@ class PyLayer(core.Layer):
for x in inputs:
py_var = base.to_variable(x)
var_inputs.append(py_var)
if not self._built:
self._build_once(inputs)
self._built = True
outputs = self.forward(var_inputs)
return outputs
def _build_once(self, inputs):
pass
return outputs
def forward(self, inputs):
return []
......@@ -161,7 +161,8 @@ class ConstantInitializer(Initializer):
"dtype": int(var.dtype),
"value": float(self._value),
'force_cpu': self._force_cpu or force_init_on_cpu()
})
},
stop_gradient=True)
var.op = op
return op
......@@ -216,7 +217,8 @@ class UniformInitializer(Initializer):
"min": self._low,
"max": self._high,
"seed": self._seed
})
},
stop_gradient=True)
var.op = op
return op
......@@ -271,7 +273,8 @@ class NormalInitializer(Initializer):
"std": self._std_dev,
"seed": self._seed,
"use_mkldnn": False
})
},
stop_gradient=True)
var.op = op
return op
......@@ -325,7 +328,8 @@ class TruncatedNormalInitializer(Initializer):
"mean": self._mean,
"std": self._std_dev,
"seed": self._seed
})
},
stop_gradient=True)
var.op = op
return op
......@@ -415,7 +419,8 @@ class XavierInitializer(Initializer):
"min": -limit,
"max": limit,
"seed": self._seed
})
},
stop_gradient=True)
else:
std = np.sqrt(2.0 / float(fan_in + fan_out))
......@@ -428,7 +433,8 @@ class XavierInitializer(Initializer):
"mean": 0.0,
"std": std,
"seed": self._seed
})
},
stop_gradient=True)
var.op = op
return op
......@@ -513,7 +519,8 @@ class MSRAInitializer(Initializer):
"min": -limit,
"max": limit,
"seed": self._seed
})
},
stop_gradient=True)
else:
std = np.sqrt(2.0 / float(fan_in))
......@@ -526,7 +533,8 @@ class MSRAInitializer(Initializer):
"mean": 0.0,
"std": std,
"seed": self._seed
})
},
stop_gradient=True)
var.op = op
return op
......
......@@ -22,8 +22,8 @@ import numpy as np
from .framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating
from . import unique_name
from paddle.fluid.imperative.base import to_variable
from paddle.fluid.initializer import Constant, Xavier
from paddle.fluid.imperative import base
from .param_attr import ParamAttr, WeightNormParamAttr
from . import core
from six.moves import zip
......
......@@ -29,7 +29,6 @@ from . import utils
from .. import unique_name
from functools import reduce
from .. import core
from ..imperative import layers
__all__ = [
'fc',
......@@ -2537,12 +2536,12 @@ def adaptive_pool2d(input,
Examples:
.. code-block:: python
# suppose input data in shape of [N, C, H, W], `pool_size` is [m, n],
# suppose input data in shape of [N, C, H, W], `pool_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimentions
# of input data into m * n grids averagely and performs poolings in each
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive average pool performs calculations as follow:
#
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
......@@ -2636,10 +2635,10 @@ def adaptive_pool3d(input,
# suppose input data in shape of [N, C, D, H, W], `pool_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimentions
# of input data into l * m * n grids averagely and performs poolings in each
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive average pool performs calculations as follow:
#
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
......@@ -2649,7 +2648,7 @@ def adaptive_pool3d(input,
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
#
data = fluid.layers.data(
......@@ -9427,47 +9426,3 @@ def huber_loss(input, label, delta):
'Residual': residual},
attrs={'delta': delta})
return out
class FC(layers.PyLayer):
def __init__(self,
size,
param_attr=None,
num_flatten_dims=1,
dtype=core.VarDesc.VarType.FP32):
super(FC, self).__init__()
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._helper = LayerHelper('FC', param_attr=param_attr)
def _build_once(self, inputs):
input_shape = inputs[0].shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1)
] + [self._size]
self._w = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=False)
def forward(self, inputs):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inputs[0],
"Y": self._w},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": [tmp]},
outputs={"Out": out},
attrs={"use_mkldnn": False})
return out
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.layers.nn import FC
@contextlib.contextmanager
def new_program_scope():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
class MyLayer(fluid.imperative.PyLayer):
def __init__(self):
super(MyLayer, self).__init__()
def forward(self, inputs):
x = fluid.layers.relu(inputs[0])
self._x_for_debug = x
return [fluid.layers.elementwise_mul(x, x)]
class MLP(fluid.imperative.PyLayer):
def __init__(self):
super(MLP, self).__init__()
self._fc1 = FC(3,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = FC(4,
fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs[0])
x = self._fc2(x)
x = fluid.layers.reduce_sum(x)
return x
class TestImperative(unittest.TestCase):
def test_layer(self):
with fluid.imperative.guard():
cl = core.Layer()
cl.forward([])
l = fluid.imperative.PyLayer()
l.forward([])
def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard():
l = MyLayer()
x = l(np_inp)[0]
self.assertIsNotNone(x)
dy_out = x._numpy()
x._backward()
dy_grad = l._x_for_debug._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[3], append_batch_size=False)
l = MyLayer()
x = l(inp)[0]
param_grads = fluid.backward.append_backward(
x, parameter_list=[l._x_for_debug.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[x.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard():
mlp = MLP()
out = mlp(np_inp)
dy_out = out._numpy()
out._backward()
dy_grad = mlp._fc1._w._gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP()
out = mlp(inp)
param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1._w.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
fetch_list=[out.name, param_grads[1].name])
self.assertTrue(np.allclose(dy_out, static_out))
self.assertTrue(np.allclose(dy_grad, static_grad))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.imperative.nn import Conv2D
@contextlib.contextmanager
def new_program_scope():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
class MNIST(fluid.imperative.PyLayer):
def __init__(self):
super(MNIST, self).__init__()
groups = 1
dilation = [1, 1]
pad = [0, 0]
stride = [1, 1]
input_size = [2, 3, 5, 5] # NCHW
assert np.mod(input_size[1], groups) == 0
f_c = input_size[1] // groups
filter_size = [6, f_c, 3, 3]
self._conv2d = Conv2D(
num_channels=3,
num_filters=20,
filter_size=3,
stride=stride,
padding=pad,
dilation=dilation,
groups=groups,
use_cudnn=False)
def forward(self, inputs):
x = self._conv2d(inputs)
return x
class TestImperativeMnist(unittest.TestCase):
# def test_layer(self):
# with fluid.imperative.guard():
# cl = core.Layer()
# cl.forward([])
# l = fluid.imperative.PyLayer()
# l.forward([])
# def test_layer_in_out(self):
# np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
# with fluid.imperative.guard():
# l = MyLayer()
# x = l(np_inp)[0]
# self.assertIsNotNone(x)
# dy_out = x._numpy()
# x._backward()
# dy_grad = l._x_for_debug._gradient()
# with new_program_scope():
# inp = fluid.layers.data(
# name="inp", shape=[3], append_batch_size=False)
# l = MyLayer()
# x = l(inp)[0]
# param_grads = fluid.backward.append_backward(
# x, parameter_list=[l._x_for_debug.name])[0]
# exe = fluid.Executor(fluid.CPUPlace())
# static_out, static_grad = exe.run(
# feed={inp.name: np_inp},
# fetch_list=[x.name, param_grads[1].name])
# self.assertTrue(np.allclose(dy_out, static_out))
# self.assertTrue(np.allclose(dy_grad, static_grad))
def test_mnist_cpu_float32(self):
with fluid.imperative.guard():
mnist = MNIST()
data = np.random.rand(2, 3, 5, 5).astype('float32')
mnist(data)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard():
# mlp = MLP()
# out = mlp(np_inp)
# dy_out = out._numpy()
# out._backward()
# dy_grad = mlp._fc1._w._gradient()
# with new_program_scope():
# inp = fluid.layers.data(
# name="inp", shape=[2, 2], append_batch_size=False)
# mlp = MLP()
# out = mlp(inp)
# param_grads = fluid.backward.append_backward(
# out, parameter_list=[mlp._fc1._w.name])[0]
# exe = fluid.Executor(fluid.CPUPlace())
# exe.run(fluid.default_startup_program())
# static_out, static_grad = exe.run(
# feed={inp.name: np_inp},
# fetch_list=[out.name, param_grads[1].name])
# self.assertTrue(np.allclose(dy_out, static_out))
# self.assertTrue(np.allclose(dy_grad, static_grad))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册