From c8965dc1ab28767ebe85e969126ed2e4b4fb0f66 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 23 Jan 2019 18:45:31 +0800 Subject: [PATCH] Polish code test=develop --- paddle/fluid/imperative/layer.cc | 11 +- paddle/fluid/imperative/layer.h | 5 +- paddle/fluid/pybind/pybind.cc | 16 +- python/paddle/fluid/framework.py | 17 +- python/paddle/fluid/imperative/nn.py | 8 +- python/paddle/fluid/layer_helper.py | 4 +- python/paddle/fluid/optimizer.py | 1 - .../fluid/tests/unittests/test_imperative.py | 19 +- .../tests/unittests/test_imperative_gan.py | 8 +- .../unittests/test_imperative_optimizer.py | 5 +- .../tests/unittests/test_imperative_resnet.py | 171 +----------------- 11 files changed, 57 insertions(+), 208 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index d2c5ef01ff..8029129b9a 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -168,12 +168,12 @@ class Autograd { } }; -VarBase* VarBase::NewVarBase(const platform::Place& dst_place, - const bool blocking) const { +std::unique_ptr VarBase::NewVarBase(const platform::Place& dst_place, + const bool blocking) const { PADDLE_ENFORCE(var_->IsInitialized(), "Variable must be initialized when getting numpy tensor"); - VarBase* new_var = new VarBase(); + std::unique_ptr new_var(new VarBase()); framework::LoDTensor* tensor = new_var->var_->GetMutable(); tensor->Resize(var_->Get().dims()); @@ -240,9 +240,8 @@ std::map> OpBase::ApplyGrad() { PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); framework::Scope scope; - platform::Place place = place_; - PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); - p.op.RuntimeInferShape(scope, place, ctx); + PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_); + p.op.RuntimeInferShape(scope, place_, ctx); p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); } diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 0e8064227b..633924aa41 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -21,6 +21,7 @@ #include // NOLINT #include // NOLINT #include // NOLINT +#include // NOLINT #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/operator.h" @@ -153,8 +154,8 @@ class VarBase { framework::LoDTensor& GradValue(); - VarBase* NewVarBase(const platform::Place& dst_place, - const bool blocking) const; + std::unique_ptr NewVarBase(const platform::Place& dst_place, + const bool blocking) const; inline std::string GradName() const { PADDLE_ENFORCE( diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 4877bde083..25c4c44128 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -137,13 +137,21 @@ PYBIND11_MODULE(core, m) { .def("_grad_ivar", [](const imperative::VarBase &self) { return self.grads_; }, py::return_value_policy::reference) - .def("_to", + .def("_copy_to", [](const imperative::VarBase &self, const platform::CPUPlace &place, - bool blocking) { return self.NewVarBase(place, blocking); }, + bool blocking) { + std::unique_ptr new_var = + self.NewVarBase(place, blocking); + return new_var.release(); + }, py::return_value_policy::take_ownership) - .def("_to", + .def("_copy_to", [](const imperative::VarBase &self, const platform::CUDAPlace &place, - bool blocking) { return self.NewVarBase(place, blocking); }, + bool blocking) { + std::unique_ptr new_var = + self.NewVarBase(place, blocking); + return new_var.release(); + }, py::return_value_policy::take_ownership) .def("value", [](const imperative::VarBase &self) { return self.var_; }, py::return_value_policy::reference) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 773ba3087a..3ddd73080b 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -67,7 +67,7 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix() CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName() _imperative_tracer_ = None -_current_expected_place_ = None +_imperative_current_expected_place_ = None def _in_imperative_mode(): @@ -79,7 +79,7 @@ def _imperative_tracer(): def _current_expected_place(): - return _current_expected_place_ + return _imperative_current_expected_place_ class NameScope(object): @@ -385,7 +385,7 @@ class Variable(object): self._ivar.stop_gradient = stop_gradient def _numpy(self): - new_ivar = self._ivar._to(core.CPUPlace(), True) + new_ivar = self._ivar._copy_to(core.CPUPlace(), True) return np.array(new_ivar.value().get_tensor()) def _backward(self): @@ -1313,7 +1313,8 @@ class Block(object): def _trace_op(self, op, stop_gradient=False): if _in_imperative_mode(): _imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc, - _current_expected_place_, stop_gradient) + _imperative_current_expected_place_, + stop_gradient) def _insert_op(self, index, *args, **kwargs): """ @@ -2338,10 +2339,10 @@ def _imperative_guard(tracer): @contextlib.contextmanager def _imperative_place_guard(place): - global _current_expected_place_ - tmp_place = _current_expected_place_ - _current_expected_place_ = place + global _imperative_current_expected_place_ + tmp_place = _imperative_current_expected_place_ + _imperative_current_expected_place_ = place yield - _current_expected_place_ = tmp_place + _imperative_current_expected_place_ = tmp_place diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 23ef35bad8..140c0ff037 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -144,7 +144,7 @@ class Conv2D(layers.Layer): attrs={'axis': 1}) # Currently, we don't support inplace in imperative mode - return self._helper.append_activation(pre_act, force_no_inplace=True) + return self._helper.append_activation(pre_act) class Pool2D(layers.Layer): @@ -286,8 +286,7 @@ class FC(layers.Layer): else: pre_activation = pre_bias # Currently, we don't support inplace in imperative mode - return self._helper.append_activation( - pre_activation, force_no_inplace=True) + return self._helper.append_activation(pre_activation) class BatchNorm(layers.Layer): @@ -419,5 +418,4 @@ class BatchNorm(layers.Layer): }) # Currently, we don't support inplace in imperative mode - return self._helper.append_activation( - batch_norm_out, force_no_inplace=True) + return self._helper.append_activation(batch_norm_out) diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index df5591fb2a..972c51938f 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -419,7 +419,7 @@ class LayerHelper(object): attrs={'axis': dim_start}) return tmp - def append_activation(self, input_var, force_no_inplace=False): + def append_activation(self, input_var): act = self.kwargs.get('act', None) if act is None: return input_var @@ -436,7 +436,7 @@ class LayerHelper(object): tmp = input_var # NOTE(dzhwinter): some activation support inplace compution. # NOTE(minqiyang): currently, we don't support inplace in imperative mode - if not force_no_inplace and core.IsInplace(act_type): + if not imperative_base.enabled() and core.IsInplace(act_type): tmp = input_var else: tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index cd28ff218e..14f4276e2f 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -388,7 +388,6 @@ class Optimizer(object): params_grads = [] for param in parameters: if param.stop_gradient: - print("parameter:", param.name, "stop gradient, skip it") continue # create gradient variable grad_var = Variable( diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py index 29cfce5079..7533ab9fdb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -68,7 +68,7 @@ class MLP(fluid.imperative.Layer): class TestImperative(unittest.TestCase): def test_layer(self): - with fluid.imperative.guard(device=None): + with fluid.imperative.guard(): cl = core.Layer() cl.forward([]) l = fluid.imperative.Layer() @@ -76,7 +76,7 @@ class TestImperative(unittest.TestCase): def test_pylayer_func_id(self): - with fluid.imperative.guard(device=None): + with fluid.imperative.guard(): class PyLayer1(fluid.imperative.PyLayer): def __init__(self): @@ -116,7 +116,7 @@ class TestImperative(unittest.TestCase): def test_pylayer(self): np_inp = np.ones([2, 2], np.float32) - with fluid.imperative.guard(device=None): + with fluid.imperative.guard(): my_py_layer = MyPyLayer() var_inp = fluid.imperative.base.to_variable(np_inp) outs = my_py_layer(var_inp) @@ -133,7 +133,8 @@ class TestImperative(unittest.TestCase): x = fluid.layers.reduce_sum(fluid.layers.tanh(x1)) param_grads = fluid.backward.append_backward( x, parameter_list=[x1.name])[0] - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) static_out, static_grad = exe.run( feed={inp.name: np_inp}, @@ -144,7 +145,7 @@ class TestImperative(unittest.TestCase): def test_layer_in_out(self): np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) - with fluid.imperative.guard(device=None): + with fluid.imperative.guard(): var_inp = fluid.imperative.base.to_variable(np_inp) l = MyLayer() x = l(var_inp)[0] @@ -160,7 +161,8 @@ class TestImperative(unittest.TestCase): x = l(inp)[0] param_grads = fluid.backward.append_backward( x, parameter_list=[l._x_for_debug.name])[0] - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) static_out, static_grad = exe.run( feed={inp.name: np_inp}, @@ -171,7 +173,7 @@ class TestImperative(unittest.TestCase): def test_mlp(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) - with fluid.imperative.guard(device=None): + with fluid.imperative.guard(): var_inp = fluid.imperative.base.to_variable(np_inp) mlp = MLP() out = mlp(var_inp) @@ -186,7 +188,8 @@ class TestImperative(unittest.TestCase): out = mlp(inp) param_grads = fluid.backward.append_backward( out, parameter_list=[mlp._fc1._w.name])[0] - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) static_out, static_grad = exe.run( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py index 776b35bbd1..681661bfc6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py @@ -20,6 +20,7 @@ import sys import paddle import paddle.fluid as fluid +import paddle.fluid.core as core from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from test_imperative_base import new_program_scope @@ -58,7 +59,7 @@ class Generator(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): - def test_mnist_cpu_float32(self): + def test_gan_float32(self): seed = 90 startup = fluid.Program() @@ -115,7 +116,8 @@ class TestImperativeMnist(unittest.TestCase): sgd = SGDOptimizer(learning_rate=1e-3) sgd.minimize(g_loss) - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0)) static_params = dict() with fluid.scope_guard(scope): img = np.ones([2, 1], np.float32) @@ -135,7 +137,7 @@ class TestImperativeMnist(unittest.TestCase): scope.find_var(param.name).get_tensor()) dy_params = dict() - with fluid.imperative.guard(place=fluid.CPUPlace()): + with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 5816c178c3..d0a5a88317 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase): def test_mnist_cpu_float32(self): seed = 90 - with fluid.imperative.guard(place=fuild.CPUPlace()): + with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -145,7 +145,8 @@ class TestImperativeMnist(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 8915be8277..87a72dd04e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -143,7 +143,7 @@ class BottleneckBlock(fluid.imperative.Layer): y = fluid.layers.elementwise_add(x=short, y=conv2) layer_helper = LayerHelper('elementwise_add_activation', act='relu') - return layer_helper.append_activation(y, force_no_inplace=True) + return layer_helper.append_activation(y) class ResNet(fluid.imperative.Layer): @@ -204,12 +204,9 @@ class ResNet(fluid.imperative.Layer): class TestImperativeResnet(unittest.TestCase): - def test_resnet_gpu_float32(self): + def test_resnet_float32(self): seed = 90 - if not core.is_compiled_with_cuda(): - return - batch_size = train_parameters["batch_size"] batch_num = 1 with fluid.imperative.guard(): @@ -277,168 +274,8 @@ class TestImperativeResnet(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - exe = fluid.Executor(fluid.CUDAPlace(0)) - - resnet = ResNet() - optimizer = optimizer_setting(train_parameters) - - np.random.seed(seed) - import random - random.seed = seed - train_reader = paddle.batch( - paddle.dataset.flowers.train(use_xmap=False), - batch_size=batch_size) - - img = fluid.layers.data( - name='pixel', shape=[3, 224, 224], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - out = resnet(img) - loss = fluid.layers.cross_entropy(input=out, label=label) - avg_loss = fluid.layers.mean(x=loss) - optimizer.minimize(avg_loss) - - # initialize params and fetch them - static_param_init_value = {} - static_param_name_list = [] - static_grad_name_list = [] - for param in fluid.default_startup_program().global_block( - ).all_parameters(): - static_param_name_list.append(param.name) - for param in fluid.default_main_program().global_block( - ).all_parameters(): - if not param.stop_gradient: - static_grad_name_list.append(param.name + - core.grad_var_suffix()) - - out = exe.run(fluid.default_startup_program(), - fetch_list=static_param_name_list) - - for i in range(len(static_param_name_list)): - static_param_init_value[static_param_name_list[i]] = out[i] - - for batch_id, data in enumerate(train_reader()): - if batch_id >= batch_num: - break - - static_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [batch_size, 1]) - - fetch_list = [avg_loss.name] - fetch_list.extend(static_param_name_list) - fetch_list.extend(static_grad_name_list) - out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) - - static_param_value = {} - static_grad_value = {} - static_out = out[0] - param_start_pos = 1 - grad_start_pos = len(static_param_name_list) + param_start_pos - for i in range(param_start_pos, - len(static_param_name_list) + param_start_pos): - static_param_value[static_param_name_list[ - i - param_start_pos]] = out[i] - for i in range(grad_start_pos, - len(static_grad_name_list) + grad_start_pos): - static_grad_value[static_grad_name_list[ - i - grad_start_pos]] = out[i] - - self.assertTrue(np.allclose(static_out, dy_out)) - - self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) - for key, value in six.iteritems(static_param_init_value): - self.assertTrue(np.allclose(value, dy_param_init_value[key])) - self.assertTrue(np.isfinite(value.all())) - self.assertFalse(np.isnan(value.any())) - - self.assertEqual(len(dy_grad_value), len(static_grad_value)) - for key, value in six.iteritems(static_grad_value): - # TODO(minqiyang): find a way to align the gradient - self.assertTrue(np.allclose(value, dy_grad_value[key])) - self.assertTrue(np.isfinite(value.all())) - self.assertFalse(np.isnan(value.any())) - - self.assertEqual(len(dy_param_value), len(static_param_value)) - for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value, dy_param_value[key])) - self.assertTrue(np.isfinite(value.all())) - self.assertFalse(np.isnan(value.any())) - - def test_resnet_cpu_float32(self): - seed = 90 - - batch_size = train_parameters["batch_size"] - batch_num = 1 - with fluid.imperative.guard(place=fluid.CPUPlace()): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - - resnet = ResNet() - optimizer = optimizer_setting(train_parameters) - np.random.seed(seed) - import random - random.seed = seed - train_reader = paddle.batch( - paddle.dataset.flowers.train(use_xmap=False), - batch_size=batch_size) - - dy_param_init_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_init_value[param.name] = param._numpy() - - for batch_id, data in enumerate(train_reader()): - if batch_id >= batch_num: - break - - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - batch_size, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - - out = resnet(img) - loss = fluid.layers.cross_entropy(input=out, label=label) - avg_loss = fluid.layers.mean(x=loss) - - dy_out = avg_loss._numpy() - - if batch_id == 0: - for param in fluid.default_main_program().global_block( - ).all_parameters(): - if param.name not in dy_param_init_value: - dy_param_init_value[param.name] = param._numpy() - - avg_loss._backward() - - dy_grad_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): - if not param.stop_gradient: - np_array = np.array(param._ivar._grad_ivar().value() - .get_tensor()) - dy_grad_value[param.name + core.grad_var_suffix( - )] = np_array - - optimizer.minimize(avg_loss) - - dy_param_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_value[param.name] = param._numpy() - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) resnet = ResNet() optimizer = optimizer_setting(train_parameters) -- GitLab