提交 c8965dc1 编写于 作者: M minqiyang

Polish code

test=develop
上级 289aba75
......@@ -168,12 +168,12 @@ class Autograd {
}
};
VarBase* VarBase::NewVarBase(const platform::Place& dst_place,
std::unique_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
const bool blocking) const {
PADDLE_ENFORCE(var_->IsInitialized(),
"Variable must be initialized when getting numpy tensor");
VarBase* new_var = new VarBase();
std::unique_ptr<VarBase> new_var(new VarBase());
framework::LoDTensor* tensor =
new_var->var_->GetMutable<framework::LoDTensor>();
tensor->Resize(var_->Get<framework::LoDTensor>().dims());
......@@ -240,9 +240,8 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
platform::Place place = place_;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx);
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_);
p.op.RuntimeInferShape(scope, place_, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
}
......
......@@ -21,6 +21,7 @@
#include <map> // NOLINT
#include <string> // NOLINT
#include <vector> // NOLINT
#include <memory> // NOLINT
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h"
......@@ -153,7 +154,7 @@ class VarBase {
framework::LoDTensor& GradValue();
VarBase* NewVarBase(const platform::Place& dst_place,
std::unique_ptr<VarBase> NewVarBase(const platform::Place& dst_place,
const bool blocking) const;
inline std::string GradName() const {
......
......@@ -137,13 +137,21 @@ PYBIND11_MODULE(core, m) {
.def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; },
py::return_value_policy::reference)
.def("_to",
.def("_copy_to",
[](const imperative::VarBase &self, const platform::CPUPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); },
bool blocking) {
std::unique_ptr<imperative::VarBase> new_var =
self.NewVarBase(place, blocking);
return new_var.release();
},
py::return_value_policy::take_ownership)
.def("_to",
.def("_copy_to",
[](const imperative::VarBase &self, const platform::CUDAPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); },
bool blocking) {
std::unique_ptr<imperative::VarBase> new_var =
self.NewVarBase(place, blocking);
return new_var.release();
},
py::return_value_policy::take_ownership)
.def("value", [](const imperative::VarBase &self) { return self.var_; },
py::return_value_policy::reference)
......
......@@ -67,7 +67,7 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix()
CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName()
_imperative_tracer_ = None
_current_expected_place_ = None
_imperative_current_expected_place_ = None
def _in_imperative_mode():
......@@ -79,7 +79,7 @@ def _imperative_tracer():
def _current_expected_place():
return _current_expected_place_
return _imperative_current_expected_place_
class NameScope(object):
......@@ -385,7 +385,7 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient
def _numpy(self):
new_ivar = self._ivar._to(core.CPUPlace(), True)
new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())
def _backward(self):
......@@ -1313,7 +1313,8 @@ class Block(object):
def _trace_op(self, op, stop_gradient=False):
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc,
_current_expected_place_, stop_gradient)
_imperative_current_expected_place_,
stop_gradient)
def _insert_op(self, index, *args, **kwargs):
"""
......@@ -2338,10 +2339,10 @@ def _imperative_guard(tracer):
@contextlib.contextmanager
def _imperative_place_guard(place):
global _current_expected_place_
tmp_place = _current_expected_place_
_current_expected_place_ = place
global _imperative_current_expected_place_
tmp_place = _imperative_current_expected_place_
_imperative_current_expected_place_ = place
yield
_current_expected_place_ = tmp_place
_imperative_current_expected_place_ = tmp_place
......@@ -144,7 +144,7 @@ class Conv2D(layers.Layer):
attrs={'axis': 1})
# Currently, we don't support inplace in imperative mode
return self._helper.append_activation(pre_act, force_no_inplace=True)
return self._helper.append_activation(pre_act)
class Pool2D(layers.Layer):
......@@ -286,8 +286,7 @@ class FC(layers.Layer):
else:
pre_activation = pre_bias
# Currently, we don't support inplace in imperative mode
return self._helper.append_activation(
pre_activation, force_no_inplace=True)
return self._helper.append_activation(pre_activation)
class BatchNorm(layers.Layer):
......@@ -419,5 +418,4 @@ class BatchNorm(layers.Layer):
})
# Currently, we don't support inplace in imperative mode
return self._helper.append_activation(
batch_norm_out, force_no_inplace=True)
return self._helper.append_activation(batch_norm_out)
......@@ -419,7 +419,7 @@ class LayerHelper(object):
attrs={'axis': dim_start})
return tmp
def append_activation(self, input_var, force_no_inplace=False):
def append_activation(self, input_var):
act = self.kwargs.get('act', None)
if act is None:
return input_var
......@@ -436,7 +436,7 @@ class LayerHelper(object):
tmp = input_var
# NOTE(dzhwinter): some activation support inplace compution.
# NOTE(minqiyang): currently, we don't support inplace in imperative mode
if not force_no_inplace and core.IsInplace(act_type):
if not imperative_base.enabled() and core.IsInplace(act_type):
tmp = input_var
else:
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
......
......@@ -388,7 +388,6 @@ class Optimizer(object):
params_grads = []
for param in parameters:
if param.stop_gradient:
print("parameter:", param.name, "stop gradient, skip it")
continue
# create gradient variable
grad_var = Variable(
......
......@@ -68,7 +68,7 @@ class MLP(fluid.imperative.Layer):
class TestImperative(unittest.TestCase):
def test_layer(self):
with fluid.imperative.guard(device=None):
with fluid.imperative.guard():
cl = core.Layer()
cl.forward([])
l = fluid.imperative.Layer()
......@@ -76,7 +76,7 @@ class TestImperative(unittest.TestCase):
def test_pylayer_func_id(self):
with fluid.imperative.guard(device=None):
with fluid.imperative.guard():
class PyLayer1(fluid.imperative.PyLayer):
def __init__(self):
......@@ -116,7 +116,7 @@ class TestImperative(unittest.TestCase):
def test_pylayer(self):
np_inp = np.ones([2, 2], np.float32)
with fluid.imperative.guard(device=None):
with fluid.imperative.guard():
my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp)
outs = my_py_layer(var_inp)
......@@ -133,7 +133,8 @@ class TestImperative(unittest.TestCase):
x = fluid.layers.reduce_sum(fluid.layers.tanh(x1))
param_grads = fluid.backward.append_backward(
x, parameter_list=[x1.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
......@@ -144,7 +145,7 @@ class TestImperative(unittest.TestCase):
def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(device=None):
with fluid.imperative.guard():
var_inp = fluid.imperative.base.to_variable(np_inp)
l = MyLayer()
x = l(var_inp)[0]
......@@ -160,7 +161,8 @@ class TestImperative(unittest.TestCase):
x = l(inp)[0]
param_grads = fluid.backward.append_backward(
x, parameter_list=[l._x_for_debug.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
static_out, static_grad = exe.run(
feed={inp.name: np_inp},
......@@ -171,7 +173,7 @@ class TestImperative(unittest.TestCase):
def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard(device=None):
with fluid.imperative.guard():
var_inp = fluid.imperative.base.to_variable(np_inp)
mlp = MLP()
out = mlp(var_inp)
......@@ -186,7 +188,8 @@ class TestImperative(unittest.TestCase):
out = mlp(inp)
param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1._w.name])[0]
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())
static_out, static_grad = exe.run(
......
......@@ -20,6 +20,7 @@ import sys
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope
......@@ -58,7 +59,7 @@ class Generator(fluid.imperative.Layer):
class TestImperativeMnist(unittest.TestCase):
def test_mnist_cpu_float32(self):
def test_gan_float32(self):
seed = 90
startup = fluid.Program()
......@@ -115,7 +116,8 @@ class TestImperativeMnist(unittest.TestCase):
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(g_loss)
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0))
static_params = dict()
with fluid.scope_guard(scope):
img = np.ones([2, 1], np.float32)
......@@ -135,7 +137,7 @@ class TestImperativeMnist(unittest.TestCase):
scope.find_var(param.name).get_tensor())
dy_params = dict()
with fluid.imperative.guard(place=fluid.CPUPlace()):
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
......
......@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase):
def test_mnist_cpu_float32(self):
seed = 90
with fluid.imperative.guard(place=fuild.CPUPlace()):
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
......@@ -145,7 +145,8 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3)
......
......@@ -143,7 +143,7 @@ class BottleneckBlock(fluid.imperative.Layer):
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper('elementwise_add_activation', act='relu')
return layer_helper.append_activation(y, force_no_inplace=True)
return layer_helper.append_activation(y)
class ResNet(fluid.imperative.Layer):
......@@ -204,12 +204,9 @@ class ResNet(fluid.imperative.Layer):
class TestImperativeResnet(unittest.TestCase):
def test_resnet_gpu_float32(self):
def test_resnet_float32(self):
seed = 90
if not core.is_compiled_with_cuda():
return
batch_size = train_parameters["batch_size"]
batch_num = 1
with fluid.imperative.guard():
......@@ -277,168 +274,8 @@ class TestImperativeResnet(unittest.TestCase):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
exe = fluid.Executor(fluid.CUDAPlace(0))
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
np.random.seed(seed)
import random
random.seed = seed
train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False),
batch_size=batch_size)
img = fluid.layers.data(
name='pixel', shape=[3, 224, 224], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
optimizer.minimize(avg_loss)
# initialize params and fetch them
static_param_init_value = {}
static_param_name_list = []
static_grad_name_list = []
for param in fluid.default_startup_program().global_block(
).all_parameters():
static_param_name_list.append(param.name)
for param in fluid.default_main_program().global_block(
).all_parameters():
if not param.stop_gradient:
static_grad_name_list.append(param.name +
core.grad_var_suffix())
out = exe.run(fluid.default_startup_program(),
fetch_list=static_param_name_list)
for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i]
for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num:
break
static_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[batch_size, 1])
fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list)
fetch_list.extend(static_grad_name_list)
out = exe.run(fluid.default_main_program(),
feed={"pixel": static_x_data,
"label": y_data},
fetch_list=fetch_list)
static_param_value = {}
static_grad_value = {}
static_out = out[0]
param_start_pos = 1
grad_start_pos = len(static_param_name_list) + param_start_pos
for i in range(param_start_pos,
len(static_param_name_list) + param_start_pos):
static_param_value[static_param_name_list[
i - param_start_pos]] = out[i]
for i in range(grad_start_pos,
len(static_grad_name_list) + grad_start_pos):
static_grad_value[static_grad_name_list[
i - grad_start_pos]] = out[i]
self.assertTrue(np.allclose(static_out, dy_out))
self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
self.assertEqual(len(dy_grad_value), len(static_grad_value))
for key, value in six.iteritems(static_grad_value):
# TODO(minqiyang): find a way to align the gradient
self.assertTrue(np.allclose(value, dy_grad_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
self.assertEqual(len(dy_param_value), len(static_param_value))
for key, value in six.iteritems(static_param_value):
self.assertTrue(np.allclose(value, dy_param_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
def test_resnet_cpu_float32(self):
seed = 90
batch_size = train_parameters["batch_size"]
batch_num = 1
with fluid.imperative.guard(place=fluid.CPUPlace()):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
np.random.seed(seed)
import random
random.seed = seed
train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False),
batch_size=batch_size)
dy_param_init_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_init_value[param.name] = param._numpy()
for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num:
break
dy_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy()
if batch_id == 0:
for param in fluid.default_main_program().global_block(
).all_parameters():
if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
dy_grad_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
if not param.stop_gradient:
np_array = np.array(param._ivar._grad_ivar().value()
.get_tensor())
dy_grad_value[param.name + core.grad_var_suffix(
)] = np_array
optimizer.minimize(avg_loss)
dy_param_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_value[param.name] = param._numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册