提交 45c9f2a6 编写于 作者: M minqiyang

Fix bugs in piecewise decay

test=develop
上级 a424ab49
......@@ -26,8 +26,12 @@ from .nn import *
from . import tracer
from .tracer import *
from . import learning_rate_scheduler
from .learning_rate_scheduler import *
__all__ = []
__all__ += layers.__all__
__all__ += base.__all__
__all__ += nn.__all__
__all__ += tracer.__all__
__all__ += learning_rate_scheduler.__all__
......@@ -14,13 +14,9 @@
from __future__ import print_function
from .. import layers
from .. import unique_name
__all__ = [
'ExponentialDecay', 'NaturalExpDecay', 'InverseTimeDecay',
'PolynomialDecay', 'PiecewiseDecay', 'NoamDecay'
]
__all__ = ['PiecewiseDecay']
class LearningRateDecay(object):
......@@ -28,32 +24,35 @@ class LearningRateDecay(object):
Base class of learning rate decay
"""
def __init__(self, step, dtype='float32'):
self.step = step
def __init__(self, begin=0, step=1, dtype='float32'):
self.step_num = begin
self.step_size = step
self.dtype = dtype
def __call__(self):
lr = self.step()
if isinstance(lr, float):
lr = self._create_lr_var(lr)
self.step += 1
self.step_num += self.step_size
return lr
def create_lr_var(lr):
def create_lr_var(self, lr):
from .. import layers
lr = layers.create_global_var(
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(lr),
dtype=self.dtype,
persistable=True)
return lr
def step(self):
raise NotImplementedError()
class PiecewiseDecay(object):
def __init__(self, boundaries, values, step, dtype='float32'):
super(PiecewiseDecay, self).__init__(step, dtype)
class PiecewiseDecay(LearningRateDecay):
def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
super(PiecewiseDecay, self).__init__(begin, step, dtype)
self.boundaries = boundaries
self.values = values
......@@ -62,7 +61,7 @@ class PiecewiseDecay(object):
self.vars.append(self.create_lr_var(value))
def step(self):
for i in range(len(boundaries)):
if self.step <= boundaries[i]:
for i in range(len(self.boundaries)):
if self.step_num < self.boundaries[i]:
return self.vars[i]
return self.vars[len(values) - 1]
return self.vars[len(self.values) - 1]
......@@ -31,6 +31,7 @@ from .layer_helper import LayerHelper
from .layers import ops
from .regularizer import append_regularization_ops
from .imperative import base as imperative_base
from .imperative.learning_rate_scheduler import LearningRateDecay
__all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
......@@ -50,9 +51,19 @@ class Optimizer(object):
"""
def __init__(self, learning_rate, regularization=None, name=None):
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable):
raise TypeError("learning rate should be float or Variable")
if framework._in_imperative_mode():
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay):
raise TypeError(
"learning rate should be float or LearningRateDecay, got %s here"
% type(learning_rate))
else:
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable):
raise TypeError(
"learning rate should be float or Variable, got %s here" %
type(learning_rate))
self._name = name
self.regularization = regularization
self._learning_rate = learning_rate
......@@ -83,7 +94,7 @@ class Optimizer(object):
dtype='float32' if self._dtype is None else self._dtype,
persistable=True)
# get learning rate Variable from LearningRateDecay
elif isinstance(self._learning_rate, imperative.LearningRateDecay):
elif isinstance(self._learning_rate, LearningRateDecay):
self._learning_rate_map[framework.default_main_program(
)] = self._learning_rate()
else:
......
......@@ -23,70 +23,130 @@ import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import FC
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope
class MLP(fluid.imperative.Layer):
def __init__(self, param_attr=None, bias_attr=None):
self._fc1 = FC(10)
self._fc2 = FC(10)
class SimpleImgConvPool(fluid.imperative.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
y = self._fc1(inputs)
y = self._fc2(y)
return y
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class TestImperativeOptimizerBase(unittest.TestCase):
def setUp(self):
self.batch_num = 2
class MNIST(fluid.imperative.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
def get_optimizer(self):
self.optimizer = SGDOptimizer(learning_rate=1e-3)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu")
def test_optimizer_float32(self):
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
class TestImperativeMnist(unittest.TestCase):
def test_mnist_float32(self):
seed = 90
epoch_num = 1
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mlp = MLP()
self.get_optimizer()
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
dy_param_init_value = {}
for batch_id, data in enumerate(train_reader()):
if batch_id >= self.batch_num:
break
dy_x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss._numpy()
if batch_id == 0:
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
self.optimizer.minimize(avg_loss)
mlp.clear_gradients()
dy_param_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_value[param.name] = param._numpy()
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy()
if epoch == 0 and batch_id == 0:
for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
sgd.minimize(avg_loss)
mnist.clear_gradients()
dy_param_value = {}
for param in mnist.parameters():
dy_param_value[param.name] = param._numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
......@@ -95,8 +155,8 @@ class TestImperativeOptimizerBase(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST()
self.get_optimizer()
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
......@@ -104,8 +164,9 @@ class TestImperativeOptimizerBase(unittest.TestCase):
name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img)
avg_loss = fluid.layers.reduce_mean(cost)
self.optimizer.minimize(avg_loss)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
sgd.minimize(avg_loss)
# initialize params and fetch them
static_param_init_value = {}
......@@ -119,26 +180,29 @@ class TestImperativeOptimizerBase(unittest.TestCase):
for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i]
for batch_id, data in enumerate(train_reader()):
if batch_id >= self.batch_num:
break
static_x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[128, 1])
fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list)
out = exe.run(fluid.default_main_program(),
feed={"pixel": static_x_data,
"label": y_data},
fetch_list=fetch_list)
static_param_value = {}
static_out = out[0]
for i in range(1, len(out)):
static_param_value[static_param_name_list[i - 1]] = out[i]
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
static_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape([128, 1])
fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list)
out = exe.run(
fluid.default_main_program(),
feed={"pixel": static_x_data,
"label": y_data},
fetch_list=fetch_list)
static_param_value = {}
static_out = out[0]
for i in range(1, len(out)):
static_param_value[static_param_name_list[i - 1]] = out[
i]
self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
......
......@@ -29,9 +29,11 @@ from test_imperative_base import new_program_scope
class MLP(fluid.imperative.Layer):
def __init__(self, param_attr=None, bias_attr=None):
self._fc1 = FC(10)
self._fc2 = FC(10)
def __init__(self, name_scope, param_attr=None, bias_attr=None):
super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), 10)
self._fc2 = FC(self.full_name(), 10)
def forward(self, inputs):
y = self._fc1(inputs)
......@@ -41,10 +43,15 @@ class MLP(fluid.imperative.Layer):
class TestImperativeOptimizerBase(unittest.TestCase):
def setUp(self):
self.batch_num = 2
self.batch_num = 10
def get_optimizer(self):
self.optimizer = SGDOptimizer(learning_rate=1e-3)
bd = [3, 6, 9]
self.optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
return self.optimizer
def test_optimizer_float32(self):
seed = 90
......@@ -52,8 +59,8 @@ class TestImperativeOptimizerBase(unittest.TestCase):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mlp = MLP()
self.get_optimizer()
mlp = MLP('mlp')
optimizer = self.get_optimizer()
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
......@@ -81,7 +88,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
self.optimizer.minimize(avg_loss)
optimizer.minimize(avg_loss)
mlp.clear_gradients()
dy_param_value = {}
for param in fluid.default_main_program().global_block(
......@@ -95,8 +102,8 @@ class TestImperativeOptimizerBase(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST()
self.get_optimizer()
mnist = MLP('mlp')
optimizer = self.get_optimizer()
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
......@@ -105,7 +112,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img)
avg_loss = fluid.layers.reduce_mean(cost)
self.optimizer.minimize(avg_loss)
optimizer.minimize(avg_loss)
# initialize params and fetch them
static_param_init_value = {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册