提交 4278be8c 编写于 作者: M minqiyang

Merge branch 'imperative_lr_scheduler' of https://github.com/velconia/Paddle...

Merge branch 'imperative_lr_scheduler' of https://github.com/velconia/Paddle into imperative_lr_scheduler

test=develop
...@@ -32,6 +32,9 @@ from .profiler import * ...@@ -32,6 +32,9 @@ from .profiler import *
from . import checkpoint from . import checkpoint
from .checkpoint import * from .checkpoint import *
from . import learning_rate_scheduler
from .learning_rate_scheduler import *
__all__ = [] __all__ = []
__all__ += layers.__all__ __all__ += layers.__all__
__all__ += base.__all__ __all__ += base.__all__
...@@ -39,3 +42,4 @@ __all__ += nn.__all__ ...@@ -39,3 +42,4 @@ __all__ += nn.__all__
__all__ += tracer.__all__ __all__ += tracer.__all__
__all__ += profiler.__all__ __all__ += profiler.__all__
__all__ += checkpoint.__all__ __all__ += checkpoint.__all__
__all__ += learning_rate_scheduler.__all__
...@@ -14,13 +14,9 @@ ...@@ -14,13 +14,9 @@
from __future__ import print_function from __future__ import print_function
from .. import layers
from .. import unique_name from .. import unique_name
__all__ = [ __all__ = ['PiecewiseDecay']
'ExponentialDecay', 'NaturalExpDecay', 'InverseTimeDecay',
'PolynomialDecay', 'PiecewiseDecay', 'NoamDecay'
]
class LearningRateDecay(object): class LearningRateDecay(object):
...@@ -28,32 +24,35 @@ class LearningRateDecay(object): ...@@ -28,32 +24,35 @@ class LearningRateDecay(object):
Base class of learning rate decay Base class of learning rate decay
""" """
def __init__(self, step, dtype='float32'): def __init__(self, begin=0, step=1, dtype='float32'):
self.step = step self.step_num = begin
self.step_size = step
self.dtype = dtype self.dtype = dtype
def __call__(self): def __call__(self):
lr = self.step() lr = self.step()
if isinstance(lr, float): if isinstance(lr, float):
lr = self._create_lr_var(lr) lr = self._create_lr_var(lr)
self.step += 1 self.step_num += self.step_size
return lr return lr
def create_lr_var(lr): def create_lr_var(self, lr):
from .. import layers
lr = layers.create_global_var( lr = layers.create_global_var(
name=unique_name.generate("learning_rate"), name=unique_name.generate("learning_rate"),
shape=[1], shape=[1],
value=float(lr), value=float(lr),
dtype=self.dtype, dtype=self.dtype,
persistable=True) persistable=True)
return lr
def step(self): def step(self):
raise NotImplementedError() raise NotImplementedError()
class PiecewiseDecay(object): class PiecewiseDecay(LearningRateDecay):
def __init__(self, boundaries, values, step, dtype='float32'): def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
super(PiecewiseDecay, self).__init__(step, dtype) super(PiecewiseDecay, self).__init__(begin, step, dtype)
self.boundaries = boundaries self.boundaries = boundaries
self.values = values self.values = values
...@@ -62,7 +61,7 @@ class PiecewiseDecay(object): ...@@ -62,7 +61,7 @@ class PiecewiseDecay(object):
self.vars.append(self.create_lr_var(value)) self.vars.append(self.create_lr_var(value))
def step(self): def step(self):
for i in range(len(boundaries)): for i in range(len(self.boundaries)):
if self.step <= boundaries[i]: if self.step_num < self.boundaries[i]:
return self.vars[i] return self.vars[i]
return self.vars[len(values) - 1] return self.vars[len(self.values) - 1]
...@@ -31,6 +31,7 @@ from .layer_helper import LayerHelper ...@@ -31,6 +31,7 @@ from .layer_helper import LayerHelper
from .layers import ops from .layers import ops
from .regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from .imperative import base as imperative_base from .imperative import base as imperative_base
from .imperative.learning_rate_scheduler import LearningRateDecay
__all__ = [ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
...@@ -50,9 +51,19 @@ class Optimizer(object): ...@@ -50,9 +51,19 @@ class Optimizer(object):
""" """
def __init__(self, learning_rate, regularization=None, name=None): def __init__(self, learning_rate, regularization=None, name=None):
if framework._in_imperative_mode():
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay):
raise TypeError(
"learning rate should be float or LearningRateDecay, got %s here"
% type(learning_rate))
else:
if not isinstance(learning_rate, float) and \ if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable): not isinstance(learning_rate, framework.Variable):
raise TypeError("learning rate should be float or Variable") raise TypeError(
"learning rate should be float or Variable, got %s here" %
type(learning_rate))
self._name = name self._name = name
self.regularization = regularization self.regularization = regularization
self._learning_rate = learning_rate self._learning_rate = learning_rate
...@@ -87,7 +98,7 @@ class Optimizer(object): ...@@ -87,7 +98,7 @@ class Optimizer(object):
dtype='float32' if self._dtype is None else self._dtype, dtype='float32' if self._dtype is None else self._dtype,
persistable=True) persistable=True)
# get learning rate Variable from LearningRateDecay # get learning rate Variable from LearningRateDecay
elif isinstance(self._learning_rate, imperative.LearningRateDecay): elif isinstance(self._learning_rate, LearningRateDecay):
self._learning_rate_map[framework.default_main_program( self._learning_rate_map[framework.default_main_program(
)] = self._learning_rate() )] = self._learning_rate()
else: else:
......
...@@ -23,69 +23,129 @@ import paddle ...@@ -23,69 +23,129 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import FC from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MLP(fluid.imperative.Layer): class SimpleImgConvPool(fluid.imperative.Layer):
def __init__(self, param_attr=None, bias_attr=None): def __init__(self,
self._fc1 = FC(10) name_scope,
self._fc2 = FC(10) num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs): def forward(self, inputs):
y = self._fc1(inputs) x = self._conv2d(inputs)
y = self._fc2(y) x = self._pool2d(x)
return y return x
class TestImperativeOptimizerBase(unittest.TestCase): class MNIST(fluid.imperative.Layer):
def setUp(self): def __init__(self, name_scope):
self.batch_num = 2 super(MNIST, self).__init__(name_scope)
def get_optimizer(self): self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.optimizer = SGDOptimizer(learning_rate=1e-3) self.full_name(), 1, 20, 5, 2, 2, act="relu")
def test_optimizer_float32(self): self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
class TestImperativeMnist(unittest.TestCase):
def test_mnist_float32(self):
seed = 90 seed = 90
epoch_num = 1
with fluid.imperative.guard(): with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
mlp = MLP() mnist = MNIST("mnist")
self.get_optimizer() sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
dy_param_init_value = {} dy_param_init_value = {}
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if batch_id >= self.batch_num:
break
dy_x_data = np.array( dy_x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32') [x[0].reshape(1, 28, 28)
y_data = np.array([x[1] for x in data]).astype('int64').reshape( for x in data]).astype('float32')
128, 1) y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(128, 1)
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label._stop_gradient = True
cost = mlp(img) cost = mnist(img)
avg_loss = fluid.layers.reduce_mean(cost) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy() dy_out = avg_loss._numpy()
if batch_id == 0: if epoch == 0 and batch_id == 0:
for param in fluid.default_main_program().global_block( for param in mnist.parameters():
).all_parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
avg_loss._backward() avg_loss._backward()
self.optimizer.minimize(avg_loss) sgd.minimize(avg_loss)
mlp.clear_gradients() mnist.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in fluid.default_main_program().global_block( for param in mnist.parameters():
).all_parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param._numpy()
with new_program_scope(): with new_program_scope():
...@@ -95,8 +155,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -95,8 +155,8 @@ class TestImperativeOptimizerBase(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST() mnist = MNIST("mnist")
self.get_optimizer() sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
...@@ -104,8 +164,9 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -104,8 +164,9 @@ class TestImperativeOptimizerBase(unittest.TestCase):
name='pixel', shape=[1, 28, 28], dtype='float32') name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img) cost = mnist(img)
avg_loss = fluid.layers.reduce_mean(cost) loss = fluid.layers.cross_entropy(cost, label)
self.optimizer.minimize(avg_loss) avg_loss = fluid.layers.mean(loss)
sgd.minimize(avg_loss)
# initialize params and fetch them # initialize params and fetch them
static_param_init_value = {} static_param_init_value = {}
...@@ -119,18 +180,18 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -119,18 +180,18 @@ class TestImperativeOptimizerBase(unittest.TestCase):
for i in range(len(static_param_name_list)): for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i] static_param_init_value[static_param_name_list[i]] = out[i]
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if batch_id >= self.batch_num:
break
static_x_data = np.array( static_x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32') [x[0].reshape(1, 28, 28)
y_data = np.array([x[1] for x in data]).astype('int64').reshape( for x in data]).astype('float32')
[128, 1]) y_data = np.array(
[x[1] for x in data]).astype('int64').reshape([128, 1])
fetch_list = [avg_loss.name] fetch_list = [avg_loss.name]
fetch_list.extend(static_param_name_list) fetch_list.extend(static_param_name_list)
out = exe.run(fluid.default_main_program(), out = exe.run(
fluid.default_main_program(),
feed={"pixel": static_x_data, feed={"pixel": static_x_data,
"label": y_data}, "label": y_data},
fetch_list=fetch_list) fetch_list=fetch_list)
...@@ -138,7 +199,10 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -138,7 +199,10 @@ class TestImperativeOptimizerBase(unittest.TestCase):
static_param_value = {} static_param_value = {}
static_out = out[0] static_out = out[0]
for i in range(1, len(out)): for i in range(1, len(out)):
static_param_value[static_param_name_list[i - 1]] = out[i] static_param_value[static_param_name_list[i - 1]] = out[
i]
self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(value, dy_param_init_value[key]))
......
...@@ -29,9 +29,11 @@ from test_imperative_base import new_program_scope ...@@ -29,9 +29,11 @@ from test_imperative_base import new_program_scope
class MLP(fluid.imperative.Layer): class MLP(fluid.imperative.Layer):
def __init__(self, param_attr=None, bias_attr=None): def __init__(self, name_scope, param_attr=None, bias_attr=None):
self._fc1 = FC(10) super(MLP, self).__init__(name_scope)
self._fc2 = FC(10)
self._fc1 = FC(self.full_name(), 10)
self._fc2 = FC(self.full_name(), 10)
def forward(self, inputs): def forward(self, inputs):
y = self._fc1(inputs) y = self._fc1(inputs)
...@@ -41,10 +43,15 @@ class MLP(fluid.imperative.Layer): ...@@ -41,10 +43,15 @@ class MLP(fluid.imperative.Layer):
class TestImperativeOptimizerBase(unittest.TestCase): class TestImperativeOptimizerBase(unittest.TestCase):
def setUp(self): def setUp(self):
self.batch_num = 2 self.batch_num = 10
def get_optimizer(self): def get_optimizer(self):
self.optimizer = SGDOptimizer(learning_rate=1e-3) bd = [3, 6, 9]
self.optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
return self.optimizer
def test_optimizer_float32(self): def test_optimizer_float32(self):
seed = 90 seed = 90
...@@ -52,8 +59,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -52,8 +59,8 @@ class TestImperativeOptimizerBase(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
mlp = MLP() mlp = MLP('mlp')
self.get_optimizer() optimizer = self.get_optimizer()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
...@@ -81,7 +88,7 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -81,7 +88,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param._numpy()
avg_loss._backward() avg_loss._backward()
self.optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
mlp.clear_gradients() mlp.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in fluid.default_main_program().global_block( for param in fluid.default_main_program().global_block(
...@@ -95,8 +102,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -95,8 +102,8 @@ class TestImperativeOptimizerBase(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST() mnist = MLP('mlp')
self.get_optimizer() optimizer = self.get_optimizer()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
...@@ -105,7 +112,7 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -105,7 +112,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img) cost = mnist(img)
avg_loss = fluid.layers.reduce_mean(cost) avg_loss = fluid.layers.reduce_mean(cost)
self.optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
# initialize params and fetch them # initialize params and fetch them
static_param_init_value = {} static_param_init_value = {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册