提交 5514270d 编写于 作者: S shippingwang

fix bug in classification

上级 a4f1cfea
......@@ -37,7 +37,10 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120):
return decayed_lr
def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
def cosine_decay_with_warmup(learning_rate,
step_each_epoch,
warm_up_epoch=5.0,
epochs=120):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
decrease lr for every mini-batch and start with warmup.
......@@ -51,7 +54,7 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
name="learning_rate")
warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(5), force_cpu=True)
shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
with init_on_cpu():
epoch = ops.floor(global_step / step_each_epoch)
......@@ -66,16 +69,21 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
return lr
def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs, decay_rate=0.97, warm_up_epoch=5.0):
def exponential_decay_with_warmup(learning_rate,
step_each_epoch,
decay_epochs,
decay_rate=0.97,
warm_up_epoch=5.0):
"""Applies exponential decay to the learning rate.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
......@@ -84,16 +92,19 @@ def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs,
epoch = ops.floor(global_step / step_each_epoch)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch < warmup_epoch):
decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch))
decayed_lr = learning_rate * (global_step /
(step_each_epoch * warmup_epoch))
fluid.layers.assign(input=decayed_lr, output=lr)
with switch.default():
div_res = (global_step - warmup_epoch * step_each_epoch) / decay_epochs
div_res = (
global_step - warmup_epoch * step_each_epoch) / decay_epochs
div_res = ops.floor(div_res)
decayed_lr = learning_rate * (decay_rate ** div_res)
decayed_lr = learning_rate * (decay_rate**div_res)
fluid.layers.assign(input=decayed_lr, output=lr)
return lr
def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
""" Applies linear learning rate warmup for distributed training
Argument learning_rate can be float or a Variable
......@@ -197,7 +208,8 @@ class Optimizer(object):
learning_rate = cosine_decay_with_warmup(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
epochs=self.num_epochs,
warm_up_epoch=self.warm_up_epochs)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
......@@ -222,8 +234,7 @@ class Optimizer(object):
regularization=fluid.regularizer.L2Decay(self.l2_decay),
momentum=self.momentum_rate,
rho=0.9,
epsilon=0.001
)
epsilon=0.001)
return optimizer
def linear_decay(self):
......
......@@ -131,7 +131,7 @@ def parse_args():
add_arg('use_mixup', bool, False, "Whether to use mixup")
add_arg('mixup_alpha', float, 0.2, "The value of mixup_alpha")
add_arg('reader_thread', int, 8, "The number of multi thread reader")
add_arg('reader_buf_size', int, 2048, "The buf size of multi thread reader")
add_arg('reader_buf_size', int, 64, "The buf size of multi thread reader")
add_arg('interpolation', int, None, "The interpolation mode")
add_arg('use_aa', bool, False, "Whether to use auto augment")
parser.add_argument('--image_mean', nargs='+', type=float, default=[0.485, 0.456, 0.406], help="The mean of input image data")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册