提交 5514270d 编写于 作者: S shippingwang

fix bug in classification

上级 a4f1cfea
...@@ -37,7 +37,10 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120): ...@@ -37,7 +37,10 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120):
return decayed_lr return decayed_lr
def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): def cosine_decay_with_warmup(learning_rate,
step_each_epoch,
warm_up_epoch=5.0,
epochs=120):
"""Applies cosine decay to the learning rate. """Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
decrease lr for every mini-batch and start with warmup. decrease lr for every mini-batch and start with warmup.
...@@ -51,7 +54,7 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): ...@@ -51,7 +54,7 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
name="learning_rate") name="learning_rate")
warmup_epoch = fluid.layers.fill_constant( warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(5), force_cpu=True) shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
with init_on_cpu(): with init_on_cpu():
epoch = ops.floor(global_step / step_each_epoch) epoch = ops.floor(global_step / step_each_epoch)
...@@ -66,7 +69,12 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): ...@@ -66,7 +69,12 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
fluid.layers.tensor.assign(input=decayed_lr, output=lr) fluid.layers.tensor.assign(input=decayed_lr, output=lr)
return lr return lr
def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs, decay_rate=0.97, warm_up_epoch=5.0):
def exponential_decay_with_warmup(learning_rate,
step_each_epoch,
decay_epochs,
decay_rate=0.97,
warm_up_epoch=5.0):
"""Applies exponential decay to the learning rate. """Applies exponential decay to the learning rate.
""" """
global_step = _decay_step_counter() global_step = _decay_step_counter()
...@@ -84,16 +92,19 @@ def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs, ...@@ -84,16 +92,19 @@ def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs,
epoch = ops.floor(global_step / step_each_epoch) epoch = ops.floor(global_step / step_each_epoch)
with fluid.layers.control_flow.Switch() as switch: with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch < warmup_epoch): with switch.case(epoch < warmup_epoch):
decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) decayed_lr = learning_rate * (global_step /
(step_each_epoch * warmup_epoch))
fluid.layers.assign(input=decayed_lr, output=lr) fluid.layers.assign(input=decayed_lr, output=lr)
with switch.default(): with switch.default():
div_res = (global_step - warmup_epoch * step_each_epoch) / decay_epochs div_res = (
global_step - warmup_epoch * step_each_epoch) / decay_epochs
div_res = ops.floor(div_res) div_res = ops.floor(div_res)
decayed_lr = learning_rate * (decay_rate ** div_res) decayed_lr = learning_rate * (decay_rate**div_res)
fluid.layers.assign(input=decayed_lr, output=lr) fluid.layers.assign(input=decayed_lr, output=lr)
return lr return lr
def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
""" Applies linear learning rate warmup for distributed training """ Applies linear learning rate warmup for distributed training
Argument learning_rate can be float or a Variable Argument learning_rate can be float or a Variable
...@@ -197,7 +208,8 @@ class Optimizer(object): ...@@ -197,7 +208,8 @@ class Optimizer(object):
learning_rate = cosine_decay_with_warmup( learning_rate = cosine_decay_with_warmup(
learning_rate=self.lr, learning_rate=self.lr,
step_each_epoch=self.step, step_each_epoch=self.step,
epochs=self.num_epochs) epochs=self.num_epochs,
warm_up_epoch=self.warm_up_epochs)
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate, learning_rate=learning_rate,
momentum=self.momentum_rate, momentum=self.momentum_rate,
...@@ -222,8 +234,7 @@ class Optimizer(object): ...@@ -222,8 +234,7 @@ class Optimizer(object):
regularization=fluid.regularizer.L2Decay(self.l2_decay), regularization=fluid.regularizer.L2Decay(self.l2_decay),
momentum=self.momentum_rate, momentum=self.momentum_rate,
rho=0.9, rho=0.9,
epsilon=0.001 epsilon=0.001)
)
return optimizer return optimizer
def linear_decay(self): def linear_decay(self):
......
...@@ -131,7 +131,7 @@ def parse_args(): ...@@ -131,7 +131,7 @@ def parse_args():
add_arg('use_mixup', bool, False, "Whether to use mixup") add_arg('use_mixup', bool, False, "Whether to use mixup")
add_arg('mixup_alpha', float, 0.2, "The value of mixup_alpha") add_arg('mixup_alpha', float, 0.2, "The value of mixup_alpha")
add_arg('reader_thread', int, 8, "The number of multi thread reader") add_arg('reader_thread', int, 8, "The number of multi thread reader")
add_arg('reader_buf_size', int, 2048, "The buf size of multi thread reader") add_arg('reader_buf_size', int, 64, "The buf size of multi thread reader")
add_arg('interpolation', int, None, "The interpolation mode") add_arg('interpolation', int, None, "The interpolation mode")
add_arg('use_aa', bool, False, "Whether to use auto augment") add_arg('use_aa', bool, False, "Whether to use auto augment")
parser.add_argument('--image_mean', nargs='+', type=float, default=[0.485, 0.456, 0.406], help="The mean of input image data") parser.add_argument('--image_mean', nargs='+', type=float, default=[0.485, 0.456, 0.406], help="The mean of input image data")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册