提交 a585092b 编写于 作者: W wuzewu

Update strategy

上级 77443f64
...@@ -63,7 +63,7 @@ def finetune(args): ...@@ -63,7 +63,7 @@ def finetune(args):
enable_memory_optim=False, enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy( strategy=hub.finetune.strategy.DefaultFinetuneStrategy(
learning_rate=0.00025, optimizer_name="adam")) learning_rate=0.00025, optimizer_name="momentum", momentum=0.9))
task = hub.FasterRCNNTask( task = hub.FasterRCNNTask(
data_reader=data_reader, data_reader=data_reader,
......
...@@ -45,7 +45,7 @@ def finetune(args): ...@@ -45,7 +45,7 @@ def finetune(args):
enable_memory_optim=False, enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy( strategy=hub.finetune.strategy.DefaultFinetuneStrategy(
learning_rate=0.00025, optimizer_name="adam")) learning_rate=0.00025, optimizer_name="momentum", momentum=0.9))
task = hub.SSDTask( task = hub.SSDTask(
data_reader=data_reader, data_reader=data_reader,
......
...@@ -45,7 +45,7 @@ def finetune(args): ...@@ -45,7 +45,7 @@ def finetune(args):
enable_memory_optim=False, enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy( strategy=hub.finetune.strategy.DefaultFinetuneStrategy(
learning_rate=0.00025, optimizer_name="adam")) learning_rate=0.00025, optimizer_name="momentum", momentum=0.9))
task = hub.YOLOTask( task = hub.YOLOTask(
data_reader=data_reader, data_reader=data_reader,
......
...@@ -133,39 +133,39 @@ def set_gradual_unfreeze(depth_params_dict, unfreeze_depths): ...@@ -133,39 +133,39 @@ def set_gradual_unfreeze(depth_params_dict, unfreeze_depths):
class DefaultStrategy(object): class DefaultStrategy(object):
def __init__(self, learning_rate=1e-4, optimizer_name="adam"): def __init__(self, learning_rate=1e-4, optimizer_name="adam", **kwargs):
self.learning_rate = learning_rate self.learning_rate = learning_rate
self._optimizer_name = optimizer_name self._optimizer_name = optimizer_name
if self._optimizer_name.lower() == "sgd": if self._optimizer_name.lower() == "sgd":
self.optimizer = fluid.optimizer.SGD( self.optimizer = fluid.optimizer.SGD(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "adagrad": elif self._optimizer_name.lower() == "adagrad":
self.optimizer = fluid.optimizer.Adagrad( self.optimizer = fluid.optimizer.Adagrad(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "adamax": elif self._optimizer_name.lower() == "adamax":
self.optimizer = fluid.optimizer.Adamax( self.optimizer = fluid.optimizer.Adamax(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "decayedadagrad": elif self._optimizer_name.lower() == "decayedadagrad":
self.optimizer = fluid.optimizer.DecayedAdagrad( self.optimizer = fluid.optimizer.DecayedAdagrad(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "ftrl": elif self._optimizer_name.lower() == "ftrl":
self.optimizer = fluid.optimizer.Ftrl( self.optimizer = fluid.optimizer.Ftrl(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "larsmomentum": elif self._optimizer_name.lower() == "larsmomentum":
self.optimizer = fluid.optimizer.LarsMomentum( self.optimizer = fluid.optimizer.LarsMomentum(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "momentum": elif self._optimizer_name.lower() == "momentum":
self.optimizer = fluid.optimizer.Momentum( self.optimizer = fluid.optimizer.Momentum(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "decayedadagrad": elif self._optimizer_name.lower() == "decayedadagrad":
self.optimizer = fluid.optimizer.DecayedAdagrad( self.optimizer = fluid.optimizer.DecayedAdagrad(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
elif self._optimizer_name.lower() == "rmsprop": elif self._optimizer_name.lower() == "rmsprop":
self.optimizer = fluid.optimizer.RMSPropOptimizer( self.optimizer = fluid.optimizer.RMSPropOptimizer(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
else: else:
self.optimizer = fluid.optimizer.Adam( self.optimizer = fluid.optimizer.Adam(
learning_rate=self.learning_rate) learning_rate=self.learning_rate, **kwargs)
def execute(self, loss, data_reader, config, dev_count): def execute(self, loss, data_reader, config, dev_count):
if self.optimizer is not None: if self.optimizer is not None:
...@@ -186,10 +186,13 @@ class CombinedStrategy(DefaultStrategy): ...@@ -186,10 +186,13 @@ class CombinedStrategy(DefaultStrategy):
learning_rate=1e-4, learning_rate=1e-4,
scheduler=None, scheduler=None,
regularization=None, regularization=None,
clip=None): clip=None,
**kwargs):
super(CombinedStrategy, self).__init__( super(CombinedStrategy, self).__init__(
optimizer_name=optimizer_name, learning_rate=learning_rate) optimizer_name=optimizer_name,
learning_rate=learning_rate,
**kwargs)
self.kwargs = kwargs
# init set # init set
self.scheduler = { self.scheduler = {
"warmup": 0.0, "warmup": 0.0,
...@@ -379,7 +382,9 @@ class CombinedStrategy(DefaultStrategy): ...@@ -379,7 +382,9 @@ class CombinedStrategy(DefaultStrategy):
# set optimizer # set optimizer
super(CombinedStrategy, self).__init__( super(CombinedStrategy, self).__init__(
optimizer_name=self._optimizer_name, learning_rate=scheduled_lr) optimizer_name=self._optimizer_name,
learning_rate=scheduled_lr,
**self.kwargs)
# discriminative learning rate # discriminative learning rate
# based on layer # based on layer
...@@ -568,7 +573,8 @@ class AdamWeightDecayStrategy(CombinedStrategy): ...@@ -568,7 +573,8 @@ class AdamWeightDecayStrategy(CombinedStrategy):
lr_scheduler="linear_decay", lr_scheduler="linear_decay",
warmup_proportion=0.1, warmup_proportion=0.1,
weight_decay=0.01, weight_decay=0.01,
optimizer_name="adam"): optimizer_name="adam",
**kwargs):
scheduler = {"warmup": warmup_proportion} scheduler = {"warmup": warmup_proportion}
if lr_scheduler == "noam_decay": if lr_scheduler == "noam_decay":
scheduler["noam_decay"] = True scheduler["noam_decay"] = True
...@@ -587,14 +593,16 @@ class AdamWeightDecayStrategy(CombinedStrategy): ...@@ -587,14 +593,16 @@ class AdamWeightDecayStrategy(CombinedStrategy):
learning_rate=learning_rate, learning_rate=learning_rate,
scheduler=scheduler, scheduler=scheduler,
regularization=regularization, regularization=regularization,
clip=clip) clip=clip,
**kwargs)
class L2SPFinetuneStrategy(CombinedStrategy): class L2SPFinetuneStrategy(CombinedStrategy):
def __init__(self, def __init__(self,
learning_rate=1e-4, learning_rate=1e-4,
optimizer_name="adam", optimizer_name="adam",
regularization_coeff=1e-3): regularization_coeff=1e-3,
**kwargs):
scheduler = {} scheduler = {}
regularization = {"L2SP": regularization_coeff} regularization = {"L2SP": regularization_coeff}
clip = {} clip = {}
...@@ -603,14 +611,16 @@ class L2SPFinetuneStrategy(CombinedStrategy): ...@@ -603,14 +611,16 @@ class L2SPFinetuneStrategy(CombinedStrategy):
learning_rate=learning_rate, learning_rate=learning_rate,
scheduler=scheduler, scheduler=scheduler,
regularization=regularization, regularization=regularization,
clip=clip) clip=clip,
**kwargs)
class DefaultFinetuneStrategy(CombinedStrategy): class DefaultFinetuneStrategy(CombinedStrategy):
def __init__(self, def __init__(self,
learning_rate=1e-4, learning_rate=1e-4,
optimizer_name="adam", optimizer_name="adam",
regularization_coeff=1e-3): regularization_coeff=1e-3,
**kwargs):
scheduler = {} scheduler = {}
regularization = {"L2": regularization_coeff} regularization = {"L2": regularization_coeff}
clip = {} clip = {}
...@@ -620,7 +630,8 @@ class DefaultFinetuneStrategy(CombinedStrategy): ...@@ -620,7 +630,8 @@ class DefaultFinetuneStrategy(CombinedStrategy):
learning_rate=learning_rate, learning_rate=learning_rate,
scheduler=scheduler, scheduler=scheduler,
regularization=regularization, regularization=regularization,
clip=clip) clip=clip,
**kwargs)
class ULMFiTStrategy(CombinedStrategy): class ULMFiTStrategy(CombinedStrategy):
...@@ -632,7 +643,8 @@ class ULMFiTStrategy(CombinedStrategy): ...@@ -632,7 +643,8 @@ class ULMFiTStrategy(CombinedStrategy):
dis_blocks=3, dis_blocks=3,
factor=2.6, factor=2.6,
frz_blocks=3, frz_blocks=3,
params_layer=None): params_layer=None,
**kwargs):
scheduler = { scheduler = {
"slanted_triangle": { "slanted_triangle": {
...@@ -656,4 +668,5 @@ class ULMFiTStrategy(CombinedStrategy): ...@@ -656,4 +668,5 @@ class ULMFiTStrategy(CombinedStrategy):
learning_rate=learning_rate, learning_rate=learning_rate,
scheduler=scheduler, scheduler=scheduler,
regularization=regularization, regularization=regularization,
clip=clip) clip=clip,
**kwargs)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册