提交 6f41ac4a 编写于 作者: W wuzewu

add finetune default strategy

上级 2243468e
import paddle_hub as hub import paddle_hub as hub
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle_hub.dataset.flowers import FlowersDataset
from paddle_hub.dataset.dogcat import DogCatDataset
from paddle_hub.dataset.cv_reader import ImageClassificationReader
from paddle_hub.finetune.task import Task
from paddle_hub.finetune.network import append_mlp_classifier
from paddle_hub.finetune.config import FinetuneConfig
from paddle_hub.finetune.finetune import finetune_and_eval
def train(): def train():
resnet_module = hub.Module(module_dir="./hub_module_ResNet50") resnet_module = hub.Module(name="resnet50_imagenet")
input_dict, output_dict, program = resnet_module.context( input_dict, output_dict, program = resnet_module.context(
sign_name="feature_map") sign_name="feature_map", trainable=True)
data_processor = ImageClassificationReader( dataset = hub.dataset.Flowers()
image_width=224, data_reader = hub.ImageClassificationReader(
image_height=224, image_width=224, image_height=224, dataset=dataset)
dataset=FlowersDataset(),
color_mode="RGB")
with fluid.program_guard(program): with fluid.program_guard(program):
label = fluid.layers.data(name="label", dtype="int64", shape=[1]) label = fluid.layers.data(name="label", dtype="int64", shape=[1])
img = input_dict[0] img = input_dict[0]
feature_map = output_dict[0] feature_map = output_dict[0]
config = FinetuneConfig( config = hub.FinetuneConfig(
log_interval=10,
eval_interval=100,
use_cuda=True, use_cuda=True,
learning_rate=1e-4,
weight_decay=None,
in_tokens=None,
num_epoch=10, num_epoch=10,
batch_size=32, batch_size=32,
max_seq_len=None, strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
warmup_proportion=None,
save_ckpt_interval=200,
checkpoint_dir="./finetune_task",
strategy='BaseFinetune',
with_memory_optimization=True)
feed_list = [img.name, label.name] feed_list = [img.name, label.name]
task = append_mlp_classifier( task = hub.append_mlp_classifier(
feature=feature_map, label=label, num_classes=5) feature=feature_map, label=label, num_classes=dataset.num_labels)
finetune_and_eval( hub.finetune_and_eval(
task, task, feed_list=feed_list, data_reader=data_reader, config=config)
feed_list=feed_list,
data_processor=data_processor,
config=config)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -12,7 +12,10 @@ ...@@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import time
from .strategy import DefaultStrategy from .strategy import DefaultStrategy
from paddle_hub.common.utils import md5
class FinetuneConfig(object): class FinetuneConfig(object):
...@@ -33,17 +36,18 @@ class FinetuneConfig(object): ...@@ -33,17 +36,18 @@ class FinetuneConfig(object):
self._eval_interval = eval_interval self._eval_interval = eval_interval
self._save_ckpt_interval = save_ckpt_interval self._save_ckpt_interval = save_ckpt_interval
self._use_cuda = use_cuda self._use_cuda = use_cuda
self._learning_rate = learning_rate
self._checkpoint_dir = checkpoint_dir self._checkpoint_dir = checkpoint_dir
self._num_epoch = num_epoch self._num_epoch = num_epoch
self._batch_size = batch_size self._batch_size = batch_size
self._max_seq_len = max_seq_len
if strategy is None: if strategy is None:
self._strategy = DefaultStrategy() self._strategy = DefaultStrategy()
else: else:
self._strategy = strategy self._strategy = strategy
self._enable_memory_optim = enable_memory_optim self._enable_memory_optim = enable_memory_optim
self._optimizer = optimizer if checkpoint_dir is None:
self._checkpoint_dir = "hub_cpkt_" + md5(str(time.time()))[0:20]
else:
self._checkpoint_dir = checkpoint_dir
@property @property
def log_interval(self): def log_interval(self):
...@@ -61,10 +65,6 @@ class FinetuneConfig(object): ...@@ -61,10 +65,6 @@ class FinetuneConfig(object):
def use_cuda(self): def use_cuda(self):
return self._use_cuda return self._use_cuda
@property
def learning_rate(self):
return self._learning_rate
@property @property
def checkpoint_dir(self): def checkpoint_dir(self):
return self._checkpoint_dir return self._checkpoint_dir
......
...@@ -69,7 +69,6 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False): ...@@ -69,7 +69,6 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
num_epoch = config.num_epoch num_epoch = config.num_epoch
batch_size = config.batch_size batch_size = config.batch_size
learning_rate = config.learning_rate
log_writter = LogWriter( log_writter = LogWriter(
os.path.join(config.checkpoint_dir, "vdllog"), sync_cycle=10) os.path.join(config.checkpoint_dir, "vdllog"), sync_cycle=10)
...@@ -82,7 +81,7 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False): ...@@ -82,7 +81,7 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
if isinstance(config.strategy, hub.BERTFinetuneStrategy): if isinstance(config.strategy, hub.BERTFinetuneStrategy):
scheduled_lr = config.strategy.execute(loss, main_program, scheduled_lr = config.strategy.execute(loss, main_program,
data_reader, config) data_reader, config)
elif isinstance(config.optimizer, hub.DefaultStrategy): elif isinstance(config.strategy, hub.DefaultStrategy):
config.strategy.execute(loss) config.strategy.execute(loss)
#TODO: add more finetune strategy #TODO: add more finetune strategy
...@@ -135,7 +134,7 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False): ...@@ -135,7 +134,7 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
train_time_used = 0 train_time_used = 0
num_trained_examples = acc_sum = loss_sum = 0 num_trained_examples = acc_sum = loss_sum = 0
if global_step % config.save_ckpt_interval == 0: if config.save_ckpt_interval and global_step % config.save_ckpt_interval == 0:
# NOTE: current saved checkpoint machanism is not completed, # NOTE: current saved checkpoint machanism is not completed,
# it can't restore dataset training status # it can't restore dataset training status
save_checkpoint( save_checkpoint(
......
...@@ -19,6 +19,20 @@ import paddle.fluid as fluid ...@@ -19,6 +19,20 @@ import paddle.fluid as fluid
from .optimization import adam_weight_decay_optimization from .optimization import adam_weight_decay_optimization
def get_pretrained_parameter(main_program, start_program):
pretrained_parameters = []
global_block = main_program.global_block()
for op in global_block.ops[::-1]:
for input_arg in op.input_arg_names:
var = global_block.var(input_arg)
if isinstance(
var, fluid.framework.Parameter
) and input_arg not in start_program.global_block().vars:
pretrained_parameters.append(var)
return pretrained_parameters
class DefaultStrategy(object): class DefaultStrategy(object):
def __init__(self, learning_rate=1e-4, optimizer_name="adam"): def __init__(self, learning_rate=1e-4, optimizer_name="adam"):
self.learning_rate = learning_rate self.learning_rate = learning_rate
...@@ -96,3 +110,39 @@ class BERTFinetuneStrategy(DefaultStrategy): ...@@ -96,3 +110,39 @@ class BERTFinetuneStrategy(DefaultStrategy):
# TODO complete __str__() # TODO complete __str__()
def __str__(self): def __str__(self):
return "BERTFintuneStrategy" return "BERTFintuneStrategy"
class DefaultFinetuneStrategy(DefaultStrategy):
def __init__(self,
learning_rate=1e-4,
optimizer_name="adam",
regularization_coeff=1e-3):
super(DefaultFinetuneStrategy, self).__init__(
learning_rate=learning_rate, optimizer_name=optimizer_name)
self.learning_rate = learning_rate
self._optimizer_name = optimizer_name
self.regularization_coeff = regularization_coeff
def execute(self, loss):
if self._optimizer_name.lower() == "adam":
self.optimizer = fluid.optimizer.Adam(
learning_rate=self.learning_rate)
elif self._optimizer_name.lower() == "sgd":
self.optimizer = fluid.optimizer.SGD(
learning_rate=self.learning_rate)
# get pretrained parameters
program = loss.block.program
global_block = program.global_block()
pretrained_params = get_pretrained_parameter(
program, fluid.default_startup_program())
# set parameter attrs
for index, param in enumerate(pretrained_params):
param.regularizer = fluid.regularizer.L2Decay(
regularization_coeff=self.regularization_coeff)
if self.optimizer is not None:
self.optimizer.minimize(loss)
else:
raise ValueError("DefaultFinetuneStrategy's optimizer is None")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册