提交 6c1b67bc 编写于 作者: Z Zeyu Chen

add sequence label README.md

上级 416c374c
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_sequence_labeling"
CKPT_DIR="./ckpt_sequence_label"
python -u sequence_label.py \
--batch_size 16 \
--weight_decay 0.01 \
--checkpoint_dir $CKPT_DIR \
--num_epoch 3 \
--checkpoint_dir $CKPT_DIR \
--max_seq_len 256 \
--learning_rate 5e-5
......@@ -14,6 +14,7 @@
"""Finetuning on sequence labeling task."""
import argparse
import ast
import paddle.fluid as fluid
import paddlehub as hub
......
......@@ -133,7 +133,7 @@ hub.finetune_and_eval(task=cls_task, data_reader=reader, feed_list=feed_list, co
* `eval_interval`: 模型评估的间隔,默认每100个step评估一次验证集
* `save_ckpt_interval`: 模型保存间隔,请根据任务大小配置,默认只保存验证集效果最好的模型和训练结束的模型
* `use_cuda`: 是否使用GPU训练,默认为False
* `checkpoint_dir`: 模型checkpoint保存路径
* `checkpoint_dir`: 模型checkpoint保存路径, 若用户没有指定,程序会自动生成
* `num_epoch`: finetune的轮数
* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size
* `enable_memory_optim`: 是否使用内存优化, 默认为True
......
export CUDA_VISIBLE_DEVICES=5
CKPT_DIR="./ckpt_chnsenticorp/best_model"
CKPT_DIR="./ckpt_20190414203357/best_model"
python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128
import paddle.fluid as fluid
import paddlehub as hub
# Step1
# Step1: Select pre-trained model
module = hub.Module(name="ernie")
inputs, outputs, program = module.context(trainable=True, max_seq_len=128)
# Step2
# Step2: Prepare Dataset and DataReader
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.ClassifyReader(
dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=128)
# Step3
# Step3: Construct transfer learning task
with fluid.program_guard(program):
label = fluid.layers.data(name="label", shape=[1], dtype='int64')
pooled_output = outputs["pooled_output"]
cls_task = hub.create_text_classification_task(
cls_task = hub.create_text_cls_task(
feature=pooled_output, label=label, num_classes=dataset.num_labels)
# Step4
# Step4: Setup config then start finetune
strategy = hub.AdamWeightDecayStrategy(learning_rate=5e-5, weight_decay=0.01)
config = hub.RunConfig(
use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
use_cuda=True,
checkpoint_dir="./ckpt",
num_epoch=3,
batch_size=32,
strategy=strategy)
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
......
......@@ -57,6 +57,7 @@ def evaluate_cls_task(task, data_reader, feed_list, phase="test", config=None):
avg_loss = loss_sum / num_eval_examples
avg_acc = acc_sum / num_eval_examples
eval_speed = eval_step / eval_time_used
logger.info(
"[%s dataset evaluation result] loss=%.5f acc=%.5f [step/sec: %.2f]" %
(phase, avg_loss, avg_acc, eval_speed))
......@@ -64,11 +65,11 @@ def evaluate_cls_task(task, data_reader, feed_list, phase="test", config=None):
return avg_loss, avg_acc, eval_speed
def evaluate_seq_labeling_task(task,
data_reader,
feed_list,
phase="test",
config=None):
def evaluate_seq_label_task(task,
data_reader,
feed_list,
phase="test",
config=None):
fetch_list = [
task.variable("labels").name,
task.variable("infers").name,
......
......@@ -27,7 +27,7 @@ from visualdl import LogWriter
from paddlehub.common.logger import logger
from paddlehub.finetune.strategy import AdamWeightDecayStrategy, DefaultStrategy
from paddlehub.finetune.checkpoint import load_checkpoint, save_checkpoint
from paddlehub.finetune.evaluate import evaluate_cls_task, evaluate_seq_labeling_task
from paddlehub.finetune.evaluate import evaluate_cls_task, evaluate_seq_label_task
import paddlehub as hub
......@@ -126,13 +126,13 @@ def _finetune_seq_label_task(task,
exe=exe)
if do_eval and global_step % config.eval_interval == 0:
evaluate_seq_labeling_task(
evaluate_seq_label_task(
task,
data_reader,
feed_list,
phase="test",
config=config)
evaluate_seq_labeling_task(
evaluate_seq_label_task(
task,
data_reader,
feed_list,
......@@ -148,9 +148,9 @@ def _finetune_seq_label_task(task,
exe=exe)
if do_eval:
evaluate_seq_labeling_task(
evaluate_seq_label_task(
task, data_reader, feed_list, phase="dev", config=config)
evaluate_seq_labeling_task(
evaluate_seq_label_task(
task, data_reader, feed_list, phase="test", config=config)
logger.info("PaddleHub finetune finished.")
......@@ -164,8 +164,8 @@ def _finetune_cls_task(task, data_reader, feed_list, config=None,
num_epoch = config.num_epoch
batch_size = config.batch_size
log_writter = LogWriter(
os.path.join(config.checkpoint_dir, "vdllog"), sync_cycle=10)
log_writer = LogWriter(
os.path.join(config.checkpoint_dir, "vdllog"), sync_cycle=1)
place, dev_count = hub.common.get_running_device_info(config)
with fluid.program_guard(main_program, startup_program):
......@@ -190,10 +190,10 @@ def _finetune_cls_task(task, data_reader, feed_list, config=None,
logger.info("PaddleHub finetune start")
# add visualdl scalar
with log_writter.mode("train") as logw:
with log_writer.mode("train") as logw:
train_loss_scalar = logw.scalar(tag="loss[train]")
train_acc_scalar = logw.scalar(tag="accuracy[train]")
with log_writter.mode("evaluate") as logw:
with log_writer.mode("evaluate") as logw:
eval_loss_scalar = logw.scalar(tag="loss[evaluate]")
eval_acc_scalar = logw.scalar(tag="accuracy[evaluate]")
......@@ -276,8 +276,7 @@ def finetune_and_eval(task, data_reader, feed_list, config=None):
if task.task_type == "sequence_labeling":
_finetune_seq_label_task(
task, data_reader, feed_list, config, do_eval=True)
# if it's image_classification and text classificaiton
else:
elif task.task_type == "image_classification" or task.task_type == "text_classification":
_finetune_cls_task(task, data_reader, feed_list, config, do_eval=True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册