提交 9477ba35 编写于 作者: L LielinJiang

pull master

...@@ -80,12 +80,19 @@ data/cityscapes/testA/412_A.jpg ...@@ -80,12 +80,19 @@ data/cityscapes/testA/412_A.jpg
### 训练 ### 训练
在GPU单卡上训练: 在GPU单卡上静态图训练:
``` ```
env CUDA_VISIBLE_DEVICES=0 python train.py env CUDA_VISIBLE_DEVICES=0 python train.py --checkpoint_path=checkpoint_static
``` ```
在GPU单卡上动态图训练:
```
env CUDA_VISIBLE_DEVICES=0 python train.py --dynamic --checkpoint_path=checkpoint_dynamic
```
执行`python train.py --help`可查看更多使用方式和参数详细说明。 执行`python train.py --help`可查看更多使用方式和参数详细说明。
图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_loss','da_loss'和'db_loss'分别为生成器、判别器A和判别器B的训练损失。 图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_loss','da_loss'和'db_loss'分别为生成器、判别器A和判别器B的训练损失。
......
...@@ -18,9 +18,10 @@ from __future__ import print_function ...@@ -18,9 +18,10 @@ from __future__ import print_function
import numpy as np import numpy as np
from layers import ConvBN, DeConvBN
import paddle.fluid as fluid import paddle.fluid as fluid
from model import Model, Loss from hapi.model import Model, Loss
from layers import ConvBN, DeConvBN
class ResnetBlock(fluid.dygraph.Layer): class ResnetBlock(fluid.dygraph.Layer):
......
...@@ -20,6 +20,8 @@ import random ...@@ -20,6 +20,8 @@ import random
import numpy as np import numpy as np
from PIL import Image, ImageOps from PIL import Image, ImageOps
import paddle
DATASET = "cityscapes" DATASET = "cityscapes"
A_LIST_FILE = "./data/" + DATASET + "/trainA.txt" A_LIST_FILE = "./data/" + DATASET + "/trainA.txt"
B_LIST_FILE = "./data/" + DATASET + "/trainB.txt" B_LIST_FILE = "./data/" + DATASET + "/trainB.txt"
...@@ -27,8 +29,6 @@ A_TEST_LIST_FILE = "./data/" + DATASET + "/testA.txt" ...@@ -27,8 +29,6 @@ A_TEST_LIST_FILE = "./data/" + DATASET + "/testA.txt"
B_TEST_LIST_FILE = "./data/" + DATASET + "/testB.txt" B_TEST_LIST_FILE = "./data/" + DATASET + "/testB.txt"
IMAGES_ROOT = "./data/" + DATASET + "/" IMAGES_ROOT = "./data/" + DATASET + "/"
import paddle.fluid as fluid
class Cityscapes(paddle.io.Dataset): class Cityscapes(paddle.io.Dataset):
def __init__(self, root_path, file_path, mode='train', return_name=False): def __init__(self, root_path, file_path, mode='train', return_name=False):
......
...@@ -25,9 +25,9 @@ from PIL import Image ...@@ -25,9 +25,9 @@ from PIL import Image
from scipy.misc import imsave from scipy.misc import imsave
import paddle.fluid as fluid import paddle.fluid as fluid
from check import check_gpu, check_version from hapi.model import Model, Input, set_device
from model import Model, Input, set_device from check import check_gpu, check_version
from cyclegan import Generator, GeneratorCombine from cyclegan import Generator, GeneratorCombine
...@@ -43,7 +43,7 @@ def main(): ...@@ -43,7 +43,7 @@ def main():
im_shape = [-1, 3, 256, 256] im_shape = [-1, 3, 256, 256]
input_A = Input(im_shape, 'float32', 'input_A') input_A = Input(im_shape, 'float32', 'input_A')
input_B = Input(im_shape, 'float32', 'input_B') input_B = Input(im_shape, 'float32', 'input_B')
g.prepare(inputs=[input_A, input_B]) g.prepare(inputs=[input_A, input_B], device=FLAGS.device)
g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True)
out_path = FLAGS.output + "/single" out_path = FLAGS.output + "/single"
...@@ -59,10 +59,10 @@ def main(): ...@@ -59,10 +59,10 @@ def main():
data = image.transpose([2, 0, 1])[np.newaxis, :] data = image.transpose([2, 0, 1])[np.newaxis, :]
if FLAGS.input_style == "A": if FLAGS.input_style == "A":
_, fake, _, _ = g.test([data, data]) _, fake, _, _ = g.test_batch([data, data])
if FLAGS.input_style == "B": if FLAGS.input_style == "B":
fake, _, _, _ = g.test([data, data]) fake, _, _, _ = g.test_batch([data, data])
fake = np.squeeze(fake[0]).transpose([1, 2, 0]) fake = np.squeeze(fake[0]).transpose([1, 2, 0])
...@@ -74,7 +74,7 @@ def main(): ...@@ -74,7 +74,7 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN inference") parser = argparse.ArgumentParser("CycleGAN inference")
parser.add_argument( parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode") "-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument( parser.add_argument(
"-p", "-p",
"--device", "--device",
......
...@@ -22,9 +22,9 @@ import numpy as np ...@@ -22,9 +22,9 @@ import numpy as np
from scipy.misc import imsave from scipy.misc import imsave
import paddle.fluid as fluid import paddle.fluid as fluid
from check import check_gpu, check_version from hapi.model import Model, Input, set_device
from model import Model, Input, set_device from check import check_gpu, check_version
from cyclegan import Generator, GeneratorCombine from cyclegan import Generator, GeneratorCombine
import data as data import data as data
...@@ -41,7 +41,7 @@ def main(): ...@@ -41,7 +41,7 @@ def main():
im_shape = [-1, 3, 256, 256] im_shape = [-1, 3, 256, 256]
input_A = Input(im_shape, 'float32', 'input_A') input_A = Input(im_shape, 'float32', 'input_A')
input_B = Input(im_shape, 'float32', 'input_B') input_B = Input(im_shape, 'float32', 'input_B')
g.prepare(inputs=[input_A, input_B]) g.prepare(inputs=[input_A, input_B], device=FLAGS.device)
g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True)
if not os.path.exists(FLAGS.output): if not os.path.exists(FLAGS.output):
...@@ -56,7 +56,7 @@ def main(): ...@@ -56,7 +56,7 @@ def main():
data_A = np.array(data_A).astype("float32") data_A = np.array(data_A).astype("float32")
data_B = np.array(data_B).astype("float32") data_B = np.array(data_B).astype("float32")
fake_A, fake_B, cyc_A, cyc_B = g.test([data_A, data_B]) fake_A, fake_B, cyc_A, cyc_B = g.test_batch([data_A, data_B])
datas = [fake_A, fake_B, cyc_A, cyc_B, data_A, data_B] datas = [fake_A, fake_B, cyc_A, cyc_B, data_A, data_B]
odatas = [] odatas = []
...@@ -75,7 +75,7 @@ def main(): ...@@ -75,7 +75,7 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN test") parser = argparse.ArgumentParser("CycleGAN test")
parser.add_argument( parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode") "-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument( parser.add_argument(
"-p", "-p",
"--device", "--device",
......
...@@ -24,12 +24,11 @@ import time ...@@ -24,12 +24,11 @@ import time
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from check import check_gpu, check_version from hapi.model import Model, Input, set_device
from model import Model, Input, set_device
import data as data from check import check_gpu, check_version
from cyclegan import Generator, Discriminator, GeneratorCombine, GLoss, DLoss from cyclegan import Generator, Discriminator, GeneratorCombine, GLoss, DLoss
import data as data
step_per_epoch = 2974 step_per_epoch = 2974
...@@ -76,12 +75,15 @@ def main(): ...@@ -76,12 +75,15 @@ def main():
fake_A = Input(im_shape, 'float32', 'fake_A') fake_A = Input(im_shape, 'float32', 'fake_A')
fake_B = Input(im_shape, 'float32', 'fake_B') fake_B = Input(im_shape, 'float32', 'fake_B')
g_AB.prepare(inputs=[input_A]) g_AB.prepare(inputs=[input_A], device=FLAGS.device)
g_BA.prepare(inputs=[input_B]) g_BA.prepare(inputs=[input_B], device=FLAGS.device)
g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B]) g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B],
d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B]) device=FLAGS.device)
d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A]) d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B],
device=FLAGS.device)
d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A],
device=FLAGS.device)
if FLAGS.resume: if FLAGS.resume:
g.load(FLAGS.resume) g.load(FLAGS.resume)
...@@ -108,14 +110,14 @@ def main(): ...@@ -108,14 +110,14 @@ def main():
data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0] data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0]
start = time.time() start = time.time()
fake_B = g_AB.test(data_A)[0] fake_B = g_AB.test_batch(data_A)[0]
fake_A = g_BA.test(data_B)[0] fake_A = g_BA.test_batch(data_B)[0]
g_loss = g.train([data_A, data_B])[0] g_loss = g.train_batch([data_A, data_B])[0]
fake_pb = B_pool.get(fake_B) fake_pb = B_pool.get(fake_B)
da_loss = d_A.train([data_B, fake_pb])[0] da_loss = d_A.train_batch([data_B, fake_pb])[0]
fake_pa = A_pool.get(fake_A) fake_pa = A_pool.get(fake_A)
db_loss = d_B.train([data_A, fake_pa])[0] db_loss = d_B.train_batch([data_A, fake_pa])[0]
t = time.time() - start t = time.time() - start
if i % 20 == 0: if i % 20 == 0:
...@@ -128,7 +130,7 @@ def main(): ...@@ -128,7 +130,7 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN Training on Cityscapes") parser = argparse.ArgumentParser("CycleGAN Training on Cityscapes")
parser.add_argument( parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode") "-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument( parser.add_argument(
"-p", "-p",
"--device", "--device",
......
...@@ -54,7 +54,7 @@ python downloads.py dataset ...@@ -54,7 +54,7 @@ python downloads.py dataset
我们开源了在自建数据集上训练的词法分析模型,可供用户直接使用,可通过下述链接进行下载: 我们开源了在自建数据集上训练的词法分析模型,可供用户直接使用,可通过下述链接进行下载:
```bash ```bash
# download baseline model # download baseline model
python downloads.py lac python downloads.py model
``` ```
### 模型训练 ### 模型训练
...@@ -66,65 +66,36 @@ GPU上单卡训练 ...@@ -66,65 +66,36 @@ GPU上单卡训练
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python -u train.py \ python -u train.py \
--train_file ./data/train.tsv \
--test_file ./data/test.tsv \
--word_dict_path ./conf/word.dic \
--label_dict_path ./conf/tag.dic \
--word_rep_dict_path ./conf/q2b.dic \
--device gpu \ --device gpu \
--grnn_hidden_dim 128 \ --dynamic False
--word_emb_dim 128 \
--bigru_num 2 \ # --device: 使用gpu设备还是cpu设备
--base_learning_rate 1e-3 \ # --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
--batch_size 300 \
--epoch 10 \
--save_dir ./model \
--num_devices 1 \
-d
# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数
``` ```
GPU上多卡训练 GPU上多卡训练
``` ```
# setting visible devices for training # setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \ python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
--train_file ./data/train.tsv \
--test_file ./data/test.tsv \
--word_dict_path ./conf/word.dic \
--label_dict_path ./conf/tag.dic \
--word_rep_dict_path ./conf/q2b.dic \
--device gpu \ --device gpu \
--grnn_hidden_dim 128 \ --dynamic False
--word_emb_dim 128 \
--bigru_num 2 \
--base_learning_rate 1e-3 \
--batch_size 300 \
--epoch 10 \
--save_dir ./model \
-d
# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数 # --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
``` ```
CPU上训练 CPU上训练
``` ```
python -u train.py \ python -u train.py \
--train_file ./data/train.tsv \
--test_file ./data/test.tsv \
--word_dict_path ./conf/word.dic \
--label_dict_path ./conf/tag.dic \
--word_rep_dict_path ./conf/q2b.dic \
--device cpu \ --device cpu \
--grnn_hidden_dim 128 \ --dynamic False
--word_emb_dim 128 \
--bigru_num 2 \
--base_learning_rate 1e-3 \
--batch_size 300 \
--epoch 10 \
--save_dir ./model \
-d
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
``` ```
### 模型预测 ### 模型预测
...@@ -132,18 +103,17 @@ python -u train.py \ ...@@ -132,18 +103,17 @@ python -u train.py \
加载已有的模型,对未知的数据进行预测 加载已有的模型,对未知的数据进行预测
```bash ```bash
python predict.py \ python predict.py \
--predict_file ./data/infer.tsv \
--word_dict_path ./conf/word.dic \
--label_dict_path ./conf/tag.dic \
--word_rep_dict_path ./conf/q2b.dic \
--init_from_checkpoint model_baseline/params \ --init_from_checkpoint model_baseline/params \
--output_file predict.result \ --output_file predict.result \
--mode predict \ --mode predict \
--device cpu \ --device cpu \
-d --dynamic False
# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数
# --init_from_checkpoint: 初始化模型
# --output_file: 预测结果文件
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
``` ```
### 模型评估 ### 模型评估
...@@ -152,15 +122,15 @@ python predict.py \ ...@@ -152,15 +122,15 @@ python predict.py \
```bash ```bash
# baseline model # baseline model
python eval.py \ python eval.py \
--test_file ./data/test.tsv \
--word_dict_path ./conf/word.dic \
--label_dict_path ./conf/tag.dic \
--word_rep_dict_path ./conf/q2b.dic \
--init_from_checkpoint ./model_baseline/params \ --init_from_checkpoint ./model_baseline/params \
--mode predict \
--device cpu \ --device cpu \
-d --dynamic False
# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数 # --init_from_checkpoint: 初始化模型
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
``` ```
......
...@@ -33,19 +33,19 @@ FILE_INFO = { ...@@ -33,19 +33,19 @@ FILE_INFO = {
'name': 'lexical_analysis-dataset-2.0.0.tar.gz', 'name': 'lexical_analysis-dataset-2.0.0.tar.gz',
'md5': '71e4a9a36d0f0177929a1bccedca7dba' 'md5': '71e4a9a36d0f0177929a1bccedca7dba'
}, },
'LAC_MODEL': { 'MODEL': {
'name': 'lexical_analysis-2.0.0.tar.gz', 'name': 'sequence_tagging_dy.tar.gz',
'md5': "fc1daef00de9564083c7dc7b600504ca" 'md5': "1125d374c03c8218b6e47325dcf607e3"
}, },
} }
def usage(): def usage():
desc = ("\nDownload datasets and pretrained models for LAC.\n" desc = ("\nDownload datasets and pretrained models for sequence tagging.\n"
"Usage:\n" "Usage:\n"
" 1. python download.py all\n" " 1. python download.py all\n"
" 2. python download.py dataset\n" " 2. python download.py dataset\n"
" 3. python download.py lac\n") " 3. python download.py model\n")
print(desc) print(desc)
...@@ -136,13 +136,13 @@ if __name__ == '__main__': ...@@ -136,13 +136,13 @@ if __name__ == '__main__':
if sys.argv[1] == 'all': if sys.argv[1] == 'all':
download('DATA', pwd) download('DATA', pwd)
download('LAC_MODEL', pwd) download('MODEL', pwd)
if sys.argv[1] == "dataset": if sys.argv[1] == "dataset":
download('DATA', pwd) download('DATA', pwd)
elif sys.argv[1] == "lac": elif sys.argv[1] == "model":
download('LAC_MODEL', pwd) download('MODEL', pwd)
else: else:
usage() usage()
...@@ -5,9 +5,9 @@ if [ -d ./model_baseline/ ] ...@@ -5,9 +5,9 @@ if [ -d ./model_baseline/ ]
then then
echo "./model_baseline/ directory already existed, ignore download" echo "./model_baseline/ directory already existed, ignore download"
else else
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis-2.0.0.tar.gz wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/sequence_tagging_dy.tar.gz
tar xvf lexical_analysis-2.0.0.tar.gz tar xvf sequence_tagging_dy.tar.gz
/bin/rm lexical_analysis-2.0.0.tar.gz /bin/rm sequence_tagging_dy.tar.gz
fi fi
# download dataset file to ./data/ # download dataset file to ./data/
......
...@@ -26,12 +26,13 @@ import argparse ...@@ -26,12 +26,13 @@ import argparse
import numpy as np import numpy as np
from train import SeqTagging from train import SeqTagging
from utils.configure import PDConfig
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
from utils.metrics import chunk_count from utils.metrics import chunk_count
from reader import LacDataset, create_lexnet_data_generator, create_dataloader from reader import LacDataset, create_lexnet_data_generator, create_dataloader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(work_dir) sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input from hapi.model import set_device, Input
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -87,95 +88,12 @@ def main(args): ...@@ -87,95 +88,12 @@ def main(args):
print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1)) print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1))
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser("sequence tagging training") args = PDConfig(yaml_file="sequence_tagging.yaml")
parser.add_argument( args.build()
"-wd", args.Print()
"--word_dict_path",
default=None, use_gpu = True if args.device == "gpu" else False
type=str,
help='word dict path')
parser.add_argument(
"-ld",
"--label_dict_path",
default=None,
type=str,
help='label dict path')
parser.add_argument(
"-wrd",
"--word_rep_dict_path",
default=None,
type=str,
help='The path of the word replacement Dictionary.')
parser.add_argument(
"-dev",
"--device",
type=str,
default='gpu',
help="device to use, gpu or cpu")
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=10, type=int, help="number of epoch")
parser.add_argument(
'-lr',
'--base_learning_rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"--word_emb_dim",
default=128,
type=int,
help='word embedding dimension')
parser.add_argument(
"--grnn_hidden_dim", default=128, type=int, help="hidden dimension")
parser.add_argument(
"--bigru_num", default=2, type=int, help='the number of bi-rnn')
parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float)
parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float)
parser.add_argument(
"-b", "--batch_size", default=300, type=int, help="batch size")
parser.add_argument(
"--max_seq_len", default=126, type=int, help="max sequence length")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-o",
"--save_dir",
default="./model",
type=str,
help="save model path")
parser.add_argument(
"--init_from_checkpoint",
default=None,
type=str,
help="load init model parameters")
parser.add_argument(
"--init_from_pretrain_model",
default=None,
type=str,
help="load pretrain model parameters")
parser.add_argument(
"-sf", "--save_freq", default=1, type=int, help="save frequency")
parser.add_argument(
"-ef", "--eval_freq", default=1, type=int, help="eval frequency")
parser.add_argument(
"--output_file", default="predict.result", type=str, help="predict output file")
parser.add_argument(
"--predict_file", default="./data/infer.tsv", type=str, help="predict output file")
parser.add_argument(
"--test_file", default="./data/test.tsv", type=str, help="predict and eval output file")
parser.add_argument(
"--train_file", default="./data/train.tsv", type=str, help="train file")
parser.add_argument(
"--mode", default="predict", type=str, help="train|test|predict")
args = parser.parse_args()
print(args)
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu) check_gpu(use_gpu)
check_version() check_version()
main(args) main(args)
...@@ -27,10 +27,11 @@ import numpy as np ...@@ -27,10 +27,11 @@ import numpy as np
from train import SeqTagging from train import SeqTagging
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader from reader import LacDataset, create_lexnet_data_generator, create_dataloader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(work_dir) sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input from hapi.model import set_device, Input
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -66,7 +67,7 @@ def main(args): ...@@ -66,7 +67,7 @@ def main(args):
model.mode = "test" model.mode = "test"
model.prepare(inputs=inputs) model.prepare(inputs=inputs)
model.load(args.init_from_checkpoint) model.load(args.init_from_checkpoint, skip_mismatch=True)
f = open(args.output_file, "wb") f = open(args.output_file, "wb")
for data in predict_dataset(): for data in predict_dataset():
...@@ -82,91 +83,12 @@ def main(args): ...@@ -82,91 +83,12 @@ def main(args):
f.write("\002".join(tags) + "\n") f.write("\002".join(tags) + "\n")
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser("sequence tagging training") args = PDConfig(yaml_file="sequence_tagging.yaml")
parser.add_argument( args.build()
"-wd", args.Print()
"--word_dict_path",
default=None,
type=str,
help='word dict path')
parser.add_argument(
"-ld",
"--label_dict_path",
default=None,
type=str,
help='label dict path')
parser.add_argument(
"-wrd",
"--word_rep_dict_path",
default=None,
type=str,
help='The path of the word replacement Dictionary.')
parser.add_argument(
"-dev",
"--device",
type=str,
default='gpu',
help="device to use, gpu or cpu")
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=10, type=int, help="number of epoch")
parser.add_argument(
'-lr',
'--base_learning_rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"--word_emb_dim",
default=128,
type=int,
help='word embedding dimension')
parser.add_argument(
"--grnn_hidden_dim", default=128, type=int, help="hidden dimension")
parser.add_argument(
"--bigru_num", default=2, type=int, help='the number of bi-rnn')
parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float)
parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float)
parser.add_argument(
"-b", "--batch_size", default=300, type=int, help="batch size")
parser.add_argument(
"--max_seq_len", default=126, type=int, help="max sequence length")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-o",
"--save_dir",
default="./model",
type=str,
help="save model path")
parser.add_argument(
"--init_from_checkpoint",
default=None,
type=str,
help="load init model parameters")
parser.add_argument(
"--init_from_pretrain_model",
default=None,
type=str,
help="load pretrain model parameters")
parser.add_argument(
"-sf", "--save_freq", default=1, type=int, help="save frequency")
parser.add_argument(
"-ef", "--eval_freq", default=1, type=int, help="eval frequency")
parser.add_argument(
"--output_file", default="predict.result", type=str, help="predict output file")
parser.add_argument(
"--predict_file", default="./data/infer.tsv", type=str, help="predict output file")
parser.add_argument(
"--mode", default="train", type=str, help="train|test|predict")
args = parser.parse_args()
print(args)
use_gpu = True if args.device == "gpu" else False use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu) check_gpu(use_gpu)
check_version() check_version()
main(args) main(args)
word_dict_path: "./conf/word.dic"
label_dict_path: "./conf/tag.dic"
word_rep_dict_path: "./conf/q2b.dic"
device: "cpu"
dynamic: True
epoch: 10
base_learning_rate: 0.001
word_emb_dim: 128
grnn_hidden_dim: 128
bigru_num: 2
emb_learning_rate: 1.0
crf_learning_rate: 1.0
batch_size: 300
max_seq_len: 126
num_devices: 1
save_dir: "model"
init_from_checkpoint: "model_baseline/params"
init_from_pretrain_model: ""
save_freq: 1
eval_freq: 1
output_file: "predict.result"
test_file: "./data/test.tsv"
train_file: "./data/train.tsv"
predict_file: "./data/infer.tsv"
mode: "train"
...@@ -26,13 +26,15 @@ import argparse ...@@ -26,13 +26,15 @@ import argparse
import numpy as np import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir)) sys.path.append(os.path.join(work_dir, "../"))
from hapi.metrics import Metric from hapi.metrics import Metric
from hapi.model import Model, Input, Loss, set_device from hapi.model import Model, Input, Loss, set_device
from hapi.text.text import SequenceTagging from hapi.text.text import SequenceTagging
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader from reader import LacDataset, create_lexnet_data_generator, create_dataloader
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -258,92 +260,10 @@ def main(args): ...@@ -258,92 +260,10 @@ def main(args):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser("sequence tagging training") args = PDConfig(yaml_file="sequence_tagging.yaml")
parser.add_argument( args.build()
"-wd", args.Print()
"--word_dict_path",
default=None,
type=str,
help='word dict path')
parser.add_argument(
"-ld",
"--label_dict_path",
default=None,
type=str,
help='label dict path')
parser.add_argument(
"-wrd",
"--word_rep_dict_path",
default=None,
type=str,
help='The path of the word replacement Dictionary.')
parser.add_argument(
"-dev",
"--device",
type=str,
default='gpu',
help="device to use, gpu or cpu")
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=10, type=int, help="number of epoch")
parser.add_argument(
'-lr',
'--base_learning_rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"--word_emb_dim",
default=128,
type=int,
help='word embedding dimension')
parser.add_argument(
"--grnn_hidden_dim", default=128, type=int, help="hidden dimension")
parser.add_argument(
"--bigru_num", default=2, type=int, help='the number of bi-rnn')
parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float)
parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float)
parser.add_argument(
"-b", "--batch_size", default=300, type=int, help="batch size")
parser.add_argument(
"--max_seq_len", default=126, type=int, help="max sequence length")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-o",
"--save_dir",
default="./model",
type=str,
help="save model path")
parser.add_argument(
"--init_from_checkpoint",
default=None,
type=str,
help="load init model parameters")
parser.add_argument(
"--init_from_pretrain_model",
default=None,
type=str,
help="load pretrain model parameters")
parser.add_argument(
"-sf", "--save_freq", default=1, type=int, help="save frequency")
parser.add_argument(
"-ef", "--eval_freq", default=1, type=int, help="eval frequency")
parser.add_argument(
"--output_file", default="predict.result", type=str, help="predict output file")
parser.add_argument(
"--predict_file", default="./data/infer.tsv", type=str, help="predict output file")
parser.add_argument(
"--test_file", default="./data/test.tsv", type=str, help="predict and eval output file")
parser.add_argument(
"--train_file", default="./data/train.tsv", type=str, help="train file")
parser.add_argument(
"--mode", default="train", type=str, help="train|test|predict")
args = parser.parse_args()
print(args)
use_gpu = True if args.device == "gpu" else False use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu) check_gpu(use_gpu)
check_version() check_version()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5,
"Learning rate used to train with warmup.")
train_g.add_arg(
"lr_scheduler",
str,
"linear_warmup_decay",
"scheduler of learning rate.",
choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01,
"Weight decay rate for L2 regularizer.")
train_g.add_arg(
"warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for."
)
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("use_fp16", bool, False,
"Whether to use fp16 mixed precision training.")
train_g.add_arg(
"loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
)
train_g.add_arg("pred_dir", str, None,
"Path to save the prediction results")
log_g = ArgumentGroup(parser, "logging", "logging related.")
log_g.add_arg("skip_steps", int, 10,
"The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True,
"If set, use GPU for training.")
run_type_g.add_arg(
"use_fast_executor", bool, False,
"If set, use fast parallel executor (in experiment).")
run_type_g.add_arg(
"num_iteration_per_drop_scope", int, 1,
"Ihe iteration intervals to clean up temporary variables.")
run_type_g.add_arg("do_train", bool, True,
"Whether to perform training.")
run_type_g.add_arg("do_predict", bool, True,
"Whether to perform prediction.")
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class PDConfig(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.default_g.add_arg("do_train", bool, False,
"Whether to perform training.")
self.default_g.add_arg("do_predict", bool, False,
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg("do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if isinstance(self.json_config[name], list):
self.json_g.add_arg(
name,
type(self.json_config[name][0]),
self.json_config[name],
"This is from %s" % file_path,
nargs=len(self.json_config[name]))
continue
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if isinstance(self.yaml_config[name], list):
self.yaml_g.add_arg(
name,
type(self.yaml_config[name][0]),
self.yaml_config[name],
"This is from %s" % file_path,
nargs=len(self.yaml_config[name]))
continue
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
pd_config += ("my_age", int, 18, "I am forever 18.")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
print(pd_config.my_age)
...@@ -193,17 +193,17 @@ class StaticGraphAdapter(object): ...@@ -193,17 +193,17 @@ class StaticGraphAdapter(object):
def mode(self, value): def mode(self, value):
self.model.mode = value self.model.mode = value
def train(self, inputs, labels=None): def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'train' self.mode = 'train'
return self._run(inputs, labels) return self._run(inputs, labels)
def eval(self, inputs, labels=None): def eval_batch(self, inputs, labels=None):
self.mode = 'eval' self.mode = 'eval'
return self._run(inputs, labels) return self._run(inputs, labels)
def test(self, inputs): def test_batch(self, inputs):
self.mode = 'test' self.mode = 'test'
return self._run(inputs, None) return self._run(inputs, None)
...@@ -567,7 +567,7 @@ class DynamicGraphAdapter(object): ...@@ -567,7 +567,7 @@ class DynamicGraphAdapter(object):
self.model.mode = value self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time # TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels=None): def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).train() super(Model, self.model).train()
...@@ -600,7 +600,7 @@ class DynamicGraphAdapter(object): ...@@ -600,7 +600,7 @@ class DynamicGraphAdapter(object):
return ([to_numpy(l) for l in losses], metrics) \ return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses] if len(metrics) > 0 else [to_numpy(l) for l in losses]
def eval(self, inputs, labels=None): def eval_batch(self, inputs, labels=None):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'eval' self.mode = 'eval'
inputs = to_list(inputs) inputs = to_list(inputs)
...@@ -642,7 +642,7 @@ class DynamicGraphAdapter(object): ...@@ -642,7 +642,7 @@ class DynamicGraphAdapter(object):
return ([to_numpy(l) for l in losses], metrics) \ return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses] if len(metrics) > 0 else [to_numpy(l) for l in losses]
def test(self, inputs): def test_batch(self, inputs):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'test' self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)] inputs = [to_variable(x) for x in to_list(inputs)]
...@@ -741,14 +741,14 @@ class Model(fluid.dygraph.Layer): ...@@ -741,14 +741,14 @@ class Model(fluid.dygraph.Layer):
else: else:
self._adapter = StaticGraphAdapter(self) self._adapter = StaticGraphAdapter(self)
def train(self, *args, **kwargs): def train_batch(self, *args, **kwargs):
return self._adapter.train(*args, **kwargs) return self._adapter.train_batch(*args, **kwargs)
def eval(self, *args, **kwargs): def eval_batch(self, *args, **kwargs):
return self._adapter.eval(*args, **kwargs) return self._adapter.eval_batch(*args, **kwargs)
def test(self, *args, **kwargs): def test_batch(self, *args, **kwargs):
return self._adapter.test(*args, **kwargs) return self._adapter.test_batch(*args, **kwargs)
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if ParallelEnv().local_rank == 0: if ParallelEnv().local_rank == 0:
...@@ -1213,18 +1213,6 @@ class Model(fluid.dygraph.Layer): ...@@ -1213,18 +1213,6 @@ class Model(fluid.dygraph.Layer):
return outputs return outputs
def set_eval_data(self, eval_data):
"""
Args:
eval_data (Dataset|DataLoader|None): An iterable data loader is used for
eval. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended.
"""
assert isinstance(
eval_data,
DataLoader), "eval_data must be a instance of Dataloader!"
self._test_dataloader = eval_data
def _run_one_epoch(self, def _run_one_epoch(self,
data_loader, data_loader,
callbacks, callbacks,
...@@ -1261,11 +1249,11 @@ class Model(fluid.dygraph.Layer): ...@@ -1261,11 +1249,11 @@ class Model(fluid.dygraph.Layer):
callbacks.on_batch_begin(mode, step, logs) callbacks.on_batch_begin(mode, step, logs)
if mode == 'train': if mode == 'train':
outs = self.train(data[:len(self._inputs)], outs = self.train_batch(data[:len(self._inputs)],
data[len(self._inputs):]) data[len(self._inputs):])
else: else:
outs = self.eval(data[:len(self._inputs)], outs = self.eval_batch(data[:len(self._inputs)],
data[len(self._inputs):]) data[len(self._inputs):])
# losses # losses
loss = outs[0] if self._metrics else outs loss = outs[0] if self._metrics else outs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册