diff --git a/cyclegan/README.md b/examples/cyclegan/README.md similarity index 94% rename from cyclegan/README.md rename to examples/cyclegan/README.md index ef35c3ab1b3ec53ca2f9b7d6ba28f210b6d36e91..8481fb66e7c46a987476931b0e2d4858bd91405a 100644 --- a/cyclegan/README.md +++ b/examples/cyclegan/README.md @@ -80,12 +80,19 @@ data/cityscapes/testA/412_A.jpg ### 训练 -在GPU单卡上训练: +在GPU单卡上静态图训练: ``` -env CUDA_VISIBLE_DEVICES=0 python train.py +env CUDA_VISIBLE_DEVICES=0 python train.py --checkpoint_path=checkpoint_static ``` +在GPU单卡上动态图训练: + +``` +env CUDA_VISIBLE_DEVICES=0 python train.py --dynamic --checkpoint_path=checkpoint_dynamic +``` + + 执行`python train.py --help`可查看更多使用方式和参数详细说明。 图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_loss','da_loss'和'db_loss'分别为生成器、判别器A和判别器B的训练损失。 diff --git a/cyclegan/__init__.py b/examples/cyclegan/__init__.py similarity index 100% rename from cyclegan/__init__.py rename to examples/cyclegan/__init__.py diff --git a/cyclegan/check.py b/examples/cyclegan/check.py similarity index 100% rename from cyclegan/check.py rename to examples/cyclegan/check.py diff --git a/cyclegan/cyclegan.py b/examples/cyclegan/cyclegan.py similarity index 99% rename from cyclegan/cyclegan.py rename to examples/cyclegan/cyclegan.py index 6fdd21c1bdf41a8ed3b6743297b99ef239bd5543..2c5cbd364c35c71dd5bd1d6831bb5b6d4ada07fb 100644 --- a/cyclegan/cyclegan.py +++ b/examples/cyclegan/cyclegan.py @@ -18,9 +18,10 @@ from __future__ import print_function import numpy as np -from layers import ConvBN, DeConvBN import paddle.fluid as fluid -from model import Model, Loss +from hapi.model import Model, Loss + +from layers import ConvBN, DeConvBN class ResnetBlock(fluid.dygraph.Layer): diff --git a/cyclegan/data.py b/examples/cyclegan/data.py similarity index 99% rename from cyclegan/data.py rename to examples/cyclegan/data.py index b4c0a1f011a38fba5e84b8bc669eee90f073bb0f..b78c4602abd3f2d3997726b78a34ec5bac73b5ab 100644 --- a/cyclegan/data.py +++ b/examples/cyclegan/data.py @@ -20,6 +20,8 @@ import random import numpy as np from PIL import Image, ImageOps +import paddle + DATASET = "cityscapes" A_LIST_FILE = "./data/" + DATASET + "/trainA.txt" B_LIST_FILE = "./data/" + DATASET + "/trainB.txt" @@ -27,8 +29,6 @@ A_TEST_LIST_FILE = "./data/" + DATASET + "/testA.txt" B_TEST_LIST_FILE = "./data/" + DATASET + "/testB.txt" IMAGES_ROOT = "./data/" + DATASET + "/" -import paddle.fluid as fluid - class Cityscapes(paddle.io.Dataset): def __init__(self, root_path, file_path, mode='train', return_name=False): diff --git a/cyclegan/image/A2B.png b/examples/cyclegan/image/A2B.png similarity index 100% rename from cyclegan/image/A2B.png rename to examples/cyclegan/image/A2B.png diff --git a/cyclegan/image/B2A.png b/examples/cyclegan/image/B2A.png similarity index 100% rename from cyclegan/image/B2A.png rename to examples/cyclegan/image/B2A.png diff --git a/cyclegan/image/net.png b/examples/cyclegan/image/net.png similarity index 100% rename from cyclegan/image/net.png rename to examples/cyclegan/image/net.png diff --git a/cyclegan/image/testA/123_A.jpg b/examples/cyclegan/image/testA/123_A.jpg similarity index 100% rename from cyclegan/image/testA/123_A.jpg rename to examples/cyclegan/image/testA/123_A.jpg diff --git a/cyclegan/image/testB/78_B.jpg b/examples/cyclegan/image/testB/78_B.jpg similarity index 100% rename from cyclegan/image/testB/78_B.jpg rename to examples/cyclegan/image/testB/78_B.jpg diff --git a/cyclegan/infer.py b/examples/cyclegan/infer.py similarity index 91% rename from cyclegan/infer.py rename to examples/cyclegan/infer.py index 0b61a958d59e19b73fa01d3c484e1e3231fae71b..2fb2b35eefc8ffb1b4fc10f58ce519e998415055 100644 --- a/cyclegan/infer.py +++ b/examples/cyclegan/infer.py @@ -25,9 +25,9 @@ from PIL import Image from scipy.misc import imsave import paddle.fluid as fluid -from check import check_gpu, check_version +from hapi.model import Model, Input, set_device -from model import Model, Input, set_device +from check import check_gpu, check_version from cyclegan import Generator, GeneratorCombine @@ -43,7 +43,7 @@ def main(): im_shape = [-1, 3, 256, 256] input_A = Input(im_shape, 'float32', 'input_A') input_B = Input(im_shape, 'float32', 'input_B') - g.prepare(inputs=[input_A, input_B]) + g.prepare(inputs=[input_A, input_B], device=FLAGS.device) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) out_path = FLAGS.output + "/single" @@ -59,10 +59,10 @@ def main(): data = image.transpose([2, 0, 1])[np.newaxis, :] if FLAGS.input_style == "A": - _, fake, _, _ = g.test([data, data]) + _, fake, _, _ = g.test_batch([data, data]) if FLAGS.input_style == "B": - fake, _, _, _ = g.test([data, data]) + fake, _, _, _ = g.test_batch([data, data]) fake = np.squeeze(fake[0]).transpose([1, 2, 0]) @@ -74,7 +74,7 @@ def main(): if __name__ == "__main__": parser = argparse.ArgumentParser("CycleGAN inference") parser.add_argument( - "-d", "--dynamic", action='store_false', help="Enable dygraph mode") + "-d", "--dynamic", action='store_true', help="Enable dygraph mode") parser.add_argument( "-p", "--device", diff --git a/cyclegan/layers.py b/examples/cyclegan/layers.py similarity index 100% rename from cyclegan/layers.py rename to examples/cyclegan/layers.py diff --git a/cyclegan/test.py b/examples/cyclegan/test.py similarity index 92% rename from cyclegan/test.py rename to examples/cyclegan/test.py index 995663090f07e345e54be47da26a8c0e7fd32a4a..67f7183e2229ec9509e23e9ac81dc54122290056 100644 --- a/cyclegan/test.py +++ b/examples/cyclegan/test.py @@ -22,9 +22,9 @@ import numpy as np from scipy.misc import imsave import paddle.fluid as fluid -from check import check_gpu, check_version +from hapi.model import Model, Input, set_device -from model import Model, Input, set_device +from check import check_gpu, check_version from cyclegan import Generator, GeneratorCombine import data as data @@ -41,7 +41,7 @@ def main(): im_shape = [-1, 3, 256, 256] input_A = Input(im_shape, 'float32', 'input_A') input_B = Input(im_shape, 'float32', 'input_B') - g.prepare(inputs=[input_A, input_B]) + g.prepare(inputs=[input_A, input_B], device=FLAGS.device) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) if not os.path.exists(FLAGS.output): @@ -56,7 +56,7 @@ def main(): data_A = np.array(data_A).astype("float32") data_B = np.array(data_B).astype("float32") - fake_A, fake_B, cyc_A, cyc_B = g.test([data_A, data_B]) + fake_A, fake_B, cyc_A, cyc_B = g.test_batch([data_A, data_B]) datas = [fake_A, fake_B, cyc_A, cyc_B, data_A, data_B] odatas = [] @@ -75,7 +75,7 @@ def main(): if __name__ == "__main__": parser = argparse.ArgumentParser("CycleGAN test") parser.add_argument( - "-d", "--dynamic", action='store_false', help="Enable dygraph mode") + "-d", "--dynamic", action='store_true', help="Enable dygraph mode") parser.add_argument( "-p", "--device", diff --git a/cyclegan/train.py b/examples/cyclegan/train.py similarity index 84% rename from cyclegan/train.py rename to examples/cyclegan/train.py index d4273a3304a2cd6673f89afdea01ee61e379a568..4ca77dd66996afcc00218eafaafefa25a2a7c771 100644 --- a/cyclegan/train.py +++ b/examples/cyclegan/train.py @@ -24,12 +24,11 @@ import time import paddle import paddle.fluid as fluid -from check import check_gpu, check_version - -from model import Model, Input, set_device +from hapi.model import Model, Input, set_device -import data as data +from check import check_gpu, check_version from cyclegan import Generator, Discriminator, GeneratorCombine, GLoss, DLoss +import data as data step_per_epoch = 2974 @@ -76,12 +75,15 @@ def main(): fake_A = Input(im_shape, 'float32', 'fake_A') fake_B = Input(im_shape, 'float32', 'fake_B') - g_AB.prepare(inputs=[input_A]) - g_BA.prepare(inputs=[input_B]) + g_AB.prepare(inputs=[input_A], device=FLAGS.device) + g_BA.prepare(inputs=[input_B], device=FLAGS.device) - g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B]) - d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B]) - d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A]) + g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B], + device=FLAGS.device) + d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B], + device=FLAGS.device) + d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A], + device=FLAGS.device) if FLAGS.resume: g.load(FLAGS.resume) @@ -108,14 +110,14 @@ def main(): data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0] start = time.time() - fake_B = g_AB.test(data_A)[0] - fake_A = g_BA.test(data_B)[0] - g_loss = g.train([data_A, data_B])[0] + fake_B = g_AB.test_batch(data_A)[0] + fake_A = g_BA.test_batch(data_B)[0] + g_loss = g.train_batch([data_A, data_B])[0] fake_pb = B_pool.get(fake_B) - da_loss = d_A.train([data_B, fake_pb])[0] + da_loss = d_A.train_batch([data_B, fake_pb])[0] fake_pa = A_pool.get(fake_A) - db_loss = d_B.train([data_A, fake_pa])[0] + db_loss = d_B.train_batch([data_A, fake_pa])[0] t = time.time() - start if i % 20 == 0: @@ -128,7 +130,7 @@ def main(): if __name__ == "__main__": parser = argparse.ArgumentParser("CycleGAN Training on Cityscapes") parser.add_argument( - "-d", "--dynamic", action='store_false', help="Enable dygraph mode") + "-d", "--dynamic", action='store_true', help="Enable dygraph mode") parser.add_argument( "-p", "--device", diff --git a/sequence_tagging/README.md b/examples/sequence_tagging/README.md similarity index 80% rename from sequence_tagging/README.md rename to examples/sequence_tagging/README.md index 550645761362d029663aac4b37f86a0403024176..0bcb9ff859a0ab593abcc6769ec671b15581f6c9 100644 --- a/sequence_tagging/README.md +++ b/examples/sequence_tagging/README.md @@ -54,7 +54,7 @@ python downloads.py dataset 我们开源了在自建数据集上训练的词法分析模型,可供用户直接使用,可通过下述链接进行下载: ```bash # download baseline model -python downloads.py lac +python downloads.py model ``` ### 模型训练 @@ -66,65 +66,36 @@ GPU上单卡训练 export CUDA_VISIBLE_DEVICES=0 python -u train.py \ - --train_file ./data/train.tsv \ - --test_file ./data/test.tsv \ - --word_dict_path ./conf/word.dic \ - --label_dict_path ./conf/tag.dic \ - --word_rep_dict_path ./conf/q2b.dic \ --device gpu \ - --grnn_hidden_dim 128 \ - --word_emb_dim 128 \ - --bigru_num 2 \ - --base_learning_rate 1e-3 \ - --batch_size 300 \ - --epoch 10 \ - --save_dir ./model \ - --num_devices 1 \ - -d - -# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数 + --dynamic False + +# --device: 使用gpu设备还是cpu设备 +# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False ``` + GPU上多卡训练 + ``` # setting visible devices for training export CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \ - --train_file ./data/train.tsv \ - --test_file ./data/test.tsv \ - --word_dict_path ./conf/word.dic \ - --label_dict_path ./conf/tag.dic \ - --word_rep_dict_path ./conf/q2b.dic \ --device gpu \ - --grnn_hidden_dim 128 \ - --word_emb_dim 128 \ - --bigru_num 2 \ - --base_learning_rate 1e-3 \ - --batch_size 300 \ - --epoch 10 \ - --save_dir ./model \ - -d + --dynamic False -# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数 +# --device: 使用gpu设备还是cpu设备 +# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False ``` + CPU上训练 + ``` python -u train.py \ - --train_file ./data/train.tsv \ - --test_file ./data/test.tsv \ - --word_dict_path ./conf/word.dic \ - --label_dict_path ./conf/tag.dic \ - --word_rep_dict_path ./conf/q2b.dic \ --device cpu \ - --grnn_hidden_dim 128 \ - --word_emb_dim 128 \ - --bigru_num 2 \ - --base_learning_rate 1e-3 \ - --batch_size 300 \ - --epoch 10 \ - --save_dir ./model \ - -d + --dynamic False +# --device: 使用gpu设备还是cpu设备 +# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False ``` ### 模型预测 @@ -132,18 +103,17 @@ python -u train.py \ 加载已有的模型,对未知的数据进行预测 ```bash python predict.py \ - --predict_file ./data/infer.tsv \ - --word_dict_path ./conf/word.dic \ - --label_dict_path ./conf/tag.dic \ - --word_rep_dict_path ./conf/q2b.dic \ --init_from_checkpoint model_baseline/params \ --output_file predict.result \ --mode predict \ --device cpu \ - -d - -# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数 + --dynamic False +# --init_from_checkpoint: 初始化模型 +# --output_file: 预测结果文件 +# --device: 使用gpu还是cpu设备 +# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测 +# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False ``` ### 模型评估 @@ -152,15 +122,15 @@ python predict.py \ ```bash # baseline model python eval.py \ - --test_file ./data/test.tsv \ - --word_dict_path ./conf/word.dic \ - --label_dict_path ./conf/tag.dic \ - --word_rep_dict_path ./conf/q2b.dic \ --init_from_checkpoint ./model_baseline/params \ + --mode predict \ --device cpu \ - -d + --dynamic False -# -d: 是否使用动态图模式进行训练,如果使用静态图训练,命令行请删除-d参数 +# --init_from_checkpoint: 初始化模型 +# --device: 使用gpu还是cpu设备 +# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测 +# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False ``` diff --git a/sequence_tagging/conf/q2b.dic b/examples/sequence_tagging/conf/q2b.dic similarity index 100% rename from sequence_tagging/conf/q2b.dic rename to examples/sequence_tagging/conf/q2b.dic diff --git a/sequence_tagging/conf/tag.dic b/examples/sequence_tagging/conf/tag.dic similarity index 100% rename from sequence_tagging/conf/tag.dic rename to examples/sequence_tagging/conf/tag.dic diff --git a/sequence_tagging/conf/word.dic b/examples/sequence_tagging/conf/word.dic similarity index 100% rename from sequence_tagging/conf/word.dic rename to examples/sequence_tagging/conf/word.dic diff --git a/sequence_tagging/downloads.py b/examples/sequence_tagging/downloads.py similarity index 92% rename from sequence_tagging/downloads.py rename to examples/sequence_tagging/downloads.py index f4aeab39a46adb3cf37366ca1151b0b13f3ad4c0..b61c3e779cca3f3900d8e0bc0eb209f8fa2f9389 100644 --- a/sequence_tagging/downloads.py +++ b/examples/sequence_tagging/downloads.py @@ -33,19 +33,19 @@ FILE_INFO = { 'name': 'lexical_analysis-dataset-2.0.0.tar.gz', 'md5': '71e4a9a36d0f0177929a1bccedca7dba' }, - 'LAC_MODEL': { - 'name': 'lexical_analysis-2.0.0.tar.gz', - 'md5': "fc1daef00de9564083c7dc7b600504ca" + 'MODEL': { + 'name': 'sequence_tagging_dy.tar.gz', + 'md5': "1125d374c03c8218b6e47325dcf607e3" }, } def usage(): - desc = ("\nDownload datasets and pretrained models for LAC.\n" + desc = ("\nDownload datasets and pretrained models for sequence tagging.\n" "Usage:\n" " 1. python download.py all\n" " 2. python download.py dataset\n" - " 3. python download.py lac\n") + " 3. python download.py model\n") print(desc) @@ -136,13 +136,13 @@ if __name__ == '__main__': if sys.argv[1] == 'all': download('DATA', pwd) - download('LAC_MODEL', pwd) + download('MODEL', pwd) if sys.argv[1] == "dataset": download('DATA', pwd) - elif sys.argv[1] == "lac": - download('LAC_MODEL', pwd) + elif sys.argv[1] == "model": + download('MODEL', pwd) else: usage() diff --git a/sequence_tagging/downloads.sh b/examples/sequence_tagging/downloads.sh similarity index 83% rename from sequence_tagging/downloads.sh rename to examples/sequence_tagging/downloads.sh index 93efbdc62324dc77eb1897a7be10ad6b79732781..a6c49878cf37fef01216fd450cf9de29958cb899 100644 --- a/sequence_tagging/downloads.sh +++ b/examples/sequence_tagging/downloads.sh @@ -5,9 +5,9 @@ if [ -d ./model_baseline/ ] then echo "./model_baseline/ directory already existed, ignore download" else - wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis-2.0.0.tar.gz - tar xvf lexical_analysis-2.0.0.tar.gz - /bin/rm lexical_analysis-2.0.0.tar.gz + wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/sequence_tagging_dy.tar.gz + tar xvf sequence_tagging_dy.tar.gz + /bin/rm sequence_tagging_dy.tar.gz fi # download dataset file to ./data/ diff --git a/examples/sequence_tagging/eval.py b/examples/sequence_tagging/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..ff3e7b9865064289f73b19756d4c1b5a271e11d2 --- /dev/null +++ b/examples/sequence_tagging/eval.py @@ -0,0 +1,99 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +SequenceTagging network structure +""" + +from __future__ import division +from __future__ import print_function + +import io +import os +import sys +import math +import argparse +import numpy as np + +from train import SeqTagging +from utils.configure import PDConfig +from utils.check import check_gpu, check_version +from utils.metrics import chunk_count +from reader import LacDataset, create_lexnet_data_generator, create_dataloader + +work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(os.path.join(work_dir, "../")) +from hapi.model import set_device, Input + +import paddle.fluid as fluid +from paddle.fluid.optimizer import AdamOptimizer +from paddle.fluid.layers.utils import flatten + + +def main(args): + place = set_device(args.device) + fluid.enable_dygraph(place) if args.dynamic else None + + inputs = [Input([None, None], 'int64', name='words'), + Input([None], 'int64', name='length')] + + feed_list = None if args.dynamic else [x.forward() for x in inputs] + dataset = LacDataset(args) + eval_path = args.test_file + + chunk_evaluator = fluid.metrics.ChunkEvaluator() + chunk_evaluator.reset() + + eval_generator = create_lexnet_data_generator( + args, reader=dataset, file_name=eval_path, place=place, mode="test") + + eval_dataset = create_dataloader( + eval_generator, place, feed_list=feed_list) + + vocab_size = dataset.vocab_size + num_labels = dataset.num_labels + model = SeqTagging(args, vocab_size, num_labels) + + optim = AdamOptimizer( + learning_rate=args.base_learning_rate, + parameter_list=model.parameters()) + + model.mode = "test" + model.prepare(inputs=inputs) + model.load(args.init_from_checkpoint, skip_mismatch=True) + + for data in eval_dataset(): + if len(data) == 1: + batch_data = data[0] + targets = np.array(batch_data[2]) + else: + batch_data = data + targets = batch_data[2].numpy() + inputs_data = [batch_data[0], batch_data[1]] + crf_decode, length = model.test(inputs=inputs_data) + num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_count(crf_decode, targets, length, dataset.id2label_dict) + chunk_evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks) + + precision, recall, f1 = chunk_evaluator.eval() + print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1)) + + +if __name__ == '__main__': + args = PDConfig(yaml_file="sequence_tagging.yaml") + args.build() + args.Print() + + use_gpu = True if args.device == "gpu" else False + check_gpu(use_gpu) + check_version() + main(args) diff --git a/sequence_tagging/images/gru-crf-model.png b/examples/sequence_tagging/images/gru-crf-model.png similarity index 100% rename from sequence_tagging/images/gru-crf-model.png rename to examples/sequence_tagging/images/gru-crf-model.png diff --git a/examples/sequence_tagging/predict.py b/examples/sequence_tagging/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4a50ad30c494a4d433682d04fffa23cc4d1c03 --- /dev/null +++ b/examples/sequence_tagging/predict.py @@ -0,0 +1,94 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +SequenceTagging network structure +""" + +from __future__ import division +from __future__ import print_function + +import io +import os +import sys +import math +import argparse +import numpy as np + +from train import SeqTagging +from utils.check import check_gpu, check_version +from utils.configure import PDConfig +from reader import LacDataset, create_lexnet_data_generator, create_dataloader + +work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(os.path.join(work_dir, "../")) +from hapi.model import set_device, Input + +import paddle.fluid as fluid +from paddle.fluid.optimizer import AdamOptimizer +from paddle.fluid.layers.utils import flatten + + +def main(args): + place = set_device(args.device) + fluid.enable_dygraph(place) if args.dynamic else None + + inputs = [Input([None, None], 'int64', name='words'), + Input([None], 'int64', name='length')] + + feed_list = None if args.dynamic else [x.forward() for x in inputs] + dataset = LacDataset(args) + predict_path = args.predict_file + + predict_generator = create_lexnet_data_generator( + args, reader=dataset, file_name=predict_path, place=place, mode="predict") + + predict_dataset = create_dataloader( + predict_generator, place, feed_list=feed_list) + + vocab_size = dataset.vocab_size + num_labels = dataset.num_labels + model = SeqTagging(args, vocab_size, num_labels) + + optim = AdamOptimizer( + learning_rate=args.base_learning_rate, + parameter_list=model.parameters()) + + model.mode = "test" + model.prepare(inputs=inputs) + + model.load(args.init_from_checkpoint, skip_mismatch=True) + + f = open(args.output_file, "wb") + for data in predict_dataset(): + if len(data) == 1: + input_data = data[0] + else: + input_data = data + results, length = model.test(inputs=flatten(input_data)) + for i in range(len(results)): + word_len = length[i] + word_ids = results[i][: word_len] + tags = [dataset.id2label_dict[str(id)] for id in word_ids] + f.write("\002".join(tags) + "\n") + + +if __name__ == '__main__': + args = PDConfig(yaml_file="sequence_tagging.yaml") + args.build() + args.Print() + + use_gpu = True if args.device == "gpu" else False + check_gpu(use_gpu) + check_version() + main(args) diff --git a/sequence_tagging/reader.py b/examples/sequence_tagging/reader.py similarity index 100% rename from sequence_tagging/reader.py rename to examples/sequence_tagging/reader.py diff --git a/examples/sequence_tagging/sequence_tagging.yaml b/examples/sequence_tagging/sequence_tagging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..feb0ce13c20aac64af5ddd85980de32c86b7a1d0 --- /dev/null +++ b/examples/sequence_tagging/sequence_tagging.yaml @@ -0,0 +1,25 @@ +word_dict_path: "./conf/word.dic" +label_dict_path: "./conf/tag.dic" +word_rep_dict_path: "./conf/q2b.dic" +device: "cpu" +dynamic: True +epoch: 10 +base_learning_rate: 0.001 +word_emb_dim: 128 +grnn_hidden_dim: 128 +bigru_num: 2 +emb_learning_rate: 1.0 +crf_learning_rate: 1.0 +batch_size: 300 +max_seq_len: 126 +num_devices: 1 +save_dir: "model" +init_from_checkpoint: "model_baseline/params" +init_from_pretrain_model: "" +save_freq: 1 +eval_freq: 1 +output_file: "predict.result" +test_file: "./data/test.tsv" +train_file: "./data/train.tsv" +predict_file: "./data/infer.tsv" +mode: "train" diff --git a/sequence_tagging/train.py b/examples/sequence_tagging/train.py similarity index 74% rename from sequence_tagging/train.py rename to examples/sequence_tagging/train.py index d02014a79656c333519cd71640d143a94bbcd96b..947bf370e9de22ddde4127c22431baf1a8b0248d 100644 --- a/sequence_tagging/train.py +++ b/examples/sequence_tagging/train.py @@ -26,13 +26,15 @@ import argparse import numpy as np work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(os.path.join(work_dir)) +sys.path.append(os.path.join(work_dir, "../")) + from hapi.metrics import Metric from hapi.model import Model, Input, Loss, set_device from hapi.text.text import SequenceTagging from utils.check import check_gpu, check_version +from utils.configure import PDConfig from reader import LacDataset, create_lexnet_data_generator, create_dataloader import paddle.fluid as fluid @@ -258,92 +260,10 @@ def main(args): if __name__ == '__main__': - parser = argparse.ArgumentParser("sequence tagging training") - parser.add_argument( - "-wd", - "--word_dict_path", - default=None, - type=str, - help='word dict path') - parser.add_argument( - "-ld", - "--label_dict_path", - default=None, - type=str, - help='label dict path') - parser.add_argument( - "-wrd", - "--word_rep_dict_path", - default=None, - type=str, - help='The path of the word replacement Dictionary.') - parser.add_argument( - "-dev", - "--device", - type=str, - default='gpu', - help="device to use, gpu or cpu") - parser.add_argument( - "-d", "--dynamic", action='store_true', help="enable dygraph mode") - parser.add_argument( - "-e", "--epoch", default=10, type=int, help="number of epoch") - parser.add_argument( - '-lr', - '--base_learning_rate', - default=1e-3, - type=float, - metavar='LR', - help='initial learning rate') - parser.add_argument( - "--word_emb_dim", - default=128, - type=int, - help='word embedding dimension') - parser.add_argument( - "--grnn_hidden_dim", default=128, type=int, help="hidden dimension") - parser.add_argument( - "--bigru_num", default=2, type=int, help='the number of bi-rnn') - parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float) - parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float) - parser.add_argument( - "-b", "--batch_size", default=300, type=int, help="batch size") - parser.add_argument( - "--max_seq_len", default=126, type=int, help="max sequence length") - parser.add_argument( - "-n", "--num_devices", default=1, type=int, help="number of devices") - parser.add_argument( - "-o", - "--save_dir", - default="./model", - type=str, - help="save model path") - parser.add_argument( - "--init_from_checkpoint", - default=None, - type=str, - help="load init model parameters") - parser.add_argument( - "--init_from_pretrain_model", - default=None, - type=str, - help="load pretrain model parameters") - parser.add_argument( - "-sf", "--save_freq", default=1, type=int, help="save frequency") - parser.add_argument( - "-ef", "--eval_freq", default=1, type=int, help="eval frequency") - parser.add_argument( - "--output_file", default="predict.result", type=str, help="predict output file") - parser.add_argument( - "--predict_file", default="./data/infer.tsv", type=str, help="predict output file") - parser.add_argument( - "--test_file", default="./data/test.tsv", type=str, help="predict and eval output file") - parser.add_argument( - "--train_file", default="./data/train.tsv", type=str, help="train file") - parser.add_argument( - "--mode", default="train", type=str, help="train|test|predict") - - args = parser.parse_args() - print(args) + args = PDConfig(yaml_file="sequence_tagging.yaml") + args.build() + args.Print() + use_gpu = True if args.device == "gpu" else False check_gpu(use_gpu) check_version() diff --git a/sequence_tagging/utils/__init__.py b/examples/sequence_tagging/utils/__init__.py similarity index 100% rename from sequence_tagging/utils/__init__.py rename to examples/sequence_tagging/utils/__init__.py diff --git a/sequence_tagging/utils/check.py b/examples/sequence_tagging/utils/check.py similarity index 100% rename from sequence_tagging/utils/check.py rename to examples/sequence_tagging/utils/check.py diff --git a/examples/sequence_tagging/utils/configure.py b/examples/sequence_tagging/utils/configure.py new file mode 100644 index 0000000000000000000000000000000000000000..67e601282fee572518435eaed38a4ed8e26fc5f9 --- /dev/null +++ b/examples/sequence_tagging/utils/configure.py @@ -0,0 +1,350 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import argparse +import json +import yaml +import six +import logging + +logging_only_message = "%(message)s" +logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s" + + +class JsonConfig(object): + """ + A high-level api for handling json configure file. + """ + + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except: + raise IOError("Error in parsing bert model config file '%s'" % + config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class ArgumentGroup(object): + def __init__(self, parser, title, des): + self._group = parser.add_argument_group(title=title, description=des) + + def add_arg(self, name, type, default, help, **kwargs): + type = str2bool if type == bool else type + self._group.add_argument( + "--" + name, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +class ArgConfig(object): + """ + A high-level api for handling argument configs. + """ + + def __init__(self): + parser = argparse.ArgumentParser() + + train_g = ArgumentGroup(parser, "training", "training options.") + train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.") + train_g.add_arg("learning_rate", float, 5e-5, + "Learning rate used to train with warmup.") + train_g.add_arg( + "lr_scheduler", + str, + "linear_warmup_decay", + "scheduler of learning rate.", + choices=['linear_warmup_decay', 'noam_decay']) + train_g.add_arg("weight_decay", float, 0.01, + "Weight decay rate for L2 regularizer.") + train_g.add_arg( + "warmup_proportion", float, 0.1, + "Proportion of training steps to perform linear learning rate warmup for." + ) + train_g.add_arg("save_steps", int, 1000, + "The steps interval to save checkpoints.") + train_g.add_arg("use_fp16", bool, False, + "Whether to use fp16 mixed precision training.") + train_g.add_arg( + "loss_scaling", float, 1.0, + "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled." + ) + train_g.add_arg("pred_dir", str, None, + "Path to save the prediction results") + + log_g = ArgumentGroup(parser, "logging", "logging related.") + log_g.add_arg("skip_steps", int, 10, + "The steps interval to print loss.") + log_g.add_arg("verbose", bool, False, "Whether to output verbose log.") + + run_type_g = ArgumentGroup(parser, "run_type", "running type options.") + run_type_g.add_arg("use_cuda", bool, True, + "If set, use GPU for training.") + run_type_g.add_arg( + "use_fast_executor", bool, False, + "If set, use fast parallel executor (in experiment).") + run_type_g.add_arg( + "num_iteration_per_drop_scope", int, 1, + "Ihe iteration intervals to clean up temporary variables.") + run_type_g.add_arg("do_train", bool, True, + "Whether to perform training.") + run_type_g.add_arg("do_predict", bool, True, + "Whether to perform prediction.") + + custom_g = ArgumentGroup(parser, "customize", "customized options.") + + self.custom_g = custom_g + + self.parser = parser + + def add_arg(self, name, dtype, default, descrip): + self.custom_g.add_arg(name, dtype, default, descrip) + + def build_conf(self): + return self.parser.parse_args() + + +def str2bool(v): + # because argparse does not support to parse "true, False" as python + # boolean directly + return v.lower() in ("true", "t", "1") + + +def print_arguments(args, log=None): + if not log: + print('----------- Configuration Arguments -----------') + for arg, value in sorted(six.iteritems(vars(args))): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + else: + log.info('----------- Configuration Arguments -----------') + for arg, value in sorted(six.iteritems(vars(args))): + log.info('%s: %s' % (arg, value)) + log.info('------------------------------------------------') + + +class PDConfig(object): + """ + A high-level API for managing configuration files in PaddlePaddle. + Can jointly work with command-line-arugment, json files and yaml files. + """ + + def __init__(self, json_file="", yaml_file="", fuse_args=True): + """ + Init funciton for PDConfig. + json_file: the path to the json configure file. + yaml_file: the path to the yaml configure file. + fuse_args: if fuse the json/yaml configs with argparse. + """ + assert isinstance(json_file, str) + assert isinstance(yaml_file, str) + + if json_file != "" and yaml_file != "": + raise Warning( + "json_file and yaml_file can not co-exist for now. please only use one configure file type." + ) + return + + self.args = None + self.arg_config = {} + self.json_config = {} + self.yaml_config = {} + + parser = argparse.ArgumentParser() + + self.default_g = ArgumentGroup(parser, "default", "default options.") + self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.") + self.json_g = ArgumentGroup(parser, "json", "options from json.") + self.com_g = ArgumentGroup(parser, "custom", "customized options.") + + self.default_g.add_arg("do_train", bool, False, + "Whether to perform training.") + self.default_g.add_arg("do_predict", bool, False, + "Whether to perform predicting.") + self.default_g.add_arg("do_eval", bool, False, + "Whether to perform evaluating.") + self.default_g.add_arg("do_save_inference_model", bool, False, + "Whether to perform model saving for inference.") + + # NOTE: args for profiler + self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)") + self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)") + self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)") + + self.parser = parser + + if json_file != "": + self.load_json(json_file, fuse_args=fuse_args) + + if yaml_file: + self.load_yaml(yaml_file, fuse_args=fuse_args) + + def load_json(self, file_path, fuse_args=True): + + if not os.path.exists(file_path): + raise Warning("the json file %s does not exist." % file_path) + return + + with open(file_path, "r") as fin: + self.json_config = json.loads(fin.read()) + fin.close() + + if fuse_args: + for name in self.json_config: + if isinstance(self.json_config[name], list): + self.json_g.add_arg( + name, + type(self.json_config[name][0]), + self.json_config[name], + "This is from %s" % file_path, + nargs=len(self.json_config[name])) + continue + if not isinstance(self.json_config[name], int) \ + and not isinstance(self.json_config[name], float) \ + and not isinstance(self.json_config[name], str) \ + and not isinstance(self.json_config[name], bool): + + continue + + self.json_g.add_arg(name, + type(self.json_config[name]), + self.json_config[name], + "This is from %s" % file_path) + + def load_yaml(self, file_path, fuse_args=True): + + if not os.path.exists(file_path): + raise Warning("the yaml file %s does not exist." % file_path) + return + + with open(file_path, "r") as fin: + self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader) + fin.close() + + if fuse_args: + for name in self.yaml_config: + if isinstance(self.yaml_config[name], list): + self.yaml_g.add_arg( + name, + type(self.yaml_config[name][0]), + self.yaml_config[name], + "This is from %s" % file_path, + nargs=len(self.yaml_config[name])) + continue + + if not isinstance(self.yaml_config[name], int) \ + and not isinstance(self.yaml_config[name], float) \ + and not isinstance(self.yaml_config[name], str) \ + and not isinstance(self.yaml_config[name], bool): + + continue + + self.yaml_g.add_arg(name, + type(self.yaml_config[name]), + self.yaml_config[name], + "This is from %s" % file_path) + + def build(self): + self.args = self.parser.parse_args() + self.arg_config = vars(self.args) + + def __add__(self, new_arg): + assert isinstance(new_arg, list) or isinstance(new_arg, tuple) + assert len(new_arg) >= 3 + assert self.args is None + + name = new_arg[0] + dtype = new_arg[1] + dvalue = new_arg[2] + desc = new_arg[3] if len( + new_arg) == 4 else "Description is not provided." + + self.com_g.add_arg(name, dtype, dvalue, desc) + + return self + + def __getattr__(self, name): + if name in self.arg_config: + return self.arg_config[name] + + if name in self.json_config: + return self.json_config[name] + + if name in self.yaml_config: + return self.yaml_config[name] + + raise Warning("The argument %s is not defined." % name) + + def Print(self): + + print("-" * 70) + for name in self.arg_config: + print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name]))) + + for name in self.json_config: + if name not in self.arg_config: + print("%s:\t\t\t\t%s" % + (str(name), str(self.json_config[name]))) + + for name in self.yaml_config: + if name not in self.arg_config: + print("%s:\t\t\t\t%s" % + (str(name), str(self.yaml_config[name]))) + + print("-" * 70) + + +if __name__ == "__main__": + """ + pd_config = PDConfig(json_file = "./test/bert_config.json") + pd_config.build() + + print(pd_config.do_train) + print(pd_config.hidden_size) + + pd_config = PDConfig(yaml_file = "./test/bert_config.yaml") + pd_config.build() + + print(pd_config.do_train) + print(pd_config.hidden_size) + """ + + pd_config = PDConfig(yaml_file="./test/bert_config.yaml") + pd_config += ("my_age", int, 18, "I am forever 18.") + pd_config.build() + + print(pd_config.do_train) + print(pd_config.hidden_size) + print(pd_config.my_age) diff --git a/sequence_tagging/utils/metrics.py b/examples/sequence_tagging/utils/metrics.py similarity index 100% rename from sequence_tagging/utils/metrics.py rename to examples/sequence_tagging/utils/metrics.py diff --git a/hapi/model.py b/hapi/model.py index 50932fc46be1106d6a3fff267adae68e33ddc345..bd6cf0d07484633259ffeacc3ee4623102063d75 100644 --- a/hapi/model.py +++ b/hapi/model.py @@ -193,17 +193,17 @@ class StaticGraphAdapter(object): def mode(self, value): self.model.mode = value - def train(self, inputs, labels=None): + def train_batch(self, inputs, labels=None): assert self.model._optimizer, \ "model not ready, please call `model.prepare()` first" self.mode = 'train' return self._run(inputs, labels) - def eval(self, inputs, labels=None): + def eval_batch(self, inputs, labels=None): self.mode = 'eval' return self._run(inputs, labels) - def test(self, inputs): + def test_batch(self, inputs): self.mode = 'test' return self._run(inputs, None) @@ -567,7 +567,7 @@ class DynamicGraphAdapter(object): self.model.mode = value # TODO multi device in dygraph mode not implemented at present time - def train(self, inputs, labels=None): + def train_batch(self, inputs, labels=None): assert self.model._optimizer, \ "model not ready, please call `model.prepare()` first" super(Model, self.model).train() @@ -600,7 +600,7 @@ class DynamicGraphAdapter(object): return ([to_numpy(l) for l in losses], metrics) \ if len(metrics) > 0 else [to_numpy(l) for l in losses] - def eval(self, inputs, labels=None): + def eval_batch(self, inputs, labels=None): super(Model, self.model).eval() self.mode = 'eval' inputs = to_list(inputs) @@ -642,7 +642,7 @@ class DynamicGraphAdapter(object): return ([to_numpy(l) for l in losses], metrics) \ if len(metrics) > 0 else [to_numpy(l) for l in losses] - def test(self, inputs): + def test_batch(self, inputs): super(Model, self.model).eval() self.mode = 'test' inputs = [to_variable(x) for x in to_list(inputs)] @@ -741,14 +741,14 @@ class Model(fluid.dygraph.Layer): else: self._adapter = StaticGraphAdapter(self) - def train(self, *args, **kwargs): - return self._adapter.train(*args, **kwargs) + def train_batch(self, *args, **kwargs): + return self._adapter.train_batch(*args, **kwargs) - def eval(self, *args, **kwargs): - return self._adapter.eval(*args, **kwargs) + def eval_batch(self, *args, **kwargs): + return self._adapter.eval_batch(*args, **kwargs) - def test(self, *args, **kwargs): - return self._adapter.test(*args, **kwargs) + def test_batch(self, *args, **kwargs): + return self._adapter.test_batch(*args, **kwargs) def save(self, *args, **kwargs): if ParallelEnv().local_rank == 0: @@ -1213,18 +1213,6 @@ class Model(fluid.dygraph.Layer): return outputs - def set_eval_data(self, eval_data): - """ - Args: - eval_data (Dataset|DataLoader|None): An iterable data loader is used for - eval. An instance of paddle.io.Dataset or - paddle.io.Dataloader is recomended. - """ - assert isinstance( - eval_data, - DataLoader), "eval_data must be a instance of Dataloader!" - self._test_dataloader = eval_data - def _run_one_epoch(self, data_loader, callbacks, @@ -1261,11 +1249,11 @@ class Model(fluid.dygraph.Layer): callbacks.on_batch_begin(mode, step, logs) if mode == 'train': - outs = self.train(data[:len(self._inputs)], - data[len(self._inputs):]) + outs = self.train_batch(data[:len(self._inputs)], + data[len(self._inputs):]) else: - outs = self.eval(data[:len(self._inputs)], - data[len(self._inputs):]) + outs = self.eval_batch(data[:len(self._inputs)], + data[len(self._inputs):]) # losses loss = outs[0] if self._metrics else outs diff --git a/sequence_tagging/eval.py b/sequence_tagging/eval.py deleted file mode 100644 index c32674ec8abfb848aaa8e79f0e5ecf1c4de30a81..0000000000000000000000000000000000000000 --- a/sequence_tagging/eval.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -SequenceTagging network structure -""" - -from __future__ import division -from __future__ import print_function - -import io -import os -import sys -import math -import argparse -import numpy as np - -from train import SeqTagging -from utils.check import check_gpu, check_version -from utils.metrics import chunk_count -from reader import LacDataset, create_lexnet_data_generator, create_dataloader - -work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(work_dir) -from hapi.model import set_device, Input - -import paddle.fluid as fluid -from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.layers.utils import flatten - - -def main(args): - place = set_device(args.device) - fluid.enable_dygraph(place) if args.dynamic else None - - inputs = [Input([None, None], 'int64', name='words'), - Input([None], 'int64', name='length')] - - feed_list = None if args.dynamic else [x.forward() for x in inputs] - dataset = LacDataset(args) - eval_path = args.test_file - - chunk_evaluator = fluid.metrics.ChunkEvaluator() - chunk_evaluator.reset() - - eval_generator = create_lexnet_data_generator( - args, reader=dataset, file_name=eval_path, place=place, mode="test") - - eval_dataset = create_dataloader( - eval_generator, place, feed_list=feed_list) - - vocab_size = dataset.vocab_size - num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels) - - optim = AdamOptimizer( - learning_rate=args.base_learning_rate, - parameter_list=model.parameters()) - - model.mode = "test" - model.prepare(inputs=inputs) - model.load(args.init_from_checkpoint, skip_mismatch=True) - - for data in eval_dataset(): - if len(data) == 1: - batch_data = data[0] - targets = np.array(batch_data[2]) - else: - batch_data = data - targets = batch_data[2].numpy() - inputs_data = [batch_data[0], batch_data[1]] - crf_decode, length = model.test(inputs=inputs_data) - num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_count(crf_decode, targets, length, dataset.id2label_dict) - chunk_evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks) - - precision, recall, f1 = chunk_evaluator.eval() - print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser("sequence tagging training") - parser.add_argument( - "-wd", - "--word_dict_path", - default=None, - type=str, - help='word dict path') - parser.add_argument( - "-ld", - "--label_dict_path", - default=None, - type=str, - help='label dict path') - parser.add_argument( - "-wrd", - "--word_rep_dict_path", - default=None, - type=str, - help='The path of the word replacement Dictionary.') - parser.add_argument( - "-dev", - "--device", - type=str, - default='gpu', - help="device to use, gpu or cpu") - parser.add_argument( - "-d", "--dynamic", action='store_true', help="enable dygraph mode") - parser.add_argument( - "-e", "--epoch", default=10, type=int, help="number of epoch") - parser.add_argument( - '-lr', - '--base_learning_rate', - default=1e-3, - type=float, - metavar='LR', - help='initial learning rate') - parser.add_argument( - "--word_emb_dim", - default=128, - type=int, - help='word embedding dimension') - parser.add_argument( - "--grnn_hidden_dim", default=128, type=int, help="hidden dimension") - parser.add_argument( - "--bigru_num", default=2, type=int, help='the number of bi-rnn') - parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float) - parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float) - parser.add_argument( - "-b", "--batch_size", default=300, type=int, help="batch size") - parser.add_argument( - "--max_seq_len", default=126, type=int, help="max sequence length") - parser.add_argument( - "-n", "--num_devices", default=1, type=int, help="number of devices") - parser.add_argument( - "-o", - "--save_dir", - default="./model", - type=str, - help="save model path") - parser.add_argument( - "--init_from_checkpoint", - default=None, - type=str, - help="load init model parameters") - parser.add_argument( - "--init_from_pretrain_model", - default=None, - type=str, - help="load pretrain model parameters") - parser.add_argument( - "-sf", "--save_freq", default=1, type=int, help="save frequency") - parser.add_argument( - "-ef", "--eval_freq", default=1, type=int, help="eval frequency") - parser.add_argument( - "--output_file", default="predict.result", type=str, help="predict output file") - parser.add_argument( - "--predict_file", default="./data/infer.tsv", type=str, help="predict output file") - parser.add_argument( - "--test_file", default="./data/test.tsv", type=str, help="predict and eval output file") - parser.add_argument( - "--train_file", default="./data/train.tsv", type=str, help="train file") - parser.add_argument( - "--mode", default="predict", type=str, help="train|test|predict") - - args = parser.parse_args() - print(args) - use_gpu = True if args.device == "gpu" else False - check_gpu(use_gpu) - check_version() - - main(args) diff --git a/sequence_tagging/predict.py b/sequence_tagging/predict.py deleted file mode 100644 index 6e719bb2699f82f9cdc1801fa33f6460cffe7c2c..0000000000000000000000000000000000000000 --- a/sequence_tagging/predict.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -SequenceTagging network structure -""" - -from __future__ import division -from __future__ import print_function - -import io -import os -import sys -import math -import argparse -import numpy as np - -from train import SeqTagging -from utils.check import check_gpu, check_version -from reader import LacDataset, create_lexnet_data_generator, create_dataloader - -work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(work_dir) -from hapi.model import set_device, Input - -import paddle.fluid as fluid -from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.layers.utils import flatten - - -def main(args): - place = set_device(args.device) - fluid.enable_dygraph(place) if args.dynamic else None - - inputs = [Input([None, None], 'int64', name='words'), - Input([None], 'int64', name='length')] - - feed_list = None if args.dynamic else [x.forward() for x in inputs] - dataset = LacDataset(args) - predict_path = args.predict_file - - predict_generator = create_lexnet_data_generator( - args, reader=dataset, file_name=predict_path, place=place, mode="predict") - - predict_dataset = create_dataloader( - predict_generator, place, feed_list=feed_list) - - vocab_size = dataset.vocab_size - num_labels = dataset.num_labels - model = SeqTagging(args, vocab_size, num_labels) - - optim = AdamOptimizer( - learning_rate=args.base_learning_rate, - parameter_list=model.parameters()) - - model.mode = "test" - model.prepare(inputs=inputs) - - model.load(args.init_from_checkpoint) - - f = open(args.output_file, "wb") - for data in predict_dataset(): - if len(data) == 1: - input_data = data[0] - else: - input_data = data - results, length = model.test(inputs=flatten(input_data)) - for i in range(len(results)): - word_len = length[i] - word_ids = results[i][: word_len] - tags = [dataset.id2label_dict[str(id)] for id in word_ids] - f.write("\002".join(tags) + "\n") - - -if __name__ == '__main__': - parser = argparse.ArgumentParser("sequence tagging training") - parser.add_argument( - "-wd", - "--word_dict_path", - default=None, - type=str, - help='word dict path') - parser.add_argument( - "-ld", - "--label_dict_path", - default=None, - type=str, - help='label dict path') - parser.add_argument( - "-wrd", - "--word_rep_dict_path", - default=None, - type=str, - help='The path of the word replacement Dictionary.') - parser.add_argument( - "-dev", - "--device", - type=str, - default='gpu', - help="device to use, gpu or cpu") - parser.add_argument( - "-d", "--dynamic", action='store_true', help="enable dygraph mode") - parser.add_argument( - "-e", "--epoch", default=10, type=int, help="number of epoch") - parser.add_argument( - '-lr', - '--base_learning_rate', - default=1e-3, - type=float, - metavar='LR', - help='initial learning rate') - parser.add_argument( - "--word_emb_dim", - default=128, - type=int, - help='word embedding dimension') - parser.add_argument( - "--grnn_hidden_dim", default=128, type=int, help="hidden dimension") - parser.add_argument( - "--bigru_num", default=2, type=int, help='the number of bi-rnn') - parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float) - parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float) - parser.add_argument( - "-b", "--batch_size", default=300, type=int, help="batch size") - parser.add_argument( - "--max_seq_len", default=126, type=int, help="max sequence length") - parser.add_argument( - "-n", "--num_devices", default=1, type=int, help="number of devices") - parser.add_argument( - "-o", - "--save_dir", - default="./model", - type=str, - help="save model path") - parser.add_argument( - "--init_from_checkpoint", - default=None, - type=str, - help="load init model parameters") - parser.add_argument( - "--init_from_pretrain_model", - default=None, - type=str, - help="load pretrain model parameters") - parser.add_argument( - "-sf", "--save_freq", default=1, type=int, help="save frequency") - parser.add_argument( - "-ef", "--eval_freq", default=1, type=int, help="eval frequency") - parser.add_argument( - "--output_file", default="predict.result", type=str, help="predict output file") - parser.add_argument( - "--predict_file", default="./data/infer.tsv", type=str, help="predict output file") - parser.add_argument( - "--mode", default="train", type=str, help="train|test|predict") - - args = parser.parse_args() - print(args) - use_gpu = True if args.device == "gpu" else False - check_gpu(use_gpu) - check_version() - - main(args)