未验证 提交 ecfc61a3 编写于 作者: Z zhengya01 提交者: GitHub

Merge pull request #19 from PaddlePaddle/develop

update
...@@ -7,3 +7,6 @@ ...@@ -7,3 +7,6 @@
[submodule "fluid/PaddleNLP/Senta"] [submodule "fluid/PaddleNLP/Senta"]
path = fluid/PaddleNLP/Senta path = fluid/PaddleNLP/Senta
url = https://github.com/baidu/Senta.git url = https://github.com/baidu/Senta.git
[submodule "fluid/PaddleNLP/LARK"]
path = fluid/PaddleNLP/LARK
url = https://github.com/PaddlePaddle/LARK
...@@ -43,6 +43,7 @@ PaddlePaddle 提供了丰富的计算单元,使得用户可以采用模块化 ...@@ -43,6 +43,7 @@ PaddlePaddle 提供了丰富的计算单元,使得用户可以采用模块化
模型|简介|模型优势|参考论文 模型|简介|模型优势|参考论文
--|:--:|:--:|:--: --|:--:|:--:|:--:
[Transformer](./fluid/PaddleNLP/neural_machine_translation/transformer/README_cn.md)|机器翻译模型|基于self-attention,计算复杂度小,并行度高,容易学习长程依赖,翻译效果更好|[Attention Is All You Need](https://arxiv.org/abs/1706.03762) [Transformer](./fluid/PaddleNLP/neural_machine_translation/transformer/README_cn.md)|机器翻译模型|基于self-attention,计算复杂度小,并行度高,容易学习长程依赖,翻译效果更好|[Attention Is All You Need](https://arxiv.org/abs/1706.03762)
[BERT](https://github.com/PaddlePaddle/LARK/tree/develop/BERT)|语义表示模型|在多个 NLP 任务上取得 SOTA 效果,支持多卡多机训练,支持混合精度训练|[BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)
[LAC](https://github.com/baidu/lac/blob/master/README.md)|联合的词法分析模型|能够整体性地完成中文分词、词性标注、专名识别任务|[Chinese Lexical Analysis with Deep Bi-GRU-CRF Network](https://arxiv.org/abs/1807.01882) [LAC](https://github.com/baidu/lac/blob/master/README.md)|联合的词法分析模型|能够整体性地完成中文分词、词性标注、专名识别任务|[Chinese Lexical Analysis with Deep Bi-GRU-CRF Network](https://arxiv.org/abs/1807.01882)
[Senta](https://github.com/baidu/Senta/blob/master/README.md)|情感倾向分析模型集|百度AI开放平台中情感倾向分析模型|- [Senta](https://github.com/baidu/Senta/blob/master/README.md)|情感倾向分析模型集|百度AI开放平台中情感倾向分析模型|-
[DAM](./fluid/PaddleNLP/deep_attention_matching_net)|语义匹配模型|百度自然语言处理部发表于ACL-2018的工作,用于检索式聊天机器人多轮对话中应答的选择|[Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network](http://aclweb.org/anthology/P18-1103) [DAM](./fluid/PaddleNLP/deep_attention_matching_net)|语义匹配模型|百度自然语言处理部发表于ACL-2018的工作,用于检索式聊天机器人多轮对话中应答的选择|[Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network](http://aclweb.org/anthology/P18-1103)
......
...@@ -44,11 +44,15 @@ def infer(args, config): ...@@ -44,11 +44,15 @@ def infer(args, config):
shrink, max_shrink = get_shrink(image.size[1], image.size[0]) shrink, max_shrink = get_shrink(image.size[1], image.size[0])
det0 = detect_face(image, shrink) det0 = detect_face(image, shrink)
det1 = flip_test(image, shrink) if args.use_gpu:
[det2, det3] = multi_scale_test(image, max_shrink) det1 = flip_test(image, shrink)
det4 = multi_scale_test_pyramid(image, max_shrink) [det2, det3] = multi_scale_test(image, max_shrink)
det = np.row_stack((det0, det1, det2, det3, det4)) det4 = multi_scale_test_pyramid(image, max_shrink)
dets = bbox_vote(det) det = np.row_stack((det0, det1, det2, det3, det4))
dets = bbox_vote(det)
else:
# when infer on cpu, use a simple case
dets = det0
keep_index = np.where(dets[:, 4] >= args.confs_threshold)[0] keep_index = np.where(dets[:, 4] >= args.confs_threshold)[0]
dets = dets[keep_index, :] dets = dets[keep_index, :]
......
...@@ -74,8 +74,8 @@ env CUDA_VISIBLE_DEVICES=0 python train.py ...@@ -74,8 +74,8 @@ env CUDA_VISIBLE_DEVICES=0 python train.py
``` ```
env CUDA_VISIBLE_DEVICE=0 python infer.py \ env CUDA_VISIBLE_DEVICE=0 python infer.py \
--init_model="models/1" --input="./data/inputA/*" \ --init_model="checkpoints/1" --input="./data/inputA/*" \
--output="./output" --input_style A --output="./output"
``` ```
训练150轮的模型预测效果如图2和图3所示: 训练150轮的模型预测效果如图2和图3所示:
......
...@@ -26,8 +26,10 @@ def infer(args): ...@@ -26,8 +26,10 @@ def infer(args):
data_shape = [-1, 3, 256, 256] data_shape = [-1, 3, 256, 256]
input = fluid.layers.data(name='input', shape=data_shape, dtype='float32') input = fluid.layers.data(name='input', shape=data_shape, dtype='float32')
if args.input_style == "A": if args.input_style == "A":
model_name = 'g_a'
fake = build_generator_resnet_9blocks(input, name="g_A") fake = build_generator_resnet_9blocks(input, name="g_A")
elif args.input_style == "B": elif args.input_style == "B":
model_name = 'g_b'
fake = build_generator_resnet_9blocks(input, name="g_B") fake = build_generator_resnet_9blocks(input, name="g_B")
else: else:
raise "Input with style [%s] is not supported." % args.input_style raise "Input with style [%s] is not supported." % args.input_style
...@@ -37,7 +39,7 @@ def infer(args): ...@@ -37,7 +39,7 @@ def infer(args):
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
fluid.io.load_persistables(exe, args.init_model) fluid.io.load_persistables(exe, args.init_model + "/" + model_name)
if not os.path.exists(args.output): if not os.path.exists(args.output):
os.makedirs(args.output) os.makedirs(args.output)
......
...@@ -3,10 +3,12 @@ import paddle.fluid as fluid ...@@ -3,10 +3,12 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
import os import os
use_cudnn = True # cudnn is not better when batch size is 1.
use_cudnn = False
if 'ce_mode' in os.environ: if 'ce_mode' in os.environ:
use_cudnn = False use_cudnn = False
def cal_padding(img_size, stride, filter_size, dilation=1): def cal_padding(img_size, stride, filter_size, dilation=1):
"""Calculate padding size.""" """Calculate padding size."""
valid_filter_size = dilation * (filter_size - 1) + 1 valid_filter_size = dilation * (filter_size - 1) + 1
...@@ -18,6 +20,8 @@ def cal_padding(img_size, stride, filter_size, dilation=1): ...@@ -18,6 +20,8 @@ def cal_padding(img_size, stride, filter_size, dilation=1):
def instance_norm(input, name=None): def instance_norm(input, name=None):
# TODO(lvmengsi@baidu.com): Check the accuracy when using fluid.layers.layer_norm.
# return fluid.layers.layer_norm(input, begin_norm_axis=2)
helper = fluid.layer_helper.LayerHelper("instance_norm", **locals()) helper = fluid.layer_helper.LayerHelper("instance_norm", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
epsilon = 1e-5 epsilon = 1e-5
......
...@@ -17,7 +17,6 @@ import data_reader ...@@ -17,7 +17,6 @@ import data_reader
from utility import add_arguments, print_arguments, ImagePool from utility import add_arguments, print_arguments, ImagePool
from trainer import * from trainer import *
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
...@@ -36,7 +35,7 @@ add_arg('run_ce', bool, False, "Whether to run for model ce.") ...@@ -36,7 +35,7 @@ add_arg('run_ce', bool, False, "Whether to run for model ce.")
def train(args): def train(args):
max_images_num = data_reader.max_images_num() max_images_num = data_reader.max_images_num()
shuffle=True shuffle = True
if args.run_ce: if args.run_ce:
np.random.seed(10) np.random.seed(10)
fluid.default_startup_program().random_seed = 90 fluid.default_startup_program().random_seed = 90
...@@ -66,9 +65,11 @@ def train(args): ...@@ -66,9 +65,11 @@ def train(args):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
A_pool = ImagePool() A_pool = ImagePool()
B_pool = ImagePool() B_pool = ImagePool()
A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)() A_reader = paddle.batch(
B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)() data_reader.a_reader(shuffle=shuffle), args.batch_size)()
B_reader = paddle.batch(
data_reader.b_reader(shuffle=shuffle), args.batch_size)()
if not args.run_ce: if not args.run_ce:
A_test_reader = data_reader.a_test_reader() A_test_reader = data_reader.a_test_reader()
B_test_reader = data_reader.b_test_reader() B_test_reader = data_reader.b_test_reader()
...@@ -119,13 +120,13 @@ def train(args): ...@@ -119,13 +120,13 @@ def train(args):
if not os.path.exists(out_path): if not os.path.exists(out_path):
os.makedirs(out_path) os.makedirs(out_path)
fluid.io.save_persistables( fluid.io.save_persistables(
exe, out_path + "/g_a", main_program=g_A_trainer.program, filename="params") exe, out_path + "/g_a", main_program=g_A_trainer.program)
fluid.io.save_persistables( fluid.io.save_persistables(
exe, out_path + "/g_b", main_program=g_B_trainer.program, filename="params") exe, out_path + "/g_b", main_program=g_B_trainer.program)
fluid.io.save_persistables( fluid.io.save_persistables(
exe, out_path + "/d_a", main_program=d_A_trainer.program, filename="params") exe, out_path + "/d_a", main_program=d_A_trainer.program)
fluid.io.save_persistables( fluid.io.save_persistables(
exe, out_path + "/d_b", main_program=d_B_trainer.program, filename="params") exe, out_path + "/d_b", main_program=d_B_trainer.program)
print("saved checkpoint to {}".format(out_path)) print("saved checkpoint to {}".format(out_path))
sys.stdout.flush() sys.stdout.flush()
...@@ -144,8 +145,21 @@ def train(args): ...@@ -144,8 +145,21 @@ def train(args):
if args.init_model: if args.init_model:
init_model() init_model()
losses=[[], []] losses = [[], []]
t_time = 0 t_time = 0
g_A_trainer_program = fluid.CompiledProgram(
g_A_trainer.program).with_data_parallel(
loss_name=g_A_trainer.g_loss_A.name)
g_B_trainer_program = fluid.CompiledProgram(
g_B_trainer.program).with_data_parallel(
loss_name=g_B_trainer.g_loss_B.name)
d_B_trainer_program = fluid.CompiledProgram(
d_B_trainer.program).with_data_parallel(
loss_name=d_B_trainer.d_loss_B.name)
d_A_trainer_program = fluid.CompiledProgram(
d_A_trainer.program).with_data_parallel(
loss_name=d_A_trainer.d_loss_A.name)
for epoch in range(args.epoch): for epoch in range(args.epoch):
batch_id = 0 batch_id = 0
for i in range(max_images_num): for i in range(max_images_num):
...@@ -158,7 +172,7 @@ def train(args): ...@@ -158,7 +172,7 @@ def train(args):
s_time = time.time() s_time = time.time()
# optimize the g_A network # optimize the g_A network
g_A_loss, fake_B_tmp = exe.run( g_A_loss, fake_B_tmp = exe.run(
g_A_trainer.program, g_A_trainer_program,
fetch_list=[g_A_trainer.g_loss_A, g_A_trainer.fake_B], fetch_list=[g_A_trainer.g_loss_A, g_A_trainer.fake_B],
feed={"input_A": tensor_A, feed={"input_A": tensor_A,
"input_B": tensor_B}) "input_B": tensor_B})
...@@ -167,14 +181,14 @@ def train(args): ...@@ -167,14 +181,14 @@ def train(args):
# optimize the d_B network # optimize the d_B network
d_B_loss = exe.run( d_B_loss = exe.run(
d_B_trainer.program, d_B_trainer_program,
fetch_list=[d_B_trainer.d_loss_B], fetch_list=[d_B_trainer.d_loss_B],
feed={"input_B": tensor_B, feed={"input_B": tensor_B,
"fake_pool_B": fake_pool_B})[0] "fake_pool_B": fake_pool_B})[0]
# optimize the g_B network # optimize the g_B network
g_B_loss, fake_A_tmp = exe.run( g_B_loss, fake_A_tmp = exe.run(
g_B_trainer.program, g_B_trainer_program,
fetch_list=[g_B_trainer.g_loss_B, g_B_trainer.fake_A], fetch_list=[g_B_trainer.g_loss_B, g_B_trainer.fake_A],
feed={"input_A": tensor_A, feed={"input_A": tensor_A,
"input_B": tensor_B}) "input_B": tensor_B})
...@@ -183,16 +197,16 @@ def train(args): ...@@ -183,16 +197,16 @@ def train(args):
# optimize the d_A network # optimize the d_A network
d_A_loss = exe.run( d_A_loss = exe.run(
d_A_trainer.program, d_A_trainer_program,
fetch_list=[d_A_trainer.d_loss_A], fetch_list=[d_A_trainer.d_loss_A],
feed={"input_A": tensor_A, feed={"input_A": tensor_A,
"fake_pool_A": fake_pool_A})[0] "fake_pool_A": fake_pool_A})[0]
batch_time = time.time() - s_time batch_time = time.time() - s_time
t_time += batch_time t_time += batch_time
print("epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {}; " print(
"Batch_time_cost: {:.2f}".format( "epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {}; "
epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0], "Batch_time_cost: {:.2f}".format(epoch, batch_id, g_A_loss[
d_A_loss[0], batch_time)) 0], d_B_loss[0], g_B_loss[0], d_A_loss[0], batch_time))
losses[0].append(g_A_loss[0]) losses[0].append(g_A_loss[0])
losses[1].append(d_A_loss[0]) losses[1].append(d_A_loss[0])
sys.stdout.flush() sys.stdout.flush()
......
Subproject commit 8dbdf4892a9c22a39a20537fd8584b760f41d963
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
export CPU_NUM=1
export NUM_THREADS=1
FLAGS_benchmark=true python train.py --enable_ce --train_dir train_big_data/ --vocab_text_path big_vocab_text.txt --vocab_tag_path big_vocab_tag.txt --model_dir big_model --batch_size 500 | python _ce.py
cudaid=${tagspace:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce --use_cuda 1 --train_dir train_big_data/ --vocab_text_path big_vocab_text.txt --vocab_tag_path big_vocab_tag.txt --model_dir big_model --batch_size 500 --parallel 1 | python _ce.py
cudaid=${tagspace_4:=0,1,2,3} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce --use_cuda 1 --train_dir train_big_data/ --vocab_text_path big_vocab_text.txt --vocab_tag_path big_vocab_tag.txt --model_dir big_model --batch_size 500 --parallel 1 | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
from kpi import AccKpi
each_pass_duration_cpu1_thread1_kpi = DurationKpi('each_pass_duration_cpu1_thread1', 0.08, 0, actived=True)
train_acc_cpu1_thread1_kpi = AccKpi('train_acc_cpu1_thread1', 0.08, 0)
each_pass_duration_gpu1_kpi = DurationKpi('each_pass_duration_gpu1', 0.08, 0, actived=True)
train_acc_gpu1_kpi = AccKpi('train_acc_gpu1', 0.08, 0)
each_pass_duration_gpu4_kpi = DurationKpi('each_pass_duration_gpu4', 0.08, 0, actived=True)
train_acc_gpu4_kpi = AccKpi('train_acc_gpu4', 0.08, 0)
tracking_kpis = [
each_pass_duration_cpu1_thread1_kpi,
train_acc_cpu1_thread1_kpi,
each_pass_duration_gpu1_kpi,
train_acc_gpu1_kpi,
each_pass_duration_gpu4_kpi,
train_acc_gpu4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -40,6 +40,10 @@ def parse_args(): ...@@ -40,6 +40,10 @@ def parse_args():
'--base_lr', type=float, default=0.01, help='learning rate') '--base_lr', type=float, default=0.01, help='learning rate')
parser.add_argument( parser.add_argument(
'--num_devices', type=int, default=1, help='Number of GPU devices') '--num_devices', type=int, default=1, help='Number of GPU devices')
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run the task with continuous evaluation logs.')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -51,6 +55,9 @@ def get_cards(args): ...@@ -51,6 +55,9 @@ def get_cards(args):
def train(): def train():
""" do training """ """ do training """
args = parse_args() args = parse_args()
if args.enable_ce:
fluid.default_startup_program().random_seed = SEED
fluid.default_main_program().random_seed = SEED
train_dir = args.train_dir train_dir = args.train_dir
vocab_text_path = args.vocab_text_path vocab_text_path = args.vocab_text_path
vocab_tag_path = args.vocab_tag_path vocab_tag_path = args.vocab_tag_path
...@@ -91,6 +98,7 @@ def train(): ...@@ -91,6 +98,7 @@ def train():
model_dir = args.model_dir model_dir = args.model_dir
fetch_list = [avg_cost.name] fetch_list = [avg_cost.name]
total_time = 0.0 total_time = 0.0
ce_info = []
for pass_idx in range(pass_num): for pass_idx in range(pass_num):
epoch_idx = pass_idx + 1 epoch_idx = pass_idx + 1
print("epoch_%d start" % epoch_idx) print("epoch_%d start" % epoch_idx)
...@@ -106,6 +114,7 @@ def train(): ...@@ -106,6 +114,7 @@ def train():
"neg_tag": lod_neg_tag "neg_tag": lod_neg_tag
}, },
fetch_list=[avg_cost.name, correct.name]) fetch_list=[avg_cost.name, correct.name])
ce_info.append(float(np.sum(correct_val)) / (args.num_devices * batch_size))
if batch_id % args.print_batch == 0: if batch_id % args.print_batch == 0:
print("TRAIN --> pass: {} batch_num: {} avg_cost: {}, acc: {}" print("TRAIN --> pass: {} batch_num: {} avg_cost: {}, acc: {}"
.format(pass_idx, (batch_id + 10) * batch_size, .format(pass_idx, (batch_id + 10) * batch_size,
...@@ -120,9 +129,43 @@ def train(): ...@@ -120,9 +129,43 @@ def train():
feed_var_names = ["text", "pos_tag"] feed_var_names = ["text", "pos_tag"]
fetch_vars = [cos_pos] fetch_vars = [cos_pos]
fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars,
train_exe) exe)
# only for ce
if args.enable_ce:
ce_acc = 0
try:
ce_acc = ce_info[-2]
except:
logger.error("ce info error")
epoch_idx = args.pass_num
device = get_device(args)
if args.use_cuda:
gpu_num = device[1]
print("kpis\teach_pass_duration_gpu%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_acc_gpu%s\t%s" %
(gpu_num, ce_acc))
else:
cpu_num = device[1]
threads_num = device[2]
print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" %
(cpu_num, threads_num, total_time / epoch_idx))
print("kpis\ttrain_acc_cpu%s_thread%s\t%s" %
(cpu_num, threads_num, ce_acc))
print("finish training") print("finish training")
def get_device(args):
if args.use_cuda:
gpus = os.environ.get("CUDA_VISIBLE_DEVICES", 1)
gpu_num = len(gpus.split(','))
return "gpu", gpu_num
else:
threads_num = os.environ.get('NUM_THREADS', 1)
cpu_num = os.environ.get('CPU_NUM', 1)
return "cpu", int(cpu_num), int(threads_num)
if __name__ == "__main__": if __name__ == "__main__":
train() train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册