未验证 提交 302b32dd 编写于 作者: W whs 提交者: GitHub

Merge branch 'develop' into ce_ocr

#!/bin/bash
# This file is only used for continuous evaluation.
rm -rf *_factor.txt
python train.py --use_gpu=True 1> log
cat log | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
tracking_kpis = [
train_cost_kpi,
train_duration_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -20,12 +20,12 @@ add_arg('use_gpu', bool, True, "Whether use GPU to test.") ...@@ -20,12 +20,12 @@ add_arg('use_gpu', bool, True, "Whether use GPU to test.")
def cal_mean_iou(wrong, correct): def cal_mean_iou(wrong, correct):
sum = wrong + cerroct sum = wrong + correct
true_num = (sum != 0).sum() true_num = (sum != 0).sum()
for i in len(sum): for i in range(len(sum)):
if sum[i] == 0: if sum[i] == 0:
sum[i] = 1 sum[i] = 1
return (cerroct.astype("float64") / sum).sum() / true_num return (correct.astype("float64") / sum).sum() / true_num
def create_iou(predict, label, mask, num_classes, image_shape): def create_iou(predict, label, mask, num_classes, image_shape):
...@@ -84,6 +84,7 @@ def eval(args): ...@@ -84,6 +84,7 @@ def eval(args):
sys.stdout.flush() sys.stdout.flush()
iou = cal_mean_iou(out_wrong, out_right) iou = cal_mean_iou(out_wrong, out_right)
print "\nmean iou: %.3f" % iou print "\nmean iou: %.3f" % iou
print "kpis test_acc %f" % iou
def main(): def main():
......
...@@ -184,7 +184,7 @@ def res_block(input, filter_num, padding=0, dilation=None, name=None): ...@@ -184,7 +184,7 @@ def res_block(input, filter_num, padding=0, dilation=None, name=None):
tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase")
tmp = bn(tmp, relu=False) tmp = bn(tmp, relu=False)
tmp = input + tmp tmp = input + tmp
tmp = fluid.layers.relu(tmp, name=name + "_relu") tmp = fluid.layers.relu(tmp)
return tmp return tmp
...@@ -227,7 +227,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, ...@@ -227,7 +227,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase")
tmp = bn(tmp, relu=False) tmp = bn(tmp, relu=False)
tmp = proj_bn + tmp tmp = proj_bn + tmp
tmp = fluid.layers.relu(tmp, name=name + "_relu") tmp = fluid.layers.relu(tmp)
return tmp return tmp
......
...@@ -11,6 +11,10 @@ from utils import add_arguments, print_arguments, get_feeder_data ...@@ -11,6 +11,10 @@ from utils import add_arguments, print_arguments, get_feeder_data
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
from paddle.fluid.initializer import init_on_cpu from paddle.fluid.initializer import init_on_cpu
SEED = 90
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
...@@ -27,9 +31,9 @@ LAMBDA2 = 0.4 ...@@ -27,9 +31,9 @@ LAMBDA2 = 0.4
LAMBDA3 = 1.0 LAMBDA3 = 1.0
LEARNING_RATE = 0.003 LEARNING_RATE = 0.003
POWER = 0.9 POWER = 0.9
LOG_PERIOD = 1 LOG_PERIOD = 100
CHECKPOINT_PERIOD = 1000 CHECKPOINT_PERIOD = 100
TOTAL_STEP = 60000 TOTAL_STEP = 100
no_grad_set = [] no_grad_set = []
...@@ -97,10 +101,13 @@ def train(args): ...@@ -97,10 +101,13 @@ def train(args):
sub124_loss = 0. sub124_loss = 0.
train_reader = cityscape.train( train_reader = cityscape.train(
args.batch_size, flip=args.random_mirror, scaling=args.random_scaling) args.batch_size, flip=args.random_mirror, scaling=args.random_scaling)
start_time = time.time()
while True: while True:
# train a pass # train a pass
for data in train_reader(): for data in train_reader():
if iter_id > TOTAL_STEP: if iter_id > TOTAL_STEP:
end_time = time.time()
print "kpis train_duration %f" % (end_time - start_time)
return return
iter_id += 1 iter_id += 1
results = exe.run( results = exe.run(
...@@ -115,13 +122,15 @@ def train(args): ...@@ -115,13 +122,15 @@ def train(args):
print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % ( print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (
iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD, iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD,
sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD) sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD)
print "kpis train_cost %f" % (t_loss / LOG_PERIOD)
t_loss = 0. t_loss = 0.
sub4_loss = 0. sub4_loss = 0.
sub24_loss = 0. sub24_loss = 0.
sub124_loss = 0. sub124_loss = 0.
sys.stdout.flush() sys.stdout.flush()
if iter_id % CHECKPOINT_PERIOD == 0: if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None:
dir_name = args.checkpoint_path + "/" + str(iter_id) dir_name = args.checkpoint_path + "/" + str(iter_id)
fluid.io.save_persistables(exe, dirname=dir_name) fluid.io.save_persistables(exe, dirname=dir_name)
print "Saved checkpoint: %s" % (dir_name) print "Saved checkpoint: %s" % (dir_name)
......
# saved model
output/
# coco and pascalvoc data
data/ILSVRC2012/ILSVRC2012_img_val.tar
data/ILSVRC2012/ILSVRC2012_img_train.tar
data/ILSVRC2012/ImageNet_label.tgz
data/ILSVRC2012/train_list.txt
data/ILSVRC2012/val_list.txt
#!/bin/bash
# This file is only used for continuous evaluation.
cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=64 --num_passes=10 --total_images=6149 --enable_ce=True | python _ce.py
cudaid=${object_detection_cudaid:=0, 1, 2, 3}
export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=64 --num_passes=10 --total_images=6149 --enable_ce=True | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_top1_kpi = AccKpi('train_acc_top1', 0.05, 0, desc='TOP1 ACC')
train_acc_top5_kpi = AccKpi(
'train_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC')
train_cost_kpi = CostKpi('train_cost', 0.3, 0, actived=True, desc='train cost')
test_acc_top1_kpi = AccKpi('test_acc_top1', 0.05, 0, desc='TOP1 ACC')
test_acc_top5_kpi = AccKpi(
'test_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC')
test_cost_kpi = CostKpi('test_cost', 1.0, 0, actived=True, desc='train cost')
train_speed_kpi = AccKpi(
'train_speed',
0.05,
0,
actived=True,
unit_repr='seconds/image',
desc='train speed in one GPU card')
train_acc_top1_card4_kpi = AccKpi(
'train_acc_top1_card4', 0.05, 0, desc='TOP1 ACC')
train_acc_top5_card4_kpi = AccKpi(
'train_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC')
train_cost_card4_kpi = CostKpi(
'train_cost_kpi', 0.3, 0, actived=True, desc='train cost')
test_acc_top1_card4_kpi = AccKpi(
'test_acc_top1_card4', 0.05, 0, desc='TOP1 ACC')
test_acc_top5_card4_kpi = AccKpi(
'test_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC')
test_cost_card4_kpi = CostKpi(
'test_cost_card4', 1.0, 0, actived=True, desc='train cost')
train_speed_card4_kpi = AccKpi(
'train_speed_card4',
0.05,
0,
actived=True,
unit_repr='seconds/image',
desc='train speed in four GPU card')
tracking_kpis = [
train_acc_top1_kpi, train_acc_top5_kpi, train_cost_kpi, test_acc_top1_kpi,
test_acc_top5_kpi, test_cost_kpi, train_speed_kpi, train_acc_top1_card4_kpi,
train_acc_top5_card4_kpi, train_cost_card4_kpi, test_acc_top1_card4_kpi,
test_acc_top5_card4_kpi, test_cost_card4_kpi, train_speed_card4_kpi
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
...@@ -11,6 +11,7 @@ train_parameters = { ...@@ -11,6 +11,7 @@ train_parameters = {
"input_size": [3, 224, 224], "input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406], "input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225], "input_std": [0.229, 0.224, 0.225],
"dropout_seed": None,
"learning_strategy": { "learning_strategy": {
"name": "piecewise_decay", "name": "piecewise_decay",
"batch_size": 256, "batch_size": 256,
...@@ -101,7 +102,9 @@ class SE_ResNeXt(): ...@@ -101,7 +102,9 @@ class SE_ResNeXt():
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True) input=conv, pool_size=7, pool_type='avg', global_pooling=True)
drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) # do not set seed when traning, it is only used for debug
drop = fluid.layers.dropout(
x=pool, dropout_prob=0.5, seed=self.params["dropout_seed"])
stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
out = fluid.layers.fc(input=drop, out = fluid.layers.fc(input=drop,
size=class_dim, size=class_dim,
......
...@@ -4,6 +4,7 @@ import time ...@@ -4,6 +4,7 @@ import time
import sys import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.dataset.flowers as flowers
import models import models
import reader import reader
import argparse import argparse
...@@ -28,6 +29,7 @@ add_arg('checkpoint', str, None, "Whether to resume chec ...@@ -28,6 +29,7 @@ add_arg('checkpoint', str, None, "Whether to resume chec
add_arg('lr', float, 0.1, "set learning rate.") add_arg('lr', float, 0.1, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.") add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
# yapf: enable # yapf: enable
model_list = [m for m in dir(models) if "__" not in m] model_list = [m for m in dir(models) if "__" not in m]
...@@ -100,6 +102,9 @@ def train(args): ...@@ -100,6 +102,9 @@ def train(args):
# model definition # model definition
model = models.__dict__[model_name]() model = models.__dict__[model_name]()
if args.enable_ce:
assert model_name == "SE_ResNeXt50_32x4d"
if model_name is "GoogleNet": if model_name is "GoogleNet":
out0, out1, out2 = model.net(input=image, class_dim=class_dim) out0, out1, out2 = model.net(input=image, class_dim=class_dim)
cost0 = fluid.layers.cross_entropy(input=out0, label=label) cost0 = fluid.layers.cross_entropy(input=out0, label=label)
...@@ -129,6 +134,8 @@ def train(args): ...@@ -129,6 +134,8 @@ def train(args):
params["num_epochs"] = args.num_epochs params["num_epochs"] = args.num_epochs
params["learning_strategy"]["batch_size"] = args.batch_size params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy params["learning_strategy"]["name"] = args.lr_strategy
if args.enable_ce:
params["dropout_seed"] = 10
# initialize optimizer # initialize optimizer
optimizer = optimizer_setting(params) optimizer = optimizer_setting(params)
...@@ -137,6 +144,9 @@ def train(args): ...@@ -137,6 +144,9 @@ def train(args):
if with_memory_optimization: if with_memory_optimization:
fluid.memory_optimize(fluid.default_main_program()) fluid.memory_optimize(fluid.default_main_program())
if args.enable_ce:
fluid.default_startup_program().random_seed = 1000
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -153,8 +163,20 @@ def train(args): ...@@ -153,8 +163,20 @@ def train(args):
train_batch_size = args.batch_size train_batch_size = args.batch_size
test_batch_size = 16 test_batch_size = 16
train_reader = paddle.batch(reader.train(), batch_size=train_batch_size)
test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) if not args.enable_ce:
train_reader = paddle.batch(reader.train(), batch_size=train_batch_size)
test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
else:
# use flowers dataset for CE and set use_xmap False to avoid disorder data
# but it is time consuming. For faster speed, need another dataset.
import random
random.seed(0)
train_reader = paddle.batch(
flowers.train(use_xmap=False), batch_size=train_batch_size)
test_reader = paddle.batch(
flowers.test(use_xmap=False), batch_size=test_batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
...@@ -162,9 +184,12 @@ def train(args): ...@@ -162,9 +184,12 @@ def train(args):
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
gpu = os.getenv("CUDA_VISIBLE_DEVICES") or ""
gpu_nums = len(gpu.split(","))
for pass_id in range(params["num_epochs"]): for pass_id in range(params["num_epochs"]):
train_info = [[], [], []] train_info = [[], [], []]
test_info = [[], [], []] test_info = [[], [], []]
train_time = []
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
t1 = time.time() t1 = time.time()
loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data)) loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data))
...@@ -176,6 +201,7 @@ def train(args): ...@@ -176,6 +201,7 @@ def train(args):
train_info[0].append(loss) train_info[0].append(loss)
train_info[1].append(acc1) train_info[1].append(acc1)
train_info[2].append(acc5) train_info[2].append(acc5)
train_time.append(period)
if batch_id % 10 == 0: if batch_id % 10 == 0:
print("Pass {0}, trainbatch {1}, loss {2}, \ print("Pass {0}, trainbatch {1}, loss {2}, \
acc1 {3}, acc5 {4} time {5}" acc1 {3}, acc5 {4} time {5}"
...@@ -187,6 +213,7 @@ def train(args): ...@@ -187,6 +213,7 @@ def train(args):
train_loss = np.array(train_info[0]).mean() train_loss = np.array(train_info[0]).mean()
train_acc1 = np.array(train_info[1]).mean() train_acc1 = np.array(train_info[1]).mean()
train_acc5 = np.array(train_info[2]).mean() train_acc5 = np.array(train_info[2]).mean()
train_speed = np.array(train_time).mean() / train_batch_size
cnt = 0 cnt = 0
for test_batch_id, data in enumerate(test_reader()): for test_batch_id, data in enumerate(test_reader()):
t1 = time.time() t1 = time.time()
...@@ -226,6 +253,36 @@ def train(args): ...@@ -226,6 +253,36 @@ def train(args):
os.makedirs(model_path) os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path)
# This is for continuous evaluation only
if args.enable_ce and pass_id == args.num_epochs - 1:
if gpu_nums == 1:
# Use the last cost/acc for training
print("kpis train_cost %s" % train_loss)
print("kpis train_acc_top1 %s" % train_acc1)
print("kpis train_acc_top5 %s" % train_acc5)
# Use the mean cost/acc for testing
print("kpis test_cost %s" % test_loss)
print("kpis test_acc_top1 %s" % test_acc1)
print("kpis test_acc_top5 %s" % test_acc5)
print("kpis train_speed %s" % train_speed)
else:
# Use the last cost/acc for training
print("kpis train_cost_card%s %s" %
(gpu_nums, train_loss))
print("kpis train_acc_top1_card%s %s" %
(gpu_nums, train_acc1))
print("kpis train_acc_top5_card%s %s" %
(gpu_nums, train_acc5))
# Use the mean cost/acc for testing
print("kpis test_cost_card%s %s" %
(gpu_nums, test_loss))
print("kpis test_acc_top1_card%s %s" %
(gpu_nums, test_acc1))
print("kpis test_acc_top5_card%s %s" %
(gpu_nums, test_acc5))
print("kpis train_speed_card%s %s" %
(gpu_nums, train_speed))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
......
...@@ -145,7 +145,7 @@ def train(train_reader, ...@@ -145,7 +145,7 @@ def train(train_reader,
if pass_idx == pass_num - 1 and args.enable_ce: if pass_idx == pass_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring. #Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs. #Other situations do not need to care about these logs.
gpu_num = get_cards() gpu_num = get_cards(args.enable_ce)
if gpu_num == 1: if gpu_num == 1:
print("kpis imikolov_20_pass_duration %s" % print("kpis imikolov_20_pass_duration %s" %
(total_time / epoch_idx)) (total_time / epoch_idx))
......
#!/bin/bash
DATA_PATH=$HOME/.cache/paddle/dataset/wmt16
if [ ! -d $DATA_PATH ] ; then
python -c 'import paddle;paddle.dataset.wmt16.train(10000, 10000, "en")'\
'().next()'
tar -zxf $DATA_PATH/wmt16.tar.gz -C $DATA_PATH
fi
train(){
python -u train.py \
--src_vocab_fpath $DATA_PATH/en_10000.dict \
--trg_vocab_fpath $DATA_PATH/de_10000.dict \
--special_token '<s>' '<e>' '<unk>' \
--train_file_pattern $DATA_PATH/wmt16/train \
--val_file_pattern $DATA_PATH/wmt16/val \
--use_token_batch True \
--batch_size 2048 \
--sort_type pool \
--pool_size 10000 \
--enable_ce True \
weight_sharing False \
pass_num 20 \
dropout_seed 10
}
train | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_cost_kpi,
train_duration_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
\ No newline at end of file
...@@ -81,6 +81,8 @@ class ModelHyperParams(object): ...@@ -81,6 +81,8 @@ class ModelHyperParams(object):
n_layer = 6 n_layer = 6
# dropout rate used by all dropout layers. # dropout rate used by all dropout layers.
dropout = 0.1 dropout = 0.1
# random seed used in dropout for CE.
dropout_seed = None
# the flag indicating whether to share embedding and softmax weights. # the flag indicating whether to share embedding and softmax weights.
# vocabularies in source and target should be same for weight sharing. # vocabularies in source and target should be same for weight sharing.
weight_sharing = True weight_sharing = True
......
...@@ -111,7 +111,10 @@ def multi_head_attention(queries, ...@@ -111,7 +111,10 @@ def multi_head_attention(queries,
x=weights, shape=product.shape, actual_shape=post_softmax_shape) x=weights, shape=product.shape, actual_shape=post_softmax_shape)
if dropout_rate: if dropout_rate:
weights = layers.dropout( weights = layers.dropout(
weights, dropout_prob=dropout_rate, is_test=False) weights,
dropout_prob=dropout_rate,
seed=ModelHyperParams.dropout_seed,
is_test=False)
out = layers.matmul(weights, v) out = layers.matmul(weights, v)
return out return out
...@@ -171,7 +174,10 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.): ...@@ -171,7 +174,10 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.):
elif cmd == "d": # add dropout elif cmd == "d": # add dropout
if dropout_rate: if dropout_rate:
out = layers.dropout( out = layers.dropout(
out, dropout_prob=dropout_rate, is_test=False) out,
dropout_prob=dropout_rate,
seed=ModelHyperParams.dropout_seed,
is_test=False)
return out return out
...@@ -211,7 +217,9 @@ def prepare_encoder(src_word, ...@@ -211,7 +217,9 @@ def prepare_encoder(src_word,
shape=[batch_size, seq_len, src_emb_dim], shape=[batch_size, seq_len, src_emb_dim],
actual_shape=src_data_shape) actual_shape=src_data_shape)
return layers.dropout( return layers.dropout(
enc_input, dropout_prob=dropout_rate, enc_input,
dropout_prob=dropout_rate,
seed=ModelHyperParams.dropout_seed,
is_test=False) if dropout_rate else enc_input is_test=False) if dropout_rate else enc_input
......
...@@ -103,6 +103,12 @@ def parse_args(): ...@@ -103,6 +103,12 @@ def parse_args():
help="The device type.") help="The device type.")
parser.add_argument( parser.add_argument(
'--sync', type=ast.literal_eval, default=True, help="sync mode.") '--sync', type=ast.literal_eval, default=True, help="sync mode.")
parser.add_argument(
"--enable_ce",
type=ast.literal_eval,
default=True,
help="The flag indicating whether to run the task "
"for continuous evaluation.")
args = parser.parse_args() args = parser.parse_args()
# Append args related to dict # Append args related to dict
...@@ -382,6 +388,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -382,6 +388,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
data_input_names, util_input_names, sum_cost, data_input_names, util_input_names, sum_cost,
token_num) token_num)
# the best cross-entropy value with label smoothing
loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log(
(1. - TrainTaskConfig.label_smooth_eps
)) + TrainTaskConfig.label_smooth_eps *
np.log(TrainTaskConfig.label_smooth_eps / (
ModelHyperParams.trg_vocab_size - 1) + 1e-20))
init = False init = False
for pass_id in xrange(TrainTaskConfig.pass_num): for pass_id in xrange(TrainTaskConfig.pass_num):
pass_start_time = time.time() pass_start_time = time.time()
...@@ -421,19 +433,27 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -421,19 +433,27 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
) # sum the cost from multi-devices ) # sum the cost from multi-devices
total_token_num = token_num_val.sum() total_token_num = token_num_val.sum()
total_avg_cost = total_sum_cost / total_token_num total_avg_cost = total_sum_cost / total_token_num
print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" % print("epoch: %d, batch: %d, avg loss: %f, normalized loss: %f,"
(pass_id, batch_id, total_sum_cost, total_avg_cost, " ppl: %f" % (pass_id, batch_id, total_avg_cost,
np.exp([min(total_avg_cost, 100)]))) total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)])))
if batch_id > 0 and batch_id % 1000 == 0: if batch_id > 0 and batch_id % 1000 == 0:
fluid.io.save_persistables( fluid.io.save_persistables(
exe, exe,
os.path.join(TrainTaskConfig.ckpt_dir, "latest.checkpoint")) os.path.join(TrainTaskConfig.ckpt_dir, "latest.checkpoint"))
init = True init = True
time_consumed = time.time() - pass_start_time
# Validate and save the model for inference. # Validate and save the model for inference.
print("epoch: %d, " % pass_id + if args.val_file_pattern is not None:
("val avg loss: %f, val ppl: %f, " % test() val_avg_cost, val_ppl = test()
if args.val_file_pattern is not None else "") + "consumed %fs" % print(
(time.time() - pass_start_time)) "epoch: %d, val avg loss: %f, val normalized loss: %f, val ppl: %f,"
" consumed %fs" % (pass_id, val_avg_cost,
val_avg_cost - loss_normalizer, val_ppl,
time_consumed))
else:
print("epoch: %d, consumed %fs" % (pass_id, time_consumed))
fluid.io.save_persistables( fluid.io.save_persistables(
exe, exe,
os.path.join(TrainTaskConfig.ckpt_dir, os.path.join(TrainTaskConfig.ckpt_dir,
...@@ -442,6 +462,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -442,6 +462,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
os.path.join(TrainTaskConfig.model_dir, os.path.join(TrainTaskConfig.model_dir,
"pass_" + str(pass_id) + ".infer.model"), "pass_" + str(pass_id) + ".infer.model"),
data_input_names[:-2] + util_input_names, [predict], exe) data_input_names[:-2] + util_input_names, [predict], exe)
if args.enable_ce: # For CE
print("kpis\ttrain_cost\t%f" % total_avg_cost)
print("kpis\ttest_cost\t%f" % val_avg_cost)
print("kpis\ttrain_duration\t%f" % time_consumed)
def train(args): def train(args):
...@@ -465,6 +489,9 @@ def train(args): ...@@ -465,6 +489,9 @@ def train(args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
if args.enable_ce:
fluid.default_startup_program().random_seed = 1000
sum_cost, avg_cost, predict, token_num = transformer( sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
......
./neural_machine_translation/transformer
\ No newline at end of file
...@@ -193,13 +193,16 @@ def train(args, ...@@ -193,13 +193,16 @@ def train(args,
total_time += end_time - start_time total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss) train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1: if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss) print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map) print ("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx)) print ("kpis train_speed %s" % (total_time / epoch_idx))
else: else:
print ("kpis train_cost_card%s %s" % (devices_num, train_avg_loss)) print ("kpis train_cost_card%s %s" %
print ("kpis test_acc_card%s %s" % (devices_num, mean_map)) (devices_num, train_avg_loss))
print ("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx)) print ("kpis test_acc_card%s %s" %
(devices_num, mean_map))
print ("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
if pass_id % 10 == 0 or pass_id == num_passes - 1: if pass_id % 10 == 0 or pass_id == num_passes - 1:
......
###!/bin/bash
####This file is only used for continuous evaluation.
export CE_MODE_X=1
sh data/download.sh
python train.py | python _ce.py
...@@ -22,11 +22,7 @@ ...@@ -22,11 +22,7 @@
## 数据获取 ## 数据获取
请参考PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md) 一节中数据获取方式,将该例中的data文件夹拷贝至本例目录下,运行其中的download.sh脚本获取训练和测试数据。 完整数据的获取请参考PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md) 一节中的方式。本例的示例数据同样可以通过运行data/download.sh来获取。
## 通用脚本获取
请将PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md)中提供的用于数据读取的文件[reader.py](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/reader.py)以及包含字典导入等通用功能的文件[utils.py](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/utils.py)复制到本目录下。本例将会使用到这两个脚本。
## 训练 ## 训练
......
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi = AccKpi('train_precision', 0.005, actived=True)
test_acc_kpi = CostKpi('test_precision', 0.005, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [
train_acc_kpi,
test_acc_kpi,
train_duration_kpi,
]
def parse_log(log):
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
if [ -f assignment2.zip ]; then
echo "data exist"
exit 0
else
wget http://cs224d.stanford.edu/assignment2/assignment2.zip
fi
if [ $? -eq 0 ];then
unzip assignment2.zip
cp assignment2_release/data/ner/wordVectors.txt ./data
cp assignment2_release/data/ner/vocab.txt ./data
rm -rf assignment2_release
else
echo "download data error!" >> /dev/stderr
exit 1
fi
B-LOC
I-LOC
B-MISC
I-MISC
B-ORG
I-ORG
B-PER
I-PER
O
CRICKET NNP I-NP O
- : O O
LEICESTERSHIRE NNP I-NP I-ORG
TAKE NNP I-NP O
OVER IN I-PP O
AT NNP I-NP O
TOP NNP I-NP O
AFTER NNP I-NP O
INNINGS NNP I-NP O
VICTORY NN I-NP O
. . O O
LONDON NNP I-NP I-LOC
1996-08-30 CD I-NP O
West NNP I-NP I-MISC
Indian NNP I-NP I-MISC
all-rounder NN I-NP O
Phil NNP I-NP I-PER
Simmons NNP I-NP I-PER
took VBD I-VP O
four CD I-NP O
for IN I-PP O
38 CD I-NP O
on IN I-PP O
Friday NNP I-NP O
as IN I-PP O
Leicestershire NNP I-NP I-ORG
beat VBD I-VP O
Somerset NNP I-NP I-ORG
by IN I-PP O
an DT I-NP O
innings NN I-NP O
and CC O O
39 CD I-NP O
runs NNS I-NP O
in IN I-PP O
two CD I-NP O
days NNS I-NP O
to TO I-VP O
take VB I-VP O
over IN I-PP O
at IN B-PP O
the DT I-NP O
head NN I-NP O
of IN I-PP O
the DT I-NP O
county NN I-NP O
championship NN I-NP O
. . O O
Their PRP$ I-NP O
stay NN I-NP O
on IN I-PP O
top NN I-NP O
, , O O
though RB I-ADVP O
, , O O
may MD I-VP O
be VB I-VP O
short-lived JJ I-ADJP O
as IN I-PP O
title NN I-NP O
rivals NNS I-NP O
Essex NNP I-NP I-ORG
, , O O
Derbyshire NNP I-NP I-ORG
and CC I-NP O
Surrey NNP I-NP I-ORG
all DT O O
closed VBD I-VP O
in RP I-PRT O
on IN I-PP O
victory NN I-NP O
while IN I-SBAR O
Kent NNP I-NP I-ORG
made VBD I-VP O
up RP I-PRT O
for IN I-PP O
lost VBN I-NP O
time NN I-NP O
in IN I-PP O
their PRP$ I-NP O
rain-affected JJ I-NP O
match NN I-NP O
against IN I-PP O
Nottinghamshire NNP I-NP I-ORG
. . O O
After IN I-PP O
bowling VBG I-NP O
Somerset NNP I-NP I-ORG
out RP I-PRT O
for IN I-PP O
83 CD I-NP O
on IN I-PP O
the DT I-NP O
opening NN I-NP O
morning NN I-NP O
at IN I-PP O
Grace NNP I-NP I-LOC
Road NNP I-NP I-LOC
, , O O
Leicestershire NNP I-NP I-ORG
extended VBD I-VP O
their PRP$ I-NP O
first JJ I-NP O
innings NN I-NP O
by IN I-PP O
94 CD I-NP O
runs VBZ I-VP O
before IN I-PP O
being VBG I-VP O
bowled VBD I-VP O
out RP I-PRT O
for IN I-PP O
296 CD I-NP O
with IN I-PP O
England NNP I-NP I-LOC
discard VBP I-VP O
Andy NNP I-NP I-PER
Caddick NNP I-NP I-PER
taking VBG I-VP O
three CD I-NP O
for IN I-PP O
83 CD I-NP O
. . O O
EU NNP I-NP I-ORG
rejects VBZ I-VP O
German JJ I-NP I-MISC
call NN I-NP O
to TO I-VP O
boycott VB I-VP O
British JJ I-NP I-MISC
lamb NN I-NP O
. . O O
Peter NNP I-NP I-PER
Blackburn NNP I-NP I-PER
BRUSSELS NNP I-NP I-LOC
1996-08-22 CD I-NP O
The DT I-NP O
European NNP I-NP I-ORG
Commission NNP I-NP I-ORG
said VBD I-VP O
on IN I-PP O
Thursday NNP I-NP O
it PRP B-NP O
disagreed VBD I-VP O
with IN I-PP O
German JJ I-NP I-MISC
advice NN I-NP O
to TO I-PP O
consumers NNS I-NP O
to TO I-VP O
shun VB I-VP O
British JJ I-NP I-MISC
lamb NN I-NP O
until IN I-SBAR O
scientists NNS I-NP O
determine VBP I-VP O
whether IN I-SBAR O
mad JJ I-NP O
cow NN I-NP O
disease NN I-NP O
can MD I-VP O
be VB I-VP O
transmitted VBN I-VP O
to TO I-PP O
sheep NN I-NP O
. . O O
Germany NNP I-NP I-LOC
's POS B-NP O
representative NN I-NP O
to TO I-PP O
the DT I-NP O
European NNP I-NP I-ORG
Union NNP I-NP I-ORG
's POS B-NP O
veterinary JJ I-NP O
committee NN I-NP O
Werner NNP I-NP I-PER
Zwingmann NNP I-NP I-PER
said VBD I-VP O
on IN I-PP O
Wednesday NNP I-NP O
consumers NNS I-NP O
should MD I-VP O
buy VB I-VP O
sheepmeat NN I-NP O
from IN I-PP O
countries NNS I-NP O
other JJ I-ADJP O
than IN I-PP O
Britain NNP I-NP I-LOC
until IN I-SBAR O
the DT I-NP O
scientific JJ I-NP O
advice NN I-NP O
was VBD I-VP O
clearer JJR I-ADJP O
. . O O
" " O O
We PRP I-NP O
do VBP I-VP O
n't RB I-VP O
support VB I-VP O
any DT I-NP O
such JJ I-NP O
recommendation NN I-NP O
because IN I-SBAR O
we PRP I-NP O
do VBP I-VP O
n't RB I-VP O
see VB I-VP O
any DT I-NP O
grounds NNS I-NP O
for IN I-PP O
it PRP I-NP O
, , O O
" " O O
the DT I-NP O
Commission NNP I-NP I-ORG
's POS B-NP O
chief JJ I-NP O
spokesman NN I-NP O
Nikolaus NNP I-NP I-PER
van NNP I-NP I-PER
der FW I-NP I-PER
Pas NNP I-NP I-PER
told VBD I-VP O
a DT I-NP O
news NN I-NP O
briefing NN I-NP O
. . O O
He PRP I-NP O
said VBD I-VP O
further JJ I-NP O
scientific JJ I-NP O
study NN I-NP O
was VBD I-VP O
required VBN I-VP O
and CC O O
if IN I-SBAR O
it PRP I-NP O
was VBD I-VP O
found VBN I-VP O
that IN I-SBAR O
action NN I-NP O
was VBD I-VP O
needed VBN I-VP O
it PRP I-NP O
should MD I-VP O
be VB I-VP O
taken VBN I-VP O
by IN I-PP O
the DT I-NP O
European NNP I-NP I-ORG
Union NNP I-NP I-ORG
. . O O
"""
Conll03 dataset.
"""
from utils import *
__all__ = ["data_reader"]
def canonicalize_digits(word):
if any([c.isalpha() for c in word]): return word
word = re.sub("\d", "DG", word)
if word.startswith("DG"):
word = word.replace(",", "") # remove thousands separator
return word
def canonicalize_word(word, wordset=None, digits=True):
word = word.lower()
if digits:
if (wordset != None) and (word in wordset): return word
word = canonicalize_digits(word) # try to canonicalize numbers
if (wordset == None) or (word in wordset): return word
else: return "UUUNKKK" # unknown token
def data_reader(data_file, word_dict, label_dict):
"""
The dataset can be obtained according to http://www.clips.uantwerpen.be/conll2003/ner/.
It returns a reader creator, each sample in the reader includes:
word id sequence, label id sequence and raw sentence.
:return: reader creator
:rtype: callable
"""
def reader():
UNK_IDX = word_dict["UUUNKKK"]
sentence = []
labels = []
with open(data_file, "r") as f:
for line in f:
if len(line.strip()) == 0:
if len(sentence) > 0:
word_idx = [
word_dict.get(
canonicalize_word(w, word_dict), UNK_IDX)
for w in sentence
]
mark = [1 if w[0].isupper() else 0 for w in sentence]
label_idx = [label_dict[l] for l in labels]
yield word_idx, mark, label_idx
sentence = []
labels = []
else:
segs = line.strip().split()
sentence.append(segs[0])
# transform I-TYPE to BIO schema
if segs[-1] != "O" and (len(labels) == 0 or
labels[-1][1:] != segs[-1][1:]):
labels.append("B" + segs[-1][1:])
else:
labels.append(segs[-1])
return reader
import os import os
import math import math
import time
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
...@@ -24,12 +25,19 @@ def test(exe, chunk_evaluator, inference_program, test_data, place): ...@@ -24,12 +25,19 @@ def test(exe, chunk_evaluator, inference_program, test_data, place):
return chunk_evaluator.eval(exe) return chunk_evaluator.eval(exe)
def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, def main(train_data_file,
model_save_dir, num_passes, use_gpu, parallel): test_data_file,
vocab_file,
target_file,
emb_file,
model_save_dir,
num_passes,
use_gpu,
parallel,
batch_size=200):
if not os.path.exists(model_save_dir): if not os.path.exists(model_save_dir):
os.mkdir(model_save_dir) os.mkdir(model_save_dir)
BATCH_SIZE = 200
word_dict = load_dict(vocab_file) word_dict = load_dict(vocab_file)
label_dict = load_dict(target_file) label_dict = load_dict(target_file)
...@@ -53,60 +61,76 @@ def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, ...@@ -53,60 +61,76 @@ def main(train_data_file, test_data_file, vocab_file, target_file, emb_file,
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
inference_program = fluid.default_main_program().clone() inference_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(inference_program): with fluid.program_guard(inference_program):
test_target = chunk_evaluator.metrics + chunk_evaluator.states test_target = chunk_evaluator.metrics + chunk_evaluator.states
inference_program = fluid.io.get_inference_program(test_target) inference_program = fluid.io.get_inference_program(test_target)
train_reader = paddle.batch( if "CE_MODE_X" not in os.environ:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.reader.shuffle(
reader.data_reader(train_data_file, word_dict, label_dict),
buf_size=20000),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
reader.data_reader(test_data_file, word_dict, label_dict),
buf_size=20000),
batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.data_reader(train_data_file, word_dict, label_dict), reader.data_reader(train_data_file, word_dict, label_dict),
buf_size=20000), batch_size=batch_size)
batch_size=BATCH_SIZE) test_reader = paddle.batch(
test_reader = paddle.batch(
paddle.reader.shuffle(
reader.data_reader(test_data_file, word_dict, label_dict), reader.data_reader(test_data_file, word_dict, label_dict),
buf_size=20000), batch_size=batch_size)
batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place) feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place)
exe = fluid.Executor(place) exe = fluid.Executor(place)
if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
embedding_name = 'emb' embedding_name = 'emb'
embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor()
embedding_param.set(word_vector_values, place) embedding_param.set(word_vector_values, place)
batch_id = 0
for pass_id in xrange(num_passes): for pass_id in xrange(num_passes):
chunk_evaluator.reset(exe) chunk_evaluator.reset(exe)
for data in train_reader(): for batch_id, data in enumerate(train_reader()):
cost, batch_precision, batch_recall, batch_f1_score = exe.run( cost, batch_precision, batch_recall, batch_f1_score = exe.run(
fluid.default_main_program(), fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[avg_cost] + chunk_evaluator.metrics) fetch_list=[avg_cost] + chunk_evaluator.metrics)
if batch_id % 5 == 0: if batch_id % 5 == 0:
print(cost)
print("Pass " + str(pass_id) + ", Batch " + str( print("Pass " + str(pass_id) + ", Batch " + str(
batch_id) + ", Cost " + str(cost[0]) + ", Precision " + str( batch_id) + ", Cost " + str(cost[0]) + ", Precision " + str(
batch_precision[0]) + ", Recall " + str(batch_recall[0]) batch_precision[0]) + ", Recall " + str(batch_recall[0])
+ ", F1_score" + str(batch_f1_score[0])) + ", F1_score" + str(batch_f1_score[0]))
batch_id = batch_id + 1
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe) pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe)
print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + str( print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + str(
pass_precision) + " pass_recall:" + str(pass_recall) + pass_precision) + " pass_recall:" + str(pass_recall) +
" pass_f1_score:" + str(pass_f1_score)) " pass_f1_score:" + str(pass_f1_score))
pass_precision, pass_recall, pass_f1_score = test( test_pass_precision, test_pass_recall, test_pass_f1_score = test(
exe, chunk_evaluator, inference_program, test_reader, place) exe, chunk_evaluator, inference_program, test_reader, place)
print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str( print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str(
pass_precision) + " pass_recall:" + str(pass_recall) + test_pass_precision) + " pass_recall:" + str(test_pass_recall) +
" pass_f1_score:" + str(pass_f1_score)) " pass_f1_score:" + str(test_pass_f1_score))
save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id)
fluid.io.save_inference_model(save_dirname, ['word', 'mark', 'target'], fluid.io.save_inference_model(save_dirname, ['word', 'mark', 'target'],
[crf_decode], exe) crf_decode, exe)
if ("CE_MODE_X" in os.environ) and (pass_id % 50 == 0):
if pass_id > 0:
print("kpis train_precision %f" % pass_precision)
print("kpis test_precision %f" % test_pass_precision)
print("kpis train_duration %f" % (time.time() - time_begin))
time_begin = time.time()
if __name__ == "__main__": if __name__ == "__main__":
...@@ -118,5 +142,6 @@ if __name__ == "__main__": ...@@ -118,5 +142,6 @@ if __name__ == "__main__":
emb_file="data/wordVectors.txt", emb_file="data/wordVectors.txt",
model_save_dir="models", model_save_dir="models",
num_passes=1000, num_passes=1000,
batch_size=1,
use_gpu=False, use_gpu=False,
parallel=False) parallel=False)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import os
import re
import argparse
import numpy as np
from collections import defaultdict
logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)
def get_embedding(emb_file='data/wordVectors.txt'):
"""
Get the trained word vector.
"""
return np.loadtxt(emb_file, dtype=float)
def load_dict(dict_path):
"""
Load the word dictionary from the given file.
Each line of the given file is a word, which can include multiple columns
seperated by tab.
This function takes the first column (columns in a line are seperated by
tab) as key and takes line number of a line as the key (index of the word
in the dictionary).
"""
return dict((line.strip().split("\t")[0], idx)
for idx, line in enumerate(open(dict_path, "r").readlines()))
def load_reverse_dict(dict_path):
"""
Load the word dictionary from the given file.
Each line of the given file is a word, which can include multiple columns
seperated by tab.
This function takes line number of a line as the key (index of the word in
the dictionary) and the first column (columns in a line are seperated by
tab) as the value.
"""
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
###!/bin/bash
####This file is only used for continuous evaluation.
export CE_MODE_X=1
python train.py cnn | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi = AccKpi('train_acc', 0.005, actived=True)
train_cost_kpi = CostKpi('train_cost', 0.005, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [
train_acc_kpi,
train_cost_kpi,
train_duration_kpi,
]
def parse_log(log):
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
import unittest import unittest
import contextlib import contextlib
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
import numpy as np import numpy as np
import sys import sys
import time import time
......
import unittest import unittest
import contextlib import contextlib
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
import numpy as np import numpy as np
import sys import sys
import time import time
......
...@@ -4,8 +4,8 @@ import unittest ...@@ -4,8 +4,8 @@ import unittest
import contextlib import contextlib
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
import utils import utils
......
...@@ -2,8 +2,8 @@ import sys ...@@ -2,8 +2,8 @@ import sys
import time import time
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
def bow_net(data, def bow_net(data,
......
import os
import sys import sys
import time import time
import unittest import unittest
import contextlib import contextlib
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
import utils import utils
from nets import bow_net from nets import bow_net
...@@ -53,8 +54,12 @@ def train(train_reader, ...@@ -53,8 +54,12 @@ def train(train_reader,
exe = fluid.Executor(place) exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[data, label], place=place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
# For internal continuous evaluation
if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in xrange(pass_num):
pass_start = time.time()
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
...@@ -73,6 +78,13 @@ def train(train_reader, ...@@ -73,6 +78,13 @@ def train(train_reader,
epoch_model = save_dirname + "/" + "epoch" + str(pass_id) epoch_model = save_dirname + "/" + "epoch" + str(pass_id)
fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe) fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe)
pass_end = time.time()
# For internal continuous evaluation
if "CE_MODE_X" in os.environ:
print("kpis train_acc %f" % avg_acc)
print("kpis train_cost %f" % avg_cost)
print("kpis train_duration %f" % (pass_end - pass_start))
def train_net(): def train_net():
word_dict, train_reader, test_reader = utils.prepare_data( word_dict, train_reader, test_reader = utils.prepare_data(
......
import os
import sys import sys
import time import time
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
def to_lodtensor(data, place): def to_lodtensor(data, place):
...@@ -64,15 +65,22 @@ def prepare_data(data_type="imdb", ...@@ -64,15 +65,22 @@ def prepare_data(data_type="imdb",
raise RuntimeError("No such dataset") raise RuntimeError("No such dataset")
if data_type == "imdb": if data_type == "imdb":
train_reader = paddle.batch( if "CE_MODE_X" in os.environ:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), buf_size=buf_size), paddle.dataset.imdb.train(word_dict), batch_size=batch_size)
batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.reader.shuffle( paddle.dataset.imdb.test(word_dict), batch_size=batch_size)
paddle.dataset.imdb.test(word_dict), buf_size=buf_size), else:
batch_size=batch_size) train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=buf_size),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.test(word_dict), buf_size=buf_size),
batch_size=batch_size)
else: else:
raise RuntimeError("no such dataset") raise RuntimeError("no such dataset")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册