提交 b199f192 编写于 作者: Y Yibing Liu

Merge branch 'develop' of upstream into text_cls_ce

#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${language_model:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py | python _ce.py
cudaid=${language_model_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0)
imikolov_20_pass_duration_kpi = DurationKpi(
'imikolov_20_pass_duration', 0.02, 0, actived=True)
imikolov_20_avg_ppl_kpi_card4 = CostKpi('imikolov_20_avg_ppl_card4', 0.2, 0)
imikolov_20_pass_duration_kpi_card4 = DurationKpi(
'imikolov_20_pass_duration_card4', 0.03, 0, actived=True)
tracking_kpis = [
imikolov_20_avg_ppl_kpi,
imikolov_20_pass_duration_kpi,
imikolov_20_avg_ppl_kpi_card4,
imikolov_20_pass_duration_kpi_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
import os
import sys import sys
import time import time
import numpy as np import numpy as np
import math import math
import argparse
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
import utils import utils
SEED = 102
def parse_args():
parser = argparse.ArgumentParser("language_model benchmark.")
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run \
the task with continuous evaluation logs.')
args = parser.parse_args()
return args
def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound): def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
""" network definition """ """ network definition """
...@@ -63,31 +77,26 @@ def train(train_reader, ...@@ -63,31 +77,26 @@ def train(train_reader,
init_low_bound=-0.04, init_low_bound=-0.04,
init_high_bound=0.04): init_high_bound=0.04):
""" train network """ """ train network """
args = parse_args()
if args.enable_ce:
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
vocab_size = len(vocab) vocab_size = len(vocab)
#Input data
src_wordseq = fluid.layers.data( src_wordseq = fluid.layers.data(
name="src_wordseq", shape=[1], dtype="int64", lod_level=1) name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
dst_wordseq = fluid.layers.data( dst_wordseq = fluid.layers.data(
name="dst_wordseq", shape=[1], dtype="int64", lod_level=1) name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)
# Train program
avg_cost = None avg_cost = None
if not parallel: cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size, init_low_bound, init_high_bound)
init_low_bound, init_high_bound) avg_cost = fluid.layers.mean(x=cost)
avg_cost = fluid.layers.mean(x=cost)
else:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
cost = network(
pd.read_input(src_wordseq),
pd.read_input(dst_wordseq), vocab_size, hid_size,
init_low_bound, init_high_bound)
pd.write_output(cost)
cost = pd()
avg_cost = fluid.layers.mean(x=cost)
# Optimization to minimize lost
sgd_optimizer = fluid.optimizer.SGD( sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay( learning_rate=fluid.layers.exponential_decay(
learning_rate=base_lr, learning_rate=base_lr,
...@@ -96,39 +105,56 @@ def train(train_reader, ...@@ -96,39 +105,56 @@ def train(train_reader,
staircase=True)) staircase=True))
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
# Initialize executor
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
total_time = 0.0 total_time = 0.0
fetch_list = [avg_cost.name]
for pass_idx in xrange(pass_num): for pass_idx in xrange(pass_num):
epoch_idx = pass_idx + 1 epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx print "epoch_%d start" % epoch_idx
t0 = time.time() t0 = time.time()
i = 0 i = 0
newest_ppl = 0
for data in train_reader(): for data in train_reader():
i += 1 i += 1
lod_src_wordseq = utils.to_lodtensor( lod_src_wordseq = utils.to_lodtensor(
map(lambda x: x[0], data), place) map(lambda x: x[0], data), place)
lod_dst_wordseq = utils.to_lodtensor( lod_dst_wordseq = utils.to_lodtensor(
map(lambda x: x[1], data), place) map(lambda x: x[1], data), place)
ret_avg_cost = exe.run(fluid.default_main_program(), ret_avg_cost = train_exe.run(feed={
feed={ "src_wordseq": lod_src_wordseq,
"src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq
"dst_wordseq": lod_dst_wordseq },
}, fetch_list=fetch_list)
fetch_list=[avg_cost], avg_ppl = np.exp(ret_avg_cost[0])
use_program_cache=True) newest_ppl = np.mean(avg_ppl)
avg_ppl = math.exp(ret_avg_cost[0])
if i % 100 == 0: if i % 100 == 0:
print "step:%d ppl:%.3f" % (i, avg_ppl) print "step:%d ppl:%.3f" % (i, newest_ppl)
t1 = time.time() t1 = time.time()
total_time += t1 - t0 total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx) total_time / epoch_idx)
if pass_idx == pass_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
gpu_num = get_cards()
if gpu_num == 1:
print("kpis imikolov_20_pass_duration %s" %
(total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl %s" % newest_ppl)
else:
print("kpis imikolov_20_pass_duration_card%s %s" % \
(gpu_num, total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl_card%s %s" %
(gpu_num, newest_ppl))
save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
feed_var_names = ["src_wordseq", "dst_wordseq"] feed_var_names = ["src_wordseq", "dst_wordseq"]
fetch_vars = [avg_cost] fetch_vars = [avg_cost]
...@@ -138,11 +164,22 @@ def train(train_reader, ...@@ -138,11 +164,22 @@ def train(train_reader,
print("finish training") print("finish training")
def get_cards(enable_ce):
if enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return fluid.core.get_cuda_device_count()
def train_net(): def train_net():
""" do training """ """ do training """
batch_size = 20 batch_size = 20
args = parse_args()
vocab, train_reader, test_reader = utils.prepare_data( vocab, train_reader, test_reader = utils.prepare_data(
batch_size=batch_size, buffer_size=1000, word_freq_threshold=0) batch_size=batch_size * get_cards(args.enable_ce), buffer_size=1000, \
word_freq_threshold=0, enable_ce = args.enable_ce)
train( train(
train_reader=train_reader, train_reader=train_reader,
vocab=vocab, vocab=vocab,
...@@ -152,7 +189,7 @@ def train_net(): ...@@ -152,7 +189,7 @@ def train_net():
batch_size=batch_size, batch_size=batch_size,
pass_num=12, pass_num=12,
use_cuda=True, use_cuda=True,
parallel=False, parallel=True,
model_dir="model", model_dir="model",
init_low_bound=-0.1, init_low_bound=-0.1,
init_high_bound=0.1) init_high_bound=0.1)
......
...@@ -3,7 +3,7 @@ import time ...@@ -3,7 +3,7 @@ import time
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle
def to_lodtensor(data, place): def to_lodtensor(data, place):
...@@ -22,17 +22,28 @@ def to_lodtensor(data, place): ...@@ -22,17 +22,28 @@ def to_lodtensor(data, place):
return res return res
def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0): def prepare_data(batch_size,
buffer_size=1000,
word_freq_threshold=0,
enable_ce=False):
""" prepare the English Pann Treebank (PTB) data """ """ prepare the English Pann Treebank (PTB) data """
vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold) vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold)
train_reader = paddle.batch( if enable_ce:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.dataset.imikolov.train( paddle.dataset.imikolov.train(
vocab, vocab,
buffer_size, buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ), data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size), batch_size)
batch_size) else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imikolov.train(
vocab,
buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size),
batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.imikolov.test( paddle.dataset.imikolov.test(
vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ), vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ),
......
...@@ -7,9 +7,9 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -7,9 +7,9 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, actived=True) test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
tracking_kpis = [ tracking_kpis = [
train_cost_kpi, train_cost_kpi,
......
...@@ -151,9 +151,9 @@ def train(): ...@@ -151,9 +151,9 @@ def train():
# This log is for continuous evaluation only # This log is for continuous evaluation only
if args.enable_ce: if args.enable_ce:
print("kpis train_cost %f" % avg_cost_train) print("kpis\ttrain_cost\t%f" % avg_cost_train)
print("kpis test_cost %f" % test_loss) print("kpis\ttest_cost\t%f" % test_loss)
print("kpis train_duration %f" % time_consumed) print("kpis\ttrain_duration\t%f" % time_consumed)
if pass_id % args.save_interval == 0: if pass_id % args.save_interval == 0:
model_path = os.path.join(args.save_dir, str(pass_id)) model_path = os.path.join(args.save_dir, str(pass_id))
......
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${object_detection_cudaid:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
bash ./.move.sh
fi
FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=2 --for_model_ce=True --data_dir=/root/.cache/paddle/dataset/pascalvoc/
###!/bin/bash
####This file is only used for continuous evaluation.
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
fi
cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True)
train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True)
test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True)
train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_acc_kpi,
train_speed_kpi,
train_cost_card4_kpi,
test_acc_card4_kpi,
train_speed_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
#kpi_map = {}
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
#kpi_map[kpi_name] = kpi_value
yield kpi_name, kpi_value
#return kpi_map
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
......
...@@ -23,7 +23,7 @@ add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalv ...@@ -23,7 +23,7 @@ add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalv
add_arg('model_save_dir', str, 'model', "The path to save model.") add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.") add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.")
add_arg('apply_distort', bool, True, "Whether apply distort.") add_arg('apply_distort', bool, True, "Whether apply distort.")
add_arg('apply_expand', bool, True, "Whether appley expand.") add_arg('apply_expand', bool, True, "Whether apply expand.")
add_arg('nms_threshold', float, 0.45, "NMS threshold.") add_arg('nms_threshold', float, 0.45, "NMS threshold.")
add_arg('ap_version', str, '11point', "integral, 11point.") add_arg('ap_version', str, '11point', "integral, 11point.")
add_arg('resize_h', int, 300, "The resized image height.") add_arg('resize_h', int, 300, "The resized image height.")
...@@ -32,10 +32,8 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will ...@@ -32,10 +32,8 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will
add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.") add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('for_model_ce', bool, False, "Use CE to evaluate the model")
add_arg('data_dir', str, 'data/pascalvoc', "data directory") add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('skip_batch_num', int, 5, "the num of minibatch to skip.") add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model")
add_arg('iterations', int, 120, "mini batchs.")
#yapf: enable #yapf: enable
...@@ -48,6 +46,9 @@ def train(args, ...@@ -48,6 +46,9 @@ def train(args,
num_passes, num_passes,
model_save_dir, model_save_dir,
pretrained_model=None): pretrained_model=None):
if args.enable_ce:
fluid.framework.default_startup_program().random_seed = 111
image_shape = [3, data_args.resize_h, data_args.resize_w] image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset: if 'coco' in data_args.dataset:
num_classes = 91 num_classes = 91
...@@ -121,8 +122,12 @@ def train(args, ...@@ -121,8 +122,12 @@ def train(args,
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name) use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch( if not args.enable_ce:
reader.train(data_args, train_file_list), batch_size=batch_size) train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size) reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(
...@@ -140,32 +145,32 @@ def train(args, ...@@ -140,32 +145,32 @@ def train(args,
def test(pass_id, best_map): def test(pass_id, best_map):
_, accum_map = map_eval.get_map_var() _, accum_map = map_eval.get_map_var()
map_eval.reset(exe) map_eval.reset(exe)
every_pass_map=[]
for batch_id, data in enumerate(test_reader()): for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program, test_map, = exe.run(test_program,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[accum_map]) fetch_list=[accum_map])
if batch_id % 20 == 0: if batch_id % 20 == 0:
every_pass_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map)) print("Batch {0}, map {1}".format(batch_id, test_map))
mean_map = np.mean(every_pass_map)
if test_map[0] > best_map: if test_map[0] > best_map:
best_map = test_map[0] best_map = test_map[0]
save_model('best_model') save_model('best_model')
print("Pass {0}, test map {1}".format(pass_id, test_map)) print("Pass {0}, test map {1}".format(pass_id, test_map))
return best_map return best_map, mean_map
train_num = 0 total_time = 0.0
total_train_time = 0.0
for pass_id in range(num_passes): for pass_id in range(num_passes):
epoch_idx = pass_id + 1
start_time = time.time() start_time = time.time()
prev_start_time = start_time prev_start_time = start_time
# end_time = 0
every_pass_loss = [] every_pass_loss = []
iter = 0 iter = 0
pass_duration = 0.0 pass_duration = 0.0
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
if args.for_model_ce and iter == args.iterations:
break
if len(data) < (devices_num * 2): if len(data) < (devices_num * 2):
print("There are too few data to train on all devices.") print("There are too few data to train on all devices.")
continue continue
...@@ -176,34 +181,31 @@ def train(args, ...@@ -176,34 +181,31 @@ def train(args,
loss_v, = exe.run(fluid.default_main_program(), loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[loss]) fetch_list=[loss])
# end_time = time.time()
loss_v = np.mean(np.array(loss_v)) loss_v = np.mean(np.array(loss_v))
every_pass_loss.append(loss_v)
if batch_id % 20 == 0: if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format( print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time)) pass_id, batch_id, loss_v, start_time - prev_start_time))
if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0: end_time = time.time()
batch_duration = time.time() - start_time best_map, mean_map = test(pass_id, best_map)
pass_duration += batch_duration if args.enable_ce and pass_id == 1:
train_num += len(data) total_time += end_time - start_time
every_pass_loss.append(loss_v) train_avg_loss = np.mean(every_pass_loss)
iter += 1 if devices_num == 1:
total_train_time += pass_duration print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map)
if args.for_model_ce and pass_id == num_passes - 1: print ("kpis train_speed %s" % (total_time / epoch_idx))
examples_per_sec = train_num / total_train_time else:
cost = np.mean(every_pass_loss) print ("kpis train_cost_card%s %s" % (devices_num, train_avg_loss))
with open("train_speed_factor.txt", 'w') as f: print ("kpis test_acc_card%s %s" % (devices_num, mean_map))
f.write('{:f}\n'.format(examples_per_sec)) print ("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx))
with open("train_cost_factor.txt", 'a+') as f:
f.write('{:f}\n'.format(cost))
best_map = test(pass_id, best_map)
if pass_id % 10 == 0 or pass_id == num_passes - 1: if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id)) save_model(str(pass_id))
print("Best test map {0}".format(best_map)) print("Best test map {0}".format(best_map))
if __name__ == '__main__': if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册