未验证 提交 75594b98 编写于 作者: Y Yibing Liu 提交者: GitHub

Enable model ce for dam (#1447)

* Enable model ce for dam

* Uncomment mem opt
上级 55737d59
###!/bin/bash
####This file is only used for continuous evaluation.
export CE_MODE_X=1
export CUDA_VISIBLE_DEVICES=0
export FLAGS_eager_delete_tensor_gb=0.0
if [ ! -e data_small.pkl ]; then
wget -c http://dam-data.bj.bcebos.com/data_small.pkl
fi
python train_and_evaluate.py --data_path data_small.pkl \
--use_cuda \
--use_pyreader \
--num_scan_data 1 \
--batch_size 100 | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [
train_cost_kpi,
train_duration_kpi,
]
def parse_log(log):
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
......@@ -192,6 +192,9 @@ def train(args):
train_program = fluid.Program()
train_startup = fluid.Program()
if "CE_MODE_X" in os.environ:
train_program.random_seed = 110
train_startup.random_seed = 110
with fluid.program_guard(train_program, train_startup):
with fluid.unique_name.guard():
if args.use_pyreader:
......@@ -217,6 +220,9 @@ def train(args):
test_program = fluid.Program()
test_startup = fluid.Program()
if "CE_MODE_X" in os.environ:
test_program.random_seed = 110
test_startup.random_seed = 110
with fluid.program_guard(test_program, test_startup):
with fluid.unique_name.guard():
if args.use_pyreader:
......@@ -322,7 +328,7 @@ def train(args):
result_file_path = os.path.join(args.save_path,
'result.' + str(step))
evaluate(score_path, result_file_path)
return step
return step, np.array(cost[0]).mean()
# train on one epoch with pyreader
def train_with_pyreader(step):
......@@ -367,18 +373,25 @@ def train(args):
except fluid.core.EOFException:
train_pyreader.reset()
break
return step
return step, np.array(cost[0]).mean()
# train over different epoches
global_step = 0
global_step, train_time = 0, 0.0
for epoch in six.moves.xrange(args.num_scan_data):
shuffle_train = reader.unison_shuffle(train_data)
shuffle_train = reader.unison_shuffle(
train_data, seed=110 if ("CE_MODE_X" in os.environ) else None)
train_batches = reader.build_batches(shuffle_train, data_conf)
begin_time = time.time()
if args.use_pyreader:
global_step = train_with_pyreader(global_step)
global_step, last_cost = train_with_pyreader(global_step)
else:
global_step = train_with_feed(global_step)
global_step, last_cost = train_with_feed(global_step)
train_time += time.time() - begin_time
# For internal continuous evaluation
if "CE_MODE_X" in os.environ:
print("kpis train_cost %f" % last_cost)
print("kpis train_duration %f" % train_time)
if __name__ == '__main__':
......
......@@ -17,6 +17,7 @@ def unison_shuffle(data, seed=None):
assert len(y) == len(c) == len(r)
p = np.random.permutation(len(y))
print(p)
shuffle_data = {six.b('y'): y[p], six.b('c'): c[p], six.b('r'): r[p]}
return shuffle_data
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册