提交 85197084 编写于 作者: Z zhengya01

add ce

上级 e6ed31ef
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${HiNAS_models:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_hinas.py --model=0 --random_flip_left_right=False --random_flip_up_down=False --pad_and_cut_image=False --shuffle_image=False --batch_size=128 --num_epochs=1 --cutout=False --dropout_rate=0.5 --enable_ce=True| python _ce.py
cudaid=${HiNAS_models_4:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_hinas.py --model=0 --random_flip_left_right=False --random_flip_up_down=False --pad_and_cut_image=False --shuffle_image=False --batch_size=128 --num_epochs=1 --cutout=False --dropout_rate=0.5 --enable_ce=True| python _ce.py
cudaid=${HiNAS_models_8:=0,1,2,3,4,5,6,7} # use 0,1,2,3,4,5,6,7 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_hinas.py --model=0 --random_flip_left_right=False --random_flip_up_down=False --pad_and_cut_image=False --shuffle_image=False --batch_size=128 --num_epochs=1 --cutout=False --dropout_rate=0.5 --enable_ce=True| python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
from kpi import AccKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0)
train_acc_card1_kpi = AccKpi('train_acc_card1', 0.08, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0)
train_acc_card4_kpi = AccKpi('train_acc_card4', 0.08, 0)
each_pass_duration_card8_kpi = DurationKpi('each_pass_duration_card8', 0.08, 0, actived=True)
train_loss_card8_kpi = CostKpi('train_loss_card8', 0.08, 0)
train_acc_card8_kpi = AccKpi('train_acc_card8', 0.08, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_loss_card1_kpi,
train_acc_card1_kpi,
each_pass_duration_card4_kpi,
train_loss_card4_kpi,
train_acc_card4_kpi,
each_pass_duration_card8_kpi,
train_loss_card8_kpi,
train_acc_card8_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -17,6 +17,8 @@ from __future__ import division
from __future__ import print_function
import math
import time
import os
import numpy as np
import paddle
......@@ -42,6 +44,10 @@ flags.DEFINE_float("momentum", 0.9, "momentum")
flags.DEFINE_boolean("shuffle_image", True, "shuffle input images on training")
flags.DEFINE_integer("num_devices", 1, "total devices for ce")
flags.DEFINE_boolean("enable_ce", False, "enable ce")
dataset_train_size = 50000
......@@ -104,10 +110,15 @@ class Model(object):
costs = []
accs = []
ce_costs = []
ce_accs = []
def event_handler(event):
if isinstance(event, EndStepEvent):
costs.append(event.metrics[0])
accs.append(event.metrics[1])
ce_costs.append(event.metrics[0])
ce_accs.append(event.metrics[1])
if event.step % 20 == 0:
print("Epoch %d, Step %d, Loss %f, Acc %f" % (
event.epoch, event.step, np.mean(costs), np.mean(accs)))
......@@ -125,6 +136,7 @@ class Model(object):
(event.epoch, avg_cost, accuracy))
print("Best acc %f" % event_handler.best_acc)
event_handler.best_acc = 0.0
place = fluid.CUDAPlace(0)
trainer = Trainer(
......@@ -132,8 +144,33 @@ class Model(object):
optimizer_func=self.optimizer_program,
place=place)
total_time = 0
start_time = time.time()
trainer.train(
reader=train_reader,
num_epochs=FLAGS.num_epochs,
event_handler=event_handler,
feed_order=['pixel', 'label'])
end_time = time.time()
total_time = end_time - start_time
# only for ce
if FLAGS.enable_ce:
epoch_idx = FLAGS.num_epochs
gpu_num = get_cards()
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" %
(gpu_num, np.mean(ce_costs)))
print("kpis\ttrain_acc_card%s\t%s" %
(gpu_num, np.mean(ce_accs)))
def get_cards():
if FLAGS.num_epochs:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return FLAGS.num_devices
......@@ -30,6 +30,7 @@ import paddle.dataset.common
import tarfile
from absl import flags
FLAGS = flags.FLAGS
flags.DEFINE_boolean("random_flip_left_right", True,
......@@ -81,6 +82,8 @@ def preprocess(sample, is_training):
img = (img_float - mean) / std
if is_training and FLAGS.cutout:
if FLAGS.enable_ce:
np.random.seed(0)
center = np.random.randint(image_size, size=2)
offset_width = max(0, center[0] - half_length)
offset_height = max(0, center[1] - half_length)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册