提交 0f1132e7 编写于 作者: Z zhengya01

add ctr ce

上级 a9b13609
...@@ -6,11 +6,17 @@ export OMP_NUM_THREADS=1 ...@@ -6,11 +6,17 @@ export OMP_NUM_THREADS=1
#cudaid=${face_detection:=0} # use 0-th card as default #cudaid=${face_detection:=0} # use 0-th card as default
#export CUDA_VISIBLE_DEVICES=$cudaid #export CUDA_VISIBLE_DEVICES=$cudaid
export CPU_NUM=1
export NUM_THREADS=1 export NUM_THREADS=1
FLAGS_benchmark=true python train.py --is_local 1 --cloud_train 0 --train_data_path data/raw/train.txt --enable_ce | python _ce.py FLAGS_benchmark=true python train.py --is_local 1 --cloud_train 0 --train_data_path data/raw/train.txt --enable_ce | python _ce.py
export NUM_THREADS=4 export CPU_NUM=1
export NUM_THREADS=8
FLAGS_benchmark=true python train.py --is_local 1 --cloud_train 0 --train_data_path data/raw/train.txt --enable_ce | python _ce.py FLAGS_benchmark=true python train.py --is_local 1 --cloud_train 0 --train_data_path data/raw/train.txt --enable_ce | python _ce.py
export CPU_NUM=8
export NUM_THREADS=8
FLAGS_benchmark=true python train.py --is_local 1 --cloud_train 0 --train_data_path data/raw/train.txt --enable_ce | python _ce.py
...@@ -8,24 +8,32 @@ from kpi import DurationKpi ...@@ -8,24 +8,32 @@ from kpi import DurationKpi
from kpi import AccKpi from kpi import AccKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True) each_pass_duration_cpu1_thread1_kpi = DurationKpi('each_pass_duration_cpu1_thread1', 0.08, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0) train_loss_cpu1_thread1_kpi = CostKpi('train_loss_cpu1_thread1', 0.08, 0)
train_auc_val_card1_kpi = AccKpi('train_auc_val_card1', 0.08, 0) train_auc_val_cpu1_thread1_kpi = AccKpi('train_auc_val_cpu1_thread1', 0.08, 0)
train_batch_auc_val_card1_kpi = AccKpi('train_batch_auc_val_card1', 0.08, 0) train_batch_auc_val_cpu1_thread1_kpi = AccKpi('train_batch_auc_val_cpu1_thread1', 0.08, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True) each_pass_duration_cpu1_thread8_kpi = DurationKpi('each_pass_duration_cpu1_thread8', 0.08, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0) train_loss_cpu1_thread8_kpi = CostKpi('train_loss_cpu1_thread8', 0.08, 0)
train_auc_val_card4_kpi = AccKpi('train_auc_val_card4', 0.08, 0) train_auc_val_cpu1_thread8_kpi = AccKpi('train_auc_val_cpu1_thread8', 0.08, 0)
train_batch_auc_val_card4_kpi = AccKpi('train_batch_auc_val_card4', 0.08, 0) train_batch_auc_val_cpu1_thread8_kpi = AccKpi('train_batch_auc_val_cpu1_thread8', 0.08, 0)
each_pass_duration_cpu8_thread8_kpi = DurationKpi('each_pass_duration_cpu8_thread8', 0.08, 0, actived=True)
train_loss_cpu8_thread8_kpi = CostKpi('train_loss_cpu8_thread8', 0.08, 0)
train_auc_val_cpu8_thread8_kpi = AccKpi('train_auc_val_cpu8_thread8', 0.08, 0)
train_batch_auc_val_cpu8_thread8_kpi = AccKpi('train_batch_auc_val_cpu8_thread8', 0.08, 0)
tracking_kpis = [ tracking_kpis = [
each_pass_duration_card1_kpi, each_pass_duration_cpu1_thread1_kpi,
train_loss_card1_kpi, train_loss_cpu1_thread1_kpi,
train_auc_val_card1_kpi, train_auc_val_cpu1_thread1_kpi,
train_batch_auc_val_card1_kpi, train_batch_auc_val_cpu1_thread1_kpi,
each_pass_duration_card4_kpi, each_pass_duration_cpu1_thread8_kpi,
train_loss_card4_kpi, train_loss_cpu1_thread8_kpi,
train_auc_val_card4_kpi, train_auc_val_cpu1_thread8_kpi,
train_batch_auc_val_card4_kpi train_batch_auc_val_cpu1_thread8_kpi,
each_pass_duration_cpu8_thread8_kpi,
train_loss_cpu8_thread8_kpi,
train_auc_val_cpu8_thread8_kpi,
train_batch_auc_val_cpu8_thread8_kpi,
] ]
......
...@@ -116,6 +116,11 @@ def parse_args(): ...@@ -116,6 +116,11 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='The num of threads, (default: 1)') help='The num of threads, (default: 1)')
parser.add_argument(
'--num_cpu',
type=int,
default=1,
help='The num of cpu, (default: 1)')
return parser.parse_args() return parser.parse_args()
...@@ -193,16 +198,16 @@ def train_loop(args, train_program, py_reader, loss, auc_var, batch_auc_var, ...@@ -193,16 +198,16 @@ def train_loop(args, train_program, py_reader, loss, auc_var, batch_auc_var,
# only for ce # only for ce
if args.enable_ce: if args.enable_ce:
cpu_num = get_cards(args) threads_num, cpu_num = get_cards(args)
epoch_idx = args.num_passes epoch_idx = args.num_passes
print("kpis\teach_pass_duration_card%s\t%s" % print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" %
(cpu_num, total_time / epoch_idx)) (cpu_num, threads_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" % print("kpis\ttrain_loss_cpu%s_thread%s\t%s" %
(cpu_num, loss_val/args.batch_size)) (cpu_num, threads_num, loss_val/args.batch_size))
print("kpis\ttrain_auc_val_card%s\t%s" % print("kpis\ttrain_auc_val_cpu%s_thread%s\t%s" %
(cpu_num, auc_val)) (cpu_num, threads_num, auc_val))
print("kpis\ttrain_batch_auc_val_card%s\t%s" % print("kpis\ttrain_batch_auc_val_cpu%s_thread%s\t%s" %
(cpu_num, batch_auc_val)) (cpu_num, threads_num, batch_auc_val))
def train(): def train():
...@@ -257,10 +262,11 @@ def train(): ...@@ -257,10 +262,11 @@ def train():
def get_cards(args): def get_cards(args):
if args.enable_ce: if args.enable_ce:
cards = os.environ.get('NUM_THREADS', 1) threads_num = os.environ.get('NUM_THREADS', 1)
return int(cards) cpu_num = os.environ.get('CPU_NUM', 1)
return int(threads_num), int(cpu_num)
else: else:
return args.num_threads return args.num_threads, args.num_cpu
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册