提交 623698ef 编写于 作者: Z zhengya01 提交者: kolinwei

add ce for emotion_detection (#2364)

上级 e0e198ed
#!/bin/bash
export FLAGS_enable_parallel_graph=1
export FLAGS_sync_nccl_allreduce=1
export FLAGS_fraction_of_gpu_memory_to_use=0.95
TASK_NAME='emotion_detection'
DATA_PATH=./data/
VOCAB_PATH=./data/vocab.txt
CKPT_PATH=./save_models/textcnn
MODEL_PATH=./models/textcnn
# run_train on train.tsv and do_val on dev.tsv
train() {
python run_classifier.py \
--task_name ${TASK_NAME} \
--use_cuda true \
--do_train true \
--do_val true \
--batch_size 64 \
--data_dir ${DATA_PATH} \
--vocab_path ${VOCAB_PATH} \
--output_dir ${CKPT_PATH} \
--save_steps 200 \
--validation_steps 200 \
--epoch 10 \
--lr 0.002 \
--config_path ./config.json \
--skip_steps 100 \
--enable_ce
}
export CUDA_VISIBLE_DEVICES=0
train | python _ce.py
sleep 20
export CUDA_VISIBLE_DEVICES=0,1,2,3
train | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
from kpi import AccKpi
each_step_duration_emotion_detection_card1 = DurationKpi('each_step_duration_emotion_detection_card1', 0.08, 0, actived=True)
train_loss_emotion_detection_card1 = CostKpi('train_loss_emotion_detection_card1', 0.05, 0, actived=False)
train_acc_emotion_detection_card1 = CostKpi('train_acc_emotion_detection_card1', 0.05, 0, actived=True)
each_step_duration_emotion_detection_card4 = DurationKpi('each_step_duration_emotion_detection_card4', 0.08, 0, actived=True)
train_loss_emotion_detection_card4 = CostKpi('train_loss_emotion_detection_card4', 0.05, 0, actived=False)
train_acc_emotion_detection_card4 = CostKpi('train_acc_emotion_detection_card4', 0.05, 0, actived=True)
tracking_kpis = [
each_step_duration_emotion_detection_card1,
train_loss_emotion_detection_card1,
train_acc_emotion_detection_card1,
each_step_duration_emotion_detection_card4,
train_loss_emotion_detection_card4,
train_acc_emotion_detection_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -51,6 +51,8 @@ run_type_g.add_arg("do_train", bool, False, "Whether to perform training.")
run_type_g.add_arg("do_val", bool, False, "Whether to perform evaluation.")
run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
args = parser.parse_args()
def create_model(args,
......@@ -188,6 +190,8 @@ def main(args):
print("Max train steps: %d" % max_train_steps)
train_program = fluid.Program()
if args.random_seed is not None:
train_program.random_seed = args.random_seed
with fluid.program_guard(train_program, startup_prog):
with fluid.unique_name.guard():
......@@ -261,6 +265,7 @@ def main(args):
steps = 0
total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time()
ce_info = []
while True:
try:
steps += 1
......@@ -292,6 +297,7 @@ def main(args):
(steps, np.sum(total_cost) / np.sum(total_num_seqs),
np.sum(total_acc) / np.sum(total_num_seqs),
args.skip_steps / used_time))
ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time])
total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time()
......@@ -317,6 +323,24 @@ def main(args):
train_pyreader.reset()
break
if args.do_train and args.enable_ce:
card_num = get_cards()
ce_loss = 0
ce_acc = 0
ce_time = 0
try:
ce_loss = ce_info[-2][0]
ce_acc = ce_info[-2][1]
ce_time = ce_info[-2][2]
except:
print("ce info error")
print("kpis\teach_step_duration_%s_card%s\t%s" %
(task_name, card_num, ce_time))
print("kpis\ttrain_loss_%s_card%s\t%f" %
(task_name, card_num, ce_loss))
print("kpis\ttrain_acc_%s_card%s\t%f" %
(task_name, card_num, ce_acc))
# evaluate on test set
if not args.do_train and args.do_val:
test_pyreader.decorate_paddle_reader(
......@@ -339,6 +363,15 @@ def main(args):
infer(test_exe, test_prog, infer_pyreader,
[probs.name], "infer")
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__":
utils.print_arguments(args)
main(args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册