提交 27d556df 编写于 作者: Z zhengya01 提交者: kolinwei

add ce for xlnet (#3900)

* update .run_ce.sh

* chmod +x .run_ce.sh

* add ce

* add ce

* add ce

* add ce

* add ce for xlnet

* add ce for xlnet

* add xlnet ce
上级 f087880c
train(){
python run_classifier.py \
--data_dir data/STS-B \
--verbose True \
--shuffle false \
--init_checkpoint xlnet_cased_L-12_H-768_A-12/params \
--predict_dir exp/sts-b \
--model_config_path xlnet_cased_L-12_H-768_A-12/xlnet_config.json \
--uncased False \
--save_steps 50 \
--train_steps 50 \
--epoch 1 \
--skip_steps 10 \
--validation_steps 30 \
--task_name sts-b \
--warmup_steps 5 \
--random_seed 100 \
--spiece_model_file xlnet_cased_L-12_H-768_A-12/spiece.model \
--checkpoints checkpoints_sts-b \
--is_regression True \
--use_cuda True \
--eval_batch_size 4 \
--enable_ce
}
export CUDA_VISIBLE_DEVICES=0
train | python _ce.py
export CUDA_VISIBLE_DEVICES=0,1,2,3
train | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.insert(0, os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_duration_sts_b_card1 = DurationKpi(
'train_duration_sts_b_card1', 0.01, 0, actived=True)
train_cost_sts_b_card1 = CostKpi(
'train_cost_sts_b_card1', 0.02, 0, actived=True)
train_duration_sts_b_card4 = DurationKpi(
'train_duration_sts_b_card4', 0.04, 0, actived=True)
train_cost_sts_b_card4 = CostKpi(
'train_cost_sts_b_card4', 0.08, 0, actived=False)
tracking_kpis = [
train_duration_sts_b_card1,
train_cost_sts_b_card1,
train_duration_sts_b_card4,
train_cost_sts_b_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
......@@ -41,6 +41,7 @@ from model.classifier import create_model
from optimization import optimization
from utils.args import ArgumentGroup, print_arguments, check_cuda
from utils.init import init_pretraining_params, init_checkpoint
from utils.cards import get_cards
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
......@@ -432,20 +433,16 @@ def main(args):
if args.enable_ce:
card_num = get_cards()
ce_cost = 0
ce_acc = 0
ce_time = 0
try:
ce_cost = ce_info[-2][0]
ce_acc = ce_info[-2][1]
ce_time = ce_info[-2][2]
ce_time = ce_info[-2][1]
except:
print("ce info error")
print("kpis\ttrain_duration_%s_card%s\t%s" %
(args.task_name, card_num, ce_time))
(args.task_name.replace("-", "_"), card_num, ce_time))
print("kpis\ttrain_cost_%s_card%s\t%f" %
(args.task_name, card_num, ce_cost))
print("kpis\ttrain_acc_%s_card%s\t%f" %
(args.task_name, card_num, ce_acc))
(args.task_name.replace("-", "_"), card_num, ce_cost))
# final eval on dev set
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
def get_cards():
"""
get gpu cards number
"""
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册