提交 cc785f8d 编写于 作者: C chenxuyi

ernie reorganize

上级 507e0916
*.pyc
*.un~
*.swp
*.egg-info/
......@@ -50,11 +50,11 @@ sh ./distill/script/distill_chnsenticorp.sh
该脚本会进行前述的三步:1. 在任务数据上Fine-tune。 2. 加载Fine-tune好的模型对增强数据进行打分。 3.使用Student模型进行训练。脚本采用hard-label蒸馏,在第二步中将会直接预测出ERNIE标注的label。
该脚本涉及两个python文件:`./distill/finetune_chnsenticorp.py` 负责finetune以及预测teacher模型, `distill/distill_chnsentocorp.py` 负责student模型的训练。事先构造好的增强数据放在`${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug`
该脚本涉及两个python文件:`./example/finetune_classifier.py` 负责finetune以及预测teacher模型, `distill/distill_chnsentocorp.py` 负责student模型的训练。事先构造好的增强数据放在`${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug`
在脚本的第二步中,使用 `--do_predict` 参数进入预测模式:
```script
cat ${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug/part.0 |python3 -u ./distill/finetune_chnsenticorp.py \
cat ${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug/part.0 |python3 -u ./example/finetune_classifier.py \
--do_predict \
--data_dir ${TASK_DATA_PATH}/distill/chnsenticorp/teacher \
--warm_start_from ${MODEL_PATH}/params \
......@@ -86,7 +86,7 @@ sh ./distill/script/distill_chnsenticorp_with_propeller_server.sh
流程包含3步:1. finetune ERNIE模型。2. 取指标最好的ERNIE模型启动`propeller`服务。 3.在student模型的训练过程中访问服务获取teacher模型的标注。
此流程涉及两个python文件: `distill/finetune_chnsenticorp.py``distill/distill_chnsentocorp_with_propeller_server.py` 。其中第一步与离线蒸馏中的用法完全一样。
此流程涉及两个python文件: `example/finetune_classifier.py``distill/distill_chnsentocorp_with_propeller_server.py` 。其中第一步与离线蒸馏中的用法完全一样。
第二步中使用
```script
python3 -m propeller.tools.start_server -p 8113 -m ${teacher_dir}/best/inference/ &
......
......@@ -117,7 +117,7 @@ class ClassificationBowModel(propeller.train.Model):
return {'acc': acc}
if __name__ == '__main__':
parser = propeller.ArgumentParser('DAN model with Paddle')
parser = propeller.ArgumentParser('Distill model with Paddle')
parser.add_argument('--max_seqlen', type=int, default=128)
parser.add_argument('--vocab_file', type=str, required=True)
parser.add_argument('--unsupervise_data_dir', type=str, required=True)
......
......@@ -118,7 +118,7 @@ class ClassificationBowModel(propeller.train.Model):
return {'acc': acc}
if __name__ == '__main__':
parser = propeller.ArgumentParser('DAN model with Paddle')
parser = propeller.ArgumentParser('distill model with ERNIE')
parser.add_argument('--max_seqlen', type=int, default=128)
parser.add_argument('--vocab_file', type=str, required=True)
parser.add_argument('--teacher_vocab_file', type=str, required=True)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import time
import logging
from random import random
from functools import reduce, partial
import numpy as np
import multiprocessing
import paddle
import paddle.fluid as F
import paddle.fluid.layers as L
from model.ernie import ErnieModel
from optimization import optimization
import utils.data
from propeller import log
import propeller.paddle as propeller
log.setLevel(logging.DEBUG)
class ClassificationErnieModel(propeller.train.Model):
"""propeller Model wraper for paddle-ERNIE """
def __init__(self, hparam, mode, run_config):
self.hparam = hparam
self.mode = mode
self.run_config = run_config
def forward(self, features):
src_ids, sent_ids = features
dtype = 'float16' if self.hparam['fp16'] else 'float32'
zero = L.fill_constant([1], dtype='int64', value=0)
input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0
#input_mask = L.unsqueeze(input_mask, axes=[2])
d_shape = L.shape(src_ids)
seqlen = d_shape[1]
batch_size = d_shape[0]
pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0])
pos_ids = L.expand(pos_ids, [batch_size, 1])
pos_ids = L.unsqueeze(pos_ids, axes=[2])
pos_ids = L.cast(pos_ids, 'int64')
pos_ids.stop_gradient = True
input_mask.stop_gradient = True
task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment
task_ids.stop_gradient = True
bert = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
task_ids=task_ids,
input_mask=input_mask,
config=self.hparam,
use_fp16=self.hparam['fp16']
)
cls_feats = bert.get_pooled_output()
cls_feats = L.dropout(
x=cls_feats,
dropout_prob=0.1,
dropout_implementation="upscale_in_train"
)
logits = L.fc(
input=cls_feats,
size=self.hparam['num_label'],
param_attr=F.ParamAttr(
name="cls_out_w",
initializer=F.initializer.TruncatedNormal(scale=0.02)),
bias_attr=F.ParamAttr(
name="cls_out_b", initializer=F.initializer.Constant(0.))
)
propeller.summary.histogram('pred', logits)
if self.mode is propeller.RunMode.PREDICT:
probs = L.softmax(logits)
return probs
else:
return logits
def loss(self, predictions, labels):
ce_loss, probs = L.softmax_with_cross_entropy(
logits=predictions, label=labels, return_softmax=True)
#L.Print(ce_loss, message='per_example_loss')
loss = L.mean(x=ce_loss)
return loss
def backward(self, loss):
scheduled_lr, _ = optimization(
loss=loss,
warmup_steps=int(self.run_config.max_steps * self.hparam['warmup_proportion']),
num_train_steps=self.run_config.max_steps,
learning_rate=self.hparam['learning_rate'],
train_program=F.default_main_program(),
startup_prog=F.default_startup_program(),
weight_decay=self.hparam['weight_decay'],
scheduler="linear_warmup_decay",)
propeller.summary.scalar('lr', scheduled_lr)
def metrics(self, predictions, label):
predictions = L.argmax(predictions, axis=1)
predictions = L.unsqueeze(predictions, axes=[1])
acc = propeller.metrics.Acc(label, predictions)
#auc = propeller.metrics.Auc(label, predictions)
return {'acc': acc}
if __name__ == '__main__':
parser = propeller.ArgumentParser('DAN model with Paddle')
parser.add_argument('--max_seqlen', type=int, default=128)
parser.add_argument('--data_dir', type=str, required=True)
parser.add_argument('--vocab_file', type=str, required=True)
parser.add_argument('--do_predict', action='store_true')
parser.add_argument('--warm_start_from', type=str)
args = parser.parse_args()
run_config = propeller.parse_runconfig(args)
hparams = propeller.parse_hparam(args)
vocab = {j.strip().split(b'\t')[0].decode('utf8'): i for i, j in enumerate(open(args.vocab_file, 'rb'))}
sep_id = vocab['[SEP]']
cls_id = vocab['[CLS]']
unk_id = vocab['[UNK]']
tokenizer = utils.data.CharTokenizer(vocab.keys())
def tokenizer_func(inputs):
'''avoid pickle error'''
ret = tokenizer(inputs)
return ret
if not args.do_predict:
feature_column = propeller.data.FeatureColumns([
propeller.data.TextColumn('title',unk_id=unk_id, vocab_dict=vocab, tokenizer=tokenizer_func),
propeller.data.LabelColumn('label'),
])
def before(seg_a, label):
sentence, segments = utils.data.build_1_pair(seg_a, max_seqlen=args.max_seqlen, cls_id=cls_id, sep_id=sep_id)
return sentence, segments, label
def after(sentence, segments, label):
sentence, segments, label = utils.data.expand_dims(sentence, segments, label)
return sentence, segments, label
log.debug(os.path.join(args.data_dir, 'train'))
train_ds = feature_column.build_dataset('train', data_dir=os.path.join(args.data_dir, 'train'), shuffle=True, repeat=True, use_gz=False) \
.map(before) \
.padded_batch(hparams.batch_size, (0, 0, 0)) \
.map(after)
dev_ds = feature_column.build_dataset('dev', data_dir=os.path.join(args.data_dir, 'dev'), shuffle=False, repeat=False, use_gz=False) \
.map(before) \
.padded_batch(hparams.batch_size, (0, 0, 0)) \
.map(after)
shapes = ([-1, args.max_seqlen, 1], [-1, args.max_seqlen, 1], [-1, 1])
types = ('int64', 'int64', 'int64')
train_ds.data_shapes = shapes
train_ds.data_types = types
dev_ds.data_shapes = shapes
dev_ds.data_types = types
varname_to_warmstart = re.compile('encoder.*|pooled.*|.*embedding|pre_encoder_.*')
warm_start_dir = args.warm_start_from
ws = propeller.WarmStartSetting(
predicate_fn=lambda v: varname_to_warmstart.match(v.name) and os.path.exists(os.path.join(warm_start_dir, v.name)),
from_dir=warm_start_dir
)
best_exporter = propeller.train.exporter.BestInferenceModelExporter(os.path.join(run_config.model_dir, 'best'), cmp_fn=lambda old, new: new['eval']['acc'] > old['eval']['acc'])
propeller.train.train_and_eval(
model_class_or_model_fn=ClassificationErnieModel,
params=hparams,
run_config=run_config,
train_dataset=train_ds,
eval_dataset=dev_ds,
warm_start_setting=ws,
exporters=[best_exporter])
print('dev_acc\t%.5f' % (best_exporter._best['eval']['acc']))
else:
feature_column = propeller.data.FeatureColumns([
propeller.data.TextColumn('title',unk_id=unk_id, vocab_dict=vocab, tokenizer=tokenizer_func),
propeller.data.LabelColumn('label'),
])
def before(seg_a):
sentence, segments = utils.data.build_1_pair(seg_a, max_seqlen=args.max_seqlen, cls_id=cls_id, sep_id=sep_id)
return sentence, segments
def after(sentence, segments):
sentence, segments = utils.data.expand_dims(sentence, segments)
return sentence, segments
predict_ds = feature_column.build_dataset_from_stdin('predict') \
.map(before) \
.padded_batch(hparams.batch_size, (0, 0)) \
.map(after)
shapes = ([-1, args.max_seqlen, 1], [-1, args.max_seqlen, 1])
types = ('int64', 'int64')
predict_ds.data_shapes = shapes
predict_ds.data_types = types
finetuned_model = propeller.Learner(ClassificationErnieModel, run_config, hparams)
for logits, in finetuned_model.predict(predict_ds, ckpt=-1): # ckpt=-1 means last step
print(np.argmax(logits))
set -x
export PYTHONPATH=.:$PYTHONPATH
export PYTHONPATH=.:./ernie/:${PYTHONPATH:-}
output_dir=./output/distill
teacher_dir=${output_dir}/teacher
student_dir=${output_dir}/student
# 1. finetune teacher
CUDA_VISIBLE_DEVICES=0 \
python3 -u ./distill/finetune_chnsenticorp.py \
python3 -u ./example/finetune_classifier.py \
--data_dir ${TASK_DATA_PATH}/distill/chnsenticorp/teacher \
--warm_start_from ${MODEL_PATH}/params \
--vocab_file ${MODEL_PATH}/vocab.txt \
......@@ -29,7 +29,7 @@ python3 -u ./distill/finetune_chnsenticorp.py \
--hparam '{ # learn
"warmup_proportion": 0.1,
"weight_decay": 0.01,
"fp16": 0,
"use_fp16": 0,
"learning_rate": 0.00005,
"num_label": 2,
"batch_size": 32
......@@ -39,7 +39,7 @@ python3 -u ./distill/finetune_chnsenticorp.py \
# 2. start a prediction server
export CUDA_VISIBLE_DEVICES=0
cat ${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug/part.0 |awk -F"\t" '{print $2}' |python3 -u ./distill/finetune_chnsenticorp.py \
cat ${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug/part.0 |awk -F"\t" '{print $2}' |python3 -u ./example/finetune_classifier.py \
--do_predict \
--data_dir ${TASK_DATA_PATH}/distill/chnsenticorp/teacher \
--warm_start_from ${MODEL_PATH}/params \
......@@ -58,7 +58,7 @@ cat ${TASK_DATA_PATH}/distill/chnsenticorp/student/unsup_train_aug/part.0 |awk -
--hparam '{ # learn
"warmup_proportion": 0.1,
"weight_decay": 0.01,
"fp16": 0,
"use_fp16": 0,
"learning_rate": 0.00005,
"num_label": 2,
"batch_size": 100
......@@ -94,7 +94,6 @@ python3 ./distill/distill_chnsentocorp.py \
--hparam '{ # lr shit
"warmup_proportion": 0.1,
"weight_decay": 0.00,
"fp16": 0,
"learning_rate": 1e-4,
"batch_size": 100
}'
......
set -x
export PYTHONPATH=.:$PYTHONPATH
export PYTHONPATH=.:./ernie/:${PYTHONPATH:-}
output_dir=./output/distill
teacher_dir=${output_dir}/teacher
student_dir=${output_dir}/student
# 1. finetune teacher
CUDA_VISIBLE_DEVICES=0 \
python3 -u ./distill/finetune_chnsenticorp.py \
python3 -u ./example/finetune_classifier.py \
--data_dir ${TASK_DATA_PATH}/distill/chnsenticorp/teacher \
--warm_start_from ${MODEL_PATH}/params \
--vocab_file ${MODEL_PATH}/vocab.txt \
......@@ -29,7 +29,7 @@ python3 -u ./distill/finetune_chnsenticorp.py \
--hparam '{ # learn
"warmup_proportion": 0.1,
"weight_decay": 0.01,
"fp16": 0,
"use_fp16": 0,
"learning_rate": 0.00005,
"num_label": 2,
"batch_size": 32
......@@ -74,7 +74,6 @@ python3 ./distill/distill_chnsentocorp_with_propeller_server.py \
--hparam '{ # learn
"warmup_proportion": 0.1,
"weight_decay": 0.00,
"fp16": 0,
"learning_rate": 1e-4,
"batch_size": 100
}'
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import sys
import os
import argparse
from propeller.service.client import InferenceClient
from propeller import log
import six
import utils.data
from time import time
import numpy as np
class ErnieClient(InferenceClient):
def __init__(self,
vocab_file,
host='localhost',
port=8888,
batch_size=32,
num_coroutine=1,
timeout=10.,
max_seqlen=128):
host_port = 'tcp://%s:%d' % (host, port)
client = super(ErnieClient, self).__init__(host_port, batch_size=batch_size, num_coroutine=num_coroutine, timeout=timeout)
self.vocab = {j.strip().split(b'\t')[0].decode('utf8'): i for i, j in enumerate(open(vocab_file, 'rb'))}
self.tokenizer = utils.data.CharTokenizer(self.vocab.keys())
self.max_seqlen = max_seqlen
self.cls_id = self.vocab['[CLS]']
self.sep_id = self.vocab['[SEP]']
def txt_2_id(self, text):
ids = np.array([self.vocab[i] for i in self.tokenizer(text)])
return ids
def pad_and_batch(self, ids):
max_len = max(map(len, ids))
padded = np.stack([np.pad(i, [[0, max_len - len(i)]], mode='constant')for i in ids])
padded = np.expand_dims(padded, axis=-1)
return padded
def __call__(self, text_a, text_b=None):
if text_b is not None and len(text_a) != len(text_b):
raise ValueError('text_b %d has different size than text_a %d' % (text_b, text_a))
text_a = [i.encode('utf8') if isinstance(i, six.string_types) else i for i in text_a]
if text_b is not None:
text_b = [i.encode('utf8') if isinstance(i, six.string_types) else i for i in text_b]
ids_a = map(self.txt_2_id, text_a)
if text_b is not None:
ids_b = map(self.txt_2_id, text_b)
ret = [utils.data.build_2_pair(a, b, self.max_seqlen, self.cls_id, self.sep_id) for a, b in zip(ids_a, ids_b)]
else:
ret = [utils.data.build_1_pair(a, self.max_seqlen, self.cls_id, self.sep_id) for a in ids_a]
sen_ids, token_type_ids = zip(*ret)
sen_ids = self.pad_and_batch(sen_ids)
token_type_ids = self.pad_and_batch(token_type_ids)
ret, = super(ErnieClient, self).__call__(sen_ids, token_type_ids)
return ret
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='ernie_encoder_client')
parser.add_argument('--host', type=str, default='localhost')
parser.add_argument('-i', '--input', type=str, required=True)
parser.add_argument('-o', '--output', type=str, required=True)
parser.add_argument('-p', '--port', type=int, default=8888)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--num_coroutine', type=int, default=1)
parser.add_argument('--vocab', type=str, required=True)
args = parser.parse_args()
client = ErnieClient(args.vocab, args.host, args.port, batch_size=args.batch_size, num_coroutine=args.num_coroutine)
inputs = [i.strip().split(b'\t') for i in open(args.input, 'rb').readlines()]
if len(inputs) == 0:
raise ValueError('empty input')
send_batch = args.num_coroutine * args.batch_size
send_num = len(inputs) // send_batch + 1
rets = []
start = time()
for i in range(send_num):
slice = inputs[i * send_batch: (i + 1) * send_batch]
if len(slice) == 0:
continue
columns = list(zip(*slice))
if len(columns) > 2:
raise ValueError('inputs file has more than 2 columns')
ret = client(*columns)
if len(ret.shape) == 3:
ret = ret[:, 0, :] # take cls
rets.append(ret)
end = time()
with open(args.output, 'wb') as outf:
arr = np.concatenate(rets, 0)
np.save(outf, arr)
log.info('query num: %d average latency %.5f' % (len(inputs), (end - start)/len(inputs)))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import sys
import os
import argparse
import logging
import logging.handlers
import re
from propeller.service.server import InferenceServer
from propeller import log
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--model_dir', type=str, required=True)
parser.add_argument('-p', '--port', type=int, default=8888)
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('--encode_layer', type=str, choices=[
'pooler',
'layer12',
'layer11',
'layer10',
'layer9',
'layer8',
'layer7',
'layer6',
'layer5',
'layer4',
'layer3',
'layer2',
'layer1',
], default='pooler')
args = parser.parse_args()
if args.verbose:
log.setLevel(logging.DEBUG)
cuda_env = os.getenv("CUDA_VISIBLE_DEVICES")
if cuda_env is None:
raise RuntimeError('CUDA_VISIBLE_DEVICES not set')
n_devices = len(cuda_env.split(","))
if args.encode_layer.lower() == 'pooler':
model_dir = os.path.join(args.model_dir, 'pooler')
else:
pat = re.compile(r'layer(\d+)')
match = pat.match(args.encode_layer.lower())
layer = int(match.group(1))
model_dir = os.path.join(args.model_dir, 'enc%d' % layer)
server = InferenceServer(model_dir, n_devices)
log.info('propeller server listent on port %d' % args.port)
server.listen(args.port)
......@@ -108,7 +108,7 @@ class CharTokenizer(object):
"""
self.vocab = set(vocab)
#self.pat = re.compile(r'([,.!?\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]|[\u4e00-\u9fa5]|[a-zA-Z0-9]+)')
self.pat = re.compile(r'\S')
self.pat = re.compile(r'([a-zA-Z0-9]+|\S)')
self.lower = lower
def __call__(self, sen):
......@@ -132,7 +132,7 @@ def build_2_pair(seg_a, seg_b, max_seqlen, cls_id, sep_id):
seqlen = sen_emb.shape[0]
#random truncate
random_begin = 0#np.random.randint(0, np.maximum(0, seqlen - max_seqlen) + 1,)
random_begin = 0 #np.random.randint(0, np.maximum(0, seqlen - max_seqlen) + 1,)
sen_emb = sen_emb[random_begin: random_begin + max_seqlen]
token_type_emb = token_type_emb[random_begin: random_begin + max_seqlen]
......@@ -147,7 +147,7 @@ def build_1_pair(seg_a, max_seqlen, cls_id, sep_id):
seqlen = sen_emb.shape[0]
#random truncate
random_begin = 0#np.random.randint(0, np.maximum(0, seqlen - max_seqlen) + 1,)
random_begin = 0 #np.random.randint(0, np.maximum(0, seqlen - max_seqlen) + 1,)
sen_emb = sen_emb[random_begin: random_begin + max_seqlen]
token_type_emb = token_type_emb[random_begin: random_begin + max_seqlen]
......
此差异已折叠。
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_sync_nccl_allreduce=1
export FLAGS_eager_delete_tensor_gb=0.0
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -20,7 +21,7 @@ batch_size=64
epoch=3
for i in {1..5};do
python -u run_classifier.py \
python -u ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -20,7 +21,7 @@ for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--use_fast_executor ${e_executor:-"true"} \
--tokenizer ${TOKENIZER:-"FullTokenizer"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -19,7 +20,7 @@ epoch=4
for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -4,6 +4,7 @@ R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -22,7 +23,7 @@ for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -21,7 +22,7 @@ for i in {1..1};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--for_cn False \
--ernie_config_path script/en_glue/ernie_base/ernie_config.json \
--validation_steps 1000000000000 \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -16,7 +17,7 @@ for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -18,7 +19,7 @@ epoch=4
for i in {1..5};do
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--for_cn False \
--use_cuda true \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -18,7 +19,7 @@ epoch=3
for i in {1..5};do
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -20,7 +21,7 @@ epoch=4
for i in {1..5};do
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--for_cn False \
--use_cuda true \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_sync_nccl_allreduce=1
export FLAGS_eager_delete_tensor_gb=0.0
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -19,7 +20,7 @@ epoch=5
for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -4,6 +4,7 @@ R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -17,7 +18,7 @@ for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--use_fast_executor ${e_executor:-"true"} \
--tokenizer ${TOKENIZER:-"FullTokenizer"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -20,7 +21,7 @@ epoch=4
for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -4,6 +4,7 @@ R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -21,7 +22,7 @@ for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -16,7 +17,7 @@ for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--for_cn False \
--ernie_config_path script/en_glue/ernie_large/ernie_config.json \
--validation_steps 1000000000000 \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -16,7 +17,7 @@ mkdir -p log/
for i in {1..5};do
timestamp=`date "+%Y-%m-%d-%H-%M-%S"`
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
mkdir -p log/
......@@ -19,7 +20,7 @@ epoch=4
for i in {1..5};do
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--for_cn False \
--use_cuda true \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -15,7 +16,7 @@ mkdir -p log/
for i in {1..5};do
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--use_cuda true \
--for_cn False \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -3,6 +3,7 @@
R_DIR=`dirname $0`; MYDIR=`cd $R_DIR;pwd`
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export PYTHONPATH=./ernie:${PYTHONPATH:-}
if [[ -f ./model_conf ]];then
source ./model_conf
......@@ -19,7 +20,7 @@ epoch=4
for i in {1..5};do
python -u run_classifier.py \
python -u ./ernie/run_classifier.py \
--for_cn False \
--use_cuda true \
--use_fast_executor ${e_executor:-"true"} \
......
......@@ -4,7 +4,8 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -4,7 +4,8 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u ./run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -4,12 +4,13 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_mrc.py --use_cuda true\
./ernie/run_mrc.py --use_cuda true\
--batch_size 16 \
--in_tokens false\
--use_fast_executor true \
......
......@@ -4,13 +4,13 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_classifier.py \
./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -4,12 +4,13 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_mrc.py --use_cuda true\
./ernie/run_mrc.py --use_cuda true\
--batch_size 16 \
--in_tokens false\
--use_fast_executor true \
......
......@@ -4,7 +4,8 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -4,7 +4,8 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u run_sequence_labeling.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_sequence_labeling.py \
--use_cuda true \
--do_train true \
--do_val true \
......
......@@ -4,7 +4,8 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -u run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--do_train true \
--do_val true \
......
......@@ -4,12 +4,13 @@ export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_classifier.py \
./ernie/run_classifier.py \
--use_cuda true \
--do_train true \
--do_val true \
......
......@@ -3,7 +3,9 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -3,7 +3,8 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u ./run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -4,12 +4,13 @@ export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_mrc.py --use_cuda true\
./ernie/run_mrc.py --use_cuda true\
--batch_size 8 \
--in_tokens false\
--use_fast_executor true \
......
......@@ -3,12 +3,13 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_classifier.py \
./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -4,12 +4,13 @@ export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_mrc.py --use_cuda true\
./ernie/run_mrc.py --use_cuda true\
--batch_size 8 \
--in_tokens false\
--use_fast_executor true \
......
......@@ -3,7 +3,8 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--verbose true \
--do_train true \
......
......@@ -3,7 +3,8 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0
python -u run_sequence_labeling.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_sequence_labeling.py \
--use_cuda true \
--do_train true \
--do_val true \
......
......@@ -3,7 +3,8 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -u run_classifier.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python -u ./ernie/run_classifier.py \
--use_cuda true \
--do_train true \
--do_val true \
......
......@@ -3,13 +3,13 @@ set -eux
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./finetune_launch.py \
export PYTHONPATH=./ernie:${PYTHONPATH:-}
python ./ernie/finetune_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
run_classifier.py \
./ernie/run_classifier.py \
--use_cuda true \
--do_train true \
--do_val true \
......
......@@ -3,12 +3,12 @@ set -eux
export FLAGS_eager_delete_tensor_gb=0
export FLAGS_sync_nccl_allreduce=1
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python ./pretrain_launch.py \
python ./ernie/pretrain_launch.py \
--nproc_per_node 8 \
--selected_gpus 0,1,2,3,4,5,6,7 \
--node_ips $(hostname -i) \
--node_id 0 \
./train.py --use_cuda True \
./ernie/train.py --use_cuda True \
--is_distributed False\
--use_fast_executor True \
--weight_sharing True \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册