提交 8abe35ee 编写于 作者: Y Yibing Liu 提交者: pkpk

Upgrade bert to 1.6 api (#3599)

* Upgrade bert to 1.6 api

* Use inplace for reshape
上级 80e6c71b
......@@ -155,7 +155,7 @@ def pad_batch_data(insts,
inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data
if return_pos:
......@@ -164,7 +164,7 @@ def pad_batch_data(insts,
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask:
# This is used to avoid attention on paddings.
......
......@@ -82,21 +82,21 @@ class BertModel(object):
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
# padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding(
emb_out = fluid.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
position_emb_out = fluid.layers.embedding(
position_emb_out = fluid.embedding(
input=position_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_emb_out = fluid.embedding(
sentence_ids,
size=[self._sent_types, self._emb_size],
dtype=self._dtype,
......@@ -148,6 +148,7 @@ class BertModel(object):
input=self._enc_out, axes=[1], starts=[0], ends=[1])
next_sent_feat = fluid.layers.fc(
input=next_sent_feat,
num_flatten_dims=2,
size=self._emb_size,
act="tanh",
param_attr=fluid.ParamAttr(
......@@ -209,11 +210,14 @@ class BertModel(object):
next_sent_fc_out = fluid.layers.fc(
input=next_sent_feat,
num_flatten_dims=2,
size=2,
param_attr=fluid.ParamAttr(
name="next_sent_fc.w_0", initializer=self._param_initializer),
bias_attr="next_sent_fc.b_0")
next_sent_fc_out = fluid.layers.reshape(
next_sent_fc_out, [-1, 2], inplace=True)
next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy(
logits=next_sent_fc_out, label=labels, return_softmax=True)
......
......@@ -25,9 +25,8 @@ from model.bert import BertModel
def create_model(args, bert_config, num_labels, is_prediction=False):
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'],
'shapes':
[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]],
'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1]],
'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
'lod_levels': [0, 0, 0, 0, 0],
}
......@@ -59,6 +58,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
dropout_implementation="upscale_in_train")
logits = fluid.layers.fc(
input=cls_feats,
num_flatten_dims=2,
size=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
......@@ -73,6 +73,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
]
return pyreader, probs, feed_targets_name
logits = fluid.layers.reshape(logits, [-1, num_labels], inplace=True)
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True)
loss = fluid.layers.mean(x=ce_loss)
......
......@@ -224,17 +224,6 @@ def main(args):
incr_ratio=args.incr_ratio,
decr_ratio=args.decr_ratio)
if args.verbose:
if args.in_tokens:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program,
batch_size=args.batch_size // args.max_seq_len)
else:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size)
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
if args.do_val:
dev_prog = fluid.Program()
with fluid.program_guard(dev_prog, startup_prog):
......
......@@ -108,8 +108,7 @@ def create_model(bert_config, is_training=False):
if is_training:
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'],
'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1],
'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1], [-1, 1]],
'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
......@@ -118,8 +117,7 @@ def create_model(bert_config, is_training=False):
else:
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1],
'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1]],
'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64'],
......@@ -300,17 +298,6 @@ def train(args):
incr_ratio=args.incr_ratio,
decr_ratio=args.decr_ratio)
if args.verbose:
if args.in_tokens:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program,
batch_size=args.batch_size // args.max_seq_len)
else:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size)
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
if args.do_predict:
test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog):
......
......@@ -98,9 +98,8 @@ args = parser.parse_args()
def create_model(bert_config):
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'mask_label', 'mask_pos', 'labels'],
'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]],
'shapes': [[None, None], [None, None], [None, None],
[None, None, 1], [None, 1], [None, 1], [None, 1]],
'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64'],
'lod_levels': [0, 0, 0, 0, 0, 0, 0],
}
......@@ -263,16 +262,6 @@ def train(args):
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
print("Device count %d" % dev_count)
if args.verbose:
if args.in_tokens:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program,
batch_size=args.batch_size // args.max_seq_len)
else:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size)
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
nccl2_num_trainers = 1
nccl2_trainer_id = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册