提交 8abe35ee 编写于 作者: Y Yibing Liu 提交者: pkpk

Upgrade bert to 1.6 api (#3599)

* Upgrade bert to 1.6 api

* Use inplace for reshape
上级 80e6c71b
...@@ -155,7 +155,7 @@ def pad_batch_data(insts, ...@@ -155,7 +155,7 @@ def pad_batch_data(insts,
inst_data = np.array([ inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
]) ])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data # position data
if return_pos: if return_pos:
...@@ -164,7 +164,7 @@ def pad_batch_data(insts, ...@@ -164,7 +164,7 @@ def pad_batch_data(insts,
for inst in insts for inst in insts
]) ])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask: if return_input_mask:
# This is used to avoid attention on paddings. # This is used to avoid attention on paddings.
......
...@@ -82,21 +82,21 @@ class BertModel(object): ...@@ -82,21 +82,21 @@ class BertModel(object):
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding( emb_out = fluid.embedding(
input=src_ids, input=src_ids,
size=[self._voc_size, self._emb_size], size=[self._voc_size, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer), name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False) is_sparse=False)
position_emb_out = fluid.layers.embedding( position_emb_out = fluid.embedding(
input=position_ids, input=position_ids,
size=[self._max_position_seq_len, self._emb_size], size=[self._max_position_seq_len, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer)) name=self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding( sent_emb_out = fluid.embedding(
sentence_ids, sentence_ids,
size=[self._sent_types, self._emb_size], size=[self._sent_types, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
...@@ -148,6 +148,7 @@ class BertModel(object): ...@@ -148,6 +148,7 @@ class BertModel(object):
input=self._enc_out, axes=[1], starts=[0], ends=[1]) input=self._enc_out, axes=[1], starts=[0], ends=[1])
next_sent_feat = fluid.layers.fc( next_sent_feat = fluid.layers.fc(
input=next_sent_feat, input=next_sent_feat,
num_flatten_dims=2,
size=self._emb_size, size=self._emb_size,
act="tanh", act="tanh",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
...@@ -209,11 +210,14 @@ class BertModel(object): ...@@ -209,11 +210,14 @@ class BertModel(object):
next_sent_fc_out = fluid.layers.fc( next_sent_fc_out = fluid.layers.fc(
input=next_sent_feat, input=next_sent_feat,
num_flatten_dims=2,
size=2, size=2,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="next_sent_fc.w_0", initializer=self._param_initializer), name="next_sent_fc.w_0", initializer=self._param_initializer),
bias_attr="next_sent_fc.b_0") bias_attr="next_sent_fc.b_0")
next_sent_fc_out = fluid.layers.reshape(
next_sent_fc_out, [-1, 2], inplace=True)
next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy(
logits=next_sent_fc_out, label=labels, return_softmax=True) logits=next_sent_fc_out, label=labels, return_softmax=True)
......
...@@ -25,9 +25,8 @@ from model.bert import BertModel ...@@ -25,9 +25,8 @@ from model.bert import BertModel
def create_model(args, bert_config, num_labels, is_prediction=False): def create_model(args, bert_config, num_labels, is_prediction=False):
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'],
'shapes': 'shapes': [[None, None], [None, None], [None, None],
[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]],
'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
'lod_levels': [0, 0, 0, 0, 0], 'lod_levels': [0, 0, 0, 0, 0],
} }
...@@ -59,6 +58,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False): ...@@ -59,6 +58,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
dropout_implementation="upscale_in_train") dropout_implementation="upscale_in_train")
logits = fluid.layers.fc( logits = fluid.layers.fc(
input=cls_feats, input=cls_feats,
num_flatten_dims=2,
size=num_labels, size=num_labels,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="cls_out_w", name="cls_out_w",
...@@ -73,6 +73,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False): ...@@ -73,6 +73,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
] ]
return pyreader, probs, feed_targets_name return pyreader, probs, feed_targets_name
logits = fluid.layers.reshape(logits, [-1, num_labels], inplace=True)
ce_loss, probs = fluid.layers.softmax_with_cross_entropy( ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True) logits=logits, label=labels, return_softmax=True)
loss = fluid.layers.mean(x=ce_loss) loss = fluid.layers.mean(x=ce_loss)
......
...@@ -224,17 +224,6 @@ def main(args): ...@@ -224,17 +224,6 @@ def main(args):
incr_ratio=args.incr_ratio, incr_ratio=args.incr_ratio,
decr_ratio=args.decr_ratio) decr_ratio=args.decr_ratio)
if args.verbose:
if args.in_tokens:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program,
batch_size=args.batch_size // args.max_seq_len)
else:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size)
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
if args.do_val: if args.do_val:
dev_prog = fluid.Program() dev_prog = fluid.Program()
with fluid.program_guard(dev_prog, startup_prog): with fluid.program_guard(dev_prog, startup_prog):
......
...@@ -108,8 +108,7 @@ def create_model(bert_config, is_training=False): ...@@ -108,8 +108,7 @@ def create_model(bert_config, is_training=False):
if is_training: if is_training:
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'],
'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], 'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]],
'dtypes': [ 'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
...@@ -118,8 +117,7 @@ def create_model(bert_config, is_training=False): ...@@ -118,8 +117,7 @@ def create_model(bert_config, is_training=False):
else: else:
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], 'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, 1]], [-1, args.max_seq_len, 1], [-1, 1]],
'dtypes': [ 'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64'], 'int64', 'int64', 'int64', 'float32', 'int64'],
...@@ -300,17 +298,6 @@ def train(args): ...@@ -300,17 +298,6 @@ def train(args):
incr_ratio=args.incr_ratio, incr_ratio=args.incr_ratio,
decr_ratio=args.decr_ratio) decr_ratio=args.decr_ratio)
if args.verbose:
if args.in_tokens:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program,
batch_size=args.batch_size // args.max_seq_len)
else:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size)
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
if args.do_predict: if args.do_predict:
test_prog = fluid.Program() test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog): with fluid.program_guard(test_prog, startup_prog):
......
...@@ -98,9 +98,8 @@ args = parser.parse_args() ...@@ -98,9 +98,8 @@ args = parser.parse_args()
def create_model(bert_config): def create_model(bert_config):
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'mask_label', 'mask_pos', 'labels'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'mask_label', 'mask_pos', 'labels'],
'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], 'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [None, None, 1], [None, 1], [None, 1], [None, 1]],
[-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]],
'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64'], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64'],
'lod_levels': [0, 0, 0, 0, 0, 0, 0], 'lod_levels': [0, 0, 0, 0, 0, 0, 0],
} }
...@@ -263,16 +262,6 @@ def train(args): ...@@ -263,16 +262,6 @@ def train(args):
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
print("Device count %d" % dev_count) print("Device count %d" % dev_count)
if args.verbose:
if args.in_tokens:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program,
batch_size=args.batch_size // args.max_seq_len)
else:
lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size)
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
nccl2_num_trainers = 1 nccl2_num_trainers = 1
nccl2_trainer_id = 0 nccl2_trainer_id = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册