提交 d7541747 编写于 作者: P pkpk 提交者: Yibing Liu

remove reduce_sum/mean to support fp16 (#2492)

上级 1c13f704
...@@ -20,17 +20,18 @@ import paddle ...@@ -20,17 +20,18 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
class Paradigm(object): class Paradigm(object):
""" """
define network paradigm define network paradigm
""" """
def __init__(self, task_name):
def __init__(self, task_name):
""" """
init init
""" """
self.task_name = task_name self.task_name = task_name
def create_cls(self, transformer_inst, params): def create_cls(self, transformer_inst, params):
""" """
create classify paradigm network create classify paradigm network
""" """
...@@ -48,42 +49,46 @@ class Paradigm(object): ...@@ -48,42 +49,46 @@ class Paradigm(object):
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.))) name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
if params['is_prediction']: if params['is_prediction']:
probs = fluid.layers.softmax(logits) probs = fluid.layers.softmax(logits)
feed_targets_name = [ feed_targets_name = [
params['src_ids'].name, params['pos_ids'].name, params['src_ids'].name,
params['sent_ids'].name, params['input_mask'].name, params['pos_ids'].name,
] params['sent_ids'].name,
results = {"probs": probs, params['input_mask'].name,
"feed_targets_name": feed_targets_name} ]
results = {"probs": probs, "feed_targets_name": feed_targets_name}
return results return results
ce_loss, probs = fluid.layers.softmax_with_cross_entropy( ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=params['labels'], return_softmax=True) logits=logits, label=params['labels'], return_softmax=True)
loss = fluid.layers.reduce_mean(input=ce_loss) loss = fluid.layers.mean(input=ce_loss)
num_seqs = fluid.layers.create_tensor(dtype='int64') num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(input=probs, label=params['labels'], total=num_seqs) accuracy = fluid.layers.accuracy(
input=probs, label=params['labels'], total=num_seqs)
loss.persistable = True loss.persistable = True
probs.persistable = True probs.persistable = True
accuracy.persistable = True accuracy.persistable = True
num_seqs.persistable = True num_seqs.persistable = True
results = {"loss": loss, results = {
"probs": probs, "loss": loss,
"accuracy": accuracy, "probs": probs,
"num_seqs": num_seqs} "accuracy": accuracy,
"num_seqs": num_seqs
}
return results return results
def create_multi_cls(self, transformer_inst, params): def create_multi_cls(self, transformer_inst, params):
""" """
create multi classify paradigm network create multi classify paradigm network
""" """
cls_feats = transformer_inst.get_pooled_output() cls_feats = transformer_inst.get_pooled_output()
cls_feats = fluid.layers.dropout( cls_feats = fluid.layers.dropout(
x=cls_feats, x=cls_feats,
dropout_prob=0.1, dropout_prob=0.1,
dropout_implementation="upscale_in_train") dropout_implementation="upscale_in_train")
logits = fluid.layers.fc( logits = fluid.layers.fc(
input=cls_feats, input=cls_feats,
size=params['num_labels'], size=params['num_labels'],
...@@ -94,30 +99,32 @@ class Paradigm(object): ...@@ -94,30 +99,32 @@ class Paradigm(object):
name="cls_out_b", initializer=fluid.initializer.Constant(0.))) name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
labels_onehot = fluid.layers.cast(params["labels"], dtype='float32') labels_onehot = fluid.layers.cast(params["labels"], dtype='float32')
ce_loss = fluid.layers.reduce_sum(fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, label=labels_onehot)) ce_loss = fluid.layers.reduce_sum(
loss = fluid.layers.reduce_mean(input=ce_loss) fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=labels_onehot))
loss = fluid.layers.mean(input=ce_loss)
probs = fluid.layers.sigmoid(logits) probs = fluid.layers.sigmoid(logits)
if params['is_prediction']: if params['is_prediction']:
feed_targets_name = [ feed_targets_name = [
params['src_ids'].name, params['pos_ids'].name, params['src_ids'].name,
params['sent_ids'].name, params['input_mask'].name, params['pos_ids'].name,
] params['sent_ids'].name,
results = {"probs": probs, params['input_mask'].name,
"feed_targets_name": feed_targets_name} ]
results = {"probs": probs, "feed_targets_name": feed_targets_name}
return results return results
num_seqs = fluid.layers.tensor.fill_constant(shape=[1], dtype='int64', value=1) num_seqs = fluid.layers.tensor.fill_constant(
shape=[1], dtype='int64', value=1)
loss.persistable = True loss.persistable = True
probs.persistable = True probs.persistable = True
num_seqs.persistable = True num_seqs.persistable = True
results = {"loss": loss, results = {"loss": loss, "probs": probs, "num_seqs": num_seqs}
"probs": probs,
"num_seqs": num_seqs}
return results return results
def create_sequence_tagging(self, transformer_inst, params): def create_sequence_tagging(self, transformer_inst, params):
""" """
create sequence tagging paradigm create sequence tagging paradigm
""" """
...@@ -127,52 +134,59 @@ class Paradigm(object): ...@@ -127,52 +134,59 @@ class Paradigm(object):
output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size]) output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size])
logits = fluid.layers.fc(input=output_layer, size=params['num_labels']) logits = fluid.layers.fc(input=output_layer, size=params['num_labels'])
probs = fluid.layers.cast(fluid.layers.argmax(logits, axis=1), dtype='int32') probs = fluid.layers.cast(
fluid.layers.argmax(
logits, axis=1), dtype='int32')
if params['is_prediction']: if params['is_prediction']:
feed_targets_name = [ feed_targets_name = [
params['src_ids'].name, params['pos_ids'].name, params['src_ids'].name,
params['sent_ids'].name, params['input_mask'].name, params['pos_ids'].name,
] params['sent_ids'].name,
results = {"probs": probs, params['input_mask'].name,
"feed_targets_name": feed_targets_name} ]
results = {"probs": probs, "feed_targets_name": feed_targets_name}
return results return results
num_seqs = fluid.layers.tensor.fill_constant(shape=[1], dtype='int64', value=1) num_seqs = fluid.layers.tensor.fill_constant(
y_label_reshape = fluid.layers.cast(fluid.layers.reshape(params['labels'], [-1]), dtype='int32') shape=[1], dtype='int64', value=1)
y_label_reshape = fluid.layers.cast(
fluid.layers.reshape(params['labels'], [-1]), dtype='int32')
correct_prediction = fluid.layers.equal(probs, y_label_reshape) correct_prediction = fluid.layers.equal(probs, y_label_reshape)
accuracy = fluid.layers.reduce_mean(fluid.layers.cast(correct_prediction, dtype='float32')) accuracy = fluid.layers.mean(
fluid.layers.cast(
correct_prediction, dtype='float32'))
ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \ ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \
label=fluid.layers.reshape(params['labels'], [-1, 1])) label=fluid.layers.reshape(params['labels'], [-1, 1]))
loss = fluid.layers.reduce_mean(input=ce_loss) loss = fluid.layers.mean(input=ce_loss)
loss.persistable = True loss.persistable = True
probs.persistable = True probs.persistable = True
accuracy.persistable = True accuracy.persistable = True
num_seqs.persistable = True num_seqs.persistable = True
results = {"loss": loss, results = {
"probs": probs, "loss": loss,
"accuracy": accuracy, "probs": probs,
"num_seqs": num_seqs} "accuracy": accuracy,
"num_seqs": num_seqs
}
return results return results
def paradigm(self, transformer_inst, params): def paradigm(self, transformer_inst, params):
""" """
run paradigm run paradigm
""" """
results = None results = None
if self.task_name == 'udc': if self.task_name == 'udc':
results = self.create_cls(transformer_inst, params) results = self.create_cls(transformer_inst, params)
elif self.task_name == 'swda': elif self.task_name == 'swda':
results = self.create_cls(transformer_inst, params) results = self.create_cls(transformer_inst, params)
elif self.task_name == 'mrda': elif self.task_name == 'mrda':
results = self.create_cls(transformer_inst, params) results = self.create_cls(transformer_inst, params)
elif self.task_name == 'atis_intent': elif self.task_name == 'atis_intent':
results = self.create_cls(transformer_inst, params) results = self.create_cls(transformer_inst, params)
elif self.task_name == 'atis_slot': elif self.task_name == 'atis_slot':
results = self.create_sequence_tagging(transformer_inst, params) results = self.create_sequence_tagging(transformer_inst, params)
elif self.task_name == 'dstc2': elif self.task_name == 'dstc2':
results = self.create_multi_cls(transformer_inst, params) results = self.create_multi_cls(transformer_inst, params)
return results return results
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册