提交 d7541747 编写于 作者: P pkpk 提交者: Yibing Liu

remove reduce_sum/mean to support fp16 (#2492)

上级 1c13f704
...@@ -24,6 +24,7 @@ class Paradigm(object): ...@@ -24,6 +24,7 @@ class Paradigm(object):
""" """
define network paradigm define network paradigm
""" """
def __init__(self, task_name): def __init__(self, task_name):
""" """
init init
...@@ -51,28 +52,32 @@ class Paradigm(object): ...@@ -51,28 +52,32 @@ class Paradigm(object):
if params['is_prediction']: if params['is_prediction']:
probs = fluid.layers.softmax(logits) probs = fluid.layers.softmax(logits)
feed_targets_name = [ feed_targets_name = [
params['src_ids'].name, params['pos_ids'].name, params['src_ids'].name,
params['sent_ids'].name, params['input_mask'].name, params['pos_ids'].name,
params['sent_ids'].name,
params['input_mask'].name,
] ]
results = {"probs": probs, results = {"probs": probs, "feed_targets_name": feed_targets_name}
"feed_targets_name": feed_targets_name}
return results return results
ce_loss, probs = fluid.layers.softmax_with_cross_entropy( ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=params['labels'], return_softmax=True) logits=logits, label=params['labels'], return_softmax=True)
loss = fluid.layers.reduce_mean(input=ce_loss) loss = fluid.layers.mean(input=ce_loss)
num_seqs = fluid.layers.create_tensor(dtype='int64') num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(input=probs, label=params['labels'], total=num_seqs) accuracy = fluid.layers.accuracy(
input=probs, label=params['labels'], total=num_seqs)
loss.persistable = True loss.persistable = True
probs.persistable = True probs.persistable = True
accuracy.persistable = True accuracy.persistable = True
num_seqs.persistable = True num_seqs.persistable = True
results = {"loss": loss, results = {
"loss": loss,
"probs": probs, "probs": probs,
"accuracy": accuracy, "accuracy": accuracy,
"num_seqs": num_seqs} "num_seqs": num_seqs
}
return results return results
def create_multi_cls(self, transformer_inst, params): def create_multi_cls(self, transformer_inst, params):
...@@ -94,27 +99,29 @@ class Paradigm(object): ...@@ -94,27 +99,29 @@ class Paradigm(object):
name="cls_out_b", initializer=fluid.initializer.Constant(0.))) name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
labels_onehot = fluid.layers.cast(params["labels"], dtype='float32') labels_onehot = fluid.layers.cast(params["labels"], dtype='float32')
ce_loss = fluid.layers.reduce_sum(fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, label=labels_onehot)) ce_loss = fluid.layers.reduce_sum(
loss = fluid.layers.reduce_mean(input=ce_loss) fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=labels_onehot))
loss = fluid.layers.mean(input=ce_loss)
probs = fluid.layers.sigmoid(logits) probs = fluid.layers.sigmoid(logits)
if params['is_prediction']: if params['is_prediction']:
feed_targets_name = [ feed_targets_name = [
params['src_ids'].name, params['pos_ids'].name, params['src_ids'].name,
params['sent_ids'].name, params['input_mask'].name, params['pos_ids'].name,
params['sent_ids'].name,
params['input_mask'].name,
] ]
results = {"probs": probs, results = {"probs": probs, "feed_targets_name": feed_targets_name}
"feed_targets_name": feed_targets_name}
return results return results
num_seqs = fluid.layers.tensor.fill_constant(shape=[1], dtype='int64', value=1) num_seqs = fluid.layers.tensor.fill_constant(
shape=[1], dtype='int64', value=1)
loss.persistable = True loss.persistable = True
probs.persistable = True probs.persistable = True
num_seqs.persistable = True num_seqs.persistable = True
results = {"loss": loss, results = {"loss": loss, "probs": probs, "num_seqs": num_seqs}
"probs": probs,
"num_seqs": num_seqs}
return results return results
def create_sequence_tagging(self, transformer_inst, params): def create_sequence_tagging(self, transformer_inst, params):
...@@ -127,33 +134,42 @@ class Paradigm(object): ...@@ -127,33 +134,42 @@ class Paradigm(object):
output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size]) output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size])
logits = fluid.layers.fc(input=output_layer, size=params['num_labels']) logits = fluid.layers.fc(input=output_layer, size=params['num_labels'])
probs = fluid.layers.cast(fluid.layers.argmax(logits, axis=1), dtype='int32') probs = fluid.layers.cast(
fluid.layers.argmax(
logits, axis=1), dtype='int32')
if params['is_prediction']: if params['is_prediction']:
feed_targets_name = [ feed_targets_name = [
params['src_ids'].name, params['pos_ids'].name, params['src_ids'].name,
params['sent_ids'].name, params['input_mask'].name, params['pos_ids'].name,
params['sent_ids'].name,
params['input_mask'].name,
] ]
results = {"probs": probs, results = {"probs": probs, "feed_targets_name": feed_targets_name}
"feed_targets_name": feed_targets_name}
return results return results
num_seqs = fluid.layers.tensor.fill_constant(shape=[1], dtype='int64', value=1) num_seqs = fluid.layers.tensor.fill_constant(
y_label_reshape = fluid.layers.cast(fluid.layers.reshape(params['labels'], [-1]), dtype='int32') shape=[1], dtype='int64', value=1)
y_label_reshape = fluid.layers.cast(
fluid.layers.reshape(params['labels'], [-1]), dtype='int32')
correct_prediction = fluid.layers.equal(probs, y_label_reshape) correct_prediction = fluid.layers.equal(probs, y_label_reshape)
accuracy = fluid.layers.reduce_mean(fluid.layers.cast(correct_prediction, dtype='float32')) accuracy = fluid.layers.mean(
fluid.layers.cast(
correct_prediction, dtype='float32'))
ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \ ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \
label=fluid.layers.reshape(params['labels'], [-1, 1])) label=fluid.layers.reshape(params['labels'], [-1, 1]))
loss = fluid.layers.reduce_mean(input=ce_loss) loss = fluid.layers.mean(input=ce_loss)
loss.persistable = True loss.persistable = True
probs.persistable = True probs.persistable = True
accuracy.persistable = True accuracy.persistable = True
num_seqs.persistable = True num_seqs.persistable = True
results = {"loss": loss, results = {
"loss": loss,
"probs": probs, "probs": probs,
"accuracy": accuracy, "accuracy": accuracy,
"num_seqs": num_seqs} "num_seqs": num_seqs
}
return results return results
def paradigm(self, transformer_inst, params): def paradigm(self, transformer_inst, params):
...@@ -174,5 +190,3 @@ class Paradigm(object): ...@@ -174,5 +190,3 @@ class Paradigm(object):
elif self.task_name == 'dstc2': elif self.task_name == 'dstc2':
results = self.create_multi_cls(transformer_inst, params) results = self.create_multi_cls(transformer_inst, params)
return results return results
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册