From d7541747f41b54da6871945e96d4d3a1540846e8 Mon Sep 17 00:00:00 2001 From: pkpk Date: Sat, 22 Jun 2019 13:54:47 +0800 Subject: [PATCH] remove reduce_sum/mean to support fp16 (#2492) --- .../define_paradigm.py | 126 ++++++++++-------- 1 file changed, 70 insertions(+), 56 deletions(-) diff --git a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py index 9b873205..07581b59 100644 --- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py +++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py @@ -20,17 +20,18 @@ import paddle import paddle.fluid as fluid -class Paradigm(object): +class Paradigm(object): """ define network paradigm """ - def __init__(self, task_name): + + def __init__(self, task_name): """ init """ self.task_name = task_name - def create_cls(self, transformer_inst, params): + def create_cls(self, transformer_inst, params): """ create classify paradigm network """ @@ -48,42 +49,46 @@ class Paradigm(object): bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) - if params['is_prediction']: + if params['is_prediction']: probs = fluid.layers.softmax(logits) feed_targets_name = [ - params['src_ids'].name, params['pos_ids'].name, - params['sent_ids'].name, params['input_mask'].name, - ] - results = {"probs": probs, - "feed_targets_name": feed_targets_name} + params['src_ids'].name, + params['pos_ids'].name, + params['sent_ids'].name, + params['input_mask'].name, + ] + results = {"probs": probs, "feed_targets_name": feed_targets_name} return results ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=params['labels'], return_softmax=True) - loss = fluid.layers.reduce_mean(input=ce_loss) + loss = fluid.layers.mean(input=ce_loss) num_seqs = fluid.layers.create_tensor(dtype='int64') - accuracy = fluid.layers.accuracy(input=probs, label=params['labels'], total=num_seqs) + accuracy = fluid.layers.accuracy( + input=probs, label=params['labels'], total=num_seqs) loss.persistable = True probs.persistable = True accuracy.persistable = True num_seqs.persistable = True - results = {"loss": loss, - "probs": probs, - "accuracy": accuracy, - "num_seqs": num_seqs} + results = { + "loss": loss, + "probs": probs, + "accuracy": accuracy, + "num_seqs": num_seqs + } return results - def create_multi_cls(self, transformer_inst, params): + def create_multi_cls(self, transformer_inst, params): """ create multi classify paradigm network """ cls_feats = transformer_inst.get_pooled_output() cls_feats = fluid.layers.dropout( - x=cls_feats, - dropout_prob=0.1, - dropout_implementation="upscale_in_train") + x=cls_feats, + dropout_prob=0.1, + dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=params['num_labels'], @@ -94,30 +99,32 @@ class Paradigm(object): name="cls_out_b", initializer=fluid.initializer.Constant(0.))) labels_onehot = fluid.layers.cast(params["labels"], dtype='float32') - ce_loss = fluid.layers.reduce_sum(fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, label=labels_onehot)) - loss = fluid.layers.reduce_mean(input=ce_loss) + ce_loss = fluid.layers.reduce_sum( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=logits, label=labels_onehot)) + loss = fluid.layers.mean(input=ce_loss) probs = fluid.layers.sigmoid(logits) - if params['is_prediction']: + if params['is_prediction']: feed_targets_name = [ - params['src_ids'].name, params['pos_ids'].name, - params['sent_ids'].name, params['input_mask'].name, - ] - results = {"probs": probs, - "feed_targets_name": feed_targets_name} + params['src_ids'].name, + params['pos_ids'].name, + params['sent_ids'].name, + params['input_mask'].name, + ] + results = {"probs": probs, "feed_targets_name": feed_targets_name} return results - num_seqs = fluid.layers.tensor.fill_constant(shape=[1], dtype='int64', value=1) + num_seqs = fluid.layers.tensor.fill_constant( + shape=[1], dtype='int64', value=1) loss.persistable = True probs.persistable = True num_seqs.persistable = True - results = {"loss": loss, - "probs": probs, - "num_seqs": num_seqs} + results = {"loss": loss, "probs": probs, "num_seqs": num_seqs} return results - def create_sequence_tagging(self, transformer_inst, params): + def create_sequence_tagging(self, transformer_inst, params): """ create sequence tagging paradigm """ @@ -127,52 +134,59 @@ class Paradigm(object): output_layer = fluid.layers.reshape(output_layer, [-1, hidden_size]) logits = fluid.layers.fc(input=output_layer, size=params['num_labels']) - probs = fluid.layers.cast(fluid.layers.argmax(logits, axis=1), dtype='int32') + probs = fluid.layers.cast( + fluid.layers.argmax( + logits, axis=1), dtype='int32') - if params['is_prediction']: + if params['is_prediction']: feed_targets_name = [ - params['src_ids'].name, params['pos_ids'].name, - params['sent_ids'].name, params['input_mask'].name, - ] - results = {"probs": probs, - "feed_targets_name": feed_targets_name} + params['src_ids'].name, + params['pos_ids'].name, + params['sent_ids'].name, + params['input_mask'].name, + ] + results = {"probs": probs, "feed_targets_name": feed_targets_name} return results - - num_seqs = fluid.layers.tensor.fill_constant(shape=[1], dtype='int64', value=1) - y_label_reshape = fluid.layers.cast(fluid.layers.reshape(params['labels'], [-1]), dtype='int32') + + num_seqs = fluid.layers.tensor.fill_constant( + shape=[1], dtype='int64', value=1) + y_label_reshape = fluid.layers.cast( + fluid.layers.reshape(params['labels'], [-1]), dtype='int32') correct_prediction = fluid.layers.equal(probs, y_label_reshape) - accuracy = fluid.layers.reduce_mean(fluid.layers.cast(correct_prediction, dtype='float32')) + accuracy = fluid.layers.mean( + fluid.layers.cast( + correct_prediction, dtype='float32')) ce_loss = fluid.layers.softmax_with_cross_entropy(logits=logits, \ label=fluid.layers.reshape(params['labels'], [-1, 1])) - loss = fluid.layers.reduce_mean(input=ce_loss) - + loss = fluid.layers.mean(input=ce_loss) + loss.persistable = True probs.persistable = True accuracy.persistable = True num_seqs.persistable = True - results = {"loss": loss, - "probs": probs, - "accuracy": accuracy, - "num_seqs": num_seqs} + results = { + "loss": loss, + "probs": probs, + "accuracy": accuracy, + "num_seqs": num_seqs + } return results - def paradigm(self, transformer_inst, params): + def paradigm(self, transformer_inst, params): """ run paradigm """ results = None - if self.task_name == 'udc': + if self.task_name == 'udc': results = self.create_cls(transformer_inst, params) elif self.task_name == 'swda': results = self.create_cls(transformer_inst, params) - elif self.task_name == 'mrda': + elif self.task_name == 'mrda': results = self.create_cls(transformer_inst, params) - elif self.task_name == 'atis_intent': + elif self.task_name == 'atis_intent': results = self.create_cls(transformer_inst, params) - elif self.task_name == 'atis_slot': + elif self.task_name == 'atis_slot': results = self.create_sequence_tagging(transformer_inst, params) elif self.task_name == 'dstc2': results = self.create_multi_cls(transformer_inst, params) return results - - -- GitLab