1.update nlp_reader, remove redundant returns

2. fix pre-set net spelling , pre-set net -> pre-defined net 3. remove version_compare usage

1.update nlp_reader, remove redundant returns
2. fix pre-set net spelling , pre-set net -> pre-defined net 3. remove version_compare usage
545d975d · Steffy-zxf · 99cda5db · 545d975d · 545d975d · 545d975d
3 changed file
--- a/paddlehub/finetune/task/classifier_task.py
+++ b/paddlehub/finetune/task/classifier_task.py
@@ -170,8 +170,8 @@ class TextClassifierTask(ClassifierTask):
                 num_classes,
                 feed_list,
                 data_reader,
-                 token_feature=None,
                 feature=None,
+                 token_feature=None,
                 network=None,
                 startup_program=None,
                 config=None,
@@ -182,13 +182,13 @@ class TextClassifierTask(ClassifierTask):
            num_classes: total labels of the text classification task.
            feed_list(list): the variable name that will be feeded to the main program
            data_reader(object): data reader for the task. It must be one of ClassifyReader and LACClassifyReader.
-            token_feature(Variable): the feature will be used to connect the preset net. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
-            feature(Variable): the feature will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. Token_feature and feature couldn't be setted as the same time. One of them must be setted as not None. Default None.
-            network(str): the preset network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then token_feature must be seted and feature must be None.
-            main_program (object): the customized main_program, default None.
-            startup_program (object): the customized startup_program, default None.
+            feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None.
+            token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
+            network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None.
+            main_program (object): the customized main program, default None.
+            startup_program (object): the customized startup program, default None.
            config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None.
-            hidden_units(list): the element of hidden_units list is the full-connect layer size. It will add the full-connect layers to the program. Default None.
+            hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None.
            metrics_choices(list): metrics used to the task, default ["acc"].
        """
        if (not feature) and (not token_feature):
@@ -247,7 +247,7 @@ class TextClassifierTask(ClassifierTask):
                self.feature, length=self.seq_len_used)

        if self.network:
-            # add preset net
+            # add pre-defined net
            net_func = getattr(net.classification, self.network)
            if self.network == 'dpcnn':
                # deepcnn network is no need to unpad
@@ -258,7 +258,7 @@ class TextClassifierTask(ClassifierTask):
            logger.info(
                "%s has been added in the TextClassifierTask!" % self.network)
        else:
-            # not use preset net but to use fc net
+            # not use pre-defined net but to use fc net
            cls_feats = fluid.layers.dropout(
                x=self.feature,
                dropout_prob=0.1,

--- a/paddlehub/network/classification.py
+++ b/paddlehub/network/classification.py
@@ -16,7 +16,6 @@
 This module provide nets for text classification
 """

-from paddlehub.common.utils import version_compare
 import paddle
 import paddle.fluid as fluid


--- a/paddlehub/reader/nlp_reader.py
+++ b/paddlehub/reader/nlp_reader.py
@@ -65,7 +65,6 @@ class BaseNLPReader(BaseReader):
            logger.warning(
                "use_task_id has been de discarded since PaddleHub v1.4.0, it's no necessary to feed task_ids now."
            )
-            self.task_id = 0

        self.Record_With_Label_Id = namedtuple(
            'Record',
@@ -287,36 +286,16 @@ class ClassifyReader(BaseNLPReader):
            max_seq_len=self.max_seq_len,
            pad_idx=self.pad_id)

+        return_list = [
+            padded_token_ids, padded_position_ids, padded_text_type_ids,
+            input_mask, batch_seq_lens
+        ]
        if phase != "predict":
            batch_labels = [record.label_id for record in batch_records]
            batch_labels = np.array(batch_labels).astype("int64").reshape(
                [-1, 1])
+            return_list += [batch_labels]

-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_seq_lens, batch_labels
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_seq_lens, batch_labels
-                ]
-        else:
-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_seq_lens
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_seq_lens
-                ]
        return return_list


@@ -370,40 +349,20 @@ class SequenceLabelReader(BaseNLPReader):
            max_seq_len=self.max_seq_len,
            pad_idx=self.pad_id)

+        return_list = [
+            padded_token_ids, padded_position_ids, padded_text_type_ids,
+            input_mask
+        ]
        if phase != "predict":
            batch_label_ids = [record.label_id for record in batch_records]
            padded_label_ids = pad_batch_data(
                batch_label_ids,
                max_seq_len=self.max_seq_len,
                pad_idx=len(self.label_map) - 1)
-
-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, padded_label_ids, batch_seq_lens
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, padded_label_ids,
-                    batch_seq_lens
-                ]
+            return_list += [padded_label_ids, batch_seq_lens]

        else:
-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_seq_lens
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_seq_lens
-                ]
+            return_list += [batch_seq_lens]

        return return_list

@@ -515,37 +474,18 @@ class MultiLabelClassifyReader(BaseNLPReader):
            max_seq_len=self.max_seq_len,
            pad_idx=self.pad_id)

+        return_list = [
+            padded_token_ids, padded_position_ids, padded_text_type_ids,
+            input_mask
+        ]
        if phase != "predict":
            batch_labels_ids = [record.label_id for record in batch_records]
            num_label = len(self.dataset.get_labels())
            batch_labels = np.array(batch_labels_ids).astype("int64").reshape(
                [-1, num_label])

-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_labels
-            ]
+            return_list += [batch_labels]

-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_labels
-                ]
-        else:
-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids
-                ]
        return return_list

    def _convert_example_to_record(self,
@@ -635,37 +575,17 @@ class RegressionReader(BaseNLPReader):
            max_seq_len=self.max_seq_len,
            pad_idx=self.pad_id)

+        return_list = [
+            padded_token_ids, padded_position_ids, padded_text_type_ids,
+            input_mask
+        ]
        if phase != "predict":
            batch_labels = [record.label_id for record in batch_records]
            # the only diff with ClassifyReader: astype("float32")
            batch_labels = np.array(batch_labels).astype("float32").reshape(
                [-1, 1])

-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_labels
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_labels
-                ]
-        else:
-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids
-                ]
+            return_list += [batch_labels]

        return return_list

@@ -832,6 +752,10 @@ class ReadingComprehensionReader(BaseNLPReader):
            pad_idx=self.pad_id,
            max_seq_len=self.max_seq_len)

+        return_list = [
+            padded_token_ids, padded_position_ids, padded_text_type_ids,
+            input_mask, batch_unique_ids
+        ]
        if phase != "predict":
            batch_start_position = [
                record.start_position for record in batch_records
@@ -844,33 +768,8 @@ class ReadingComprehensionReader(BaseNLPReader):
            batch_end_position = np.array(batch_end_position).astype(
                "int64").reshape([-1, 1])

-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_unique_ids, batch_start_position,
-                batch_end_position
-            ]
-
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_unique_ids,
-                    batch_start_position, batch_end_position
-                ]
+            return_list += [batch_start_position, batch_end_position]

-        else:
-            return_list = [
-                padded_token_ids, padded_position_ids, padded_text_type_ids,
-                input_mask, batch_unique_ids
-            ]
-            if self.use_task_id:
-                padded_task_ids = np.ones_like(
-                    padded_token_ids, dtype="int64") * self.task_id
-                return_list = [
-                    padded_token_ids, padded_position_ids, padded_text_type_ids,
-                    input_mask, padded_task_ids, batch_unique_ids
-                ]
        return return_list

    def _prepare_batch_data(self, records, batch_size, phase=None):