Fix module compatibility issues

24788265 · wuzewu · c4f19c8e · 24788265 · 24788265
隐藏空白更改
内联并排

Showing with 26 addition and 3 deletion

paddlehub/__init__.py paddlehub/__init__.py +5 -1

paddlehub/compat/module/module_v1.py paddlehub/compat/module/module_v1.py +21 -2

未找到文件。
--- a/paddlehub/__init__.py
+++ b/paddlehub/__init__.py
@@ -48,7 +48,11 @@ sys.modules['paddlehub.common.logger'] = log
 sys.modules['paddlehub.common.paddle_helper'] = paddle_utils
 sys.modules['paddlehub.common.utils'] = utils
 sys.modules['paddlehub.reader'] = task
+sys.modules['paddlehub.reader.batching'] = task.batch

+AdamWeightDecayStrategy = lambda: 0
+ULMFiTStrategy = lambda params_layer=0: 0
 common = EasyDict(paddle_helper=paddle_utils)
 dataset = EasyDict(Couplet=couplet.Couplet)
-AdamWeightDecayStrategy = lambda: 0
+finetune = EasyDict(strategy=EasyDict(ULMFiTStrategy=ULMFiTStrategy))
+logger = EasyDict(logger=log.logger)
--- a/paddlehub/compat/module/module_v1.py
+++ b/paddlehub/compat/module/module_v1.py
@@ -118,8 +118,8 @@ class ModuleV1(object):
                op._set_attr('op_callstack', [''])

    @paddle_utils.run_in_static_mode
-    def context(self, signature: str = None, for_test: bool = False,
-                trainable: bool = True) -> Tuple[dict, dict, paddle.static.Program]:
+    def context(self, signature: str = None, for_test: bool = False, trainable: bool = True,
+                max_seq_len: int = 128) -> Tuple[dict, dict, paddle.static.Program]:
        '''Get module context information, including graph structure and graph input and output variables.'''
        program = self.program.clone(for_test=for_test)
        paddle_utils.remove_feed_fetch_op(program)
@@ -141,8 +141,27 @@ class ModuleV1(object):
        for param in program.all_parameters():
            param.trainable = trainable

+        # The bert series model saved by ModuleV1 sets max_seq_len to 512 by default. We need to adjust max_seq_len
+        # according to the parameters in actual use.
+        if 'bert' in self.name or self.name.startswith('ernie'):
+            self._update_bert_max_seq_len(program, feed_dict, max_seq_len)
+
        return feed_dict, fetch_dict, program

+    def _update_bert_max_seq_len(self, program: paddle.static.Program, feed_dict: dict, max_seq_len: int = 128):
+        MAX_SEQ_LENGTH = 512
+        if max_seq_len > MAX_SEQ_LENGTH or max_seq_len <= 0:
+            raise ValueError("max_seq_len({}) should be in the range of [1, {}]".format(max_seq_len, MAX_SEQ_LENGTH))
+        log.logger.info("Set maximum sequence length of input tensor to {}".format(max_seq_len))
+        if self.name.startswith("ernie_v2"):
+            feed_list = ["input_ids", "position_ids", "segment_ids", "input_mask", "task_ids"]
+        else:
+            feed_list = ["input_ids", "position_ids", "segment_ids", "input_mask"]
+        for tensor_name in feed_list:
+            seq_tensor_shape = [-1, max_seq_len, 1]
+            log.logger.info("The shape of input tensor[{}] set to {}".format(tensor_name, seq_tensor_shape))
+            program.global_block().var(feed_dict[tensor_name].name).desc.set_shape(seq_tensor_shape)
+
    @paddle_utils.run_in_static_mode
    def __call__(self, sign_name: str, data: dict, use_gpu: bool = False, batch_size: int = 1, **kwargs):
        '''Call the specified signature function for prediction.'''