From fe10b94f386eeb87a650bdca7e3ad214f36da56d Mon Sep 17 00:00:00 2001 From: wuzewu Date: Wed, 7 Apr 2021 16:36:03 +0800 Subject: [PATCH] Improve the model export interface. --- paddlehub/compat/module/module_v1.py | 14 +++++- paddlehub/module/module.py | 75 ++++++++++++++++++++++------ paddlehub/module/nlp_module.py | 16 ++++-- requirements.txt | 2 +- 4 files changed, 85 insertions(+), 22 deletions(-) diff --git a/paddlehub/compat/module/module_v1.py b/paddlehub/compat/module/module_v1.py index 1ca899a1..92faaef5 100644 --- a/paddlehub/compat/module/module_v1.py +++ b/paddlehub/compat/module/module_v1.py @@ -282,6 +282,7 @@ class ModuleV1(object): model_filename=model_filename, params_filename=params_filename) + @paddle_utils.run_in_static_mode def export_onnx_model(self, dirname: str, **kwargs): ''' Export the model to ONNX format. @@ -289,8 +290,8 @@ class ModuleV1(object): Args: dirname(str): The directory to save the onnx model. **kwargs(dict|optional): Other export configuration options for compatibility, some may be removed - in the future. Don't use them If not necessary. Refer to https://github.com/PaddlePaddle/paddle2onnx - for more information. + in the future. Don't use them If not necessary. Refer to https://github.com/PaddlePaddle/paddle2onnx + for more information. ''' feed_dict, fetch_dict, program = self.context(for_test=True, trainable=False) inputs = set([var.name for var in feed_dict.values()]) @@ -308,3 +309,12 @@ class ModuleV1(object): target_vars=outputs, save_file=save_file, **kwargs) + + def sub_modules(self, recursive: bool = True): + ''' + Get all sub modules. + + Args: + recursive(bool): Whether to get sub modules recursively. Default to True. + ''' + return [] diff --git a/paddlehub/module/module.py b/paddlehub/module/module.py index 38af3073..2e298abd 100644 --- a/paddlehub/module/module.py +++ b/paddlehub/module/module.py @@ -135,7 +135,7 @@ class RunModule(object): @property def _pretrained_model_path(self): _pretrained_model_attrs = [ - 'pretrained_model_path', 'rec_pretrained_model_path', 'default_pretrained_model_path' + 'pretrained_model_path', 'rec_pretrained_model_path', 'default_pretrained_model_path', 'model_path' ] for _attr in _pretrained_model_attrs: @@ -147,30 +147,77 @@ class RunModule(object): return None - def export_onnx_model(self, dirname: str, **kwargs): + def sub_modules(self, recursive: bool = True): + ''' + Get all sub modules. + + Args: + recursive(bool): Whether to get sub modules recursively. Default to True. + ''' + _sub_modules = {} + for key, item in self.__dict__.items(): + if id(item) == id(self): + continue + + if isinstance(item, (RunModule, ModuleV1)): + _sub_modules[key] = item + if not recursive: + continue + + for _k, _v in item.sub_modules(recursive): + _sub_modules['{}/{}'.format(key, _k)] = _v + + return _sub_modules + + def export_onnx_model(self, + dirname: str, + input_spec: List[paddle.static.InputSpec] = None, + export_sub_modules: bool = True, + **kwargs): ''' Export the model to ONNX format. Args: dirname(str): The directory to save the onnx model. - **kwargs(dict|optional): Other export configuration options for compatibility, some may be removed - in the future. Don't use them If not necessary. Refer to https://github.com/PaddlePaddle/paddle2onnx - for more information. + input_spec(list): Describes the input of the saved model's forward method, which can be described by + InputSpec or example Tensor. If None, all input variables of the original Layer's forward method + would be the inputs of the saved model. Default None. + export_sub_modules(bool): Whether to export sub modules. Default to True. + **kwargs(dict|optional): Other export configuration options for compatibility, some may be removed in + the future. Don't use them If not necessary. Refer to https://github.com/PaddlePaddle/paddle2onnx + for more information. ''' + if export_sub_modules: + for key, _sub_module in self.sub_modules().items(): + try: + sub_dirname = os.path.normpath(os.path.join(dirname, key)) + _sub_module.export_onnx_model(sub_dirname, export_sub_modules=export_sub_modules, **kwargs) + except: + utils.record_exception('Failed to export sub module {}'.format(_sub_module.name)) + if not self._pretrained_model_path: if isinstance(self, paddle.nn.Layer): save_file = os.path.join(dirname, '{}'.format(self.name)) - if hasattr(self, 'input_spec'): - input_spec = self.input_sepc - else: - _type = self.type.lower() - if _type.startswith('cv/image'): - input_spec = paddle.static.InputSpec(shape=[None, 3, None, None], dtype='float32') + if not input_spec: + if hasattr(self, 'input_spec'): + input_spec = self.input_spec else: - raise NotImplementedError - paddle.onnx.export(self, save_file, input_spec=[input_spec]) + _type = self.type.lower() + if _type.startswith('cv/image'): + input_spec = [paddle.static.InputSpec(shape=[None, 3, None, None], dtype='float32')] + else: + raise RuntimeError( + 'Module {} lacks `input_spec`, please specify it when calling `export_onnx_model`.'. + format(self.name)) + + paddle.onnx.export(self, save_file, input_spec=input_spec, **kwargs) return - raise NotImplementedError + + raise RuntimeError('Module {} does not support exporting models in ONNX format.'.format(self.name)) + + if not os.path.exists(self._pretrained_model_path): + log.logger.warning('The model path of Module {} does not exist'.format(self.name)) + return place = paddle.CPUPlace() exe = paddle.static.Executor(place) diff --git a/paddlehub/module/nlp_module.py b/paddlehub/module/nlp_module.py index e30ecd01..d0b06231 100644 --- a/paddlehub/module/nlp_module.py +++ b/paddlehub/module/nlp_module.py @@ -407,6 +407,13 @@ class TransformerModule(RunModule, TextServing): 'text-matching', ] + @property + def input_spec(self): + return [ + paddle.static.InputSpec(shape=[None, None], dtype='int64'), + paddle.static.InputSpec(shape=[None, None], dtype='int64') + ] + def _convert_text_to_input(self, tokenizer, texts: List[str], max_seq_len: int, split_char: str): pad_to_max_seq_len = False if self.task is None else True if self.task == 'token-cls': # Extra processing of token-cls task @@ -442,7 +449,7 @@ class TransformerModule(RunModule, TextServing): pad_to_max_seq_len=True, is_split_into_words=is_split_into_words, return_length=True)) else: raise RuntimeError( - 'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(text)) + 'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(texts)) return encoded_inputs def _batchify(self, data: List[List[str]], max_seq_len: int, batch_size: int, split_char: str): @@ -605,10 +612,9 @@ class TransformerModule(RunModule, TextServing): results.extend(token_labels) elif self.task == None: sequence_output, pooled_output = self(input_ids, segment_ids) - results.append([ - pooled_output.squeeze(0).numpy().tolist(), - sequence_output.squeeze(0).numpy().tolist() - ]) + results.append( + [pooled_output.squeeze(0).numpy().tolist(), + sequence_output.squeeze(0).numpy().tolist()]) return results diff --git a/requirements.txt b/requirements.txt index ead4c10b..31f4aca0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ numpy matplotlib opencv-python packaging -paddle2onnx >= 0.5 +paddle2onnx >= 0.5.1 paddlenlp >= 2.0.0rc5 Pillow pyyaml -- GitLab