未验证 提交 7240945e 编写于 作者: Z Zeyu Chen 提交者: GitHub

Fix bug of get_embedding interface

Fix bugs
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -114,7 +114,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -114,7 +114,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
...@@ -218,7 +218,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -218,7 +218,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -107,7 +107,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -107,7 +107,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.2/demo/text-classification)[序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.2/demo/sequence-labeling) 利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.2/demo/text-classification)[序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.2/demo/sequence-labeling)
......
...@@ -94,7 +94,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -94,7 +94,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.4.0/demo/text-classification) 利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.4.0/demo/text-classification)
......
...@@ -100,7 +100,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -100,7 +100,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.4.0/demo/text-classification) 利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.4.0/demo/text-classification)
......
...@@ -103,7 +103,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -103,7 +103,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.2/demo/text-classification) 利用该PaddleHub Module Fine-tune示例,可参考[文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v1.2/demo/text-classification)
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text ...@@ -96,7 +96,7 @@ embedding_result = module.get_embedding(texts=[["Sample1_text_a"],["Sample2_text
# Use "get_params_layer" to get params layer and used to ULMFiTStrategy. # Use "get_params_layer" to get params layer and used to ULMFiTStrategy.
params_layer = module.get_params_layer() params_layer = module.get_params_layer()
strategy = hub.finetune.strategy.ULMFiTStrategy(params_layer=params_layer) strategy = hub.finetune.strategy.ULMFiTStrategy(frz_params_layer=params_layer, dis_params_layer=params_layer)
``` ```
## 查看代码 ## 查看代码
......
...@@ -511,10 +511,6 @@ class CombinedStrategy(DefaultStrategy): ...@@ -511,10 +511,6 @@ class CombinedStrategy(DefaultStrategy):
unfreeze_depths=self. unfreeze_depths=self.
sorted_depth[:self.max_depth * self.epoch // sorted_depth[:self.max_depth * self.epoch //
self.scheduler["gradual_unfreeze"]["blocks"]]) self.scheduler["gradual_unfreeze"]["blocks"]])
else:
logger.warning(
"The max op-depth in the network is %s. That results in that can't use the gradual unfreeze finetune strategy."
% (self.max_depth))
elif self.scheduler["gradual_unfreeze"]["params_layer"]: elif self.scheduler["gradual_unfreeze"]["params_layer"]:
max_layer = max( max_layer = max(
self.scheduler["gradual_unfreeze"]["params_layer"].values()) self.scheduler["gradual_unfreeze"]["params_layer"].values())
...@@ -631,8 +627,9 @@ class ULMFiTStrategy(CombinedStrategy): ...@@ -631,8 +627,9 @@ class ULMFiTStrategy(CombinedStrategy):
ratio=32, ratio=32,
dis_blocks=3, dis_blocks=3,
factor=2.6, factor=2.6,
dis_params_layer=None,
frz_blocks=3, frz_blocks=3,
params_layer=None): frz_params_layer=None):
scheduler = { scheduler = {
"slanted_triangle": { "slanted_triangle": {
...@@ -641,12 +638,12 @@ class ULMFiTStrategy(CombinedStrategy): ...@@ -641,12 +638,12 @@ class ULMFiTStrategy(CombinedStrategy):
}, },
"gradual_unfreeze": { "gradual_unfreeze": {
"blocks": frz_blocks, "blocks": frz_blocks,
"params_layer": params_layer "params_layer": frz_params_layer
}, },
"discriminative": { "discriminative": {
"blocks": dis_blocks, "blocks": dis_blocks,
"factor": factor, "factor": factor,
"params_layer": params_layer "params_layer": dis_params_layer
} }
} }
regularization = {} regularization = {}
......
...@@ -36,7 +36,7 @@ from visualdl import LogWriter ...@@ -36,7 +36,7 @@ from visualdl import LogWriter
import paddlehub as hub import paddlehub as hub
from paddlehub.common.paddle_helper import dtype_map, clone_program from paddlehub.common.paddle_helper import dtype_map, clone_program
from paddlehub.common.utils import mkdir, version_compare from paddlehub.common.utils import mkdir
from paddlehub.common.dir import tmp_dir from paddlehub.common.dir import tmp_dir
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
from paddlehub.finetune.checkpoint import load_checkpoint, save_checkpoint from paddlehub.finetune.checkpoint import load_checkpoint, save_checkpoint
...@@ -951,12 +951,6 @@ class BaseTask(object): ...@@ -951,12 +951,6 @@ class BaseTask(object):
Returns: Returns:
RunState: the running result of predict phase RunState: the running result of predict phase
""" """
if isinstance(self._base_data_reader, hub.reader.LACClassifyReader):
raise Exception(
"LACClassifyReader does not support predictor, please close accelerate_mode"
)
global_run_states = [] global_run_states = []
period_run_states = [] period_run_states = []
...@@ -998,6 +992,12 @@ class BaseTask(object): ...@@ -998,6 +992,12 @@ class BaseTask(object):
Returns: Returns:
RunState: the running result of predict phase RunState: the running result of predict phase
""" """
if accelerate_mode and isinstance(self._base_data_reader,
hub.reader.LACClassifyReader):
logger.warning(
"LACClassifyReader does not support predictor, the accelerate_mode is closed now."
)
accelerate_mode = False
self.accelerate_mode = accelerate_mode self.accelerate_mode = accelerate_mode
with self.phase_guard(phase="predict"): with self.phase_guard(phase="predict"):
......
...@@ -205,7 +205,7 @@ def get_predictions(all_examples, all_features, all_results, n_best_size, ...@@ -205,7 +205,7 @@ def get_predictions(all_examples, all_features, all_results, n_best_size,
for (feature_index, feature) in enumerate(features): for (feature_index, feature) in enumerate(features):
if feature.unique_id not in unique_id_to_result: if feature.unique_id not in unique_id_to_result:
logger.info( logger.info(
"As using pyreader, the last one batch is so small that the feature %s in the last batch is discarded " "As using multidevice, the last one batch is so small that the feature %s in the last batch is discarded "
% feature.unique_id) % feature.unique_id)
continue continue
result = unique_id_to_result[feature.unique_id] result = unique_id_to_result[feature.unique_id]
......
...@@ -397,7 +397,8 @@ class TransformerModule(NLPBaseModule): ...@@ -397,7 +397,8 @@ class TransformerModule(NLPBaseModule):
return inputs, outputs, module_program return inputs, outputs, module_program
def get_embedding(self, texts, use_gpu=False, batch_size=1): def get_embedding(self, texts, max_seq_len=512, use_gpu=False,
batch_size=1):
""" """
get pooled_output and sequence_output for input texts. get pooled_output and sequence_output for input texts.
Warnings: this method depends on Paddle Inference Library, it may not work properly in PaddlePaddle <= 1.6.2. Warnings: this method depends on Paddle Inference Library, it may not work properly in PaddlePaddle <= 1.6.2.
...@@ -405,6 +406,7 @@ class TransformerModule(NLPBaseModule): ...@@ -405,6 +406,7 @@ class TransformerModule(NLPBaseModule):
Args: Args:
texts (list): each element is a text sample, each sample include text_a and text_b where text_b can be omitted. texts (list): each element is a text sample, each sample include text_a and text_b where text_b can be omitted.
for example: [[sample0_text_a, sample0_text_b], [sample1_text_a, sample1_text_b], ...] for example: [[sample0_text_a, sample0_text_b], [sample1_text_a, sample1_text_b], ...]
max_seq_len (int): the max sequence length.
use_gpu (bool): use gpu or not, default False. use_gpu (bool): use gpu or not, default False.
batch_size (int): the data batch size, default 1. batch_size (int): the data batch size, default 1.
...@@ -417,12 +419,12 @@ class TransformerModule(NLPBaseModule): ...@@ -417,12 +419,12 @@ class TransformerModule(NLPBaseModule):
) or self.emb_job["batch_size"] != batch_size or self.emb_job[ ) or self.emb_job["batch_size"] != batch_size or self.emb_job[
"use_gpu"] != use_gpu: "use_gpu"] != use_gpu:
inputs, outputs, program = self.context( inputs, outputs, program = self.context(
trainable=True, max_seq_len=self.MAX_SEQ_LEN) trainable=True, max_seq_len=max_seq_len)
reader = hub.reader.ClassifyReader( reader = hub.reader.ClassifyReader(
dataset=None, dataset=None,
vocab_path=self.get_vocab_path(), vocab_path=self.get_vocab_path(),
max_seq_len=self.MAX_SEQ_LEN, max_seq_len=max_seq_len,
sp_model_path=self.get_spm_path() if hasattr( sp_model_path=self.get_spm_path() if hasattr(
self, "get_spm_path") else None, self, "get_spm_path") else None,
word_dict_path=self.get_word_dict_path() if hasattr( word_dict_path=self.get_word_dict_path() if hasattr(
......
...@@ -1113,7 +1113,7 @@ class LACClassifyReader(BaseReader): ...@@ -1113,7 +1113,7 @@ class LACClassifyReader(BaseReader):
return processed return processed
if not self.has_processed[phase]: if not self.has_processed[phase] or phase == "predict":
logger.info( logger.info(
"processing %s data now... this may take a few minutes" % phase) "processing %s data now... this may take a few minutes" % phase)
for i in range(len(data)): for i in range(len(data)):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册