From f4560fa82b980e9a7dc02aaaf1b55760c3ad4190 Mon Sep 17 00:00:00 2001 From: Zeyu Chen Date: Mon, 22 Apr 2019 22:19:04 +0800 Subject: [PATCH] remove extract embedding reader --- demo/senta/run_finetune.sh | 2 +- paddlehub/reader/nlp_reader.py | 33 +-------------------------------- 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/demo/senta/run_finetune.sh b/demo/senta/run_finetune.sh index 9e91a975..ea4c290c 100644 --- a/demo/senta/run_finetune.sh +++ b/demo/senta/run_finetune.sh @@ -5,6 +5,6 @@ CKPT_DIR="./ckpt_${DATASET}" python -u senta_finetune.py \ --batch_size=24 \ - --use_gpu=True \ + --use_gpu=False \ --checkpoint_dir=${CKPT_DIR} \ --num_epoch=3 diff --git a/paddlehub/reader/nlp_reader.py b/paddlehub/reader/nlp_reader.py index 1751d432..67041406 100644 --- a/paddlehub/reader/nlp_reader.py +++ b/paddlehub/reader/nlp_reader.py @@ -36,7 +36,6 @@ class BaseReader(object): label_map_config=None, max_seq_len=512, do_lower_case=True, - in_tokens=False, random_seed=None): self.max_seq_len = max_seq_len self.tokenizer = tokenization.FullTokenizer( @@ -46,7 +45,7 @@ class BaseReader(object): self.pad_id = self.vocab["[PAD]"] self.cls_id = self.vocab["[CLS]"] self.sep_id = self.vocab["[SEP]"] - self.in_tokens = in_tokens + self.in_tokens = False np.random.seed(random_seed) @@ -352,36 +351,6 @@ class SequenceLabelReader(BaseReader): return record -class ExtractEmbeddingReader(BaseReader): - def _pad_batch_records(self, batch_records): - batch_token_ids = [record.token_ids for record in batch_records] - batch_text_type_ids = [record.text_type_ids for record in batch_records] - batch_position_ids = [record.position_ids for record in batch_records] - - # padding - padded_token_ids, input_mask, seq_lens = pad_batch_data( - batch_token_ids, - pad_idx=self.pad_id, - max_seq_len=self.max_seq_len, - return_input_mask=True, - return_seq_lens=True) - padded_text_type_ids = pad_batch_data( - batch_text_type_ids, - pad_idx=self.pad_id, - max_seq_len=self.max_seq_len) - padded_position_ids = pad_batch_data( - batch_position_ids, - pad_idx=self.pad_id, - max_seq_len=self.max_seq_len) - - return_list = [ - padded_token_ids, padded_text_type_ids, padded_position_ids, - input_mask, seq_lens - ] - - return return_list - - class LACClassifyReader(object): def __init__(self, dataset, vocab_path): self.dataset = dataset -- GitLab