From 07bf4e23cce2e45c7899f4db9bdb53a5cc1d3123 Mon Sep 17 00:00:00 2001 From: kinghuin Date: Wed, 20 Nov 2019 16:31:24 +0800 Subject: [PATCH] debug ernie tiny reader --- demo/text-classification/run_classifier.sh | 4 ++-- paddlehub/reader/tokenization.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/demo/text-classification/run_classifier.sh b/demo/text-classification/run_classifier.sh index d297cb74..fad3e471 100644 --- a/demo/text-classification/run_classifier.sh +++ b/demo/text-classification/run_classifier.sh @@ -13,8 +13,8 @@ python -u text_classifier.py \ --weight_decay=0.01 \ --max_seq_len=128 \ --num_epoch=3 \ - --use_pyreader=True \ - --use_data_parallel=True + --use_pyreader=False \ + --use_data_parallel=False # Recommending hyper parameters for difference task # for ChineseGLUE: diff --git a/paddlehub/reader/tokenization.py b/paddlehub/reader/tokenization.py index f572099d..e3a55451 100644 --- a/paddlehub/reader/tokenization.py +++ b/paddlehub/reader/tokenization.py @@ -201,8 +201,9 @@ class WSSPTokenizer(object): else: in_vocab.append(unk_token) text = ' '.join(in_vocab) - ret = self.sp_model.EncodeAsPieces(text) - return ret + tokens = self.sp_model.EncodeAsPieces(text) + print(tokens) + return tokens def convert_tokens_to_ids(self, tokens): return convert_by_vocab(self.vocab, tokens) -- GitLab