From 07bf4e23cce2e45c7899f4db9bdb53a5cc1d3123 Mon Sep 17 00:00:00 2001
From: kinghuin <kinghuin_chull@163.com>
Date: Wed, 20 Nov 2019 16:31:24 +0800
Subject: [PATCH] debug ernie tiny reader

---
 demo/text-classification/run_classifier.sh | 4 ++--
 paddlehub/reader/tokenization.py           | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/demo/text-classification/run_classifier.sh b/demo/text-classification/run_classifier.sh
index d297cb74..fad3e471 100644
--- a/demo/text-classification/run_classifier.sh
+++ b/demo/text-classification/run_classifier.sh
@@ -13,8 +13,8 @@ python -u text_classifier.py \
                    --weight_decay=0.01 \
                    --max_seq_len=128 \
                    --num_epoch=3 \
-                   --use_pyreader=True \
-                   --use_data_parallel=True
+                   --use_pyreader=False \
+                   --use_data_parallel=False
 
 # Recommending hyper parameters for difference task
 # for ChineseGLUE:
diff --git a/paddlehub/reader/tokenization.py b/paddlehub/reader/tokenization.py
index f572099d..e3a55451 100644
--- a/paddlehub/reader/tokenization.py
+++ b/paddlehub/reader/tokenization.py
@@ -201,8 +201,9 @@ class WSSPTokenizer(object):
             else:
                 in_vocab.append(unk_token)
         text = ' '.join(in_vocab)
-        ret = self.sp_model.EncodeAsPieces(text)
-        return ret
+        tokens = self.sp_model.EncodeAsPieces(text)
+        print(tokens)
+        return tokens
 
     def convert_tokens_to_ids(self, tokens):
         return convert_by_vocab(self.vocab, tokens)
-- 
GitLab