diff --git a/paddlehub/reader/tokenization.py b/paddlehub/reader/tokenization.py
index e3a55451a43aace5465b1f59249c1c5bfbb87c72..fab4121ff4a147dde007c4f19e468cf9f9917b0c 100644
--- a/paddlehub/reader/tokenization.py
+++ b/paddlehub/reader/tokenization.py
@@ -194,16 +194,15 @@ class WSSPTokenizer(object):
             text = text.split(' ')
         if self.lower:
             text = [s.lower() for s in text]
-        in_vocab = []
-        for word in text:
-            if word in self.vocab:
-                in_vocab.append(word)
-            else:
-                in_vocab.append(unk_token)
-        text = ' '.join(in_vocab)
+        text = ' '.join(text)
         tokens = self.sp_model.EncodeAsPieces(text)
-        print(tokens)
-        return tokens
+        in_vocab_tokens = []
+        for token in tokens:
+            if token in self.vocab:
+                in_vocab_tokens.append(token)
+            else:
+                in_vocab_tokens.append(unk_token)
+        return in_vocab_tokens
 
     def convert_tokens_to_ids(self, tokens):
         return convert_by_vocab(self.vocab, tokens)