提交 f52ad5e5 编写于 作者: R Rai220

Add detailed errors about symbols out of vocab

上级 dad1c272
# -*- coding: utf-8 -*-
"""Contains the text featurizer class."""
from __future__ import absolute_import
from __future__ import division
......@@ -32,8 +33,12 @@ class TextFeaturizer(object):
:return: List of char-level token indices.
:rtype: list
"""
tokens = self._char_tokenize(text)
return [self._vocab_dict[token] for token in tokens]
result = []
try:
result = [self._vocab_dict[token] for token in text]
except KeyError, e:
print('Incorrect symbol "%s" found in string: ' % str(e).encode('utf-8'), text.encode('utf-8'))
return result
@property
def vocab_size(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册