提交 42afc3e5 编写于 作者: R Rohan Jain 提交者: TensorFlower Gardener

Adding unicode / PY3 support for feature column vocab files.

PiperOrigin-RevId: 285862836
Change-Id: I2eec29c2300dfbc99f29b30b56e3e7dfea6d047e
上级 3aa42f1c
......@@ -99,6 +99,7 @@ filegroup(
"testdata/embedding.ckpt.data-00000-of-00001",
"testdata/embedding.ckpt.index",
"testdata/embedding.ckpt.meta",
"testdata/unicode_vocabulary",
"testdata/warriors_vocabulary.txt",
"testdata/wire_vocabulary.txt",
],
......
......@@ -1665,7 +1665,7 @@ def categorical_column_with_vocabulary_file_v2(key,
if not gfile.Exists(vocabulary_file):
raise ValueError('vocabulary_file in {} does not exist.'.format(key))
with gfile.GFile(vocabulary_file) as f:
with gfile.GFile(vocabulary_file, mode='rb') as f:
vocabulary_size = sum(1 for _ in f)
logging.info(
'vocabulary_size = %d in %s is inferred from the number of elements '
......
......@@ -3886,6 +3886,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
'python/feature_column/testdata/wire_vocabulary.txt')
self._wire_vocabulary_size = 3
# Contains unicode characters.
self._unicode_vocabulary_file_name = test.test_src_dir_path(
'python/feature_column/testdata/unicode_vocabulary')
@test_util.run_deprecated_v1
def test_defaults(self):
column = fc.categorical_column_with_vocabulary_file(
......@@ -3898,6 +3902,17 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
}, column.parse_example_spec)
self.assertTrue(column._is_v2_column)
@test_util.run_deprecated_v1
def test_defaults_unicode(self):
column = fc.categorical_column_with_vocabulary_file(
key='aaa', vocabulary_file=self._unicode_vocabulary_file_name)
self.assertEqual('aaa', column.name)
self.assertEqual('aaa', column.key)
self.assertEqual(165, column.num_buckets)
self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.string)},
column.parse_example_spec)
self.assertTrue(column._is_v2_column)
def test_key_should_be_string(self):
with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
fc.categorical_column_with_vocabulary_file(
......
t
/
e
o
a
s
p
i
c
n
.
r
h
m
x
l
d
w
-
u
g
b
:
2
0
1
f
%
8
3
5
k
9
4
y
7
6
v
=
_
?
A
D
j
&
F
z
E
B
S
C
q
M
L
I
R
T
N
W
P
U
G
Z
O
V
Y
H
J
X
Q
K
+
#
,
;
~
)
@
!
|
'
(
$
*
]
[
{
}
\
^
`
"
>
<
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册