提交 e34d18a5 编写于 作者: A A. Unique TensorFlower 提交者: TensorFlower Gardener

Add support for FixedLenSequenceFeature for parse_example/parse_single_example.

With this you may parse variable-length feature of the Example into a padded Tensor.
Change: 150009250
上级 b6c0b14c
......@@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import parsing_ops
......@@ -735,17 +736,50 @@ class ParseExampleTest(test.TestCase):
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenFeature((None, 2, 1), dtype=dtypes.float32),
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True),
bname:
parsing_ops.FixedLenFeature(
(None, 1, 1, 1), dtype=dtypes.string),
parsing_ops.FixedLenSequenceFeature(
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
cname:
parsing_ops.FixedLenFeature((None,), dtype=dtypes.int64),
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.int64, allow_missing=True),
dname:
parsing_ops.FixedLenFeature((None,), dtype=dtypes.string),
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.string, allow_missing=True),
}
}, expected_output)
# Test with padding values.
expected_output_custom_padding = dict(expected_output)
expected_output_custom_padding[aname] = np.array(
[
[-2, -2, -2, -2],
[1, 1, -2, -2],
[-1, -1, 2, 2],
[-2, -2, -2, -2],
],
dtype=np.float32).reshape(4, 2, 2, 1)
self._test({
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True,
default_value=-2.0),
bname:
parsing_ops.FixedLenSequenceFeature(
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
cname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.int64, allow_missing=True),
dname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.string, allow_missing=True),
}
}, expected_output_custom_padding)
# Change number of required values so the inputs are not a
# multiple of this size.
self._test(
......@@ -754,17 +788,34 @@ class ParseExampleTest(test.TestCase):
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenFeature(
(None, 2, 1), dtype=dtypes.float32),
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True),
bname:
parsing_ops.FixedLenFeature(
(None, 2, 1, 1), dtype=dtypes.string),
parsing_ops.FixedLenSequenceFeature(
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(
errors_impl.OpError, "Name: in3, Key: b, Index: 2. "
"Number of bytes values is not a multiple of stride length."))
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True,
default_value=[]),
bname:
parsing_ops.FixedLenSequenceFeature(
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(ValueError,
"Cannot reshape a tensor with 0 elements to shape"))
self._test(
{
"example_names": example_names,
......@@ -772,14 +823,50 @@ class ParseExampleTest(test.TestCase):
"features": {
aname:
parsing_ops.FixedLenFeature(
(None, 2, 1), dtype=dtypes.float32, default_value=[]),
(None, 2, 1), dtype=dtypes.float32),
bname:
parsing_ops.FixedLenSequenceFeature(
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(ValueError,
"First dimension of shape for feature a unknown. "
"Consider using FixedLenSequenceFeature."))
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
cname:
parsing_ops.FixedLenFeature(
(None, 2, 1, 1), dtype=dtypes.string),
(1, None), dtype=dtypes.int64, default_value=[[1]]),
}
},
expected_err=(ValueError,
"Cannot reshape a tensor with 0 elements to shape"))
"All dimensions of shape for feature c need to be known "
r"but received \(1, None\)."))
self._test({
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True),
bname:
parsing_ops.FixedLenSequenceFeature(
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
cname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.int64, allow_missing=False),
dname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.string, allow_missing=True),
}
}, expected_err=(ValueError,
"Unsupported: FixedLenSequenceFeature requires "
"allow_missing to be True."))
class ParseSingleExampleTest(test.TestCase):
......@@ -801,7 +888,8 @@ class ParseSingleExampleTest(test.TestCase):
# Check shapes.
for k, f in kwargs["features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
self.assertEqual(tuple(out[k].get_shape()), f.shape)
self.assertEqual(tuple(out[k].get_shape()),
tensor_shape.as_shape(f.shape))
elif isinstance(f, parsing_ops.VarLenFeature):
self.assertEqual(
tuple(out[k].indices.get_shape().as_list()), (None, 1))
......@@ -864,7 +952,7 @@ class ParseSingleExampleTest(test.TestCase):
(3, 3), dtypes.string, default_value=b_default),
# Feature "c" must be provided, since it has no default_value.
"c":
parsing_ops.FixedLenFeature((2,), dtypes.float32),
parsing_ops.FixedLenFeature(2, dtypes.float32),
}
},
expected_output)
......@@ -1040,7 +1128,7 @@ class ParseSequenceExampleTest(test.TestCase):
"b":
parsing_ops.FixedLenSequenceFeature((2, 2), dtypes.string),
"c":
parsing_ops.FixedLenSequenceFeature((2,), dtypes.float32),
parsing_ops.FixedLenSequenceFeature(2, dtypes.float32),
"d":
parsing_ops.FixedLenSequenceFeature(
(5,), dtypes.float32, allow_missing=True),
......
......@@ -135,29 +135,41 @@ class FixedLenFeature(collections.namedtuple(
shape: Shape of input data.
dtype: Data type of input.
default_value: Value to be used if an example is missing this feature. It
must be compatible with `dtype`.
must be compatible with `dtype` and of the specified `shape`.
"""
pass
FixedLenFeature.__new__.__defaults__ = (None,)
# NOTE: If we ever support a default_value for sequence dense features, we can
# remove this class and use FixedLenFeature in its place.
class FixedLenSequenceFeature(collections.namedtuple(
"FixedLenSequenceFeature", ["shape", "dtype", "allow_missing"])):
"""Configuration for a dense input feature in a sequence item.
"FixedLenSequenceFeature",
["shape", "dtype", "allow_missing", "default_value"])):
"""Configuration for parsing a variable-length input feature into a `Tensor`.
The resulting `Tensor` of parsing a single `SequenceExample` or `Example` has
a static `shape` of `[None] + shape` and the specified `dtype`.
The resulting `Tensor` of parsing a `batch_size` many `Example`s has
a static `shape` of `[batch_size, None] + shape` and the specified `dtype`.
The entries in the `batch` from different `Examples` will be padded with
`default_value` to the maximum length present in the `batch`.
To treat a sparse input as dense, provide `allow_missing=True`; otherwise,
the parse functions will fail on any examples missing this feature.
Fields:
shape: Shape of input data.
shape: Shape of input data for dimension 2 and higher. First dimension is
of variable length `None`.
dtype: Data type of input.
allow_missing: Whether to allow this feature to be missing from a feature
list item.
list item. Is available only for parsing `SequenceExample` not for
parsing `Examples`.
default_value: Scalar value to be used to pad multiple `Example`s to their
maximum length. Irrelevant for parsing a single `Example` or
`SequenceExample`. Defaults to "" for dtype string and 0 otherwise
(optional).
"""
pass
FixedLenSequenceFeature.__new__.__defaults__ = (False,)
FixedLenSequenceFeature.__new__.__defaults__ = (False, None)
def _features_to_raw_params(features, types):
......@@ -236,6 +248,15 @@ def _features_to_raw_params(features, types):
raise ValueError("Missing type for feature %s." % key)
if feature.shape is None:
raise ValueError("Missing shape for feature %s." % key)
feature_tensor_shape = tensor_shape.as_shape(feature.shape)
if (feature.shape and feature_tensor_shape.ndims and
feature_tensor_shape.dims[0].value is None):
raise ValueError("First dimension of shape for feature %s unknown. "
"Consider using FixedLenSequenceFeature." % key)
if (feature.shape is not None and
not feature_tensor_shape.is_fully_defined()):
raise ValueError("All dimensions of shape for feature %s need to be "
"known but received %s." % (key, str(feature.shape)))
dense_keys.append(key)
dense_shapes.append(feature.shape)
dense_types.append(feature.dtype)
......@@ -253,6 +274,8 @@ def _features_to_raw_params(features, types):
dense_types.append(feature.dtype)
if feature.allow_missing:
dense_defaults[key] = None
if feature.default_value is not None:
dense_defaults[key] = feature.default_value
else:
raise ValueError("Invalid feature %s:%s." % (key, feature))
return (
......@@ -346,9 +369,15 @@ def parse_example(serialized, features, name=None, example_names=None):
value, we will fail if that `Feature` is missing from any example in
`serialized`.
Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
(or `tf.float32` if not specified) and shape
`(serialized.size(), None) + df.shape`.
All examples in `serialized` will be padded with `default_value` along the
second dimension.
Examples:
For example, if one expects a `tf.float32` VarlenFeature `ft` and three
For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
serialized `Example`s are provided:
```
......@@ -370,6 +399,13 @@ def parse_example(serialized, features, name=None, example_names=None):
dense_shape=(3, 2)) }
```
If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
`shape=[]` is used then the output will look like:
```
{"ft": [[1.0, 2.0], [3.0, -1.0]]}
```
Given two `Example` input protos in `serialized`:
```
......@@ -505,9 +541,23 @@ def parse_example(serialized, features, name=None, example_names=None):
"""
if not features:
raise ValueError("Missing: features was %s." % features)
if features:
modified_features = dict(features) # Create a copy to modify
for key, feature in features.items():
if isinstance(feature, FixedLenSequenceFeature):
if not feature.allow_missing:
raise ValueError("Unsupported: FixedLenSequenceFeature requires "
"allow_missing to be True.")
modified_features[key] = FixedLenSequenceFeature(
[None] + list(feature.shape),
feature.dtype,
feature.allow_missing,
feature.default_value)
features = modified_features
(sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
dense_shapes) = _features_to_raw_params(
features, [VarLenFeature, SparseFeature, FixedLenFeature])
features,
[VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature])
outputs = _parse_example_raw(
serialized, example_names, sparse_keys, sparse_types, dense_keys,
dense_types, dense_defaults, dense_shapes, name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册