Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
qq_38905368
tensorflow
提交
fd9fa066
T
tensorflow
项目概览
qq_38905368
/
tensorflow
与 Fork 源项目一致
从无法访问的项目Fork
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
tensorflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
fd9fa066
编写于
11月 21, 2016
作者:
M
Mustafa Ispir
提交者:
TensorFlower Gardener
11月 21, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Renamed 'hashed_embedding_x' as 'scattered_embedding_x' to eliminate confusion.
Change: 139849495
上级
97e39be4
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
76 addition
and
59 deletion
+76
-59
tensorflow/contrib/layers/__init__.py
tensorflow/contrib/layers/__init__.py
+1
-1
tensorflow/contrib/layers/python/layers/embedding_ops.py
tensorflow/contrib/layers/python/layers/embedding_ops.py
+21
-16
tensorflow/contrib/layers/python/layers/embedding_ops_test.py
...orflow/contrib/layers/python/layers/embedding_ops_test.py
+11
-11
tensorflow/contrib/layers/python/layers/feature_column.py
tensorflow/contrib/layers/python/layers/feature_column.py
+30
-16
tensorflow/contrib/layers/python/layers/feature_column_ops.py
...orflow/contrib/layers/python/layers/feature_column_ops.py
+4
-4
tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
...w/contrib/layers/python/layers/feature_column_ops_test.py
+9
-11
未找到文件。
tensorflow/contrib/layers/__init__.py
浏览文件 @
fd9fa066
...
...
@@ -89,7 +89,7 @@ Feature columns provide a mechanism to map data to a model.
@@create_feature_spec_for_parsing
@@crossed_column
@@embedding_column
@@
hash
ed_embedding_column
@@
scatter
ed_embedding_column
@@input_from_feature_columns
@@joint_weighted_sum_from_feature_columns
@@make_place_holder_tensors_for_base_features
...
...
tensorflow/contrib/layers/python/layers/embedding_ops.py
浏览文件 @
fd9fa066
...
...
@@ -31,8 +31,10 @@ from tensorflow.python.ops import sparse_ops
from
tensorflow.python.ops
import
variables
from
tensorflow.python.platform
import
tf_logging
as
logging
__all__
=
[
"safe_embedding_lookup_sparse"
,
"hashed_embedding_lookup"
,
"hashed_embedding_lookup_sparse"
,
"embedding_lookup_unique"
]
__all__
=
[
"safe_embedding_lookup_sparse"
,
"scattered_embedding_lookup"
,
"scattered_embedding_lookup_sparse"
,
"embedding_lookup_unique"
]
def
safe_embedding_lookup_sparse
(
embedding_weights
,
...
...
@@ -176,7 +178,10 @@ def _prune_invalid_ids(sparse_ids, sparse_weights):
return
sparse_ids
,
sparse_weights
def
hashed_embedding_lookup
(
params
,
values
,
dimension
,
name
=
None
,
def
scattered_embedding_lookup
(
params
,
values
,
dimension
,
name
=
None
,
hash_key
=
None
):
"""Looks up embeddings using parameter hashing for each value in `values`.
...
...
@@ -223,7 +228,7 @@ def hashed_embedding_lookup(params, values, dimension, name=None,
if
not
isinstance
(
params
,
list
):
params
=
[
params
]
with
ops
.
name_scope
(
name
,
"
hash
ed_embedding_lookup"
,
with
ops
.
name_scope
(
name
,
"
scatter
ed_embedding_lookup"
,
params
+
[
dimension
,
values
]):
if
dimension
<=
0
:
raise
ValueError
(
"Dimension should be >0 not %d"
%
dimension
)
...
...
@@ -266,7 +271,7 @@ def hashed_embedding_lookup(params, values, dimension, name=None,
0
,
[
values_shape
,
[
dimension
]]))
def
hash
ed_embedding_lookup_sparse
(
params
,
def
scatter
ed_embedding_lookup_sparse
(
params
,
sparse_values
,
dimension
,
combiner
=
None
,
...
...
@@ -275,7 +280,7 @@ def hashed_embedding_lookup_sparse(params,
hash_key
=
None
):
"""Looks up embeddings of a sparse feature using parameter hashing.
See `tf.contrib.layers.
hash
ed_embedding_lookup` for embedding with hashing.
See `tf.contrib.layers.
scatter
ed_embedding_lookup` for embedding with hashing.
Args:
params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`.
...
...
@@ -311,7 +316,7 @@ def hashed_embedding_lookup_sparse(params,
if
not
isinstance
(
sparse_values
,
sparse_tensor
.
SparseTensor
):
raise
TypeError
(
"sparse_values must be SparseTensor"
)
with
ops
.
name_scope
(
name
,
"
hashed_sparse_embedding_lookup
"
,
with
ops
.
name_scope
(
name
,
"
scattered_embedding_lookup_sparse
"
,
params
+
[
sparse_values
])
as
scope
:
# Fill in the empty rows.
if
default_value
is
None
:
...
...
@@ -330,8 +335,8 @@ def hashed_embedding_lookup_sparse(params,
values
=
sparse_values
.
values
values
,
idx
=
array_ops
.
unique
(
values
)
embeddings
=
hashed_embedding_lookup
(
params
,
values
,
dimension
,
hash_key
=
hash_key
)
embeddings
=
scattered_embedding_lookup
(
params
,
values
,
dimension
,
hash_key
=
hash_key
)
if
combiner
==
"sum"
:
embeddings
=
math_ops
.
sparse_segment_sum
(
embeddings
,
idx
,
segment_ids
,
...
...
tensorflow/contrib/layers/python/layers/embedding_ops_test.py
浏览文件 @
fd9fa066
...
...
@@ -261,7 +261,7 @@ class SafeEmbeddingLookupSparseTest(tf.test.TestCase):
embedding_weights
,
sparse_ids
,
sparse_weights
)
class
Hash
edEmbeddingLookupTest
(
tf
.
test
.
TestCase
):
class
Scatter
edEmbeddingLookupTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
tf
.
set_random_seed
(
1
)
...
...
@@ -281,24 +281,24 @@ class HashedEmbeddingLookupTest(tf.test.TestCase):
w
.
initializer
.
run
()
return
embedding_weights
def
test_
hash
ed_embedding_consistency
(
self
):
def
test_
scatter
ed_embedding_consistency
(
self
):
with
self
.
test_session
():
embedding_weights
=
self
.
_random_weights
()
values
=
tf
.
constant
([
"foo"
,
"foo"
])
embedding_lookup_result
=
tf
.
contrib
.
layers
.
hash
ed_embedding_lookup
(
embedding_lookup_result
=
tf
.
contrib
.
layers
.
scatter
ed_embedding_lookup
(
embedding_weights
,
values
,
dimension
=
10
).
eval
()
self
.
assertAllEqual
(
embedding_lookup_result
.
shape
,
[
2
,
10
])
self
.
assertAllEqual
(
embedding_lookup_result
[
0
],
embedding_lookup_result
[
1
])
def
test_
hash
ed_embedding_multiple_partition
(
self
):
def
test_
scatter
ed_embedding_multiple_partition
(
self
):
with
self
.
test_session
():
embedding_weights
=
self
.
_random_weights
(
num_shards
=
7
)
values
=
tf
.
constant
([
4
,
4
,
5
])
embedding_lookup_result
=
tf
.
contrib
.
layers
.
hash
ed_embedding_lookup
(
embedding_lookup_result
=
tf
.
contrib
.
layers
.
scatter
ed_embedding_lookup
(
embedding_weights
,
values
,
dimension
=
5
).
eval
()
self
.
assertAllEqual
(
embedding_lookup_result
.
shape
,
[
3
,
5
])
...
...
@@ -309,31 +309,31 @@ class HashedEmbeddingLookupTest(tf.test.TestCase):
embedding_lookup_result
[
0
])
**
2
)
self
.
assertGreater
(
embedding_diff
,
0
)
def
test_
hash
ed_embedding_coverage
(
self
):
def
test_
scatter
ed_embedding_coverage
(
self
):
with
self
.
test_session
():
size
=
8
embedding_weights
=
self
.
_random_weights
(
size
=
size
,
num_shards
=
3
)
values
=
tf
.
constant
([
"foo"
])
# Large embedding dimension to cover the full range of weights.
embedding_lookup_result
=
tf
.
contrib
.
layers
.
hash
ed_embedding_lookup
(
embedding_lookup_result
=
tf
.
contrib
.
layers
.
scatter
ed_embedding_lookup
(
embedding_weights
,
values
,
dimension
=
100
).
eval
()
self
.
assertEqual
(
len
(
np
.
unique
(
embedding_lookup_result
[
0
])),
size
)
def
test_
hash
ed_embedding_multi_dimension
(
self
):
def
test_
scatter
ed_embedding_multi_dimension
(
self
):
with
self
.
test_session
():
embedding_weights
=
self
.
_random_weights
()
values
=
tf
.
constant
([[
"foo"
,
"bar"
,
"bar"
],
[
"bar"
,
"bar"
,
"foo"
]])
embedding_lookup_result
=
tf
.
contrib
.
layers
.
hash
ed_embedding_lookup
(
embedding_lookup_result
=
tf
.
contrib
.
layers
.
scatter
ed_embedding_lookup
(
embedding_weights
,
values
,
dimension
=
10
).
eval
()
self
.
assertAllEqual
(
embedding_lookup_result
.
shape
,
[
2
,
3
,
10
])
self
.
assertAllEqual
(
embedding_lookup_result
[
0
][
0
],
embedding_lookup_result
[
1
][
2
])
def
test_
hash
ed_embedding_lookup_sparse
(
self
):
def
test_
scatter
ed_embedding_lookup_sparse
(
self
):
with
self
.
test_session
():
embedding_weights
=
self
.
_random_weights
(
num_shards
=
3
)
sparse_tensor
=
tf
.
SparseTensor
(
values
=
[
"foo"
,
"bar"
,
"foo"
,
"bar"
],
...
...
@@ -341,7 +341,7 @@ class HashedEmbeddingLookupTest(tf.test.TestCase):
shape
=
[
5
,
2
])
embedding_lookup_result
=
(
tf
.
contrib
.
layers
.
hash
ed_embedding_lookup_sparse
(
tf
.
contrib
.
layers
.
scatter
ed_embedding_lookup_sparse
(
embedding_weights
,
sparse_tensor
,
dimension
=
5
,
combiner
=
"mean"
)
.
eval
())
...
...
tensorflow/contrib/layers/python/layers/feature_column.py
浏览文件 @
fd9fa066
...
...
@@ -1094,10 +1094,12 @@ def shared_embedding_columns(sparse_id_columns,
return
tuple
(
embedded_columns
)
class
_HashedEmbeddingColumn
(
collections
.
namedtuple
(
"_HashedEmbeddingColumn"
,
[
"column_name"
,
"size"
,
"dimension"
,
"combiner"
,
"initializer"
]),
_EmbeddingColumn
):
"""See `hashed_embedding_column`."""
class
_ScatteredEmbeddingColumn
(
collections
.
namedtuple
(
"_ScatteredEmbeddingColumn"
,
[
"column_name"
,
"size"
,
"dimension"
,
"combiner"
,
"initializer"
]),
_EmbeddingColumn
):
"""See `scattered_embedding_column`."""
def
__new__
(
cls
,
column_name
,
...
...
@@ -1113,13 +1115,13 @@ class _HashedEmbeddingColumn(collections.namedtuple(
# TODO(b/25671353): Better initial value?
initializer
=
init_ops
.
truncated_normal_initializer
(
mean
=
0.0
,
stddev
=
stddev
)
return
super
(
_
Hash
edEmbeddingColumn
,
cls
).
__new__
(
cls
,
column_name
,
size
,
return
super
(
_
Scatter
edEmbeddingColumn
,
cls
).
__new__
(
cls
,
column_name
,
size
,
dimension
,
combiner
,
initializer
)
@
property
def
name
(
self
):
return
"{}_
hash
ed_embedding"
.
format
(
self
.
column_name
)
return
"{}_
scatter
ed_embedding"
.
format
(
self
.
column_name
)
@
property
def
config
(
self
):
...
...
@@ -1141,7 +1143,7 @@ class _HashedEmbeddingColumn(collections.namedtuple(
max_norm
=
None
)
def
hash
ed_embedding_column
(
column_name
,
def
scatter
ed_embedding_column
(
column_name
,
size
,
dimension
,
combiner
=
None
,
...
...
@@ -1151,6 +1153,18 @@ def hashed_embedding_column(column_name,
The i-th embedding component of a value v is found by retrieving an
embedding weight whose index is a fingerprint of the pair (v,i).
An embedding column with sparse_column_with_hash_bucket such as
embedding_column(
sparse_column_with_hash_bucket(column_name, bucket_size),
dimension)
could be replaced by
scattered_embedding_column(
column_name, size=bucket_size * dimension, dimension=dimension)
for the same number of embedding parameters and hopefully reduced impact of
collisions with a cost of slowing down training.
Args:
column_name: A string defining sparse column name.
size: An integer specifying the number of parameters in the embedding layer.
...
...
@@ -1167,7 +1181,7 @@ def hashed_embedding_column(column_name,
`tf.truncated_normal_initializer` with mean 0 and standard deviation 0.1.
Returns:
A _
Hash
edEmbeddingColumn.
A _
Scatter
edEmbeddingColumn.
Raises:
ValueError: if dimension or size is not a positive integer; or if combiner
...
...
@@ -1188,7 +1202,7 @@ def hashed_embedding_column(column_name,
"combiner: {}, column_name: {}"
.
format
(
combiner
,
column_name
))
return
_
Hash
edEmbeddingColumn
(
column_name
,
size
,
dimension
,
combiner
,
return
_
Scatter
edEmbeddingColumn
(
column_name
,
size
,
dimension
,
combiner
,
initializer
)
...
...
tensorflow/contrib/layers/python/layers/feature_column_ops.py
浏览文件 @
fd9fa066
...
...
@@ -75,7 +75,7 @@ def _embeddings_from_arguments(column,
trainable
=
trainable
,
collections
=
weight_collections
)
return
embedding_ops
.
hash
ed_embedding_lookup_sparse
(
return
embedding_ops
.
scatter
ed_embedding_lookup_sparse
(
embeddings
,
input_tensor
,
args
.
dimension
,
combiner
=
args
.
combiner
,
name
=
'lookup'
)
...
...
@@ -256,9 +256,9 @@ def sequence_input_from_feature_columns(columns_to_tensors,
See documentation for `input_from_feature_columns`. The following types of
`FeatureColumn` are permitted in `feature_columns`: `_OneHotColumn`,
`_EmbeddingColumn`, `_
Hash
edEmbeddingColumn`, `_RealValuedColumn`,
`_EmbeddingColumn`, `_
Scatter
edEmbeddingColumn`, `_RealValuedColumn`,
`_DataFrameColumn`. In addition, columns in `feature_columns` may not be
constructed using any of the following: `
Hash
edEmbeddingColumn`,
constructed using any of the following: `
Scatter
edEmbeddingColumn`,
`BucketizedColumn`, `CrossedColumn`.
Args:
...
...
@@ -892,7 +892,7 @@ _SUPPORTED_SEQUENCE_COLUMNS = (fc._OneHotColumn,
fc
.
_EmbeddingColumn
,
fc
.
_RealValuedColumn
)
_FORBIDDEN_SEQUENCE_COLUMNS
=
(
fc
.
_
Hash
edEmbeddingColumn
,
_FORBIDDEN_SEQUENCE_COLUMNS
=
(
fc
.
_
Scatter
edEmbeddingColumn
,
fc
.
_BucketizedColumn
,
fc
.
_CrossedColumn
)
...
...
tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
浏览文件 @
fd9fa066
...
...
@@ -18,7 +18,6 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -50,7 +49,6 @@ class TransformerTest(tf.test.TestCase):
with
self
.
test_session
():
self
.
assertAllEqual
(
output
[
bucket
].
eval
(),
[[
2
],
[
3
],
[
0
]])
def
testBucketizedColumnWithMultiDimensions
(
self
):
bucket
=
tf
.
contrib
.
layers
.
bucketized_column
(
tf
.
contrib
.
layers
.
real_valued_column
(
"price"
,
2
),
...
...
@@ -585,14 +583,15 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
tf
.
global_variables_initializer
().
run
()
self
.
assertAllEqual
(
output
.
eval
().
shape
,
[
4
,
10
])
def
test
Hash
edEmbeddingColumnSucceedsForDNN
(
self
):
def
test
Scatter
edEmbeddingColumnSucceedsForDNN
(
self
):
wire_tensor
=
tf
.
SparseTensor
(
values
=
[
"omar"
,
"stringer"
,
"marlo"
,
"omar"
],
indices
=
[[
0
,
0
],
[
1
,
0
],
[
1
,
1
],
[
2
,
0
]],
shape
=
[
3
,
2
])
features
=
{
"wire"
:
wire_tensor
}
# Big enough hash space so that hopefully there is no collision
embedded_sparse
=
tf
.
contrib
.
layers
.
hashed_embedding_column
(
"wire"
,
1000
,
3
)
embedded_sparse
=
tf
.
contrib
.
layers
.
scattered_embedding_column
(
"wire"
,
1000
,
3
)
output
=
tf
.
contrib
.
layers
.
input_from_feature_columns
(
features
,
[
embedded_sparse
],
weight_collections
=
[
"my_collection"
])
weights
=
tf
.
get_collection
(
"my_collection"
)
...
...
@@ -2054,12 +2053,10 @@ class ParseExampleTest(tf.test.TestCase):
self
.
assertAllEqual
(
output
[
wire_cast
].
indices
.
eval
(),
[[
0
,
0
],
[
0
,
1
]])
self
.
assertAllEqual
(
output
[
wire_cast
].
values
.
eval
(),
[
2
,
0
])
def
testParseSequenceExample
(
self
):
location_keys
=
[
"east_side"
,
"west_side"
,
"nyc"
]
embedding_dimension
=
10
location
=
tf
.
contrib
.
layers
.
sparse_column_with_keys
(
"location"
,
keys
=
location_keys
)
location_onehot
=
tf
.
contrib
.
layers
.
one_hot_column
(
location
)
...
...
@@ -2067,7 +2064,8 @@ class ParseExampleTest(tf.test.TestCase):
"wire_cast"
,
[
"marlo"
,
"omar"
,
"stringer"
])
wire_cast_embedded
=
tf
.
contrib
.
layers
.
embedding_column
(
wire_cast
,
dimension
=
embedding_dimension
)
measurements
=
tf
.
contrib
.
layers
.
real_valued_column
(
"measurements"
,
dimension
=
2
)
measurements
=
tf
.
contrib
.
layers
.
real_valued_column
(
"measurements"
,
dimension
=
2
)
context_feature_columns
=
[
location_onehot
]
sequence_feature_columns
=
[
wire_cast_embedded
,
measurements
]
...
...
@@ -2098,7 +2096,6 @@ class ParseExampleTest(tf.test.TestCase):
])
}))
ctx
,
seq
=
tf
.
contrib
.
layers
.
parse_feature_columns_from_sequence_examples
(
serialized
=
sequence_example
.
SerializeToString
(),
context_feature_columns
=
context_feature_columns
,
...
...
@@ -2128,6 +2125,7 @@ class ParseExampleTest(tf.test.TestCase):
self
.
assertAllClose
(
measurement_val
,
np
.
array
([[
0.2
,
0.3
],
[
0.1
,
0.8
],
[
0.5
,
0.0
]]))
class
InferRealValuedColumnTest
(
tf
.
test
.
TestCase
):
def
testTensorInt32
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录