Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
book
提交
0784c22c
B
book
项目概览
PaddlePaddle
/
book
通知
16
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
40
列表
看板
标记
里程碑
合并请求
37
Wiki
5
Wiki
分析
仓库
DevOps
项目成员
Pages
B
book
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
40
Issue
40
列表
看板
标记
里程碑
合并请求
37
合并请求
37
Pages
分析
分析
仓库分析
DevOps
Wiki
5
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0784c22c
编写于
1月 09, 2017
作者:
L
livc
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/recommender_sys' into recommender_sys
上级
b258ddc5
8b0f1d3e
变更
6
展开全部
显示空白变更内容
内联
并排
Showing
6 changed file
with
274 addition
and
275 deletion
+274
-275
recommender_system/README.md
recommender_system/README.md
+214
-204
recommender_system/dataprovider.py
recommender_system/dataprovider.py
+5
-6
recommender_system/image/Attention_Based_BiRNN_with_GRU_cell.png
...nder_system/image/Attention_Based_BiRNN_with_GRU_cell.png
+0
-0
recommender_system/image/BiRNN_with_GRU_Cell.png
recommender_system/image/BiRNN_with_GRU_Cell.png
+0
-0
recommender_system/prediction.py
recommender_system/prediction.py
+1
-2
recommender_system/trainer_config.py
recommender_system/trainer_config.py
+54
-63
未找到文件。
recommender_system/README.md
浏览文件 @
0784c22c
此差异已折叠。
点击以展开。
recommender_system/dataprovider.py
浏览文件 @
0784c22c
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
from
paddle.trainer.PyDataProvider2
import
*
import
common_utils
# parse
from
common_utils
import
meta_to_header
def
__list_to_map__
(
lst
):
...
...
@@ -35,17 +35,16 @@ def hook(settings, meta, **kwargs):
file record movie/user features.
:param kwargs: unused other arguments.
"""
del
kwargs
# unused kwargs
# Header define slots that used for paddle.
# first part is movie features.
# second part is user features.
# final part is rating score.
# header is a list of [USE_SEQ_OR_NOT?, SlotType]
movie_headers
=
list
(
common_utils
.
meta_to_header
(
meta
,
'movie'
))
movie_headers
=
list
(
meta_to_header
(
meta
,
'movie'
))
settings
.
movie_names
=
[
h
[
0
]
for
h
in
movie_headers
]
headers
=
movie_headers
user_headers
=
list
(
common_utils
.
meta_to_header
(
meta
,
'user'
))
user_headers
=
list
(
meta_to_header
(
meta
,
'user'
))
settings
.
user_names
=
[
h
[
0
]
for
h
in
user_headers
]
headers
.
extend
(
user_headers
)
headers
.
append
((
"rating"
,
dense_vector
(
1
)))
# Score
...
...
@@ -62,8 +61,8 @@ def process(settings, filename):
# Get a rating from file.
user_id
,
movie_id
,
score
=
map
(
int
,
line
.
split
(
'::'
)[:
-
1
])
# Scale score to [-
5, +5
]
score
=
float
(
score
)
*
2
-
5.0
# Scale score to [-
2, +2
]
score
=
float
(
score
-
3
)
# Get movie/user features by movie_id, user_id
movie_meta
=
settings
.
meta
[
'movie'
][
movie_id
]
...
...
recommender_system/image/Attention_Based_BiRNN_with_GRU_cell.png
已删除
100644 → 0
浏览文件 @
b258ddc5
292.5 KB
recommender_system/image/BiRNN_with_GRU_Cell.png
已删除
100644 → 0
浏览文件 @
b258ddc5
323.9 KB
recommender_system/prediction.py
浏览文件 @
0784c22c
...
...
@@ -47,5 +47,4 @@ if __name__ == '__main__':
data
.
append
(
user_id
-
1
)
data
.
extend
(
user_meta
)
print
"Prediction Score is %.2f"
%
(
(
network
.
forwardTest
(
cvt
.
convert
([
data
]))[
0
][
'value'
][
0
][
0
]
+
5
)
/
2
)
network
.
forwardTest
(
cvt
.
convert
([
data
]))[
0
][
'value'
][
0
][
0
]
+
3
)
recommender_system/trainer_config.py
浏览文件 @
0784c22c
...
...
@@ -27,75 +27,66 @@ with open(META_FILE, 'rb') as f:
# load meta file
meta
=
pickle
.
load
(
f
)
settings
(
batch_size
=
1600
,
learning_rate
=
1e-3
,
learning_method
=
RMSPropOptimizer
())
def
construct_feature
(
name
):
"""
Construct movie/user features.
This method read from meta data. Then convert feature to neural network due
to feature type. The map relation as follow.
* id: embedding => fc
* embedding:
is_sequence: embedding => context_projection => fc => pool
not sequence: embedding => fc
* one_hot_dense: fc => fc
Then gather all features vector, and use a fc layer to combined them as
return.
:param name: 'movie' or 'user'
:type name: basestring
:return: combined feature output
:rtype: LayerOutput
"""
__meta__
=
meta
[
name
][
'__meta__'
][
'raw_meta'
]
fusion
=
[]
for
each_meta
in
__meta__
:
type_name
=
each_meta
[
'type'
]
slot_name
=
each_meta
.
get
(
'name'
,
'%s_id'
%
name
)
if
type_name
==
'id'
:
slot_dim
=
each_meta
[
'max'
]
embedding
=
embedding_layer
(
input
=
data_layer
(
slot_name
,
size
=
slot_dim
),
size
=
256
)
fusion
.
append
(
fc_layer
(
input
=
embedding
,
size
=
256
))
elif
type_name
==
'embedding'
:
is_seq
=
each_meta
[
'seq'
]
==
'sequence'
slot_dim
=
len
(
each_meta
[
'dict'
])
din
=
data_layer
(
slot_name
,
slot_dim
)
embedding
=
embedding_layer
(
input
=
din
,
size
=
256
)
if
is_seq
:
fusion
.
append
(
text_conv_pool
(
input
=
embedding
,
context_len
=
5
,
hidden_size
=
256
))
else
:
fusion
.
append
(
fc_layer
(
input
=
embedding
,
size
=
256
))
elif
type_name
==
'one_hot_dense'
:
slot_dim
=
len
(
each_meta
[
'dict'
])
hidden
=
fc_layer
(
input
=
data_layer
(
slot_name
,
slot_dim
),
size
=
256
)
fusion
.
append
(
fc_layer
(
input
=
hidden
,
size
=
256
))
return
fc_layer
(
name
=
"%s_fusion"
%
name
,
input
=
fusion
,
size
=
256
)
movie_feature
=
construct_feature
(
"movie"
)
user_feature
=
construct_feature
(
"user"
)
similarity
=
cos_sim
(
a
=
movie_feature
,
b
=
user_feature
)
if
not
is_predict
:
outputs
(
regression_cost
(
input
=
similarity
,
label
=
data_layer
(
'rating'
,
size
=
1
)))
define_py_data_sources2
(
'data/train.list'
,
'data/test.list'
,
module
=
'dataprovider'
,
obj
=
'process'
,
args
=
{
'meta'
:
meta
})
settings
(
batch_size
=
1600
,
learning_rate
=
1e-3
,
learning_method
=
RMSPropOptimizer
())
movie_meta
=
meta
[
'movie'
][
'__meta__'
][
'raw_meta'
]
user_meta
=
meta
[
'user'
][
'__meta__'
][
'raw_meta'
]
movie_id
=
data_layer
(
'movie_id'
,
size
=
movie_meta
[
0
][
'max'
])
title
=
data_layer
(
'title'
,
size
=
len
(
movie_meta
[
1
][
'dict'
]))
genres
=
data_layer
(
'genres'
,
size
=
len
(
movie_meta
[
2
][
'dict'
]))
user_id
=
data_layer
(
'user_id'
,
size
=
user_meta
[
0
][
'max'
])
gender
=
data_layer
(
'gender'
,
size
=
len
(
user_meta
[
1
][
'dict'
]))
age
=
data_layer
(
'age'
,
size
=
len
(
user_meta
[
2
][
'dict'
]))
occupation
=
data_layer
(
'occupation'
,
size
=
len
(
user_meta
[
3
][
'dict'
]))
embsize
=
256
# construct movie feature
movie_id_emb
=
embedding_layer
(
input
=
movie_id
,
size
=
embsize
)
movie_id_hidden
=
fc_layer
(
input
=
movie_id_emb
,
size
=
embsize
)
genres_emb
=
fc_layer
(
input
=
genres
,
size
=
embsize
)
title_emb
=
embedding_layer
(
input
=
title
,
size
=
embsize
)
title_hidden
=
text_conv_pool
(
input
=
title_emb
,
context_len
=
5
,
hidden_size
=
embsize
)
movie_feature
=
fc_layer
(
input
=
[
movie_id_hidden
,
title_hidden
,
genres_emb
],
size
=
embsize
)
# construct user feature
user_id_emb
=
embedding_layer
(
input
=
user_id
,
size
=
embsize
)
user_id_hidden
=
fc_layer
(
input
=
user_id_emb
,
size
=
embsize
)
gender_emb
=
embedding_layer
(
input
=
gender
,
size
=
embsize
)
gender_hidden
=
fc_layer
(
input
=
gender_emb
,
size
=
embsize
)
age_emb
=
embedding_layer
(
input
=
age
,
size
=
embsize
)
age_hidden
=
fc_layer
(
input
=
age_emb
,
size
=
embsize
)
occup_emb
=
embedding_layer
(
input
=
occupation
,
size
=
embsize
)
occup_hidden
=
fc_layer
(
input
=
occup_emb
,
size
=
embsize
)
user_feature
=
fc_layer
(
input
=
[
user_id_hidden
,
gender_hidden
,
age_hidden
,
occup_hidden
],
size
=
embsize
)
similarity
=
cos_sim
(
a
=
movie_feature
,
b
=
user_feature
,
scale
=
2
)
if
not
is_predict
:
lbl
=
data_layer
(
'rating'
,
size
=
1
)
cost
=
regression_cost
(
input
=
similarity
,
label
=
lbl
)
outputs
(
cost
)
else
:
outputs
(
similarity
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录