Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
797e89ec
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
797e89ec
编写于
3月 08, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:baidu/Paddle into feature/better_infer_interface
上级
71ab4df3
1fa73024
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
210 addition
and
18 deletion
+210
-18
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+1
-1
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+2
-6
demo/recommendation/api_train_v2.py
demo/recommendation/api_train_v2.py
+125
-0
python/paddle/v2/data_feeder.py
python/paddle/v2/data_feeder.py
+3
-0
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+77
-7
python/paddle/v2/inference.py
python/paddle/v2/inference.py
+2
-4
未找到文件。
cmake/external/protobuf.cmake
浏览文件 @
797e89ec
...
...
@@ -14,7 +14,7 @@
INCLUDE
(
ExternalProject
)
FIND_PACKAGE
(
Protobuf
)
FIND_PACKAGE
(
Protobuf
3.1
)
IF
(
NOT PROTOBUF_FOUND
)
SET
(
PROTOBUF_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/protobuf
)
...
...
demo/mnist/api_train_v2.py
浏览文件 @
797e89ec
...
...
@@ -92,12 +92,8 @@ def main():
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
1000
==
0
:
result
=
trainer
.
test
(
reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
256
))
print
"Pass %d, Batch %d, Cost %f, %s, Testing metrics %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
,
result
.
metrics
)
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
with
gzip
.
open
(
'params.tar.gz'
,
'w'
)
as
f
:
parameters
.
to_tar
(
f
)
...
...
demo/recommendation/api_train_v2.py
0 → 100644
浏览文件 @
797e89ec
import
paddle.v2
as
paddle
import
cPickle
import
copy
def
main
():
paddle
.
init
(
use_gpu
=
False
)
movie_title_dict
=
paddle
.
dataset
.
movielens
.
get_movie_title_dict
()
uid
=
paddle
.
layer
.
data
(
name
=
'user_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
))
usr_emb
=
paddle
.
layer
.
embedding
(
input
=
uid
,
size
=
32
)
usr_gender_id
=
paddle
.
layer
.
data
(
name
=
'gender_id'
,
type
=
paddle
.
data_type
.
integer_value
(
2
))
usr_gender_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_gender_id
,
size
=
16
)
usr_age_id
=
paddle
.
layer
.
data
(
name
=
'age_id'
,
type
=
paddle
.
data_type
.
integer_value
(
len
(
paddle
.
dataset
.
movielens
.
age_table
)))
usr_age_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_age_id
,
size
=
16
)
usr_job_id
=
paddle
.
layer
.
data
(
name
=
'job_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_job_id
(
)
+
1
))
usr_job_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_job_id
,
size
=
16
)
usr_combined_features
=
paddle
.
layer
.
fc
(
input
=
[
usr_emb
,
usr_gender_emb
,
usr_age_emb
,
usr_job_emb
],
size
=
200
,
act
=
paddle
.
activation
.
Tanh
())
mov_id
=
paddle
.
layer
.
data
(
name
=
'movie_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
))
mov_emb
=
paddle
.
layer
.
embedding
(
input
=
mov_id
,
size
=
32
)
mov_categories
=
paddle
.
layer
.
data
(
name
=
'category_id'
,
type
=
paddle
.
data_type
.
sparse_binary_vector
(
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())))
mov_categories_hidden
=
paddle
.
layer
.
fc
(
input
=
mov_categories
,
size
=
32
)
mov_title_id
=
paddle
.
layer
.
data
(
name
=
'movie_title'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
len
(
movie_title_dict
)))
mov_title_emb
=
paddle
.
layer
.
embedding
(
input
=
mov_title_id
,
size
=
32
)
mov_title_conv
=
paddle
.
networks
.
sequence_conv_pool
(
input
=
mov_title_emb
,
hidden_size
=
32
,
context_len
=
3
)
mov_combined_features
=
paddle
.
layer
.
fc
(
input
=
[
mov_emb
,
mov_categories_hidden
,
mov_title_conv
],
size
=
200
,
act
=
paddle
.
activation
.
Tanh
())
inference
=
paddle
.
layer
.
cos_sim
(
a
=
usr_combined_features
,
b
=
mov_combined_features
,
size
=
1
,
scale
=
5
)
cost
=
paddle
.
layer
.
regression_cost
(
input
=
inference
,
label
=
paddle
.
layer
.
data
(
name
=
'score'
,
type
=
paddle
.
data_type
.
dense_vector
(
1
)))
parameters
=
paddle
.
parameters
.
create
(
cost
)
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
1e-4
))
feeding
=
{
'user_id'
:
0
,
'gender_id'
:
1
,
'age_id'
:
2
,
'job_id'
:
3
,
'movie_id'
:
4
,
'category_id'
:
5
,
'movie_title'
:
6
,
'score'
:
7
}
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d Batch %d Cost %.2f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
)
trainer
.
train
(
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
movielens
.
train
(),
buf_size
=
8192
),
batch_size
=
256
),
event_handler
=
event_handler
,
feeding
=
feeding
,
num_passes
=
1
)
user_id
=
234
movie_id
=
345
user
=
paddle
.
dataset
.
movielens
.
user_info
()[
user_id
]
movie
=
paddle
.
dataset
.
movielens
.
movie_info
()[
movie_id
]
feature
=
user
.
value
()
+
movie
.
value
()
def
reader
():
yield
feature
infer_dict
=
copy
.
copy
(
feeding
)
del
infer_dict
[
'score'
]
prediction
=
paddle
.
infer
(
output
=
inference
,
parameters
=
parameters
,
reader
=
paddle
.
batch
(
reader
,
batch_size
=
32
),
feeding
=
infer_dict
)
print
(
prediction
+
5
)
/
2
if
__name__
==
'__main__'
:
main
()
python/paddle/v2/data_feeder.py
浏览文件 @
797e89ec
...
...
@@ -85,6 +85,9 @@ class DataFeeder(DataProviderConverter):
input_types
.
append
(
each
[
1
])
DataProviderConverter
.
__init__
(
self
,
input_types
)
def
__len__
(
self
):
return
len
(
self
.
input_names
)
def
convert
(
self
,
dat
,
argument
=
None
):
"""
:param dat: A list of mini-batch data. Each sample is a list or tuple
...
...
python/paddle/v2/dataset/movielens.py
浏览文件 @
797e89ec
...
...
@@ -23,7 +23,12 @@ import re
import
random
import
functools
__all__
=
[
'train_creator'
,
'test_creator'
]
__all__
=
[
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
class
MovieInfo
(
object
):
...
...
@@ -38,17 +43,32 @@ class MovieInfo(object):
[
MOVIE_TITLE_DICT
[
w
.
lower
()]
for
w
in
self
.
title
.
split
()]
]
def
__str__
(
self
):
return
"<MovieInfo id(%d), title(%s), categories(%s)>"
%
(
self
.
index
,
self
.
title
,
self
.
categories
)
def
__repr__
(
self
):
return
self
.
__str__
()
class
UserInfo
(
object
):
def
__init__
(
self
,
index
,
gender
,
age
,
job_id
):
self
.
index
=
int
(
index
)
self
.
is_male
=
gender
==
'M'
self
.
age
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
.
index
(
int
(
age
))
self
.
age
=
age_table
.
index
(
int
(
age
))
self
.
job_id
=
int
(
job_id
)
def
value
(
self
):
return
[
self
.
index
,
0
if
self
.
is_male
else
1
,
self
.
age
,
self
.
job_id
]
def
__str__
(
self
):
return
"<UserInfo id(%d), gender(%s), age(%d), job(%d)>"
%
(
self
.
index
,
"M"
if
self
.
is_male
else
"F"
,
age_table
[
self
.
age
],
self
.
job_id
)
def
__repr__
(
self
):
return
str
(
self
)
MOVIE_INFO
=
None
MOVIE_TITLE_DICT
=
None
...
...
@@ -59,7 +79,8 @@ USER_INFO = None
def
__initialize_meta_info__
():
fn
=
download
(
url
=
'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
,
md5
=
'c4d9eecfca2ab87c1945afe126590906'
)
module_name
=
'movielens'
,
md5sum
=
'c4d9eecfca2ab87c1945afe126590906'
)
global
MOVIE_INFO
if
MOVIE_INFO
is
None
:
pattern
=
re
.
compile
(
r
'^(.*)\((\d+)\)$'
)
...
...
@@ -122,14 +143,63 @@ def __reader_creator__(**kwargs):
return
lambda
:
__reader__
(
**
kwargs
)
train_creator
=
functools
.
partial
(
__reader_creator__
,
is_test
=
False
)
test_creator
=
functools
.
partial
(
__reader_creator__
,
is_test
=
True
)
train
=
functools
.
partial
(
__reader_creator__
,
is_test
=
False
)
test
=
functools
.
partial
(
__reader_creator__
,
is_test
=
True
)
def
get_movie_title_dict
():
__initialize_meta_info__
()
return
MOVIE_TITLE_DICT
def
__max_index_info__
(
a
,
b
):
if
a
.
index
>
b
.
index
:
return
a
else
:
return
b
def
max_movie_id
():
__initialize_meta_info__
()
return
reduce
(
__max_index_info__
,
MOVIE_INFO
.
viewvalues
()).
index
def
max_user_id
():
__initialize_meta_info__
()
return
reduce
(
__max_index_info__
,
USER_INFO
.
viewvalues
()).
index
def
__max_job_id_impl__
(
a
,
b
):
if
a
.
job_id
>
b
.
job_id
:
return
a
else
:
return
b
def
max_job_id
():
__initialize_meta_info__
()
return
reduce
(
__max_job_id_impl__
,
USER_INFO
.
viewvalues
()).
job_id
def
movie_categories
():
__initialize_meta_info__
()
return
CATEGORIES_DICT
def
user_info
():
__initialize_meta_info__
()
return
USER_INFO
def
movie_info
():
__initialize_meta_info__
()
return
MOVIE_INFO
def
unittest
():
for
train_count
,
_
in
enumerate
(
train
_creator
()()):
for
train_count
,
_
in
enumerate
(
train
()()):
pass
for
test_count
,
_
in
enumerate
(
test
_creator
()()):
for
test_count
,
_
in
enumerate
(
test
()()):
pass
print
train_count
,
test_count
...
...
python/paddle/v2/inference.py
浏览文件 @
797e89ec
...
...
@@ -23,7 +23,7 @@ class Inference(object):
def
iter_infer
(
self
,
input
=
None
,
batch_size
=
None
,
reader
=
None
,
feeding
=
None
):
feeder
=
DataFeeder
(
self
.
__data_types__
,
feeding
)
if
reader
is
None
:
assert
input
is
not
None
and
isinstance
(
input
,
collections
.
Iterable
)
if
not
isinstance
(
input
,
collections
.
Iterable
):
...
...
@@ -45,8 +45,6 @@ class Inference(object):
if
input
is
not
None
:
raise
ValueError
(
"User should set either input or reader, "
"should not set them both."
)
feeder
=
DataFeeder
(
self
.
__data_types__
,
feeding
)
self
.
__gradient_machine__
.
start
()
for
data_batch
in
reader
():
yield
self
.
__gradient_machine__
.
forwardTest
(
feeder
(
data_batch
))
...
...
@@ -70,7 +68,7 @@ class Inference(object):
return
retv
def
infer
(
output_layer
,
parameters
,
input
=
None
,
feeding
=
None
,
field
=
'value'
):
def
infer
(
output_layer
,
parameters
,
input
,
feeding
=
None
,
field
=
'value'
):
"""
Infer a neural network by given neural network output and parameters. The
user should pass either a batch of input data or reader method.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录