Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
aaa2a1f8
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aaa2a1f8
编写于
3月 07, 2017
作者:
H
helinwang
提交者:
GitHub
3月 07, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1501 from reyoung/feature/recommendation_v2_api
Feature/recommendation v2 api
上级
79e95c1f
dda02fe1
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
300 addition
and
28 deletion
+300
-28
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+10
-15
demo/recommendation/api_train_v2.py
demo/recommendation/api_train_v2.py
+125
-0
doc/api/v2/run_logic.rst
doc/api/v2/run_logic.rst
+8
-0
python/paddle/v2/data_feeder.py
python/paddle/v2/data_feeder.py
+3
-0
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+77
-7
python/paddle/v2/inference.py
python/paddle/v2/inference.py
+77
-6
未找到文件。
demo/mnist/api_train_v2.py
浏览文件 @
aaa2a1f8
...
...
@@ -92,12 +92,8 @@ def main():
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
1000
==
0
:
result
=
trainer
.
test
(
reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
256
))
print
"Pass %d, Batch %d, Cost %f, %s, Testing metrics %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
,
result
.
metrics
)
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
with
gzip
.
open
(
'params.tar.gz'
,
'w'
)
as
f
:
parameters
.
to_tar
(
f
)
...
...
@@ -123,17 +119,16 @@ def main():
print
'Best pass is %s, testing Avgcost is %s'
%
(
best
[
0
],
best
[
1
])
print
'The classification accuracy is %.2f%%'
%
(
100
-
float
(
best
[
2
])
*
100
)
test_creator
=
paddle
.
dataset
.
mnist
.
test
()
test_data
=
[]
for
item
in
test_creator
():
test_data
.
append
(
item
[
0
])
if
len
(
test_data
)
==
100
:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs
=
paddle
.
infer
(
output
=
predict
,
parameters
=
parameters
,
reader
=
paddle
.
batch
(
paddle
.
reader
.
firstn
(
paddle
.
reader
.
map_readers
(
lambda
item
:
(
item
[
0
],
),
paddle
.
dataset
.
mnist
.
test
()),
n
=
100
),
batch_size
=
32
))
probs
=
paddle
.
infer
(
output
=
predict
,
parameters
=
parameters
,
input
=
test_data
)
print
probs
.
shape
...
...
demo/recommendation/api_train_v2.py
0 → 100644
浏览文件 @
aaa2a1f8
import
paddle.v2
as
paddle
import
cPickle
import
copy
def
main
():
paddle
.
init
(
use_gpu
=
False
)
movie_title_dict
=
paddle
.
dataset
.
movielens
.
get_movie_title_dict
()
uid
=
paddle
.
layer
.
data
(
name
=
'user_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
))
usr_emb
=
paddle
.
layer
.
embedding
(
input
=
uid
,
size
=
32
)
usr_gender_id
=
paddle
.
layer
.
data
(
name
=
'gender_id'
,
type
=
paddle
.
data_type
.
integer_value
(
2
))
usr_gender_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_gender_id
,
size
=
16
)
usr_age_id
=
paddle
.
layer
.
data
(
name
=
'age_id'
,
type
=
paddle
.
data_type
.
integer_value
(
len
(
paddle
.
dataset
.
movielens
.
age_table
)))
usr_age_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_age_id
,
size
=
16
)
usr_job_id
=
paddle
.
layer
.
data
(
name
=
'job_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_job_id
(
)
+
1
))
usr_job_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_job_id
,
size
=
16
)
usr_combined_features
=
paddle
.
layer
.
fc
(
input
=
[
usr_emb
,
usr_gender_emb
,
usr_age_emb
,
usr_job_emb
],
size
=
200
,
act
=
paddle
.
activation
.
Tanh
())
mov_id
=
paddle
.
layer
.
data
(
name
=
'movie_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
))
mov_emb
=
paddle
.
layer
.
embedding
(
input
=
mov_id
,
size
=
32
)
mov_categories
=
paddle
.
layer
.
data
(
name
=
'category_id'
,
type
=
paddle
.
data_type
.
sparse_binary_vector
(
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())))
mov_categories_hidden
=
paddle
.
layer
.
fc
(
input
=
mov_categories
,
size
=
32
)
mov_title_id
=
paddle
.
layer
.
data
(
name
=
'movie_title'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
len
(
movie_title_dict
)))
mov_title_emb
=
paddle
.
layer
.
embedding
(
input
=
mov_title_id
,
size
=
32
)
mov_title_conv
=
paddle
.
networks
.
sequence_conv_pool
(
input
=
mov_title_emb
,
hidden_size
=
32
,
context_len
=
3
)
mov_combined_features
=
paddle
.
layer
.
fc
(
input
=
[
mov_emb
,
mov_categories_hidden
,
mov_title_conv
],
size
=
200
,
act
=
paddle
.
activation
.
Tanh
())
inference
=
paddle
.
layer
.
cos_sim
(
a
=
usr_combined_features
,
b
=
mov_combined_features
,
size
=
1
,
scale
=
5
)
cost
=
paddle
.
layer
.
regression_cost
(
input
=
inference
,
label
=
paddle
.
layer
.
data
(
name
=
'score'
,
type
=
paddle
.
data_type
.
dense_vector
(
1
)))
parameters
=
paddle
.
parameters
.
create
(
cost
)
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
1e-4
))
feeding
=
{
'user_id'
:
0
,
'gender_id'
:
1
,
'age_id'
:
2
,
'job_id'
:
3
,
'movie_id'
:
4
,
'category_id'
:
5
,
'movie_title'
:
6
,
'score'
:
7
}
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d Batch %d Cost %.2f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
)
trainer
.
train
(
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
movielens
.
train
(),
buf_size
=
8192
),
batch_size
=
256
),
event_handler
=
event_handler
,
feeding
=
feeding
,
num_passes
=
1
)
user_id
=
234
movie_id
=
345
user
=
paddle
.
dataset
.
movielens
.
user_info
()[
user_id
]
movie
=
paddle
.
dataset
.
movielens
.
movie_info
()[
movie_id
]
feature
=
user
.
value
()
+
movie
.
value
()
def
reader
():
yield
feature
infer_dict
=
copy
.
copy
(
feeding
)
del
infer_dict
[
'score'
]
prediction
=
paddle
.
infer
(
output
=
inference
,
parameters
=
parameters
,
reader
=
paddle
.
batch
(
reader
,
batch_size
=
32
),
feeding
=
infer_dict
)
print
(
prediction
+
5
)
/
2
if
__name__
==
'__main__'
:
main
()
doc/api/v2/run_logic.rst
浏览文件 @
aaa2a1f8
...
...
@@ -2,6 +2,7 @@
Trainer API
###########
==========
Parameters
==========
...
...
@@ -24,3 +25,10 @@ Event
.. automodule:: paddle.v2.event
:members:
=========
Inference
=========
.. autofunction:: paddle.v2.infer
\ No newline at end of file
python/paddle/v2/data_feeder.py
浏览文件 @
aaa2a1f8
...
...
@@ -85,6 +85,9 @@ class DataFeeder(DataProviderConverter):
input_types
.
append
(
each
[
1
])
DataProviderConverter
.
__init__
(
self
,
input_types
)
def
__len__
(
self
):
return
len
(
self
.
input_names
)
def
convert
(
self
,
dat
,
argument
=
None
):
"""
:param dat: A list of mini-batch data. Each sample is a list or tuple
...
...
python/paddle/v2/dataset/movielens.py
浏览文件 @
aaa2a1f8
...
...
@@ -23,7 +23,12 @@ import re
import
random
import
functools
__all__
=
[
'train_creator'
,
'test_creator'
]
__all__
=
[
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
class
MovieInfo
(
object
):
...
...
@@ -38,17 +43,32 @@ class MovieInfo(object):
[
MOVIE_TITLE_DICT
[
w
.
lower
()]
for
w
in
self
.
title
.
split
()]
]
def
__str__
(
self
):
return
"<MovieInfo id(%d), title(%s), categories(%s)>"
%
(
self
.
index
,
self
.
title
,
self
.
categories
)
def
__repr__
(
self
):
return
self
.
__str__
()
class
UserInfo
(
object
):
def
__init__
(
self
,
index
,
gender
,
age
,
job_id
):
self
.
index
=
int
(
index
)
self
.
is_male
=
gender
==
'M'
self
.
age
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
.
index
(
int
(
age
))
self
.
age
=
age_table
.
index
(
int
(
age
))
self
.
job_id
=
int
(
job_id
)
def
value
(
self
):
return
[
self
.
index
,
0
if
self
.
is_male
else
1
,
self
.
age
,
self
.
job_id
]
def
__str__
(
self
):
return
"<UserInfo id(%d), gender(%s), age(%d), job(%d)>"
%
(
self
.
index
,
"M"
if
self
.
is_male
else
"F"
,
age_table
[
self
.
age
],
self
.
job_id
)
def
__repr__
(
self
):
return
str
(
self
)
MOVIE_INFO
=
None
MOVIE_TITLE_DICT
=
None
...
...
@@ -59,7 +79,8 @@ USER_INFO = None
def
__initialize_meta_info__
():
fn
=
download
(
url
=
'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
,
md5
=
'c4d9eecfca2ab87c1945afe126590906'
)
module_name
=
'movielens'
,
md5sum
=
'c4d9eecfca2ab87c1945afe126590906'
)
global
MOVIE_INFO
if
MOVIE_INFO
is
None
:
pattern
=
re
.
compile
(
r
'^(.*)\((\d+)\)$'
)
...
...
@@ -122,14 +143,63 @@ def __reader_creator__(**kwargs):
return
lambda
:
__reader__
(
**
kwargs
)
train_creator
=
functools
.
partial
(
__reader_creator__
,
is_test
=
False
)
test_creator
=
functools
.
partial
(
__reader_creator__
,
is_test
=
True
)
train
=
functools
.
partial
(
__reader_creator__
,
is_test
=
False
)
test
=
functools
.
partial
(
__reader_creator__
,
is_test
=
True
)
def
get_movie_title_dict
():
__initialize_meta_info__
()
return
MOVIE_TITLE_DICT
def
__max_index_info__
(
a
,
b
):
if
a
.
index
>
b
.
index
:
return
a
else
:
return
b
def
max_movie_id
():
__initialize_meta_info__
()
return
reduce
(
__max_index_info__
,
MOVIE_INFO
.
viewvalues
()).
index
def
max_user_id
():
__initialize_meta_info__
()
return
reduce
(
__max_index_info__
,
USER_INFO
.
viewvalues
()).
index
def
__max_job_id_impl__
(
a
,
b
):
if
a
.
job_id
>
b
.
job_id
:
return
a
else
:
return
b
def
max_job_id
():
__initialize_meta_info__
()
return
reduce
(
__max_job_id_impl__
,
USER_INFO
.
viewvalues
()).
job_id
def
movie_categories
():
__initialize_meta_info__
()
return
CATEGORIES_DICT
def
user_info
():
__initialize_meta_info__
()
return
USER_INFO
def
movie_info
():
__initialize_meta_info__
()
return
MOVIE_INFO
def
unittest
():
for
train_count
,
_
in
enumerate
(
train
_creator
()()):
for
train_count
,
_
in
enumerate
(
train
()()):
pass
for
test_count
,
_
in
enumerate
(
test
_creator
()()):
for
test_count
,
_
in
enumerate
(
test
()()):
pass
print
train_count
,
test_count
...
...
python/paddle/v2/inference.py
浏览文件 @
aaa2a1f8
import
numpy
import
py_paddle.swig_paddle
as
api
import
collections
import
topology
import
minibatch
from
data_feeder
import
DataFeeder
import
itertools
import
numpy
__all__
=
[
'infer'
]
...
...
@@ -21,8 +21,33 @@ class Inference(object):
self
.
__gradient_machine__
=
gm
self
.
__data_types__
=
topo
.
data_type
()
def
iter_infer
(
self
,
reader
,
feeding
=
None
):
def
iter_infer
(
self
,
input
=
None
,
batch_size
=
None
,
reader
=
None
,
feeding
=
None
):
feeder
=
DataFeeder
(
self
.
__data_types__
,
feeding
)
if
reader
is
None
:
assert
input
is
not
None
and
isinstance
(
input
,
collections
.
Iterable
)
if
not
isinstance
(
input
,
collections
.
Iterable
):
raise
TypeError
(
"When reader is None, input should be whole "
"inference data and should be iterable"
)
if
batch_size
is
None
:
if
not
hasattr
(
input
,
'__len__'
):
raise
ValueError
(
"Should set batch size when input data "
"don't contain length."
)
batch_size
=
len
(
input
)
def
__reader_impl__
():
for
each_sample
in
input
:
if
len
(
feeder
)
==
1
:
yield
[
each_sample
]
else
:
yield
each_sample
reader
=
minibatch
.
batch
(
__reader_impl__
,
batch_size
=
batch_size
)
else
:
if
input
is
not
None
:
raise
ValueError
(
"User should set either input or reader, "
"should not set them both."
)
self
.
__gradient_machine__
.
start
()
for
data_batch
in
reader
():
yield
self
.
__gradient_machine__
.
forwardTest
(
feeder
(
data_batch
))
...
...
@@ -46,6 +71,52 @@ class Inference(object):
return
retv
def
infer
(
output
,
parameters
,
reader
,
feeding
=
None
,
field
=
'value'
):
def
infer
(
output
,
parameters
,
input
=
None
,
batch_size
=
None
,
reader
=
None
,
feeding
=
None
,
field
=
'value'
):
"""
Infer a neural network by given neural network output and parameters. The
user should pass either a batch of input data or reader method.
Example usages:
.. code-block:: python
result = paddle.infer(prediction, parameters, input=SomeData,
batch_size=32)
print result
:param output: output of the neural network that would be inferred
:type output: paddle.v2.config_base.Layer
:param parameters: parameters of the neural network.
:type parameters: paddle.v2.parameters.Parameters
:param input: input data batch. Should be a python iterable object, and each
element is the data batch.
:type input: collections.Iterable
:param batch_size: the batch size when perform inference. Default is the
length of input.
:type batch_size: int
:param reader: input data reader creator in batch. If this field is set, the
`input` and `batch_size` will be ignored.
:type reader: callable
:param feeding: Reader dictionary. Default could generate from input
value.
:param field: The prediction field. It should in [`value`, `ids`]. `value`
means return the prediction probabilities, `ids` means return
the prediction labels. Default is `value`
:type field: str
:return: a numpy array
:rtype: numpy.ndarray
"""
inferer
=
Inference
(
output
=
output
,
parameters
=
parameters
)
return
inferer
.
infer
(
field
=
field
,
reader
=
reader
,
feeding
=
feeding
)
return
inferer
.
infer
(
field
=
field
,
input
=
input
,
batch_size
=
batch_size
,
reader
=
reader
,
feeding
=
feeding
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录