Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c79ec9f0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c79ec9f0
编写于
5月 25, 2018
作者:
K
Kexin Zhao
提交者:
GitHub
5月 25, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add create LoDTensor from list option and simplify recommender book example (#10946)
* add create lodtensor from list * modify book example
上级
72149c16
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
69 addition
and
108 deletion
+69
-108
python/paddle/fluid/lod_tensor.py
python/paddle/fluid/lod_tensor.py
+17
-6
python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
...-api/recommender_system/test_recommender_system_newapi.py
+17
-30
python/paddle/fluid/tests/book/test_recommender_system.py
python/paddle/fluid/tests/book/test_recommender_system.py
+28
-68
python/paddle/fluid/tests/test_lod_tensor.py
python/paddle/fluid/tests/test_lod_tensor.py
+7
-4
未找到文件。
python/paddle/fluid/lod_tensor.py
浏览文件 @
c79ec9f0
...
...
@@ -93,12 +93,12 @@ def _convert_lod(lod):
def
create_lod_tensor
(
data
,
lod
,
place
):
"""Create a lod tensor from a numpy array or an existing lod tensor.
"""Create a lod tensor from a numpy array
, a list,
or an existing lod tensor.
Create a lod tensor by doing the following:
1. Check that the length-based input lod is valid.
2. Convert the length-based lod to a offset-based LoD.
3. Copy the data from a numpy array or a existing lod tensor to
3. Copy the data from a numpy array
, a list
or a existing lod tensor to
CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD.
...
...
@@ -117,7 +117,7 @@ def create_lod_tensor(data, lod, place):
for more details regarding LoD.
Args:
data: a numpy array or a LoDTensor holding the data to be copied.
data: a numpy array or a LoDTensor
or a list
holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
...
...
@@ -126,6 +126,18 @@ def create_lod_tensor(data, lod, place):
"""
if
isinstance
(
data
,
core
.
LoDTensor
):
return
create_lod_tensor
(
np
.
array
(
data
),
lod
,
place
)
elif
isinstance
(
data
,
list
):
# When input data is a list, it only deal with the case where the base element
# is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
# LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
# of words or other indexes in the sequence.
new_lod
=
[]
for
seq
in
data
:
new_lod
.
append
(
len
(
seq
))
assert
[
new_lod
]
==
lod
,
"data and lod do not match"
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
return
create_lod_tensor
(
flattened_data
,
lod
,
place
)
elif
isinstance
(
data
,
np
.
ndarray
):
assert
_validate_lod
(
lod
,
data
.
shape
[
0
]),
"the provided lod info is invalid"
...
...
@@ -134,9 +146,8 @@ def create_lod_tensor(data, lod, place):
tensor
.
set_lod
(
_convert_lod
(
lod
))
return
tensor
else
:
raise
Exception
(
"data should be either a LoDTensor or a Numpy array, but you pass type %s instead"
%
(
type
(
data
)))
raise
TypeError
(
"data should be either a LoDTensor, a Numpy array or a list"
)
def
create_random_int_lodtensor
(
lod
,
base_shape
,
place
,
low
,
high
):
...
...
python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
浏览文件 @
c79ec9f0
...
...
@@ -197,10 +197,7 @@ def train(use_cuda, train_program, save_path):
num_epochs
=
1
,
event_handler
=
event_handler
,
reader
=
train_reader
,
feed_order
=
[
'user_id'
,
'gender_id'
,
'age_id'
,
'job_id'
,
'movie_id'
,
'category_id'
,
'movie_title'
,
'score'
])
feed_order
=
feed_order
)
def
infer
(
use_cuda
,
inference_program
,
save_path
):
...
...
@@ -208,32 +205,22 @@ def infer(use_cuda, inference_program, save_path):
inferencer
=
fluid
.
Inferencer
(
inference_program
,
param_path
=
save_path
,
place
=
place
)
def
create_lod_tensor
(
data
,
lod
=
None
):
tensor
=
fluid
.
LoDTensor
()
if
lod
is
None
:
# Tensor, the shape is [batch_size, 1]
index
=
0
lod_0
=
[
index
]
for
l
in
range
(
len
(
data
)):
index
+=
1
lod_0
.
append
(
index
)
lod
=
[
lod_0
]
tensor
.
set_lod
(
lod
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
tensor
.
set
(
flattened_data
,
place
)
return
tensor
# Generate a random input for inference
user_id
=
create_lod_tensor
([[
1
]])
gender_id
=
create_lod_tensor
([[
1
]])
age_id
=
create_lod_tensor
([[
0
]])
job_id
=
create_lod_tensor
([[
10
]])
movie_id
=
create_lod_tensor
([[
783
]])
category_id
=
create_lod_tensor
([[
10
],
[
8
],
[
9
]],
[[
0
,
3
]])
movie_title
=
create_lod_tensor
([[
1069
],
[
4140
],
[
2923
],
[
710
],
[
988
]],
[[
0
,
5
]])
# Use the first data from paddle.dataset.movielens.test() as input.
# Use create_lod_tensor(data, lod, place) API to generate LoD Tensor,
# where `data` is a list of sequences of index numbers, `lod` is
# the level of detail (lod) info associated with `data`.
# For example, data = [[10, 2, 3], [2, 3]] means that it contains
# two sequences of indexes, of length 3 and 2, respectively.
# Correspondingly, lod = [[3, 2]] contains one level of detail info,
# indicating that `data` consists of two sequences of length 3 and 2.
user_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
gender_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
age_id
=
fluid
.
create_lod_tensor
([[
0
]],
[[
1
]],
place
)
job_id
=
fluid
.
create_lod_tensor
([[
10
]],
[[
1
]],
place
)
movie_id
=
fluid
.
create_lod_tensor
([[
783
]],
[[
1
]],
place
)
category_id
=
fluid
.
create_lod_tensor
([[
10
,
8
,
9
]],
[[
3
]],
place
)
movie_title
=
fluid
.
create_lod_tensor
([[
1069
,
4140
,
2923
,
710
,
988
]],
[[
5
]],
place
)
results
=
inferencer
.
infer
(
{
...
...
python/paddle/fluid/tests/book/test_recommender_system.py
浏览文件 @
c79ec9f0
...
...
@@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True):
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
movielens
.
test
(),
batch_size
=
BATCH_SIZE
)
feeding
=
{
'user_id'
:
0
,
'gender_id'
:
1
,
'age_id'
:
2
,
'job_id'
:
3
,
'movie_id'
:
4
,
'category_id'
:
5
,
'movie_title'
:
6
,
'score'
:
7
}
def
func_feed
(
feeding
,
data
):
feed_tensors
=
{}
for
(
key
,
idx
)
in
feeding
.
iteritems
():
tensor
=
fluid
.
LoDTensor
()
if
key
!=
"category_id"
and
key
!=
"movie_title"
:
if
key
==
"score"
:
numpy_data
=
np
.
array
(
map
(
lambda
x
:
x
[
idx
],
data
)).
astype
(
"float32"
)
else
:
numpy_data
=
np
.
array
(
map
(
lambda
x
:
x
[
idx
],
data
)).
astype
(
"int64"
)
else
:
numpy_data
=
map
(
lambda
x
:
np
.
array
(
x
[
idx
]).
astype
(
"int64"
),
data
)
lod_info
=
[
len
(
item
)
for
item
in
numpy_data
]
offset
=
0
lod
=
[
offset
]
for
item
in
lod_info
:
offset
+=
item
lod
.
append
(
offset
)
numpy_data
=
np
.
concatenate
(
numpy_data
,
axis
=
0
)
tensor
.
set_lod
([
lod
])
numpy_data
=
numpy_data
.
reshape
([
numpy_data
.
shape
[
0
],
1
])
tensor
.
set
(
numpy_data
,
place
)
feed_tensors
[
key
]
=
tensor
return
feed_tensors
feed_order
=
[
'user_id'
,
'gender_id'
,
'age_id'
,
'job_id'
,
'movie_id'
,
'category_id'
,
'movie_title'
,
'score'
]
def
train_loop
(
main_program
):
exe
.
run
(
framework
.
default_startup_program
())
feed_list
=
[
main_program
.
global_block
().
var
(
var_name
)
for
var_name
in
feed_order
]
feeder
=
fluid
.
DataFeeder
(
feed_list
,
place
)
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
# train a mini-batch
outs
=
exe
.
run
(
program
=
main_program
,
feed
=
f
unc_feed
(
feeding
,
data
),
feed
=
f
eeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
out
=
np
.
array
(
outs
[
0
])
if
(
batch_id
+
1
)
%
10
==
0
:
avg_cost_set
=
[]
for
test_data
in
test_reader
():
avg_cost_np
=
exe
.
run
(
program
=
test_program
,
feed
=
func_feed
(
feeding
,
test_data
),
fetch_list
=
[
avg_cost
])
avg_cost_np
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
avg_cost
])
avg_cost_set
.
append
(
avg_cost_np
[
0
])
break
# test only 1 segment for speeding up CI
...
...
@@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
def
create_lod_tensor
(
data
,
lod
=
None
):
tensor
=
fluid
.
LoDTensor
()
if
lod
is
None
:
# Tensor, the shape is [batch_size, 1]
index
=
0
lod_0
=
[
index
]
for
l
in
range
(
len
(
data
)):
index
+=
1
lod_0
.
append
(
index
)
lod
=
[
lod_0
]
tensor
.
set_lod
(
lod
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
tensor
.
set
(
flattened_data
,
place
)
return
tensor
inference_scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
inference_scope
):
# Use fluid.io.load_inference_model to obtain the inference program desc,
...
...
@@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None):
# Use the first data from paddle.dataset.movielens.test() as input
assert
feed_target_names
[
0
]
==
"user_id"
user_id
=
create_lod_tensor
([[
1
]])
# Use create_lod_tensor(data, lod, place) API to generate LoD Tensor
# where `data` is a list of sequences of index numbers, `lod` is
# the level of detail (lod) info associated with `data`.
# For example, data = [[10, 2, 3], [2, 3]] means that it contains
# two sequences of indexes, of length 3 and 2, respectively.
# Correspondingly, lod = [[3, 2]] contains one level of detail info,
# indicating that `data` consists of two sequences of length 3 and 2.
user_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
assert
feed_target_names
[
1
]
==
"gender_id"
gender_id
=
create_lod_tensor
([[
1
]]
)
gender_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
assert
feed_target_names
[
2
]
==
"age_id"
age_id
=
create_lod_tensor
([[
0
]]
)
age_id
=
fluid
.
create_lod_tensor
([[
0
]],
[[
1
]],
place
)
assert
feed_target_names
[
3
]
==
"job_id"
job_id
=
create_lod_tensor
([[
10
]]
)
job_id
=
fluid
.
create_lod_tensor
([[
10
]],
[[
1
]],
place
)
assert
feed_target_names
[
4
]
==
"movie_id"
movie_id
=
create_lod_tensor
([[
783
]]
)
movie_id
=
fluid
.
create_lod_tensor
([[
783
]],
[[
1
]],
place
)
assert
feed_target_names
[
5
]
==
"category_id"
category_id
=
create_lod_tensor
([[
10
],
[
8
],
[
9
]],
[[
0
,
3
]]
)
category_id
=
fluid
.
create_lod_tensor
([[
10
,
8
,
9
]],
[[
3
]],
place
)
assert
feed_target_names
[
6
]
==
"movie_title"
movie_title
=
create_lod_tensor
([[
1069
],
[
4140
],
[
2923
],
[
710
],
[
988
]],
[[
0
,
5
]]
)
movie_title
=
fluid
.
create_lod_tensor
([[
1069
,
4140
,
2923
,
710
,
988
]],
[[
5
]],
place
)
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
...
...
python/paddle/fluid/tests/test_lod_tensor.py
浏览文件 @
c79ec9f0
...
...
@@ -53,11 +53,14 @@ class TestLoDTensor(unittest.TestCase):
self
.
assertEqual
(
_convert_lod
(
lod
),
converted_lod
)
def
test_create_lod_tensor
(
self
):
# Only numpy array or a fluid LoDTensor is valid input to
# create_lod_tensor function, currently a list of lists is not.
data
=
[[
1
,
2
],
[
3
,
4
]]
self
.
assertRaises
(
Exception
,
create_lod_tensor
,
data
,
[],
# Create LoDTensor from a list
data
=
[[
1
,
2
,
3
],
[
3
,
4
]]
wrong_lod
=
[[
2
,
2
]]
correct_lod
=
[[
3
,
2
]]
self
.
assertRaises
(
AssertionError
,
create_lod_tensor
,
data
,
wrong_lod
,
fluid
.
CPUPlace
())
tensor
=
create_lod_tensor
(
data
,
correct_lod
,
fluid
.
CPUPlace
())
self
.
assertEqual
(
tensor
.
lod
(),
[[
0
,
3
,
5
]])
# Create LoDTensor from numpy array
data
=
numpy
.
random
.
random
([
10
,
1
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录