Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c79ec9f0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c79ec9f0
编写于
5月 25, 2018
作者:
K
Kexin Zhao
提交者:
GitHub
5月 25, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add create LoDTensor from list option and simplify recommender book example (#10946)
* add create lodtensor from list * modify book example
上级
72149c16
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
69 addition
and
108 deletion
+69
-108
python/paddle/fluid/lod_tensor.py
python/paddle/fluid/lod_tensor.py
+17
-6
python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
...-api/recommender_system/test_recommender_system_newapi.py
+17
-30
python/paddle/fluid/tests/book/test_recommender_system.py
python/paddle/fluid/tests/book/test_recommender_system.py
+28
-68
python/paddle/fluid/tests/test_lod_tensor.py
python/paddle/fluid/tests/test_lod_tensor.py
+7
-4
未找到文件。
python/paddle/fluid/lod_tensor.py
浏览文件 @
c79ec9f0
...
...
@@ -93,12 +93,12 @@ def _convert_lod(lod):
def
create_lod_tensor
(
data
,
lod
,
place
):
"""Create a lod tensor from a numpy array or an existing lod tensor.
"""Create a lod tensor from a numpy array
, a list,
or an existing lod tensor.
Create a lod tensor by doing the following:
1. Check that the length-based input lod is valid.
2. Convert the length-based lod to a offset-based LoD.
3. Copy the data from a numpy array or a existing lod tensor to
3. Copy the data from a numpy array
, a list
or a existing lod tensor to
CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD.
...
...
@@ -117,7 +117,7 @@ def create_lod_tensor(data, lod, place):
for more details regarding LoD.
Args:
data: a numpy array or a LoDTensor holding the data to be copied.
data: a numpy array or a LoDTensor
or a list
holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
...
...
@@ -126,6 +126,18 @@ def create_lod_tensor(data, lod, place):
"""
if
isinstance
(
data
,
core
.
LoDTensor
):
return
create_lod_tensor
(
np
.
array
(
data
),
lod
,
place
)
elif
isinstance
(
data
,
list
):
# When input data is a list, it only deal with the case where the base element
# is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
# LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
# of words or other indexes in the sequence.
new_lod
=
[]
for
seq
in
data
:
new_lod
.
append
(
len
(
seq
))
assert
[
new_lod
]
==
lod
,
"data and lod do not match"
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
return
create_lod_tensor
(
flattened_data
,
lod
,
place
)
elif
isinstance
(
data
,
np
.
ndarray
):
assert
_validate_lod
(
lod
,
data
.
shape
[
0
]),
"the provided lod info is invalid"
...
...
@@ -134,9 +146,8 @@ def create_lod_tensor(data, lod, place):
tensor
.
set_lod
(
_convert_lod
(
lod
))
return
tensor
else
:
raise
Exception
(
"data should be either a LoDTensor or a Numpy array, but you pass type %s instead"
%
(
type
(
data
)))
raise
TypeError
(
"data should be either a LoDTensor, a Numpy array or a list"
)
def
create_random_int_lodtensor
(
lod
,
base_shape
,
place
,
low
,
high
):
...
...
python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
浏览文件 @
c79ec9f0
...
...
@@ -197,10 +197,7 @@ def train(use_cuda, train_program, save_path):
num_epochs
=
1
,
event_handler
=
event_handler
,
reader
=
train_reader
,
feed_order
=
[
'user_id'
,
'gender_id'
,
'age_id'
,
'job_id'
,
'movie_id'
,
'category_id'
,
'movie_title'
,
'score'
])
feed_order
=
feed_order
)
def
infer
(
use_cuda
,
inference_program
,
save_path
):
...
...
@@ -208,32 +205,22 @@ def infer(use_cuda, inference_program, save_path):
inferencer
=
fluid
.
Inferencer
(
inference_program
,
param_path
=
save_path
,
place
=
place
)
def
create_lod_tensor
(
data
,
lod
=
None
):
tensor
=
fluid
.
LoDTensor
()
if
lod
is
None
:
# Tensor, the shape is [batch_size, 1]
index
=
0
lod_0
=
[
index
]
for
l
in
range
(
len
(
data
)):
index
+=
1
lod_0
.
append
(
index
)
lod
=
[
lod_0
]
tensor
.
set_lod
(
lod
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
tensor
.
set
(
flattened_data
,
place
)
return
tensor
# Generate a random input for inference
user_id
=
create_lod_tensor
([[
1
]])
gender_id
=
create_lod_tensor
([[
1
]])
age_id
=
create_lod_tensor
([[
0
]])
job_id
=
create_lod_tensor
([[
10
]])
movie_id
=
create_lod_tensor
([[
783
]])
category_id
=
create_lod_tensor
([[
10
],
[
8
],
[
9
]],
[[
0
,
3
]])
movie_title
=
create_lod_tensor
([[
1069
],
[
4140
],
[
2923
],
[
710
],
[
988
]],
[[
0
,
5
]])
# Use the first data from paddle.dataset.movielens.test() as input.
# Use create_lod_tensor(data, lod, place) API to generate LoD Tensor,
# where `data` is a list of sequences of index numbers, `lod` is
# the level of detail (lod) info associated with `data`.
# For example, data = [[10, 2, 3], [2, 3]] means that it contains
# two sequences of indexes, of length 3 and 2, respectively.
# Correspondingly, lod = [[3, 2]] contains one level of detail info,
# indicating that `data` consists of two sequences of length 3 and 2.
user_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
gender_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
age_id
=
fluid
.
create_lod_tensor
([[
0
]],
[[
1
]],
place
)
job_id
=
fluid
.
create_lod_tensor
([[
10
]],
[[
1
]],
place
)
movie_id
=
fluid
.
create_lod_tensor
([[
783
]],
[[
1
]],
place
)
category_id
=
fluid
.
create_lod_tensor
([[
10
,
8
,
9
]],
[[
3
]],
place
)
movie_title
=
fluid
.
create_lod_tensor
([[
1069
,
4140
,
2923
,
710
,
988
]],
[[
5
]],
place
)
results
=
inferencer
.
infer
(
{
...
...
python/paddle/fluid/tests/book/test_recommender_system.py
浏览文件 @
c79ec9f0
...
...
@@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True):
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
movielens
.
test
(),
batch_size
=
BATCH_SIZE
)
feeding
=
{
'user_id'
:
0
,
'gender_id'
:
1
,
'age_id'
:
2
,
'job_id'
:
3
,
'movie_id'
:
4
,
'category_id'
:
5
,
'movie_title'
:
6
,
'score'
:
7
}
def
func_feed
(
feeding
,
data
):
feed_tensors
=
{}
for
(
key
,
idx
)
in
feeding
.
iteritems
():
tensor
=
fluid
.
LoDTensor
()
if
key
!=
"category_id"
and
key
!=
"movie_title"
:
if
key
==
"score"
:
numpy_data
=
np
.
array
(
map
(
lambda
x
:
x
[
idx
],
data
)).
astype
(
"float32"
)
else
:
numpy_data
=
np
.
array
(
map
(
lambda
x
:
x
[
idx
],
data
)).
astype
(
"int64"
)
else
:
numpy_data
=
map
(
lambda
x
:
np
.
array
(
x
[
idx
]).
astype
(
"int64"
),
data
)
lod_info
=
[
len
(
item
)
for
item
in
numpy_data
]
offset
=
0
lod
=
[
offset
]
for
item
in
lod_info
:
offset
+=
item
lod
.
append
(
offset
)
numpy_data
=
np
.
concatenate
(
numpy_data
,
axis
=
0
)
tensor
.
set_lod
([
lod
])
numpy_data
=
numpy_data
.
reshape
([
numpy_data
.
shape
[
0
],
1
])
tensor
.
set
(
numpy_data
,
place
)
feed_tensors
[
key
]
=
tensor
return
feed_tensors
feed_order
=
[
'user_id'
,
'gender_id'
,
'age_id'
,
'job_id'
,
'movie_id'
,
'category_id'
,
'movie_title'
,
'score'
]
def
train_loop
(
main_program
):
exe
.
run
(
framework
.
default_startup_program
())
feed_list
=
[
main_program
.
global_block
().
var
(
var_name
)
for
var_name
in
feed_order
]
feeder
=
fluid
.
DataFeeder
(
feed_list
,
place
)
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
# train a mini-batch
outs
=
exe
.
run
(
program
=
main_program
,
feed
=
f
unc_feed
(
feeding
,
data
),
feed
=
f
eeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
out
=
np
.
array
(
outs
[
0
])
if
(
batch_id
+
1
)
%
10
==
0
:
avg_cost_set
=
[]
for
test_data
in
test_reader
():
avg_cost_np
=
exe
.
run
(
program
=
test_program
,
feed
=
func_feed
(
feeding
,
test_data
),
fetch_list
=
[
avg_cost
])
avg_cost_np
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
avg_cost
])
avg_cost_set
.
append
(
avg_cost_np
[
0
])
break
# test only 1 segment for speeding up CI
...
...
@@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
def
create_lod_tensor
(
data
,
lod
=
None
):
tensor
=
fluid
.
LoDTensor
()
if
lod
is
None
:
# Tensor, the shape is [batch_size, 1]
index
=
0
lod_0
=
[
index
]
for
l
in
range
(
len
(
data
)):
index
+=
1
lod_0
.
append
(
index
)
lod
=
[
lod_0
]
tensor
.
set_lod
(
lod
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
tensor
.
set
(
flattened_data
,
place
)
return
tensor
inference_scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
inference_scope
):
# Use fluid.io.load_inference_model to obtain the inference program desc,
...
...
@@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None):
# Use the first data from paddle.dataset.movielens.test() as input
assert
feed_target_names
[
0
]
==
"user_id"
user_id
=
create_lod_tensor
([[
1
]])
# Use create_lod_tensor(data, lod, place) API to generate LoD Tensor
# where `data` is a list of sequences of index numbers, `lod` is
# the level of detail (lod) info associated with `data`.
# For example, data = [[10, 2, 3], [2, 3]] means that it contains
# two sequences of indexes, of length 3 and 2, respectively.
# Correspondingly, lod = [[3, 2]] contains one level of detail info,
# indicating that `data` consists of two sequences of length 3 and 2.
user_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
assert
feed_target_names
[
1
]
==
"gender_id"
gender_id
=
create_lod_tensor
([[
1
]]
)
gender_id
=
fluid
.
create_lod_tensor
([[
1
]],
[[
1
]],
place
)
assert
feed_target_names
[
2
]
==
"age_id"
age_id
=
create_lod_tensor
([[
0
]]
)
age_id
=
fluid
.
create_lod_tensor
([[
0
]],
[[
1
]],
place
)
assert
feed_target_names
[
3
]
==
"job_id"
job_id
=
create_lod_tensor
([[
10
]]
)
job_id
=
fluid
.
create_lod_tensor
([[
10
]],
[[
1
]],
place
)
assert
feed_target_names
[
4
]
==
"movie_id"
movie_id
=
create_lod_tensor
([[
783
]]
)
movie_id
=
fluid
.
create_lod_tensor
([[
783
]],
[[
1
]],
place
)
assert
feed_target_names
[
5
]
==
"category_id"
category_id
=
create_lod_tensor
([[
10
],
[
8
],
[
9
]],
[[
0
,
3
]]
)
category_id
=
fluid
.
create_lod_tensor
([[
10
,
8
,
9
]],
[[
3
]],
place
)
assert
feed_target_names
[
6
]
==
"movie_title"
movie_title
=
create_lod_tensor
([[
1069
],
[
4140
],
[
2923
],
[
710
],
[
988
]],
[[
0
,
5
]]
)
movie_title
=
fluid
.
create_lod_tensor
([[
1069
,
4140
,
2923
,
710
,
988
]],
[[
5
]],
place
)
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
...
...
python/paddle/fluid/tests/test_lod_tensor.py
浏览文件 @
c79ec9f0
...
...
@@ -53,11 +53,14 @@ class TestLoDTensor(unittest.TestCase):
self
.
assertEqual
(
_convert_lod
(
lod
),
converted_lod
)
def
test_create_lod_tensor
(
self
):
# Only numpy array or a fluid LoDTensor is valid input to
# create_lod_tensor function, currently a list of lists is not.
data
=
[[
1
,
2
],
[
3
,
4
]]
self
.
assertRaises
(
Exception
,
create_lod_tensor
,
data
,
[],
# Create LoDTensor from a list
data
=
[[
1
,
2
,
3
],
[
3
,
4
]]
wrong_lod
=
[[
2
,
2
]]
correct_lod
=
[[
3
,
2
]]
self
.
assertRaises
(
AssertionError
,
create_lod_tensor
,
data
,
wrong_lod
,
fluid
.
CPUPlace
())
tensor
=
create_lod_tensor
(
data
,
correct_lod
,
fluid
.
CPUPlace
())
self
.
assertEqual
(
tensor
.
lod
(),
[[
0
,
3
,
5
]])
# Create LoDTensor from numpy array
data
=
numpy
.
random
.
random
([
10
,
1
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录