Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
70ff9038
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
70ff9038
编写于
10月 11, 2019
作者:
L
liuwei1031
提交者:
GitHub
10月 11, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
improve the doc of data feeder related APIs (#20515)
* improve data feeder related API
上级
057bce4d
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
138 addition
and
123 deletion
+138
-123
paddle/fluid/API.spec
paddle/fluid/API.spec
+4
-4
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+134
-119
未找到文件。
paddle/fluid/API.spec
浏览文件 @
70ff9038
...
@@ -1100,11 +1100,11 @@ paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', '7b5bf
...
@@ -1100,11 +1100,11 @@ paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', '7b5bf
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'ea029ec9e0dea75f136211c433154f25'))
paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'ea029ec9e0dea75f136211c433154f25'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder ('paddle.fluid.data_feeder.DataFeeder', ('document', '
d9e64be617bd5f49dbb08ac2bc8665e6
'))
paddle.fluid.DataFeeder ('paddle.fluid.data_feeder.DataFeeder', ('document', '
9e83e9c52fe5b234df4e29d07f382995
'))
paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '
a0ed5ce816b5d603cb595aacb922335a
'))
paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '
982feeee2611898d312fdf12580409d7
'))
paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '
ce65fe1d81dcd7067d5092a5667f35cc
'))
paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '
69ee4aeeb5cd8c8e5922560457d318ba
'))
paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '
334c6af750941a4397a2dd2ea8a4d76f
'))
paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '
19fe07f2e40f938003f66f39798ec7d6
'))
paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '7a0f76a77dd88a74f24485a103a22fc1'))
paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '7a0f76a77dd88a74f24485a103a22fc1'))
paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', '629b07558971a8ab5e954d9a77457656'))
paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', '629b07558971a8ab5e954d9a77457656'))
paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...
...
python/paddle/fluid/data_feeder.py
浏览文件 @
70ff9038
...
@@ -152,53 +152,25 @@ class BatchedTensorProvider(object):
...
@@ -152,53 +152,25 @@ class BatchedTensorProvider(object):
class
DataFeeder
(
object
):
class
DataFeeder
(
object
):
"""
"""
DataFeeder converts the data that returned by a reader into a data
DataFeeder converts the data that returned by a reader into a data
structure that can feed into Executor and ParallelExecutor. The reader
structure that can feed into Executor. The reader is usually a
usually returns a list of mini-batch data entries. Each data entry in
python generator that returns a list of mini-batch data entries.
the list is one sample. Each sample is a list or a tuple with one
feature or multiple features.
Parameters:
feed_list (list): Variables or names of Variables that need
The simple usage shows below:
to feed.
place (:ref:`api_fluid_CPUPlace` | :ref:`api_fluid_CUDAPlace` ):
.. code-block:: python
place indicates the device (CPU | GPU) the data will be fed into, if
you want to feed data into GPU, please using :code:`fluid.CUDAPlace(i)`
import paddle.fluid as fluid
(:code:`i` represents the GPU id), or if you want to feed data into CPU,
place = fluid.CPUPlace()
please using :code:`fluid.CPUPlace()`.
img = fluid.layers.data(name='image', shape=[1, 28, 28])
program (:ref:`api_fluid_Program` , optional): The Program that will
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feed data into, if program is None, it will use default_main_program().
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
Default None.
result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
If you want to feed data into GPU side separately in advance when you
use multi-GPU to train a model, you can use `decorate_reader` function.
.. code-block:: python
import paddle
import paddle.fluid as fluid
place=fluid.CUDAPlace(0)
data = fluid.layers.data(name='data', shape=[3, 224, 224], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
reader = feeder.decorate_reader(
paddle.batch(paddle.dataset.flowers.train(), batch_size=16), multi_devices=True)
Args:
feed_list(list): The Variables or Variables'name that will
feed into model.
place(Place): place indicates feed data into CPU or GPU, if you want to
feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents
the GPU id), or if you want to feed data into CPU, please using
`fluid.CPUPlace()`.
program(Program): The Program that will feed data into, if program
is None, it will use default_main_program(). Default None.
Raises:
Raises:
ValueError: If some Variable is
not in this Program.
:code:`ValueError` - If some Variables are
not in this Program.
Example
s
:
Example:
.. code-block:: python
.. code-block:: python
...
@@ -207,27 +179,34 @@ class DataFeeder(object):
...
@@ -207,27 +179,34 @@ class DataFeeder(object):
import paddle.fluid as fluid
import paddle.fluid as fluid
place = fluid.CPUPlace()
place = fluid.CPUPlace()
def reader():
def reader():
yield [np.random.random([4]).astype('float32'), np.random.random([3]).astype('float32')],
for _ in range(4):
yield np.random.random([4]).astype('float32'), np.random.random([3]).astype('float32'),
main_program = fluid.Program()
main_program = fluid.Program()
startup_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
with fluid.program_guard(main_program, startup_program):
data_1 = fluid.layers.data(name='data_1', shape=[1, 2, 2])
data_1 = fluid.layers.data(name='data_1', shape=[
-
1, 2, 2])
data_2 = fluid.layers.data(name='data_2', shape=[1, 1, 3])
data_2 = fluid.layers.data(name='data_2', shape=[
-
1, 1, 3])
out = fluid.layers.fc(input=[data_1, data_2], size=2)
out = fluid.layers.fc(input=[data_1, data_2], size=2)
# ...
# ...
feeder = fluid.DataFeeder([data_1, data_2], place)
feeder = fluid.DataFeeder([data_1, data_2], place)
exe = fluid.Executor(place)
exe = fluid.Executor(place)
exe.run(startup_program)
exe.run(startup_program)
for data in reader():
feed_data = feeder.feed(reader())
# print feed_data to view feed results
# print(feed_data['data_1'])
# print(feed_data['data_2'])
outs = exe.run(program=main_program,
outs = exe.run(program=main_program,
feed=feeder.feed(data)
,
feed=feed_data
,
fetch_list=[out])
fetch_list=[out])
print(outs)
"""
"""
...
@@ -252,31 +231,42 @@ class DataFeeder(object):
...
@@ -252,31 +231,42 @@ class DataFeeder(object):
def
feed
(
self
,
iterable
):
def
feed
(
self
,
iterable
):
"""
"""
According to
feed_list and iterable, converters the input into
According to
:code:`feed_list` of :code:`DataFeeder` and :code:`iterable` , converts
a data structure that can feed into Executor and Parallel
Executor.
the input into a data structure that can feed into
Executor.
Arg
s:
Parameter
s:
iterable
(list|tuple): the input data.
iterable
(generator): user defined python generator to read the raw input data
Returns:
Returns:
dict: the result of conversion.
:code:`dict`: a :code:`dict` that contains (variable name - converted tensor) pairs
Example
s
:
Example:
.. code-block:: python
.. code-block:: python
import numpy.random as random
# In this example, reader - generator will return a list of ndarray of 3 elements
# feed API will convert each ndarray input into a tensor
# the return result is a dict with keys: data_1, data_2, data_3
# result['data_1'] a LoD-Tensor with shape of [5, 2, 1, 3]. 5 is batch size, and [2, 1, 3] is the real shape of data_1.
# result['data_2'], result['data_3'] are similar.
import numpy as np
import paddle.fluid as fluid
import paddle.fluid as fluid
def reader(limit=5):
def reader(limit=5):
for i in range(
limit
):
for i in range(
1, limit + 1
):
yield
random.random([784]).astype('float32'), random.random([1]).astype('int64'), random.random([256
]).astype('float32')
yield
np.ones([6]).astype('float32') * i , np.ones([1]).astype('int64') * i, np.random.random([9
]).astype('float32')
data_1 = fluid.layers.data(name='data_1', shape=[
1, 28, 28
])
data_1 = fluid.layers.data(name='data_1', shape=[
2, 1, 3
])
data_2 = fluid.layers.data(name='data_2', shape=[1], dtype='int64')
data_2 = fluid.layers.data(name='data_2', shape=[1], dtype='int64')
data_3 = fluid.layers.data(name='data_3', shape=[
16, 16
], dtype='float32')
data_3 = fluid.layers.data(name='data_3', shape=[
3, 3
], dtype='float32')
feeder = fluid.DataFeeder(['data_1','data_2', 'data_3'], fluid.CPUPlace())
feeder = fluid.DataFeeder(['data_1','data_2', 'data_3'], fluid.CPUPlace())
result = feeder.feed(reader())
result = feeder.feed(reader())
print(result['data_1'])
print(result['data_2'])
print(result['data_3'])
"""
"""
converter
=
[]
converter
=
[]
for
lod_level
,
shape
,
dtype
in
six
.
moves
.
zip
(
for
lod_level
,
shape
,
dtype
in
six
.
moves
.
zip
(
...
@@ -303,33 +293,40 @@ class DataFeeder(object):
...
@@ -303,33 +293,40 @@ class DataFeeder(object):
def
feed_parallel
(
self
,
iterable
,
num_places
=
None
):
def
feed_parallel
(
self
,
iterable
,
num_places
=
None
):
"""
"""
Takes multiple mini-batches. Each mini-batch will be feed on each
Similar with feed function, feed_parallel is used with multiple devices (CPU|GPU).
device in advance.
Here :code:`iterable` is a list of python generators. The data return by each
generator in the list will be fed into a seperate device.
Args:
Parameters:
iterable(list|tuple): the input data.
iterable (list|tuple): list of user-defined python geneators. The element
num_places(int): the number of devices. Default None.
number should match the :code:`num_places`.
num_places (int, optional): the number of devices. If not provided (None),
all available devices on the machine will be used. Default None.
Returns:
Returns:
dict: the result of conversion.
:code:`generator`: a :code:`generator` that generate dict which contains (variable name - converted tensor) pairs,
the total number of dicts will be generated matches with the :code:`num_places`
Notes:
.. note::
The number of devices
and number of mini-batches must be same.
The number of devices
- :code:`num_places` should equal to the generator (element of :code:`iterable` ) number
Example
s
:
Example:
.. code-block:: python
.. code-block:: python
import numpy.random as random
import numpy as np
import paddle.fluid as fluid
import paddle.fluid as fluid
def reader(limit=10):
def generate_reader(batch_size, base=0, factor=1):
for i in range(limit):
def _reader():
yield [random.random([784]).astype('float32'), random.random([1]).astype('float32')],
for i in range(batch_size):
yield np.ones([4]) * factor + base, np.ones([4]) * factor + base + 5
return _reader()
x = fluid.layers.data(name='x', shape=[
1, 28, 28
])
x = fluid.layers.data(name='x', shape=[
-1, 2, 2
])
y = fluid.layers.data(name='y', shape=[
1
], dtype='float32')
y = fluid.layers.data(name='y', shape=[
-1, 2, 2
], dtype='float32')
fluid.layers.elementwise_add(x, y)
z =
fluid.layers.elementwise_add(x, y)
feeder = fluid.DataFeeder(['x','y'], fluid.CPUPlace())
feeder = fluid.DataFeeder(['x','y'], fluid.CPUPlace())
place_num = 2
place_num = 2
...
@@ -338,11 +335,17 @@ class DataFeeder(object):
...
@@ -338,11 +335,17 @@ class DataFeeder(object):
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
exe.run(fluid.default_startup_program())
program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(places=places)
program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(places=places)
for item in reader():
data.append(item)
# print sample feed_parallel r resultt
if place_num == len(data):
# for item in list(feeder.feed_parallel([generate_reader(5, 0, 1), generate_reader(3, 10, 2)], 2)):
exe.run(program=program, feed=list(feeder.feed_parallel(data, place_num)), fetch_list=[])
# print(item['x'])
data = []
# print(item['y'])
reader_list = [generate_reader(5, 0, 1), generate_reader(3, 10, 2)]
res = exe.run(program=program, feed=list(feeder.feed_parallel(reader_list, 2)), fetch_list=[z])
print(res)
"""
"""
if
isinstance
(
self
.
place
,
core
.
CUDAPlace
):
if
isinstance
(
self
.
place
,
core
.
CUDAPlace
):
places
=
[
places
=
[
...
@@ -383,52 +386,64 @@ class DataFeeder(object):
...
@@ -383,52 +386,64 @@ class DataFeeder(object):
num_places
=
None
,
num_places
=
None
,
drop_last
=
True
):
drop_last
=
True
):
"""
"""
Converter the input data into a data that returned by reader into
Decorate the reader (generator) to fit multiple devices. The reader generate
multiple mini-batches. Each mini-batch will be feed on each device.
multiple mini-batches. Each mini-batch will be fed into a single device.
Args:
Parameters:
reader(function): the reader is the function which can generate data.
reader(generator): a user defined python generator used to get :code:`mini-batch` of data.
multi_devices(bool): whether to use multiple devices or not.
A :code:`mini-batch` can be regarded as a python generator that returns batchs of input
num_places(int): if multi_devices is True, you can specify the number
entities, just like the below :code:`_mini_batch` in the code example.
of GPU to use, if multi_devices is None, the function will use all the
multi_devices(bool): indicate whether to use multiple devices or not.
GPU of the current machine. Default None.
num_places(int, optional): if :code:`multi_devices` is True, you can specify the number
drop_last(bool): whether to drop the last batch if the
of devices(CPU|GPU) to use, if multi_devices is None, the function will use all the
size of the last batch is less than batch_size. Default True.
devices of the current machine. Default None.
drop_last(bool, optional): whether to drop the last round of data if it is not enough to
feed all devices. Default True.
Returns:
Returns:
dict: the result of conversion.
:code:`generator`: a new :code:`generator` which return converted dicts that can be fed into Executor
Raises:
Raises:
ValueError: If drop_last is False and the data batch cannot fit for devices
.
:code:`ValueError`: If drop_last is False and the data cannot fit devices perfectly
.
Example
s
:
Example:
.. code-block:: python
.. code-block:: python
import numpy
.random as random
import numpy
as np
import paddle
import paddle
import paddle.fluid as fluid
import paddle.fluid as fluid
import paddle.fluid.compiler as compiler
import paddle.fluid.compiler as compiler
def reader(limit=10):
def reader():
for i in range(limit):
def _mini_batch(batch_size):
yield (random.random([784]).astype('float32'), random.random([1]).astype('int64')),
for i in range(batch_size):
yield np.random.random([16]).astype('float32'), np.random.randint(10, size=[1])
for _ in range(10):
yield _mini_batch(np.random.randint(1, 10))
place=fluid.CUDAPlace(0)
place_num = 3
data = fluid.layers.data(name='data', shape=[1, 28, 28], dtype='float32')
places = [fluid.CPUPlace() for _ in range(place_num)]
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# a simple network sample
data = fluid.layers.data(name='data', shape=[-1, 4, 4], dtype='float32')
label = fluid.layers.data(name='label', shape=[-1, 1], dtype='int64')
hidden = fluid.layers.fc(input=data, size=10)
hidden = fluid.layers.fc(input=data, size=10)
feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
feeder = fluid.DataFeeder(place=place
s[0]
, feed_list=[data, label])
reader = feeder.decorate_reader(reader, multi_devices=True)
reader = feeder.decorate_reader(reader, multi_devices=True
, num_places=3, drop_last=True
)
exe = fluid.Executor(place)
exe = fluid.Executor(place
s[0]
)
exe.run(fluid.default_startup_program())
exe.run(fluid.default_startup_program())
compiled_prog = compiler.CompiledProgram(
compiled_prog = compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel()
fluid.default_main_program()).with_data_parallel(places=places)
for i,data in enumerate(reader()):
for i,data in enumerate(reader()):
print('iteration : ', i + 1)
# print data if you like
# print(i, data)
ret = exe.run(compiled_prog, feed=data, fetch_list=[hidden])
ret = exe.run(compiled_prog, feed=data, fetch_list=[hidden])
print(ret)
"""
"""
def
__reader_creator__
():
def
__reader_creator__
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录