未验证 提交 e32f4c4f 编写于 作者: J jiaqi 提交者: GitHub

examples use code-block in dataset.py (#17451)

* examples use code-block in dataset.py
test=develop
test=document_preview

* add QueueDataset example
test=develop
test=document_preview
上级 5a6ab380
...@@ -25,7 +25,11 @@ class DatasetFactory(object): ...@@ -25,7 +25,11 @@ class DatasetFactory(object):
the default is "QueueDataset". the default is "QueueDataset".
Example: Example:
dataset = paddle.fluid.DatasetFactory.create_dataset("InMemoryDataset") .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
""" """
def __init__(self): def __init__(self):
...@@ -42,8 +46,11 @@ class DatasetFactory(object): ...@@ -42,8 +46,11 @@ class DatasetFactory(object):
Default is QueueDataset. Default is QueueDataset.
Examples: Examples:
.. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset() dataset = fluid.DatasetFactory().create_dataset()
""" """
try: try:
dataset = globals()[datafeed_class]() dataset = globals()[datafeed_class]()
...@@ -70,8 +77,12 @@ class DatasetBase(object): ...@@ -70,8 +77,12 @@ class DatasetBase(object):
Set pipe command of current dataset Set pipe command of current dataset
A pipe command is a UNIX pipeline command that can be used only A pipe command is a UNIX pipeline command that can be used only
Example: Examples:
>>> dataset.set_pipe_command("python my_script.py") .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_pipe_command("python my_script.py")
Args: Args:
pipe_command(str): pipe command pipe_command(str): pipe command
...@@ -83,8 +94,12 @@ class DatasetBase(object): ...@@ -83,8 +94,12 @@ class DatasetBase(object):
""" """
Set batch size. Will be effective during training Set batch size. Will be effective during training
Example: Examples:
>>> dataset.set_batch_size(128) .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_batch_size(128)
Args: Args:
batch_size(int): batch size batch_size(int): batch size
...@@ -96,8 +111,12 @@ class DatasetBase(object): ...@@ -96,8 +111,12 @@ class DatasetBase(object):
""" """
Set thread num, it is the num of readers. Set thread num, it is the num of readers.
Example: Examples:
>>> dataset.set_thread(12) .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_thread(12)
Args: Args:
thread_num(int): thread num thread_num(int): thread num
...@@ -109,8 +128,12 @@ class DatasetBase(object): ...@@ -109,8 +128,12 @@ class DatasetBase(object):
""" """
Set file list in current worker. Set file list in current worker.
Example: Examples:
>>> dataset.set_filelist(['a.txt', 'b.txt']) .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_filelist(['a.txt', 'b.txt'])
Args: Args:
filelist(list): file list filelist(list): file list
...@@ -121,8 +144,12 @@ class DatasetBase(object): ...@@ -121,8 +144,12 @@ class DatasetBase(object):
""" """
Set Variables which you will use. Set Variables which you will use.
Example: Examples:
>>> dataset.set_use_var([data, label]) .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_use_var([data, label])
Args: Args:
var_list(list): variable list var_list(list): variable list
...@@ -148,8 +175,12 @@ class DatasetBase(object): ...@@ -148,8 +175,12 @@ class DatasetBase(object):
""" """
Set hdfs config: fs name ad ugi Set hdfs config: fs name ad ugi
Example: Examples:
>>> dataset.set_hdfs_config("my_fs_name", "my_fs_ugi") .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_hdfs_config("my_fs_name", "my_fs_ugi")
Args: Args:
fs_name(str): fs name fs_name(str): fs name
...@@ -168,8 +199,12 @@ class DatasetBase(object): ...@@ -168,8 +199,12 @@ class DatasetBase(object):
""" """
Returns a protobuf message for this DataFeedDesc Returns a protobuf message for this DataFeedDesc
Example: Examples:
>>> print(dataset.desc()) .. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset()
print(dataset.desc())
Returns: Returns:
A string message A string message
...@@ -184,7 +219,7 @@ class InMemoryDataset(DatasetBase): ...@@ -184,7 +219,7 @@ class InMemoryDataset(DatasetBase):
This class should be created by DatasetFactory This class should be created by DatasetFactory
Example: Example:
dataset = paddle.fluid.DatasetFactory.create_dataset("InMemoryDataset") dataset = paddle.fluid.DatasetFactory().create_dataset("InMemoryDataset")
""" """
def __init__(self): def __init__(self):
...@@ -196,12 +231,14 @@ class InMemoryDataset(DatasetBase): ...@@ -196,12 +231,14 @@ class InMemoryDataset(DatasetBase):
""" """
Load data into memory Load data into memory
Example: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> dataset = fluid.DatasetFactory.create_dataset("InMemoryDataset")
>>> filelist = ["a.txt", "b.txt"] import paddle.fluid as fluid
>>> dataset.set_filelist(filelist) dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
>>> dataset.load_into_memory() filelist = ["a.txt", "b.txt"]
dataset.set_filelist(filelist)
dataset.load_into_memory()
""" """
self._prepare_to_run() self._prepare_to_run()
self.dataset.load_into_memory() self.dataset.load_into_memory()
...@@ -210,13 +247,15 @@ class InMemoryDataset(DatasetBase): ...@@ -210,13 +247,15 @@ class InMemoryDataset(DatasetBase):
""" """
Local shuffle Local shuffle
Example: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> dataset = fluid.DatasetFactory.create_dataset("InMemoryDataset")
>>> filelist = ["a.txt", "b.txt"] import paddle.fluid as fluid
>>> dataset.set_filelist(filelist) dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
>>> dataset.load_into_memory() filelist = ["a.txt", "b.txt"]
>>> dataset.local_shuffle() dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.local_shuffle()
""" """
self.dataset.local_shuffle() self.dataset.local_shuffle()
...@@ -228,13 +267,15 @@ class InMemoryDataset(DatasetBase): ...@@ -228,13 +267,15 @@ class InMemoryDataset(DatasetBase):
If you run in distributed mode, you should pass fleet instead of None. If you run in distributed mode, you should pass fleet instead of None.
Examples: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset = fluid.DatasetFactory.create_dataset("InMemoryDataset") import paddle.fluid as fluid
>>> filelist = ["a.txt", "b.txt"] from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset.set_filelist(filelist) dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
>>> dataset.load_into_memory() filelist = ["a.txt", "b.txt"]
>>> dataset.global_shuffle(fleet) dataset.set_filelist(filelist)
dataset.load_into_memory()
dataset.global_shuffle(fleet)
Args: Args:
fleet(Fleet): fleet singleton. Default None. fleet(Fleet): fleet singleton. Default None.
...@@ -258,18 +299,21 @@ class InMemoryDataset(DatasetBase): ...@@ -258,18 +299,21 @@ class InMemoryDataset(DatasetBase):
""" """
Release InMemoryDataset memory data, when data will not be used again. Release InMemoryDataset memory data, when data will not be used again.
Example: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset = fluid.DatasetFactory.create_dataset("InMemoryDataset") import paddle.fluid as fluid
>>> filelist = ["a.txt", "b.txt"] from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset.set_filelist(filelist) dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
>>> dataset.load_into_memory() filelist = ["a.txt", "b.txt"]
>>> dataset.global_shuffle(fleet) dataset.set_filelist(filelist)
>>> exe = fluid.Executor(fluid.CPUPlace()) dataset.load_into_memory()
>>> exe.run(fluid.default_startup_program()) dataset.global_shuffle(fleet)
>>> exe.train_from_dataset(fluid.default_main_program(), dataset) exe = fluid.Executor(fluid.CPUPlace())
>>> dataset.release_memory() exe.run(fluid.default_startup_program())
exe.train_from_dataset(fluid.default_main_program(), dataset)
dataset.release_memory()
""" """
self.dataset.release_memory() self.dataset.release_memory()
...@@ -287,14 +331,16 @@ class InMemoryDataset(DatasetBase): ...@@ -287,14 +331,16 @@ class InMemoryDataset(DatasetBase):
Returns: Returns:
The size of memory data. The size of memory data.
Example: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset = fluid.DatasetFactory.create_dataset("InMemoryDataset") import paddle.fluid as fluid
>>> filelist = ["a.txt", "b.txt"] from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset.set_filelist(filelist) dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
>>> dataset.load_into_memory() filelist = ["a.txt", "b.txt"]
>>> print dataset.get_memory_data_size(fleet) dataset.set_filelist(filelist)
dataset.load_into_memory()
print dataset.get_memory_data_size(fleet)
""" """
import numpy as np import numpy as np
...@@ -322,15 +368,17 @@ class InMemoryDataset(DatasetBase): ...@@ -322,15 +368,17 @@ class InMemoryDataset(DatasetBase):
Returns: Returns:
The size of shuffle data. The size of shuffle data.
Example: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset = fluid.DatasetFactory.create_dataset("InMemoryDataset") import paddle.fluid as fluid
>>> filelist = ["a.txt", "b.txt"] from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
>>> dataset.set_filelist(filelist) dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
>>> dataset.load_into_memory() filelist = ["a.txt", "b.txt"]
>>> dataset.global_shuffle(fleet) dataset.set_filelist(filelist)
>>> print dataset.get_shuffle_data_size(fleet) dataset.load_into_memory()
dataset.global_shuffle(fleet)
print dataset.get_shuffle_data_size(fleet)
""" """
import numpy as np import numpy as np
...@@ -348,9 +396,12 @@ class QueueDataset(DatasetBase): ...@@ -348,9 +396,12 @@ class QueueDataset(DatasetBase):
""" """
QueueDataset, it will process data streamly. QueueDataset, it will process data streamly.
Example: Examples:
.. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
dataset = fluid.DatasetFactory.create_dataset("QueueDataset") dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
""" """
def __init__(self): def __init__(self):
...@@ -363,10 +414,18 @@ class QueueDataset(DatasetBase): ...@@ -363,10 +414,18 @@ class QueueDataset(DatasetBase):
def local_shuffle(self): def local_shuffle(self):
""" """
Local shuffle Local shuffle data.
Local shuffle is not supported in QueueDataset Local shuffle is not supported in QueueDataset
NotImplementedError will be raised NotImplementedError will be raised
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
dataset.local_shuffle()
""" """
raise NotImplementedError( raise NotImplementedError(
"QueueDataset does not support local shuffle, " "QueueDataset does not support local shuffle, "
...@@ -374,8 +433,19 @@ class QueueDataset(DatasetBase): ...@@ -374,8 +433,19 @@ class QueueDataset(DatasetBase):
def global_shuffle(self, fleet=None): def global_shuffle(self, fleet=None):
""" """
Global shuffle data.
Global shuffle is not supported in QueueDataset Global shuffle is not supported in QueueDataset
NotImplementedError will be raised NotImplementedError will be raised
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
dataset.global_shuffle(fleet)
""" """
raise NotImplementedError( raise NotImplementedError(
"QueueDataset does not support global shuffle, " "QueueDataset does not support global shuffle, "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册