Merge pull request #11580 from chengduoZH/fix_doc_data_reader

Refine doc of data reader

Merge pull request #11580 from chengduoZH/fix_doc_data_reader
Refine doc of data reader
3f8d9b0a · chengduo · GitHub · 4b7ae145 · 10cee7ed · 3f8d9b0a
隐藏空白更改
内联并排

Showing with 104 addition and 3 deletion

python/paddle/fluid/data_feeder.py python/paddle/fluid/data_feeder.py +97 -0

python/paddle/fluid/executor.py python/paddle/fluid/executor.py +7 -3

未找到文件。
--- a/python/paddle/fluid/data_feeder.py
+++ b/python/paddle/fluid/data_feeder.py
@@ -79,6 +79,61 @@ class DataToLoDTensorConverter(object):


 class DataFeeder(object):
+    """
+    DataFeeder converts the data that returned by a reader into a data
+    structure that can feed into Executor and ParallelExecutor. The reader
+    usually returns a list of mini-batch data entries. Each data entry in
+    the list is one sample. Each sample is a list or a tuple with one
+    feature or multiple features.
+
+    The simple usage shows below:
+
+    ..  code-block:: python
+
+        place = fluid.CPUPlace()
+        img = fluid.layers.data(name='image', shape=[1, 28, 28])
+        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
+        result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
+
+
+    If you want to feed data into GPU side separately in advance when you
+    use multi-GPU to train a model, you can use `decorate_reader` function.
+
+    ..  code-block:: python
+
+        place=fluid.CUDAPlace(0)
+        feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
+        reader = feeder.decorate_reader(
+            paddle.batch(flowers.train(), batch_size=16))
+
+    Args:
+        feed_list(list): The Variables or Variables'name that will
+            feed into model.
+        place(Place): place indicates feed data into CPU or GPU, if you want to
+            feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents
+            the GPU id), or if you want to feed data into CPU, please using
+            `fluid.CPUPlace()`.
+        program(Program): The Program that will feed data into, if program
+            is None, it will use default_main_program(). Default None.
+
+    Raises:
+        ValueError: If some Variable is not in this Program.
+
+    Examples:
+        .. code-block:: python
+
+            # ...
+            place = fluid.CPUPlace()
+            feed_list = [
+                main_program.global_block().var(var_name) for var_name in feed_vars_name
+            ] # feed_vars_name is a list of variables' name.
+            feeder = fluid.DataFeeder(feed_list, place)
+            for data in reader():
+                outs = exe.run(program=main_program,
+                               feed=feeder.feed(data))
+    """
+
    def __init__(self, feed_list, place, program=None):
        self.feed_dtypes = []
        self.feed_names = []
@@ -108,6 +163,16 @@ class DataFeeder(object):
        self.place = place

    def feed(self, iterable):
+        """
+        According to feed_list and iterable, converters the input into
+        a data structure that can feed into Executor and ParallelExecutor.
+
+        Args:
+            iterable(list|tuple): the input data.
+
+        Returns:
+            dict: the result of conversion.
+        """
        converter = []
        for lod_level, shape, dtype in six.zip(
                self.feed_lod_level, self.feed_shapes, self.feed_dtypes):
@@ -130,6 +195,20 @@ class DataFeeder(object):
        return ret_dict

    def feed_parallel(self, iterable, num_places=None):
+        """
+        Takes multiple mini-batches. Each mini-batch will be feed on each
+        device in advance.
+
+        Args:
+            iterable(list|tuple): the input data.
+            num_places(int): the number of devices. Default None.
+
+        Returns:
+            dict: the result of conversion.
+
+        Notes:
+            The number of devices and number of mini-batches must be same.
+        """
        if isinstance(self.place, core.CUDAPlace):
            places = [
                core.CUDAPlace(i)
@@ -168,6 +247,24 @@ class DataFeeder(object):
                        multi_devices,
                        num_places=None,
                        drop_last=True):
+        """
+        Converter the input data into a data that returned by reader into
+        multiple mini-batches. Each mini-batch will be feed on each device.
+
+        Args:
+            reader(fun): the input data.
+            multi_devices(bool): the number of places. Default None.
+            num_places(int): the number of places. Default None.
+            drop_last(bool): the number of places. Default None.
+
+        Returns:
+            dict: the result of conversion.
+
+        Raises:
+            ValueError: If drop_last is False and the data batch which cannot
+            fit for devices.
+        """
+
        def __reader_creator__():
            if not multi_devices:
                for item in reader():

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -135,14 +135,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name):

 def fetch_var(name, scope=None, return_numpy=True):
    """
-    Fetch the value of the variable with the given name from the given scope
+    Fetch the value of the variable with the given name from the
+    given scope.
+
    Args:
        name(str): name of the variable. Typically, only persistable variables
            can be found in the scope used for running the program.
        scope(core.Scope|None): scope object. It should be the scope where
            you pass to Executor.run() when running your program.
-            If None, global_scope() will be used.
-        return_numpy(bool): whether convert the tensor to numpy.ndarray
+            If None, global_scope() will be used. Default None.
+        return_numpy(bool): whether convert the tensor to numpy.ndarray.
+            Default True.
+
    Returns:
       LodTensor|numpy.ndarray
    """