From 75cda4d9df83615a196866fa76cbe58a17d40990 Mon Sep 17 00:00:00 2001 From: jiaqi <173596896@qq.com> Date: Sat, 18 May 2019 16:55:29 +0800 Subject: [PATCH] fix data_feed_desc.py example run error (#17452) * fix data_feed_desc.py example run error test=develop test=test=document_preview * fix data_feed_desc.py example display error test=develop test=document_preview * update API.spec for DataFeedDesc test=develop test=document_preview --- paddle/fluid/API.spec | 8 +- python/paddle/fluid/data_feed_desc.py | 157 ++++++++++++++++++++------ 2 files changed, 128 insertions(+), 37 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f3cb3a1f6fd..eb39e021784 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -36,10 +36,10 @@ paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', ' paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff')) paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb')) paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4294493e31c4bc9fc4bd48753044235f')) -paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21')) -paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766')) -paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690')) +paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '75283b5f03ec7b6f74bfca9881a37428')) +paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '68df53d3ea0f24063bf7689e82c2b82e')) +paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'd5a78553cd94fe64148399797055d8ad')) +paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '88d229ea9f892ce8d2922cf028c8bb3a')) paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from', 'places'], varargs=None, keywords=None, defaults=(None, None, None, None, None)), ('document', '0e17773521634ef798fddd7d2ea3ef96')) paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8')) diff --git a/python/paddle/fluid/data_feed_desc.py b/python/paddle/fluid/data_feed_desc.py index 80745aac830..5ed38f9999f 100644 --- a/python/paddle/fluid/data_feed_desc.py +++ b/python/paddle/fluid/data_feed_desc.py @@ -24,28 +24,32 @@ class DataFeedDesc(object): currently only used for AsyncExecutor (See comments for class AsyncExecutor for a brief introduction) - DataFeedDesc shall be initialized from a valid protobuf message from disk: - >>> data_feed = fluid.DataFeedDesc('data.proto') + DataFeedDesc shall be initialized from a valid protobuf message from disk. See :code:`paddle/fluid/framework/data_feed.proto` for message definition. A typical message might look like: - >>> name: "MultiSlotDataFeed" - >>> batch_size: 2 - >>> multi_slot_desc { - >>> slots { - >>> name: "words" - >>> type: "uint64" - >>> is_dense: false - >>> is_used: true - >>> } - >>> slots { - >>> name: "label" - >>> type: "uint64" - >>> is_dense: false - >>> is_used: true - >>> } - >>> } + .. code-block:: python + + f = open("data.proto", "w") + print >> f, 'name: "MultiSlotDataFeed"' + print >> f, 'batch_size: 2' + print >> f, 'multi_slot_desc {' + print >> f, ' slots {' + print >> f, ' name: "words"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, ' slots {' + print >> f, ' name: "label"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, '}' + f.close() + data_feed = fluid.DataFeedDesc('data.proto') However, users usually shouldn't care about the message format; instead, they are encouragd to use :code:`Data Generator` as a tool to generate a @@ -54,16 +58,23 @@ class DataFeedDesc(object): DataFeedDesc can also be changed during runtime. Once you got familiar with what each field mean, you can modify it to better suit your need. E.g.: - >>> data_feed.set_batch_size(128) - >>> data_feed.set_dense_slots('wd') # The slot named 'wd' will be dense - >>> data_feed.set_use_slots('wd') # The slot named 'wd' will be used + + .. code-block:: python + + data_feed = fluid.DataFeedDesc('data.proto') + data_feed.set_batch_size(128) + data_feed.set_dense_slots('wd') # The slot named 'wd' will be dense + data_feed.set_use_slots('wd') # The slot named 'wd' will be used Finally, the content can be dumped out for debugging purpose: - >>> print(data_feed.desc()) + + .. code-block:: python + + print(data_feed.desc()) Args: proto_file(string): Disk file containing a data feed description. - + """ def __init__(self, proto_file): @@ -82,8 +93,28 @@ class DataFeedDesc(object): Set batch size. Will be effective during training Example: - >>> data_feed = fluid.DataFeedDesc('data.proto') - >>> data_feed.set_batch_size(128) + .. code-block:: python + + f = open("data.proto", "w") + print >> f, 'name: "MultiSlotDataFeed"' + print >> f, 'batch_size: 2' + print >> f, 'multi_slot_desc {' + print >> f, ' slots {' + print >> f, ' name: "words"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, ' slots {' + print >> f, ' name: "label"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, '}' + f.close() + data_feed = fluid.DataFeedDesc('data.proto') + data_feed.set_batch_size(128) Args: batch_size: batch size @@ -98,8 +129,28 @@ class DataFeedDesc(object): sparse slot will be fed into a LoDTensor Example: - >>> data_feed = fluid.DataFeedDesc('data.proto') - >>> data_feed.set_dense_slots(['words']) + .. code-block:: python + + f = open("data.proto", "w") + print >> f, 'name: "MultiSlotDataFeed"' + print >> f, 'batch_size: 2' + print >> f, 'multi_slot_desc {' + print >> f, ' slots {' + print >> f, ' name: "words"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, ' slots {' + print >> f, ' name: "label"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, '}' + f.close() + data_feed = fluid.DataFeedDesc('data.proto') + data_feed.set_dense_slots(['words']) Args: dense_slots_name: a list of slot names which will be set dense @@ -109,7 +160,7 @@ class DataFeedDesc(object): """ if self.proto_desc.name != "MultiSlotDataFeed": raise ValueError( - "Only MultiSlotDataFeed need set_dense_slots, pls check your datafeed.proto" + "Only MultiSlotDataFeed needs set_dense_slots, please check your datafeed.proto" ) for name in dense_slots_name: self.proto_desc.multi_slot_desc.slots[self.__name_to_index[ @@ -122,8 +173,28 @@ class DataFeedDesc(object): ones will be used for a specific model. Example: - >>> data_feed = fluid.DataFeedDesc('data.proto') - >>> data_feed.set_use_slots(['words']) + .. code-block:: python + + f = open("data.proto", "w") + print >> f, 'name: "MultiSlotDataFeed"' + print >> f, 'batch_size: 2' + print >> f, 'multi_slot_desc {' + print >> f, ' slots {' + print >> f, ' name: "words"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, ' slots {' + print >> f, ' name: "label"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, '}' + f.close() + data_feed = fluid.DataFeedDesc('data.proto') + data_feed.set_use_slots(['words']) Args: use_slots_name: a list of slot names which will be used in training @@ -133,7 +204,7 @@ class DataFeedDesc(object): """ if self.proto_desc.name != "MultiSlotDataFeed": raise ValueError( - "Only MultiSlotDataFeed need set_use_slots, pls check your datafeed.proto" + "Only MultiSlotDataFeed needs set_use_slots, please check your datafeed.proto" ) for name in use_slots_name: self.proto_desc.multi_slot_desc.slots[self.__name_to_index[ @@ -144,8 +215,28 @@ class DataFeedDesc(object): Returns a protobuf message for this DataFeedDesc Example: - >>> data_feed = fluid.DataFeedDesc('data.proto') - >>> print(data_feed.desc()) + .. code-block:: python + + f = open("data.proto", "w") + print >> f, 'name: "MultiSlotDataFeed"' + print >> f, 'batch_size: 2' + print >> f, 'multi_slot_desc {' + print >> f, ' slots {' + print >> f, ' name: "words"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, ' slots {' + print >> f, ' name: "label"' + print >> f, ' type: "uint64"' + print >> f, ' is_dense: false' + print >> f, ' is_used: true' + print >> f, ' }' + print >> f, '}' + f.close() + data_feed = fluid.DataFeedDesc('data.proto') + print(data_feed.desc()) Returns: A string message -- GitLab