fix infer_from_dataset and train_from_dataset (#17243)

* fix train_from_dataset and infer_from_dataset example * add inductive dim for data_reader, example: shape=[-1, 1], then -1 will be inducted through run-time reading of number of elements

fix infer_from_dataset and train_from_dataset (#17243)
* fix train_from_dataset and infer_from_dataset example * add inductive dim for data_reader, example: shape=[-1, 1], then -1 will be inducted through run-time reading of number of elements
5d6a1fcf · guru4elephant · GitHub · 516317cf · 5d6a1fcf · 5d6a1fcf
5 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -16,9 +16,9 @@ paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, ke
 paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'eddb7a1f0083dcc70e9f6c71ee003cb9'))
 paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a584496aa1343f36eebf3c46b323a74'))
-paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
+paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', 'bedc29ad01c1b911e99032ee1e19ac59'))
 paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', '4cfcd9c15b766a51b584cc46d38f1ad8'))
-paddle.fluid.Executor.train_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', 'd521011d79e71080fe9b5bb179b43518'))
+paddle.fluid.Executor.train_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '28f50904a0213f110947a30e0438529c'))
 paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f65788d9ead293ada47551339df12203'))
 paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', '6e19f92e2f185320a3a86b77e85eb3b3'))
 paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))

--- a/paddle/fluid/framework/data_feed.cc
+++ b/paddle/fluid/framework/data_feed.cc
@@ -455,6 +455,8 @@ void MultiSlotDataFeed::Init(
  all_slots_.resize(all_slot_num);
  all_slots_type_.resize(all_slot_num);
  use_slots_index_.resize(all_slot_num);
+  total_dims_without_inductive_.resize(all_slot_num);
+  inductive_shape_index_.resize(all_slot_num);
  use_slots_.clear();
  use_slots_is_dense_.clear();
  for (size_t i = 0; i < all_slot_num; ++i) {
@@ -462,14 +464,20 @@ void MultiSlotDataFeed::Init(
    all_slots_[i] = slot.name();
    all_slots_type_[i] = slot.type();
    use_slots_index_[i] = slot.is_used() ? use_slots_.size() : -1;
+    total_dims_without_inductive_[i] = 1;
+    inductive_shape_index_[i] = -1;
    if (slot.is_used()) {
      use_slots_.push_back(all_slots_[i]);
      use_slots_is_dense_.push_back(slot.is_dense());
      std::vector<int> local_shape;
      if (slot.is_dense()) {
-        // for batch size holder if is_dense
-        if (slot.shape(0) > 0) {
-          local_shape.push_back(0);
+        for (size_t i = 0; i < slot.shape_size(); ++i) {
+          if (slot.shape(i) > 0) {
+            total_dims_without_inductive_[i] *= slot.shape(i);
+          }
+          if (slot.shape(i) == -1) {
+            inductive_shape_index_[i] = i;
+          }
        }
      }
      for (size_t i = 0; i < slot.shape_size(); ++i) {
@@ -762,7 +770,10 @@ void MultiSlotDataFeed::PutToFeedVec(
    LoD data_lod{offset};
    feed_vec_[i]->set_lod(data_lod);
    if (use_slots_is_dense_[i]) {
-      use_slots_shape_[i][0] = batch_size_;
+      if (inductive_shape_index_[i] != -1) {
+        use_slots_shape_[i][inductive_shape_index_[i]] =
+            total_instance / total_dims_without_inductive_[i];
+      }
      feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
    }
  }
@@ -785,6 +796,8 @@ void MultiSlotInMemoryDataFeed::Init(
  all_slots_.resize(all_slot_num);
  all_slots_type_.resize(all_slot_num);
  use_slots_index_.resize(all_slot_num);
+  total_dims_without_inductive_.resize(all_slot_num);
+  inductive_shape_index_.resize(all_slot_num);
  use_slots_.clear();
  use_slots_is_dense_.clear();
  for (size_t i = 0; i < all_slot_num; ++i) {
@@ -797,8 +810,13 @@ void MultiSlotInMemoryDataFeed::Init(
      use_slots_is_dense_.push_back(slot.is_dense());
      std::vector<int> local_shape;
      if (slot.is_dense()) {
-        if (slot.shape(0) > 0) {
-          local_shape.push_back(0);
+        for (size_t i = 0; i < slot.shape_size(); ++i) {
+          if (slot.shape(i) > 0) {
+            total_dims_without_inductive_[i] *= slot.shape(i);
+          }
+          if (slot.shape(i) == -1) {
+            inductive_shape_index_[i] = i;
+          }
        }
      }
      for (size_t i = 0; i < slot.shape_size(); ++i) {
@@ -960,7 +978,10 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
    LoD data_lod{offset};
    feed_vec_[i]->set_lod(data_lod);
    if (use_slots_is_dense_[i]) {
-      use_slots_shape_[i][0] = batch_size_;
+      if (inductive_shape_index_[i] != -1) {
+        use_slots_shape_[i][inductive_shape_index_[i]] =
+            total_instance / total_dims_without_inductive_[i];
+      }
      feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
    }
  }

--- a/paddle/fluid/framework/data_feed.h
+++ b/paddle/fluid/framework/data_feed.h
@@ -143,6 +143,8 @@ class DataFeed {
  std::vector<std::string> all_slots_;
  std::vector<std::string> all_slots_type_;
  std::vector<std::vector<int>> use_slots_shape_;
+  std::vector<int> inductive_shape_index_;
+  std::vector<int> total_dims_without_inductive_;
  std::vector<int>
      use_slots_index_;  // -1: not used; >=0: the index of use_slots_


--- a/paddle/fluid/framework/downpour_worker.cc
+++ b/paddle/fluid/framework/downpour_worker.cc
@@ -425,6 +425,7 @@ void DownpourWorker::TrainFiles() {
      }

      VLOG(3) << "push dense gradient done.";
+
      // the following code should be more precise and clean
      // TODO(guru4elephant)
      int32_t tmp_push_dense_wait_times = -1;

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -789,13 +789,15 @@ class Executor(object):
            .. code-block:: python

                import paddle.fluid as fluid
-                place = fluid.CPUPlace()
+
+                place = fluid.CPUPlace() # you can set place = fluid.CUDAPlace(0) to use gpu
                exe = fluid.Executor(place)
-                x = fluid.layers.data(name="x", type="int64")
-                y = fluid.layers.data(name="y", type="int64")
+                x = fluid.layers.data(name="x", shape=[10, 10], dtype="int64")
+                y = fluid.layers.data(name="y", shape=[1], dtype="int64", lod_level=1)
                dataset = fluid.DatasetFactory().create_dataset()
                dataset.set_use_var([x, y])
-                filelist = ["dataA.txt", "dataB.txt"]
+                dataset.set_thread(1)
+                filelist = [] # you should set your own filelist, e.g. filelist = ["dataA.txt"]
                dataset.set_filelist(filelist)
                exe.run(fluid.default_startup_program())
                exe.infer_from_dataset(program=fluid.default_main_program(),
@@ -868,14 +870,15 @@ class Executor(object):
            .. code-block:: python

              import paddle.fluid as fluid
-              place = fluid.CPUPlace()
+
+              place = fluid.CPUPlace() # you can set place = fluid.CUDAPlace(0) to use gpu
              exe = fluid.Executor(place)
-              x = fluid.layers.data(name="x", type="int64")
-              y = fluid.layers.data(name="y", type="int64")
+              x = fluid.layers.data(name="x", shape=[10, 10], dtype="int64")
+              y = fluid.layers.data(name="y", shape=[1], dtype="int64", lod_level=1)
              dataset = fluid.DatasetFactory().create_dataset()
              dataset.set_use_var([x, y])
-              dataset.set_thread(2)
-              filelist = ["dataA.txt", "dataB.txt"]
+              dataset.set_thread(1)
+              filelist = [] # you should set your own filelist, e.g. filelist = ["dataA.txt"]
              dataset.set_filelist(filelist)
              exe.run(fluid.default_startup_program())
              exe.train_from_dataset(program=fluid.default_main_program(),