diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc
index b7f666cb3663411a8956d2e6924effc8cec6738c..d590986841d817538e9d1f6e73dd8777172312a1 100644
--- a/paddle/fluid/framework/downpour_worker.cc
+++ b/paddle/fluid/framework/downpour_worker.cc
@@ -211,13 +211,16 @@ void DownpourWorker::TrainFilesWithProfiler() {
                                      &feature_values_[tid], table.fea_dim());
       timeline.Pause();
       pull_sparse_time += timeline.ElapsedSec();
+      total_time += timeline.ElapsedSec();
       CollectLabelInfo(i);
       timeline.Pause();
       collect_label_time += timeline.ElapsedSec();
+      total_time += timeline.ElapsedSec();
       timeline.Start();
       FillSparseValue(i);
       timeline.Pause();
       fill_sparse_time += timeline.ElapsedSec();
+      total_time += timeline.ElapsedSec();
     }
     VLOG(3) << "Fill sparse value for all sparse table done.";
 
@@ -257,6 +260,7 @@ void DownpourWorker::TrainFilesWithProfiler() {
           &feature_grads_[tid], &push_sparse_status_);
       timeline.Pause();
       push_sparse_time += timeline.ElapsedSec();
+      total_time += timeline.ElapsedSec();
     }
 
     timeline.Start();
@@ -269,7 +273,7 @@ void DownpourWorker::TrainFilesWithProfiler() {
     }
     timeline.Pause();
     push_dense_time += timeline.ElapsedSec();
-
+    total_time += timeline.ElapsedSec();
     VLOG(3) << "push sparse and dense gradient done.";
     int32_t tmp_push_dense_wait_times = -1;
     int32_t tmp_push_sparse_wait_times = -1;
@@ -324,6 +328,7 @@ void DownpourWorker::TrainFilesWithProfiler() {
         fprintf(stderr, "IO percent: %f\n", read_time / total_time * 100);
       }
     }
+    timeline.Start();
   }
 }
 
diff --git a/paddle/fluid/framework/hogwild_worker.cc b/paddle/fluid/framework/hogwild_worker.cc
index 148893fafc18953327c09511ac7d9fbab1265bd0..4c51067abfba6b077d6f3484648e21c58cd43b13 100644
--- a/paddle/fluid/framework/hogwild_worker.cc
+++ b/paddle/fluid/framework/hogwild_worker.cc
@@ -110,12 +110,7 @@ void HogwildWorker::TrainFilesWithProfiler() {
                   op_name[i].c_str(), op_total_time[i] / batch_cnt);
         }
         fprintf(stderr, "mean read time: %fs\n", read_time / batch_cnt);
-        /*
-        int fetch_var_num = fetch_var_names_.size();
-        for (int i = 0; i < fetch_var_num; ++i) {
-          print_fetch_var(thread_scope_, fetch_var_names_[i]);
-        }
-        */
+        fprintf(stderr, "IO percent: %f\n", read_time / total_time * 100);
       }
     }
     timeline.Start();
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index 1314a324063e8fcb93e3f24846b9bed86445a7a9..bf4edf5be20d8d0bc35ddeee17c46e1d6fd625b2 100644
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -636,21 +636,28 @@ class Executor(object):
         if not compiled:
             trainer = TrainerFactory().create_trainer(program._fleet_opt)
             trainer.set_program(program)
-            with open("fleet_desc.prototxt", "w") as fout:
-                fout.write(str(program._fleet_opt["fleet_desc"]))
         else:
             trainer = TrainerFactory().create_trainer(
                 program.program._fleet_opt)
             trainer.set_program(program.program)
         if thread <= 0:
-            trainer.set_thread(dataset.thread_num)
+            if dataset.thread_num <= 0:
+                raise RuntimeError(
+                    "You should set thread num first, either in Dataset or in Executor.train_from_dataset"
+                )
+            else:
+                trainer.set_thread(dataset.thread_num)
         else:
             trainer.set_thread(thread)
         trainer.set_debug(debug)
         trainer.gen_trainer_desc()
         dataset._prepare_to_run()
-        with open("trainer_desc.prototxt", "w") as fout:
-            fout.write(trainer._desc())
+        if debug:
+            with open("train_desc.prototxt", "w") as fout:
+                fout.write(trainer._desc())
+            if program._fleet_opt:
+                with open("fleet_desc.prototxt", "w") as fout:
+                    fout.write(str(program._fleet_opt["fleet_desc"]))
         self._default_executor.run_from_dataset(program.desc, scope,
                                                 dataset.dataset,
                                                 trainer._desc())