diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc
index 3ac752388f9df87a6c80c144d8722714e8af85d6..bbc7f77a94cfb8a921bf2660cf637d810f10ab64 100644
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -56,13 +56,12 @@ Executor::~Executor() {
   }
 }
 
-void Executor::Run(const ProgramDesc& pdesc, Scope* scope) {
+void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
   // TODO(tonyyang-svail):
-  //    - only runs the first block (i.e. no RNN support)
   //    - only runs on the first device (i.e. no interdevice communication)
   //    - will change to use multiple blocks for RNN op and Cond Op
-  PADDLE_ENFORCE_GT(pdesc.blocks_size(), 0);
-  auto& block = pdesc.blocks(0);
+  PADDLE_ENFORCE_GT(pdesc.blocks_size(), block_id);
+  auto& block = pdesc.blocks(block_id);
   auto& device = device_contexts_[0];
 
   // Instantiate all the vars in the global scope
@@ -72,7 +71,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope) {
 
   Scope& local_scope = scope->NewScope();
 
-  std::vector<bool> should_run = Prune(pdesc);
+  std::vector<bool> should_run = Prune(pdesc, block_id);
   PADDLE_ENFORCE_EQ(should_run.size(), block.ops_size());
   for (size_t i = 0; i < should_run.size(); ++i) {
     if (should_run[i]) {
@@ -92,12 +91,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope) {
   //  - Destroy local_scope
 }
 
-std::vector<bool> Executor::Prune(const ProgramDesc& pdesc) {
+std::vector<bool> Executor::Prune(const ProgramDesc& pdesc, int block_id) {
   // TODO(tonyyang-svail):
-  //    - only runs the first block
   //    - will change to use multiple blocks for RNN op and Cond Op
 
-  auto& block = pdesc.blocks(0);
+  auto& block = pdesc.blocks(block_id);
   auto& ops = block.ops();
 
   bool expect_feed = true;
@@ -144,8 +142,10 @@ std::vector<bool> Executor::Prune(const ProgramDesc& pdesc) {
         }
       }
 
+      LOG(INFO) << "1 " << op_desc.type();
       should_run.push_back(true);
     } else {
+      LOG(INFO) << "0 " << op_desc.type();
       should_run.push_back(false);
     }
   }
diff --git a/paddle/framework/executor.h b/paddle/framework/executor.h
index f832b0d7d65426ec67f17d0c4aed48d840d3f98d..7fac4f4f46450e2cedac44d75dbd86a4cffeab08 100644
--- a/paddle/framework/executor.h
+++ b/paddle/framework/executor.h
@@ -34,7 +34,7 @@ class Executor {
    *  ProgramDesc
    *  Scope
    */
-  void Run(const ProgramDesc&, Scope*);
+  void Run(const ProgramDesc&, Scope*, int);
 
  protected:
   /* @Brief
@@ -46,7 +46,7 @@ class Executor {
    * @return
    *  vector<bool> Same size as ops. Indicates whether an op should be run.
    */
-  std::vector<bool> Prune(const ProgramDesc& pdesc);
+  std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id);
 
  private:
   std::vector<platform::DeviceContext*> device_contexts_;
diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc
index f28651e809cced71249c62c4047b8d431f2cbfc8..b64ba1c98f381e03c8ed96ec6ea13fc09e6090e7 100644
--- a/paddle/framework/executor_test.cc
+++ b/paddle/framework/executor_test.cc
@@ -104,50 +104,40 @@ class ExecutorTesterRandom : public ::testing::Test {
   virtual void SetUp() override {
     int input_dim = 5, batch_size = 2, embed_dim = 5;
 
-    // init pdesc
-    auto temp_init_root_block = init_pdesc_.add_blocks();
-    temp_init_root_block->set_idx(0);
-    temp_init_root_block->set_parent_idx(-1);
-
-    // wrap to BlockDescBind
-    paddle::framework::ProgramDescBind& init_program =
-        paddle::framework::ProgramDescBind::Instance(&init_pdesc_);
-    paddle::framework::BlockDescBind* init_root_block = init_program.Block(0);
+    auto temp_root_block = pdesc_.add_blocks();
+    temp_root_block->set_idx(0);
+    temp_root_block->set_parent_idx(-1);
+    paddle::framework::ProgramDescBind& program =
+        paddle::framework::ProgramDescBind::Instance(&pdesc_);
+    paddle::framework::BlockDescBind* root_block = program.Block(0);
 
+    // block[0]
     AddOp("gaussian_random", {}, {{"Out", {"w1"}}},
-          {{"dims", std::vector<int>{input_dim, embed_dim}}}, init_root_block);
+          {{"dims", std::vector<int>{input_dim, embed_dim}}}, root_block);
     AddOp("gaussian_random", {}, {{"Out", {"w2"}}},
-          {{"dims", std::vector<int>{embed_dim, input_dim}}}, init_root_block);
+          {{"dims", std::vector<int>{embed_dim, input_dim}}}, root_block);
     AddOp("fetch", {{"Input", {"w1"}}}, {},
           {{"dims", std::vector<int>{input_dim, embed_dim}}, {"col", 0}},
-          init_root_block);
+          root_block);
     AddOp("fetch", {{"Input", {"w2"}}}, {},
           {{"dims", std::vector<int>{embed_dim, input_dim}}, {"col", 1}},
-          init_root_block);
-    // flush
-    init_program.Proto();
-
-    // run pdesc
-    auto temp_root_block = pdesc_.add_blocks();
-    temp_root_block->set_idx(0);
-    temp_root_block->set_parent_idx(-1);
-
-    // wrap to BlockDescBind
-    paddle::framework::ProgramDescBind& program =
-        paddle::framework::ProgramDescBind::Instance(&pdesc_);
-    paddle::framework::BlockDescBind* root_block = program.Block(0);
+          root_block);
 
+    // block[1]
+    paddle::framework::BlockDescBind* run_block =
+        program.AppendBlock(*root_block);
     AddOp("gaussian_random", {}, {{"Out", {"a"}}},
-          {{"dims", std::vector<int>{batch_size, input_dim}}}, root_block);
+          {{"dims", std::vector<int>{batch_size, input_dim}}}, run_block);
     AddOp("mul", {{"X", {"a"}}, {"Y", {"w1"}}}, {{"Out", {"b"}}}, {},
-          root_block);
+          run_block);
     AddOp("mul", {{"X", {"b"}}, {"Y", {"w2"}}}, {{"Out", {"a_out"}}}, {},
-          root_block);
+          run_block);
     AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}},
           {{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {},
-          root_block);
+          run_block);
     AddOp("fetch", {{"Input", {"l2_distance"}}}, {},
-          {{"dims", std::vector<int>{batch_size}}, {"col", 1}}, root_block);
+          {{"dims", std::vector<int>{batch_size}}, {"col", 1}}, run_block);
+
     // flush
     program.Proto();
 
@@ -157,7 +147,6 @@ class ExecutorTesterRandom : public ::testing::Test {
 
  protected:
   ProgramDesc pdesc_;
-  ProgramDesc init_pdesc_;
 };
 
 class ExecutorTesterFeedAndFetch : public ::testing::Test {
@@ -211,8 +200,8 @@ TEST_F(ExecutorTesterRandom, CPU) {
 
   std::unique_ptr<Executor> executor(new Executor(places));
 
-  executor->Run(init_pdesc_, GetGlobalScope());
-  executor->Run(pdesc_, GetGlobalScope());
+  executor->Run(pdesc_, GetGlobalScope(), 0);
+  executor->Run(pdesc_, GetGlobalScope(), 1);
   std::vector<std::vector<float>> result = GetFetchVariable<float>();
 }
 
@@ -231,7 +220,7 @@ TEST_F(ExecutorTesterFeedAndFetch, CPU) {
 
   for (int batch_id = 0; batch_id < 3; batch_id++) {
     SetFeedVariable<float>(inputs_);
-    executor->Run(pdesc_, GetGlobalScope());
+    executor->Run(pdesc_, GetGlobalScope(), 0);
     std::vector<std::vector<float>> result = GetFetchVariable<float>();
     PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
     for (size_t i = 0; i < result.size(); ++i) {
@@ -259,8 +248,8 @@ TEST_F(ExecutorTesterRandom, GPU) {
 
   std::unique_ptr<Executor> executor(new Executor(places));
 
-  executor->Run(init_pdesc_, GetGlobalScope());
-  executor->Run(pdesc_, GetGlobalScope());
+  executor->Run(pdesc_, GetGlobalScope(), 0);
+  executor->Run(pdesc_, GetGlobalScope(), 1);
   std::vector<std::vector<float>> result = GetFetchVariable<float>();
 }
 
@@ -281,7 +270,7 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) {
 
   for (int batch_id = 0; batch_id < 3; batch_id++) {
     SetFeedVariable<float>(inputs_);
-    executor->Run(pdesc_, GetGlobalScope());
+    executor->Run(pdesc_, GetGlobalScope(), 0);
     std::vector<std::vector<float>> result = GetFetchVariable<float>();
     PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
     for (size_t i = 0; i < result.size(); ++i) {