diff --git a/paddle/contrib/float16/float16_transpiler.py b/paddle/contrib/float16/float16_transpiler.py
index 8d95dc0591e1d6bd815cc697528191c2ee8c1cfe..500f64bed9898fa874cbad2ea69aae05df58023e 100644
--- a/paddle/contrib/float16/float16_transpiler.py
+++ b/paddle/contrib/float16/float16_transpiler.py
@@ -60,7 +60,7 @@ class Float16Transpiler:
             raise TypeError("place should be as CPUPlace/CUDAPlace type")
         if scope is None:
             scope = global_scope()
-        if not isinstance(scope, core.Scope):
+        if not isinstance(scope, core._Scope):
             raise TypeError("scope should be as Scope type or None")
 
         self.scope = scope
diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index b6974c6af290438f827c16bb478eb43e3cf42247..9872631553056fbabd8be8162ecee16250b33862 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -464,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
 paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
 paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
 paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
-paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
-paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
-paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
-paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
-paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
+paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
 paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
 paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
 paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 2d66dcbf5c01166b68f1c34e7492e9f64ad50a34..45faae977097c7522cb37575e674e8ce0dd7259a 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -88,6 +88,7 @@ endif()
 cc_test(var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits)
 
 cc_library(scope SRCS scope.cc DEPS glog threadpool var_type_traits)
+cc_library(scope_pool SRCS scope_pool.cc DEPS scope)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)
 cc_test(variable_test SRCS variable_test.cc DEPS tensor var_type_traits)
 
diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h
index d9c76881b7e98d0b7cd29024b98c8f7720398c66..67054eccb3397ea40f0fb3e2ff2530ee1ea64736 100644
--- a/paddle/fluid/framework/attribute.h
+++ b/paddle/fluid/framework/attribute.h
@@ -165,7 +165,7 @@ template <typename T>
 class GreaterThanChecker {
  public:
   explicit GreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
-  void operator()(T& value) const {
+  void operator()(const T& value) const {
     PADDLE_ENFORCE(value > lower_bound_, "larger_than check fails.");
   }
 
@@ -177,7 +177,7 @@ template <typename T>
 class EqualGreaterThanChecker {
  public:
   explicit EqualGreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
-  void operator()(T& value) const {
+  void operator()(const T& value) const {
     PADDLE_ENFORCE_GE(value, lower_bound_, "equal_larger_than check fails.");
   }
 
@@ -193,7 +193,7 @@ class DefaultValueSetter {
  public:
   explicit DefaultValueSetter(T default_value)
       : default_value_(default_value) {}
-  void operator()(T& value) const { value = default_value_; }  // NOLINT
+  void operator()(T* value) const { *value = default_value_; }
 
  private:
   T default_value_;
@@ -203,7 +203,7 @@ template <typename T>
 class EnumInContainer {
  public:
   explicit EnumInContainer(const std::unordered_set<T>& c) : container_(c) {}
-  void operator()(T& val) const {
+  void operator()(const T& val) const {
     PADDLE_ENFORCE(container_.find(val) != container_.end(),
                    "Value %s is not in enum container %s", val,
                    ContainerDebugString());
@@ -232,7 +232,8 @@ class EnumInContainer {
 // an attribute can have more than one limits
 template <typename T>
 class TypedAttrChecker {
-  typedef std::function<void(T&)> ValueChecker;
+  typedef std::function<void(T*)> DefaultValueChecker;
+  typedef std::function<void(const T&)> ValueChecker;
 
  public:
   explicit TypedAttrChecker(const std::string& attr_name)
@@ -268,17 +269,17 @@ class TypedAttrChecker {
     return *this;
   }
 
-  void operator()(AttributeMap& attr_map) const {  // NOLINT
-    if (!attr_map.count(attr_name_)) {
+  void operator()(AttributeMap* attr_map) const {
+    if (!attr_map->count(attr_name_)) {
       // user do not set this attr
       PADDLE_ENFORCE(!default_value_setter_.empty(),
                      "Attribute '%s' is required!", attr_name_);
       // default_value_setter_ has no more than one element
       T val;
-      (default_value_setter_[0])(val);
-      attr_map[attr_name_] = val;
+      (default_value_setter_[0])(&val);
+      (*attr_map)[attr_name_] = val;
     }
-    Attribute& attr = attr_map.at(attr_name_);
+    Attribute& attr = attr_map->at(attr_name_);
     ExtractAttribute<T> extract_attr(attr_name_);
     T* attr_value = extract_attr(attr);
     for (const auto& checker : value_checkers_) {
@@ -289,12 +290,12 @@ class TypedAttrChecker {
  private:
   std::string attr_name_;
   std::vector<ValueChecker> value_checkers_;
-  std::vector<ValueChecker> default_value_setter_;
+  std::vector<DefaultValueChecker> default_value_setter_;
 };
 
 // check whether op's all attributes fit their own limits
 class OpAttrChecker {
-  typedef std::function<void(AttributeMap&)> AttrChecker;
+  typedef std::function<void(AttributeMap*)> AttrChecker;
 
  public:
   template <typename T>
@@ -304,7 +305,7 @@ class OpAttrChecker {
     return *(checker.target<TypedAttrChecker<T>>());
   }
 
-  void Check(AttributeMap& attr_map) const {  // NOLINT
+  void Check(AttributeMap* attr_map) const {
     for (const auto& checker : attr_checkers_) {
       checker(attr_map);
     }
diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
index 2fe1c94ec02e8ff0a4acb81868ba2124ea89e506..0e7b0cbeb98f3b6bbf0b37f507fc6022be692bb1 100644
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -643,7 +643,7 @@ void OpDesc::CheckAttrs() {
     // not by users.
     return;
   }
-  checker->Check(attrs_);
+  checker->Check(&attrs_);
 }
 
 void OpDesc::InferShape(const BlockDesc &block) const {
diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc
index ca31303f77c4a30eb64c43404e214779ea78aeaf..2311614c335a56501ac777d787f6653659294765 100644
--- a/paddle/fluid/framework/op_proto_maker.cc
+++ b/paddle/fluid/framework/op_proto_maker.cc
@@ -82,6 +82,10 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
   AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.")
       .SetDefault("");
 
+  AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
+                                    "Callstack for Op Creatation.")
+      .SetDefault({});
+
   Validate();
 }
 
diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h
index 4c59c73d8779eceb267ad532aabccabbd54b0df2..0a0f8f4655bc34cdb25205ff6eaec9f96c801ebd 100644
--- a/paddle/fluid/framework/op_proto_maker.h
+++ b/paddle/fluid/framework/op_proto_maker.h
@@ -47,6 +47,7 @@ class OpProtoAndCheckerMaker {
   static const char *OpRoleAttrName() { return "op_role"; }
   static const char *OpRoleVarAttrName() { return "op_role_var"; }
   static const char *OpNamescopeAttrName() { return "op_namescope"; }
+  static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
 
   void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
 
diff --git a/paddle/fluid/framework/op_registry.cc b/paddle/fluid/framework/op_registry.cc
index bfc411ca2c4a483e344b368da089392d8e4a87c1..346d14d408ea1ed2cfbdbed5f48e56902e6e95b2 100644
--- a/paddle/fluid/framework/op_registry.cc
+++ b/paddle/fluid/framework/op_registry.cc
@@ -24,7 +24,7 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
     const VariableNameMap& outputs, AttributeMap attrs) {
   auto& info = OpInfoMap::Instance().Get(type);
   if (info.Checker() != nullptr) {
-    info.Checker()->Check(attrs);
+    info.Checker()->Check(&attrs);
   }
   auto op = info.Creator()(type, inputs, outputs, attrs);
   return std::unique_ptr<OperatorBase>(op);
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index d5992354da646e1e1015ad83a116348dedaf7656..f06478982c1fe86a5d3b1c20beab4b3222cc855a 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -16,10 +16,15 @@ limitations under the License. */
 #include <glog/logging.h>
 
 #include <algorithm>
-
+#include <sstream>
+#include <string>
+#include <vector>
+#include "gflags/gflags.h"
+#include "glog/logging.h"
 #include "paddle/fluid/framework/data_transform.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_proto_maker.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/shape_inference.h"
 #include "paddle/fluid/framework/transfer_scope_cache.h"
@@ -157,27 +162,59 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
 }
 
 void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
-  VLOG(4) << place << " " << DebugStringEx(&scope);
-  if (platform::is_gpu_place(place)) {
+  try {
+    if (VLOG_IS_ON(4)) {
+      VLOG(4) << place << " " << DebugStringEx(&scope);
+    }
+    if (platform::is_gpu_place(place)) {
 #ifndef PADDLE_WITH_CUDA
-    PADDLE_THROW("Cannot run operator on place %s", place);
+      PADDLE_THROW("Cannot run operator on place %s", place);
 #else
-    auto dev_id = boost::get<platform::CUDAPlace>(place).device;
-    platform::SetDeviceId(dev_id);
+      auto dev_id = boost::get<platform::CUDAPlace>(place).device;
+      platform::SetDeviceId(dev_id);
 #endif
-  }
+    }
 
-  // The profile has a process-wide mutex, results in serious performance issue
-  // in concurrency scenerio. Here use an `if` to fix this issue.
-  // Please not remove the `if`, ask @Superjomn if there are any concern.
-  if (platform::IsProfileEnabled()) {
-    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-    platform::RecordEvent record_event(Type(), pool.Get(place));
-    RunImpl(scope, place);
-  } else {
-    RunImpl(scope, place);
+    // The profile has a process-wide mutex, results in serious performance
+    // issue
+    // in concurrency scenerio. Here use an `if` to fix this issue.
+    // Please not remove the `if`, ask @Superjomn if there are any concern.
+    if (platform::IsProfileEnabled()) {
+      platform::DeviceContextPool& pool =
+          platform::DeviceContextPool::Instance();
+      platform::RecordEvent record_event(Type(), pool.Get(place));
+      RunImpl(scope, place);
+    } else {
+      RunImpl(scope, place);
+    }
+
+    if (VLOG_IS_ON(3)) {
+      VLOG(3) << place << " " << DebugStringEx(&scope);
+    }
+  } catch (platform::EnforceNotMet exception) {
+    if (Attrs().count("sub_block") != 0) {
+      throw exception;
+    }
+
+    auto& callstack = Attr<std::vector<std::string>>(
+        OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
+
+    if (callstack.empty()) {
+      throw exception;
+    }
+    std::ostringstream sout;
+    sout << "Invoke operator " << Type() << " error.\n";
+    sout << "Python Callstacks: \n";
+    for (auto& line : callstack) {
+      sout << line;
+    }
+    sout << "C++ Callstacks: \n";
+    sout << exception.err_str_;
+    exception.err_str_ = sout.str();
+    throw exception;
+  } catch (...) {
+    std::rethrow_exception(std::current_exception());
   }
-  VLOG(3) << place << " " << DebugStringEx(&scope);
 }
 
 bool OperatorBase::HasInputs(const std::string& name) const {
@@ -1061,8 +1098,8 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
           t = &(var->Get<SelectedRows>().value());
         }
         if (t != nullptr) {
-          PADDLE_ENFORCE(t->IsInitialized(), "Input %s is not initialized: %s",
-                         ipt_name, DebugString());
+          PADDLE_ENFORCE(t->IsInitialized(), "Input %s is not initialized",
+                         ipt_name);
           int tmp = static_cast<int>(t->type());
           PADDLE_ENFORCE(
               tmp == data_type || data_type == -1,
diff --git a/paddle/fluid/framework/scope_pool.cc b/paddle/fluid/framework/scope_pool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5cb241a7a341d793d8450f0c9cde3929acef8965
--- /dev/null
+++ b/paddle/fluid/framework/scope_pool.cc
@@ -0,0 +1,54 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/scope_pool.h"
+#include "paddle/fluid/framework/threadpool.h"
+
+namespace paddle {
+namespace framework {
+
+ScopePool &ScopePool::Instance() {  // NOLINT
+  static ScopePool pool;
+  return pool;
+}
+
+void ScopePool::DeleteScope(Scope *scope) { delete scope; }
+
+void ScopePool::Insert(std::unique_ptr<Scope> &&s) {
+  std::lock_guard<std::mutex> guard(mtx_);
+  scopes_.insert(s.release());
+}
+
+void ScopePool::Remove(Scope *s) {
+  size_t has_scope;
+  {
+    std::lock_guard<std::mutex> guard(mtx_);
+    has_scope = scopes_.erase(s);
+  }
+  PADDLE_ENFORCE(has_scope > 0, "Delete non-existing global scope");
+  DeleteScope(s);
+}
+
+ScopePool::~ScopePool() { Clear(); }
+
+void ScopePool::Clear() {
+  std::lock_guard<std::mutex> guard(mtx_);
+  for (auto *s : scopes_) {
+    DeleteScope(s);
+  }
+  scopes_.clear();
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/scope_pool.h b/paddle/fluid/framework/scope_pool.h
new file mode 100644
index 0000000000000000000000000000000000000000..a8b468699abe148d44a395cf888158aefab4380b
--- /dev/null
+++ b/paddle/fluid/framework/scope_pool.h
@@ -0,0 +1,46 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <mutex>  // NOLINT
+#include <unordered_set>
+#include "paddle/fluid/framework/scope.h"
+
+namespace paddle {
+namespace framework {
+
+class ScopePool {
+ public:
+  static ScopePool &Instance();  // NOLINT
+
+  void Insert(std::unique_ptr<Scope> &&s);
+
+  void Remove(Scope *s);
+
+  void Clear();
+
+  ~ScopePool();
+
+ private:
+  ScopePool() = default;
+
+  static void DeleteScope(Scope *scope);
+
+  std::unordered_set<Scope *> scopes_;
+  std::mutex mtx_;
+};
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
index 95bbc74a5961eb28a0d8fbd7c680c0740fc68d8a..9aa9db031cd46c9d537bd686f0b23f4c9ae71de6 100644
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -108,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
 inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
   "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz")
 
+# seq_pool1
+inference_analysis_api_test_with_fake_data(test_analyzer_seq_pool1
+"${INFERENCE_DEMO_INSTALL_DIR}/seq_pool1" analyzer_seq_pool1_tester.cc "seq_pool1.tar.gz")
+
 # mobilenet with depthwise_conv op
 inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv
   "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz")
diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2ae840fd11f627d845a20a59ab14118516311d22
--- /dev/null
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
@@ -0,0 +1,117 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <fstream>
+#include <iostream>
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+void SetConfig(AnalysisConfig *cfg) {
+  cfg->param_file = FLAGS_infer_model + "/params";
+  cfg->prog_file = FLAGS_infer_model + "/model";
+  cfg->use_gpu = false;
+  cfg->device = 0;
+  cfg->enable_ir_optim = true;
+  cfg->specify_input_name = true;
+  cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
+}
+
+void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
+  std::vector<std::string> feed_names = {
+      "slot10000_embed", "slot10001_embed", "slot10004_embed",
+      "slot10005_embed", "slot10008_embed", "slot10009_embed",
+      "slot10012_embed", "slot10013_embed", "slot10108_embed",
+      "slot13324_embed", "slot13325_embed", "slot13326_embed",
+      "slot13327_embed", "slot13328_embed", "slot13329_embed",
+      "slot13330_embed", "slot13331_embed", "slot15501_embed",
+      "slot15502_embed", "slot15503_embed", "slot15504_embed",
+      "slot15505_embed", "slot15506_embed", "slot15507_embed",
+      "slot15508_embed", "slot15516_embed", "slot15519_embed",
+      "slot15523_embed", "slot15531_embed", "slot15533_embed",
+      "slot15548_embed", "slot15564_embed", "slot15565_embed",
+      "slot15566_embed", "slot15570_embed", "slot15571_embed",
+      "slot15572_embed", "slot15573_embed", "slot15574_embed",
+      "slot15575_embed", "slot15576_embed", "slot15577_embed",
+      "slot15579_embed", "slot15581_embed", "slot15582_embed",
+      "slot15583_embed", "slot15584_embed", "slot5016_embed",
+      "slot5021_embed",  "slot6002_embed",  "slot6003_embed",
+      "slot6004_embed",  "slot6005_embed",  "slot6006_embed",
+      "slot6007_embed",  "slot6008_embed",  "slot6009_embed",
+      "slot6011_embed",  "slot6014_embed",  "slot6015_embed",
+      "slot6023_embed",  "slot6024_embed",  "slot6025_embed",
+      "slot6027_embed",  "slot6029_embed",  "slot6031_embed",
+      "slot6034_embed",  "slot6035_embed",  "slot6036_embed",
+      "slot6037_embed",  "slot6039_embed",  "slot6048_embed",
+      "slot6050_embed",  "slot6058_embed",  "slot6059_embed",
+      "slot6060_embed",  "slot6066_embed",  "slot6067_embed",
+      "slot6068_embed",  "slot6069_embed",  "slot6070_embed",
+      "slot6071_embed",  "slot6072_embed",  "slot6073_embed",
+      "slot6182_embed",  "slot6183_embed",  "slot6184_embed",
+      "slot6185_embed",  "slot6186_embed",  "slot6188_embed",
+      "slot6189_embed",  "slot6190_embed",  "slot6201_embed",
+      "slot6202_embed",  "slot6203_embed",  "slot6247_embed",
+      "slot6248_embed",  "slot6250_embed",  "slot6251_embed",
+      "slot6807_embed",  "slot6808_embed",  "slot6809_embed",
+      "slot6810_embed",  "slot6811_embed",  "slot6812_embed",
+      "slot6813_embed",  "slot6814_embed",  "slot6815_embed",
+      "slot6816_embed",  "slot6817_embed",  "slot6818_embed",
+      "slot6819_embed",  "slot6820_embed",  "slot6822_embed",
+      "slot6823_embed",  "slot6826_embed",  "slot7002_embed",
+      "slot7003_embed",  "slot7004_embed",  "slot7005_embed",
+      "slot7006_embed",  "slot7008_embed",  "slot7009_embed",
+      "slot7010_embed",  "slot7011_embed",  "slot7013_embed",
+      "slot7014_embed",  "slot7015_embed",  "slot7016_embed",
+      "slot7017_embed",  "slot7019_embed",  "slot7100_embed",
+      "slot7506_embed",  "slot7507_embed",  "slot7514_embed",
+      "slot7515_embed",  "slot7516_embed"};
+  SetFakeImageInput(inputs, FLAGS_infer_model, true, "model", "params",
+                    &feed_names);
+}
+
+// Easy for profiling independently.
+void profile(bool use_mkldnn = false) {
+  AnalysisConfig cfg;
+  SetConfig(&cfg);
+
+  if (use_mkldnn) {
+    cfg.EnableMKLDNN();
+  }
+  std::vector<PaddleTensor> outputs;
+
+  std::vector<std::vector<PaddleTensor>> input_slots_all;
+  SetInput(&input_slots_all);
+  TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
+                 input_slots_all, &outputs, FLAGS_num_threads);
+}
+
+TEST(Analyzer_seq_pool1, profile) { profile(); }
+
+// Check the fuse status
+TEST(Analyzer_seq_pool1, fuse_statis) {
+  AnalysisConfig cfg;
+  SetConfig(&cfg);
+  int num_ops;
+  auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
+  auto fuse_statis = GetFuseStatis(
+      static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
+  LOG(INFO) << "num_ops: " << num_ops;
+  EXPECT_EQ(num_ops, 314);
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h
index b0c8f395ce05fbfceaec3d8b69367292eca714e4..ef7e2198c5db4e723406fe1a31d2c3dde5121931 100644
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -132,7 +132,8 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
 void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
                        const std::string &dirname, bool is_combined = true,
                        std::string model_filename = "model",
-                       std::string params_filename = "params") {
+                       std::string params_filename = "params",
+                       const std::vector<std::string> *feed_names = nullptr) {
   // Set fake_image_data
   PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
   std::vector<std::vector<int64_t>> feed_target_shapes = GetFeedTargetShapes(
@@ -146,26 +147,32 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
     os << "}\n";
   }
   LOG(INFO) << os.str();
-
-  int dim1 = feed_target_shapes[0][1];
-  int dim2 = feed_target_shapes[0][2];
-  int dim3 = feed_target_shapes[0][3];
-
-  PaddleTensor input;
-  std::vector<int> shape({FLAGS_batch_size, dim1, dim2, dim3});
-  input.shape = shape;
-  input.dtype = PaddleDType::FLOAT32;
-
-  // fill input data, for profile easily, do not use random data here.
-  size_t size = FLAGS_batch_size * dim1 * dim2 * dim3;
-  input.data.Resize(size * sizeof(float));
-  float *input_data = static_cast<float *>(input.data.data());
-  for (size_t i = 0; i < size; i++) {
-    *(input_data + i) = static_cast<float>(i) / size;
+  if (feed_names) {
+    PADDLE_ENFORCE_EQ(feed_names->size(), feed_target_shapes.size());
+  }
+  std::vector<PaddleTensor> input_slots(feed_target_shapes.size());
+  for (size_t i = 0; i < feed_target_shapes.size(); ++i) {
+    const auto &feed_shape = feed_target_shapes[i];
+    auto &input = input_slots[i];
+    std::vector<int> shape({FLAGS_batch_size});
+    for (size_t s = 1; s < feed_shape.size(); ++s) {
+      shape.push_back(static_cast<int>(feed_shape[s]));
+    }
+    if (feed_names) {
+      input.name = (*feed_names)[i];
+    }
+    input.shape = shape;
+    input.dtype = PaddleDType::FLOAT32;
+    size_t len = std::accumulate(shape.begin(), shape.end(), 1,
+                                 [](int a, int b) { return a * b; });
+    input.data.Resize(len * sizeof(float));
+    input.lod.assign({{0, static_cast<size_t>(FLAGS_batch_size)}});
+    float *input_data = static_cast<float *>(input.data.data());
+    // fill input data, for profile easily, do not use random data here.
+    for (size_t j = 0; j < len; ++j) {
+      *(input_data + j) = static_cast<float>(j) / len;
+    }
   }
-
-  std::vector<PaddleTensor> input_slots;
-  input_slots.assign({input});
   (*inputs).emplace_back(input_slots);
 }
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
index 8fceed3558b4357b7863368c18add329ea9922b3..57d6f4b3ea98d7437f7fa72ed724384a19bcea4a 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
@@ -52,7 +52,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
                  "The maximum length of the sequence. If maxlen < 0, maxlen "
                  "= max(Input(X)).")
         .SetDefault(-1)
-        .AddCustomChecker([](int &v) {
+        .AddCustomChecker([](const int &v) {
           PADDLE_ENFORCE(v < 0 || v >= 1,
                          "Attr(maxlen) must be less than 0 or larger than 1");
         });
diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc
index 767449cde981e5925b7144ff1038560c67651f3e..5ede972c71ff3ef8ff00756b97662aabb54d6349 100644
--- a/paddle/fluid/operators/split_lod_tensor_op.cc
+++ b/paddle/fluid/operators/split_lod_tensor_op.cc
@@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
     }
     auto *mask_data = cpu_mask->data<bool>();
 
-    std::vector<std::vector<CopyRange>> copy_ranges(mask_dim[0]);
+    std::vector<std::vector<CopyRange>> copy_ranges(2);
 
     // set out_true/out_false lod
     for (size_t t = 0; t < 2; t++) {
diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc
index c17d1afc309c65035063348d4934ea1783b018ed..9e77f7252de1545e04bd2feaff27374c189dfc48 100644
--- a/paddle/fluid/operators/top_k_op.cc
+++ b/paddle/fluid/operators/top_k_op.cc
@@ -21,7 +21,7 @@ class TopkOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
-  void InferShape(framework::InferShapeContext *ctx) const override {
+  void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("X"),
                    "Input(X) of TopkOp should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
@@ -44,12 +44,25 @@ class TopkOp : public framework::OperatorWithKernel {
     ctx->ShareLoD("X", "Out");
     ctx->ShareLoD("X", "Indices");
   }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    framework::LibraryType library_{framework::LibraryType::kPlain};
+    framework::DataLayout layout_ = framework::DataLayout::kAnyLayout;
+    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
+                                   ctx.device_context(), layout_, library_);
+  }
 };
 
 class TopkOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X", "(Tensor) The input of Topk op");
+    AddInput("K",
+             "(Tensor)  Number of top elements to look for along "
+             "the last dimension (along each row for matrices).")
+        .AsDispensable();
     AddOutput("Out", "(Tensor) The output tensor of Topk op");
     AddOutput("Indices", "(Tensor) The indices of Topk elements of input");
     AddComment(R"DOC(
diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu
index 99a4b1b7b0b33aebd9a1a49b0b771fe6fd134bb3..c27039dd0a55549fd7ecdc3260154ae90b1a29be 100644
--- a/paddle/fluid/operators/top_k_op.cu
+++ b/paddle/fluid/operators/top_k_op.cu
@@ -327,6 +327,17 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
     auto* indices = ctx.Output<Tensor>("Indices");
     size_t k = static_cast<int>(ctx.Attr<int>("k"));
 
+    auto* k_t = ctx.Input<Tensor>("K");
+    if (k_t) {
+      Tensor k_host;
+      framework::TensorCopySync(*k_t, platform::CPUPlace(), &k_host);
+      k = k_host.data<int>()[0];
+      framework::DDim output_dims = output->dims();
+      output_dims[output_dims.size() - 1] = k;
+      output->Resize(output_dims);
+      indices->Resize(output_dims);
+    }
+
     const T* input_data = input->data<T>();
     T* output_data = output->mutable_data<T>(ctx.GetPlace());
     // FIXME(typhoonzero): data is always converted to type T?
diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h
index 76ece57b39919148da04caecaa43ea9d2b9d95df..f7bac67300bd56b89d5b08238e78d625f4a773a6 100644
--- a/paddle/fluid/operators/top_k_op.h
+++ b/paddle/fluid/operators/top_k_op.h
@@ -37,8 +37,16 @@ class TopkKernel : public framework::OpKernel<T> {
     auto* input = ctx.Input<Tensor>("X");
     auto* output = ctx.Output<Tensor>("Out");
     auto* indices = ctx.Output<Tensor>("Indices");
-    // k is determined by Attr
-    const size_t k = static_cast<int>(ctx.Attr<int>("k"));
+
+    size_t k = static_cast<int>(ctx.Attr<int>("k"));
+    auto* k_t = ctx.Input<Tensor>("K");
+    if (k_t) {
+      k = k_t->data<int>()[0];
+      framework::DDim output_dims = output->dims();
+      output_dims[output_dims.size() - 1] = k;
+      output->Resize(output_dims);
+      indices->Resize(output_dims);
+    }
 
     T* output_data = output->mutable_data<T>(ctx.GetPlace());
     int64_t* indices_data = indices->mutable_data<int64_t>(ctx.GetPlace());
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index fb8bcb190bda59e23d118547f451be46c963cce9..72b0f216d3aafbb95931590935b0bf967a8d5be8 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer)
+set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer scope_pool)
 if(WITH_PYTHON)
   list(APPEND PYBIND_DEPS py_func_op)
 endif()
diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc
index 06d8b65fb1480d9f621ca937c1d66ab7e910f010..f8ded9f94ecaf3df1e14aead60ae12abcf8c34a9 100644
--- a/paddle/fluid/pybind/const_value.cc
+++ b/paddle/fluid/pybind/const_value.cc
@@ -49,6 +49,9 @@ void BindConstValue(pybind11::module* m) {
   op_proto_and_checker_maker.def(
       "kOpNameScopeAttrName",
       framework::OpProtoAndCheckerMaker::OpNamescopeAttrName);
+  op_proto_and_checker_maker.def(
+      "kOpCreationCallstackAttrName",
+      framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
 }
 
 }  // namespace pybind
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 88a2a5276ab52e25b4f790e3a2f1386ed0715b4e..81d63aace04a44b8ab589f212d83d3c2f5c2d522 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -32,6 +32,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/parallel_executor.h"
 #include "paddle/fluid/framework/prune.h"
 #include "paddle/fluid/framework/reader.h"
+#include "paddle/fluid/framework/scope_pool.h"
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/framework/version.h"
 #include "paddle/fluid/imperative/layer.h"
@@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) {
         return paddle::operators::AppendPythonCallableObjectAndReturnId(py_obj);
       });
 
+  m.add_object("_cleanup",
+               py::capsule([]() { ScopePool::Instance().Clear(); }));
+
   py::class_<imperative::VarBase, PyVarBase>(m, "VarBase", R"DOC()DOC")
       .def(py::init<>())
       .def("_run_backward",
@@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle.
             },
         py::return_value_policy::copy);
 
-  py::class_<Scope>(m, "Scope", R"DOC(
+  py::class_<Scope>(m, "_Scope", R"DOC(
     Scope is an association of a name to Variable. All variables belong to Scope.
 
     Variables in a parent scope can be retrieved from local scope.
@@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle.
           param.set(param_array, place)
 
         )DOC")
+      .def("_remove_from_pool",
+           [](Scope &self) { ScopePool::Instance().Remove(&self); })
       .def("var",
            [](Scope &self, const std::string &name) -> Variable * {
              return self.Var(name);
            },
            py::return_value_policy::reference)
       .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
-      .def(py::init<>())
       .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
            py::return_value_policy::reference)
       .def("drop_kids", &Scope::DropKids);
 
+  m.def("Scope",
+        []() -> Scope * {
+          auto *s = new Scope();
+          ScopePool::Instance().Insert(std::unique_ptr<Scope>(s));
+          return s;
+        },
+        py::return_value_policy::reference);
+
   //! @note: Be careful! PyBind will return std::string as an unicode, not
   //! Python str. If you want a str object, you should cast them in Python.
   m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
diff --git a/paddle/scripts/installation_validate.py b/paddle/scripts/installation_validate.py
new file mode 100644
index 0000000000000000000000000000000000000000..f84e2f4b176609dec28a8e29afea74d3654e9e4c
--- /dev/null
+++ b/paddle/scripts/installation_validate.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import paddle as pd
+
+print(pd.__version__)
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index 2e6b40148d4675cc35c77673b7ed93070f31b854..418dc1346840f707f38836e09a14ca2c19c063ff 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -79,6 +79,7 @@ function cmake_gen() {
                 PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7
             -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/2.7/include/python2.7
             -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/lib/libpython2.7.dylib"
+            pip install --user -r ${PADDLE_ROOT}/python/requirements.txt
             else
                 exit 1
             fi
@@ -91,6 +92,7 @@ function cmake_gen() {
             -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
             -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
                 WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON}
+                pip3.5 install --user -r ${PADDLE_ROOT}/python/requirements.txt
             else
                 exit 1
             fi
@@ -103,6 +105,7 @@ function cmake_gen() {
             -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
             -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib"
                 WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON}
+                pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt
             else
                 exit 1
             fi
@@ -115,6 +118,7 @@ function cmake_gen() {
             -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
             -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib"
                 WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON}
+                pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt
             else
                 exit 1
             fi
@@ -441,7 +445,9 @@ EOF
         # make install should also be test when unittest
         make install -j 8
         if [ "$1" == "cp27-cp27m" ]; then
+            set -e
             pip install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
+            python ${PADDLE_ROOT}/paddle/scripts/installation_validate.py
         elif [ "$1" == "cp35-cp35m" ]; then
             pip3.5 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
         elif [ "$1" == "cp36-cp36m" ]; then
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 8f3660ca387ba10309f829bd04ac2ffdc573f3d6..e0078e53141ac7834fd00e4f2dbd8a6c8a1d6b1b 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -46,7 +46,7 @@ from . import transpiler
 from . import distribute_lookup_table
 from .param_attr import ParamAttr, WeightNormParamAttr
 from .data_feeder import DataFeeder
-from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope
+from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope, _Scope
 from .transpiler import DistributeTranspiler, \
     memory_optimize, release_memory, DistributeTranspilerConfig
 from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index f2886090d75f87654b33cf7aa6f98ebf6f2e27d1..5a9e908b61eeeea3fdfdfcc54d1f150f59a3973b 100644
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True):
     assert isinstance(name, str)
     if scope is None:
         scope = global_scope()
-    assert isinstance(scope, core.Scope)
+    assert isinstance(scope, core._Scope)
 
     var = scope.find_var(name)
     assert var is not None, (
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index de30ed2fc5858187d2ecede299832701304e4198..3427fb0c4ae4dad0323b39cf85bfb5b04f796996 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -20,6 +20,7 @@ import os
 import re
 import six
 import sys
+import traceback
 
 import numpy as np
 
@@ -604,6 +605,10 @@ class Operator(object):
         if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
             del op_attrs[role_var_name]
 
+        callstack_var_name = op_maker.kOpCreationCallstackAttrName()
+        op_attrs[callstack_var_name] = list(
+            reversed(traceback.format_stack()))[1:]
+
         if len(self.desc.type()) != 0:
             return
         if type is None:
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 8ac7efee50d2df5084f1b93acdbc6708872e46b2..cc1fdbd285611379cc4fa44d2373748aa6e24faf 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4530,7 +4530,7 @@ def topk(input, k, name=None):
     Args:
         input(Variable): The input variable which can be a vector or Tensor with
             higher rank.
-        k(int):  The number of top elements to look for along the last dimension
+        k(int | Variable):  The number of top elements to look for along the last dimension
                  of input.
         name(str|None): A name for this layer(optional). If set None, the layer
                        will be named automatically.
@@ -4553,12 +4553,18 @@ def topk(input, k, name=None):
     helper = LayerHelper("top_k", **locals())
     values = helper.create_variable_for_type_inference(dtype=input.dtype)
     indices = helper.create_variable_for_type_inference(dtype="int64")
+    inputs = {"X": [input]}
+    attrs = None
+    if isinstance(k, Variable):
+        inputs['K'] = k
+    else:
+        attrs = {'k': k}
     helper.append_op(
         type="top_k",
-        inputs={"X": [input]},
+        inputs=inputs,
         outputs={"Out": [values],
                  "Indices": [indices]},
-        attrs={"k": k})
+        attrs=attrs)
     values.stop_gradient = True
     indices.stop_gradient = True
     return values, indices
diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py
index 4153394c1da776d0a41e1415a09fa7d6f4b14d6d..37b9a9188ab44df81029ae6d9925ae21c1929cff 100644
--- a/python/paddle/fluid/tests/unittests/test_operator_desc.py
+++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
             set(mul_op.attr_names),
             set([
                 "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
-                "op_namescope"
+                "op_namescope", "op_callstack"
             ]))
         self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
         self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)
diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py
index 943ad3ed22480193dc51375cdcca5ed36ce35158..655378f7f8c18f5936643f6c178d0e6b3dd8cac8 100644
--- a/python/paddle/fluid/tests/unittests/test_py_func_op.py
+++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py
@@ -26,7 +26,7 @@ os.environ['CPU_NUM'] = str(dev_cnt)
 
 
 def dummy_func_with_no_input():
-    return float(1.0)
+    return np.array([0], dtype='float32')
 
 
 def dummy_func_with_no_output(x):
@@ -105,7 +105,7 @@ def simple_fc_net(img, label, use_py_func_op):
             name='test_tmp_var', dtype='float32', shape=[1])
         fluid.layers.py_func(
             func=dummy_func_with_no_input, x=None, out=dummy_var)
-
+        loss += dummy_var
         fluid.layers.py_func(func=dummy_func_with_no_output, x=loss, out=None)
 
     loss = fluid.layers.mean(loss)
@@ -174,7 +174,7 @@ class TestPyFuncOpUseExecutor(unittest.TestCase):
             self.assertAlmostEqual(max_diff, 0, delta=1e-3)
 
 
-class TestPyFuncOpUseParallelExecutor(unittest.TestCase):
+class TestPyFuncOpUseParallelExecutor(TestPyFuncOpUseExecutor):
     def setUp(self):
         self.use_parallel_executor = True
 
diff --git a/python/paddle/fluid/tests/unittests/test_top_k_op.py b/python/paddle/fluid/tests/unittests/test_top_k_op.py
index 21b5a62baf96bfb2d76a8c59133e8f5d1cb35aea..9fbf59ed669766077a456b3d83b7162e495ae8ae 100644
--- a/python/paddle/fluid/tests/unittests/test_top_k_op.py
+++ b/python/paddle/fluid/tests/unittests/test_top_k_op.py
@@ -21,6 +21,7 @@ from op_test import OpTest
 
 class TestTopkOp(OpTest):
     def setUp(self):
+        self.variable_k = False
         self.set_args()
         self.op_type = "top_k"
         self.dtype = np.float32
@@ -30,9 +31,12 @@ class TestTopkOp(OpTest):
         input = np.random.random((self.row, k)).astype(self.dtype)
         output = np.ndarray((self.row, k))
         indices = np.ndarray((self.row, k)).astype("int64")
-
         self.inputs = {'X': input}
-        self.attrs = {'k': k}
+
+        if self.variable_k:
+            self.inputs['K'] = np.array([k]).astype("int32")
+        else:
+            self.attrs = {'k': k}
 
         for rowid in range(self.row):
             row = input[rowid]
@@ -118,5 +122,12 @@ class TestTopkOp4(TestTopkOp):
         self.top_k = 1
 
 
+class TestTopkOp5(TestTopkOp):
+    def set_args(self):
+        self.row = 40000
+        self.top_k = 3
+        self.variable_k = True
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py
index ccf7af334d091dff5cf6a94e5875cdf582fa0c5c..cc7f5ec90c26c87b7c5514c900e853be9e16d6eb 100644
--- a/python/paddle/fluid/transpiler/inference_transpiler.py
+++ b/python/paddle/fluid/transpiler/inference_transpiler.py
@@ -57,7 +57,7 @@ class InferenceTranspiler(object):
             raise TypeError("place should be as CPUPlace/CUDAPlace type")
         if scope is None:
             scope = global_scope()
-        if not isinstance(scope, core.Scope):
+        if not isinstance(scope, core._Scope):
             raise TypeError("scope should be as Scope type or None")
         use_mkldnn = bool(os.getenv("FLAGS_use_mkldnn", False))