diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h
index 094ba5b8d79501ed08673b2e63c290b52f8dade8..146556756af7e0b56ae38b5303e622c97dfe58af 100644
--- a/lite/api/cxx_api.h
+++ b/lite/api/cxx_api.h
@@ -43,16 +43,7 @@ class LITE_API Predictor {
  public:
   // Create an empty predictor.
   Predictor() { scope_ = std::make_shared<Scope>(); }
-  ~Predictor() {
-#ifdef LITE_WITH_OPENCL
-    CLRuntime::Global()->ReleaseResources();
-#endif
-    scope_.reset();
-    exec_scope_ = nullptr;
-    program_.reset();
-    input_names_.clear();
-    output_names_.clear();
-  }
+
   // Create a predictor with the weight variable scope set.
   explicit Predictor(const std::shared_ptr<lite::Scope>& root_scope)
       : scope_(root_scope) {}
diff --git a/lite/api/light_api.cc b/lite/api/light_api.cc
index f61e2f35241bb2a361e665f37fb58e3bc5226090..d82869dbef00929b70a87e05b91ef4a82630bbbe 100644
--- a/lite/api/light_api.cc
+++ b/lite/api/light_api.cc
@@ -14,6 +14,7 @@
 
 #include "lite/api/light_api.h"
 #include <algorithm>
+#include <unordered_map>
 #include "paddle_use_kernels.h"  // NOLINT
 #include "paddle_use_ops.h"      // NOLINT
 
@@ -135,7 +136,15 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
   // 1. Create op first
   Program program(prog, scope_, {});
 
-  // 2. Create Instructs
+// 2. Create Instructs
+#ifdef LITE_WITH_OPENCL
+  using WaitListType =
+      std::unordered_map<decltype(static_cast<const void*>(nullptr)),
+                         std::shared_ptr<cl::Event>>;
+  using OpenCLContext = Context<TargetType::kOpenCL>;
+  std::unique_ptr<KernelContext> local_ctx(new KernelContext());
+  local_ctx->As<OpenCLContext>().InitOnce();
+#endif
 
   // Create the kernels of the target places, and filter out the specific
   // kernel with the target alias.
@@ -151,7 +160,18 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
           return it->alias() == alias;
         });
     CHECK(it != kernels.end());
+
+#ifdef LITE_WITH_OPENCL
+    if ((*it)->target() == TARGET(kOpenCL)) {
+      std::unique_ptr<KernelContext> ctx(new KernelContext());
+      (*local_ctx).As<OpenCLContext>().CopySharedTo(&ctx->As<OpenCLContext>());
+      (*it)->SetContext(std::move(ctx));
+    } else {
+      (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target()));
+    }
+#else
     (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target()));
+#endif
 
     insts.emplace_back(op, std::move(*it));
   }
diff --git a/lite/api/light_api.h b/lite/api/light_api.h
index e21618449ca65f86f389cfff20d8e619de7c316d..aa25ea81c7b62238211f96265a4edc49f2d065a1 100644
--- a/lite/api/light_api.h
+++ b/lite/api/light_api.h
@@ -107,8 +107,6 @@ class LightPredictorImpl : public lite_api::PaddlePredictor {
  public:
   LightPredictorImpl() = default;
 
-  ~LightPredictorImpl();
-
   std::unique_ptr<lite_api::Tensor> GetInput(int i) override;
 
   std::unique_ptr<const lite_api::Tensor> GetOutput(int i) const override;
diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc
index c5ec042dfa7864f78b780eab05f3a2b4b132e4b3..cdf5b7fb06df35b2e7fb72fc4e33ccb721a0f7f7 100644
--- a/lite/api/light_api_impl.cc
+++ b/lite/api/light_api_impl.cc
@@ -21,13 +21,6 @@
 namespace paddle {
 namespace lite {
 
-LightPredictorImpl::~LightPredictorImpl() {
-  raw_predictor_.reset();
-#ifdef LITE_WITH_OPENCL
-  CLRuntime::Global()->ReleaseResources();
-#endif
-}
-
 void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
   // LightPredictor Only support NaiveBuffer backend in publish lib
   if (config.lite_model_file().empty()) {
diff --git a/lite/backends/opencl/cl_context.cc b/lite/backends/opencl/cl_context.cc
index 153c0620035377afac065e8049a9ebbc0a6f0c15..0edb83acc4772b2f878b22f2ea16b3175b14a7ba 100644
--- a/lite/backends/opencl/cl_context.cc
+++ b/lite/backends/opencl/cl_context.cc
@@ -1,11 +1,8 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -13,7 +10,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "lite/backends/opencl/cl_context.h"
-#include <algorithm>
 #include <memory>
 #include <string>
 #include <utility>
@@ -36,10 +32,8 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
   STL::stringstream program_key_ss;
   program_key_ss << file_name << options;
   std::string program_key = program_key_ss.str();
-
-  auto &programs = CLRuntime::Global()->programs();
-  auto it = programs.find(program_key);
-  if (it != programs.end()) {
+  auto it = programs_.find(program_key);
+  if (it != programs_.end()) {
     VLOG(3) << " --- program -> " << program_key << " has been built --- ";
     return *(it->second);
   }
@@ -50,9 +44,9 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
   CLRuntime::Global()->BuildProgram(program.get(), options);
   VLOG(3) << " --- end build program -> " << program_key << " --- ";
 
-  programs[program_key] = std::move(program);
+  programs_[program_key] = std::move(program);
 
-  return *(programs[program_key]);
+  return *(programs_[program_key]);
 }
 
 void CLContext::AddKernel(const std::string &kernel_name,
@@ -68,30 +62,25 @@ void CLContext::AddKernel(const std::string &kernel_name,
       new cl::Kernel(program, kernel_name.c_str(), &status));
   CL_CHECK_FATAL(status);
   VLOG(3) << " --- end create kernel --- ";
-
-  auto &kernels = CLRuntime::Global()->kernels();
-  auto &kernel_offset_map = CLRuntime::Global()->kernel_offset();
-  kernels.emplace_back(std::move(kernel));
+  kernels_.emplace_back(std::move(kernel));
   STL::stringstream kernel_key;
   kernel_key << kernel_name << options << time_stamp;
-  kernel_offset_map[kernel_key.str()] = kernels.size() - 1;
+  kernel_offset_[kernel_key.str()] = kernels_.size() - 1;
 }
 
 cl::Kernel &CLContext::GetKernel(const int index) {
-  auto &kernels = CLRuntime::Global()->kernels();
-  VLOG(3) << " --- kernel count: " << kernels.size() << " --- ";
-  CHECK(static_cast<size_t>(index) < kernels.size())
+  VLOG(3) << " --- kernel count: " << kernels_.size() << " --- ";
+  CHECK(static_cast<size_t>(index) < kernels_.size())
       << "The index must be less than the size of kernels.";
-  CHECK(kernels[index] != nullptr)
+  CHECK(kernels_[index] != nullptr)
       << "The target kernel pointer cannot be null.";
-  return *(kernels[index]);
+  return *(kernels_[index]);
 }
 
 cl::Kernel &CLContext::GetKernel(const std::string &name) {
-  auto &kernel_offset_map = CLRuntime::Global()->kernel_offset();
-  auto it = kernel_offset_map.find(name);
-  CHECK(it != kernel_offset_map.end()) << "Cannot find the kernel function: "
-                                       << name;
+  auto it = kernel_offset_.find(name);
+  CHECK(it != kernel_offset_.end()) << "Cannot find the kernel function: "
+                                    << name;
   return GetKernel(it->second);
 }
 
diff --git a/lite/backends/opencl/cl_context.h b/lite/backends/opencl/cl_context.h
index b12473ccf5b4238f4ee95b7848a0842ee5b2ffe0..586dc3df1267e47c6cdaad1d362cd9ed2df2770e 100644
--- a/lite/backends/opencl/cl_context.h
+++ b/lite/backends/opencl/cl_context.h
@@ -27,6 +27,20 @@ namespace lite {
 
 class CLContext {
  public:
+  ~CLContext() {
+    for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) {
+      clReleaseKernel(kernels_[kidx]->get());
+      kernels_[kidx].reset();
+    }
+    kernels_.clear();
+    kernel_offset_.clear();
+    for (auto &p : programs_) {
+      clReleaseProgram(p.second->get());
+    }
+    programs_.clear();
+    LOG(INFO) << "release cl::Program, cl::Kernel finished.";
+  }
+
   cl::CommandQueue &GetCommandQueue();
 
   cl::Context &GetContext();
@@ -52,6 +66,10 @@ class CLContext {
                                 int divitor = 2);
   //  cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size,
   //                                   size_t max_work_size);
+ private:
+  std::unordered_map<std::string, std::unique_ptr<cl::Program>> programs_;
+  std::vector<std::unique_ptr<cl::Kernel>> kernels_;
+  std::map<std::string, int> kernel_offset_;
 };
 
 }  // namespace lite
diff --git a/lite/backends/opencl/cl_runtime.cc b/lite/backends/opencl/cl_runtime.cc
index dc6a16861212cf5a5693ae1779ed2b1c2c26f1ee..8a6b026367986548b017aee263a70d4df33381b5 100644
--- a/lite/backends/opencl/cl_runtime.cc
+++ b/lite/backends/opencl/cl_runtime.cc
@@ -1,11 +1,8 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -14,7 +11,6 @@ limitations under the License. */
 
 #include "lite/backends/opencl/cl_runtime.h"
 #include <string>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 #include "lite/utils/cp_logging.h"
@@ -29,38 +25,16 @@ CLRuntime* CLRuntime::Global() {
 }
 
 CLRuntime::~CLRuntime() {
-  LOG(INFO) << "CLRuntime::~CLRuntime()";
-  // Note: do ReleaseResources() in predictor
-  command_queue_&& clReleaseCommandQueue(command_queue_->get());
-  command_queue_.reset();
-  context_&& clReleaseContext(context_->get());
-  context_.reset();
-  device_.reset();
-  platform_.reset();
-  initialized_ = false;
-}
-
-void CLRuntime::ReleaseResources() {
-  //  if (is_resources_released_) {
-  //    return;
-  //  }
-
   if (command_queue_ != nullptr) {
     command_queue_->flush();
     command_queue_->finish();
   }
-  for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) {
-    clReleaseKernel(kernels_[kidx]->get());
-    kernels_[kidx].reset();
-  }
-  kernels_.clear();
-  kernel_offset_.clear();
-  for (auto& p : programs_) {
-    clReleaseProgram(p.second->get());
-  }
-  programs_.clear();
-  LOG(INFO) << "release resources finished.";
-  is_resources_released_ = true;
+  // For controlling the destruction order:
+  command_queue_.reset();
+  context_.reset();
+  device_.reset();
+  platform_.reset();
+  LOG(INFO) << "release ~CLRuntime() ";
 }
 
 bool CLRuntime::Init() {
@@ -98,14 +72,14 @@ cl::CommandQueue& CLRuntime::command_queue() {
   return *command_queue_;
 }
 
-std::shared_ptr<cl::Program> CLRuntime::CreateProgram(
+std::unique_ptr<cl::Program> CLRuntime::CreateProgram(
     const cl::Context& context, std::string file_name) {
   auto cl_file = opencl_kernels_files.find(file_name);
   std::string content(cl_file->second.begin(), cl_file->second.end());
   cl::Program::Sources sources;
   sources.push_back(content);
   auto prog =
-      std::shared_ptr<cl::Program>(new cl::Program(context, sources, &status_));
+      std::unique_ptr<cl::Program>(new cl::Program(context, sources, &status_));
   VLOG(4) << "OpenCL kernel file name: " << file_name;
   VLOG(4) << "Program source size: " << content.size();
   CL_CHECK_FATAL(status_);
diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h
index 69f9e3e371d5b55429dd727bb79ad1a9595ab5c5..2a8996b066a480d9c0a6db67fa5fd60142885046 100644
--- a/lite/backends/opencl/cl_runtime.h
+++ b/lite/backends/opencl/cl_runtime.h
@@ -1,11 +1,8 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,7 +15,6 @@ limitations under the License. */
 #include <map>
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <vector>
 #include "lite/backends/opencl/cl_include.h"
 #include "lite/backends/opencl/cl_utility.h"
@@ -33,8 +29,6 @@ class CLRuntime {
  public:
   static CLRuntime* Global();
 
-  void ReleaseResources();
-
   bool Init();
 
   cl::Platform& platform();
@@ -45,7 +39,7 @@ class CLRuntime {
 
   cl::CommandQueue& command_queue();
 
-  std::shared_ptr<cl::Program> CreateProgram(const cl::Context& context,
+  std::unique_ptr<cl::Program> CreateProgram(const cl::Context& context,
                                              std::string file_name);
 
   std::unique_ptr<cl::UserEvent> CreateEvent(const cl::Context& context);
@@ -60,12 +54,6 @@ class CLRuntime {
 
   std::map<std::string, size_t>& GetDeviceInfo();
 
-  std::unordered_map<std::string, std::shared_ptr<cl::Program>>& programs() {
-    return programs_;
-  }
-  std::vector<std::unique_ptr<cl::Kernel>>& kernels() { return kernels_; }
-  std::map<std::string, int>& kernel_offset() { return kernel_offset_; }
-
  private:
   CLRuntime() = default;
 
@@ -107,19 +95,11 @@ class CLRuntime {
 
   std::shared_ptr<cl::CommandQueue> command_queue_{nullptr};
 
-  std::unordered_map<std::string, std::shared_ptr<cl::Program>> programs_{};
-
-  std::vector<std::unique_ptr<cl::Kernel>> kernels_{};
-
-  std::map<std::string, int> kernel_offset_{};
-
   cl_int status_{CL_SUCCESS};
 
   bool initialized_{false};
 
   bool is_init_success_{false};
-
-  bool is_resources_released_{false};
 };
 
 }  // namespace lite
diff --git a/lite/core/mir/runtime_context_assign_pass.cc b/lite/core/mir/runtime_context_assign_pass.cc
index 97c4819eaf6734ba9b374444166d17cb15e8ae65..3cbe602f31a87c6ddb42d36fe75e52e8347695d8 100644
--- a/lite/core/mir/runtime_context_assign_pass.cc
+++ b/lite/core/mir/runtime_context_assign_pass.cc
@@ -24,11 +24,31 @@ class RuntimeContextAssignPass : public StmtPass {
   RuntimeContextAssignPass() {}
 
   void Apply(const std::unique_ptr<SSAGraph>& graph) override {
+#ifdef LITE_WITH_OPENCL
+    using OpenCLContext = Context<TargetType::kOpenCL>;
+    std::unique_ptr<KernelContext> local_ctx(new KernelContext());
+    local_ctx->As<OpenCLContext>().InitOnce();
+#endif
     for (auto& node : graph->mutable_nodes()) {
       if (!node.IsStmt()) continue;
       auto& inst = node.AsStmt();
+
+#ifdef LITE_WITH_OPENCL
+      if (inst.picked_kernel().target() == TARGET(kOpenCL)) {
+        std::unique_ptr<KernelContext> ctx(new KernelContext());
+        (*local_ctx)
+            .As<OpenCLContext>()
+            .CopySharedTo(&ctx->As<OpenCLContext>());
+        inst.picked_kernel().SetContext(std::move(ctx));
+      } else {
+        inst.picked_kernel().SetContext(ContextScheduler::Global().NewContext(
+            inst.picked_kernel().target()));
+      }
+#else
       inst.picked_kernel().SetContext(
           ContextScheduler::Global().NewContext(inst.picked_kernel().target()));
+
+#endif
     }
   }
 };
diff --git a/lite/kernels/opencl/io_copy_buffer_compute.cc b/lite/kernels/opencl/io_copy_buffer_compute.cc
index 6a49cc2577a58690e5e0b6a6ede82df0bdc99bb1..f76f667923fa8d39847db5dae8e07d7398f25f99 100644
--- a/lite/kernels/opencl/io_copy_buffer_compute.cc
+++ b/lite/kernels/opencl/io_copy_buffer_compute.cc
@@ -106,6 +106,7 @@ class IoCopykOpenCLToHostCompute
 
     auto& context = ctx_->As<OpenCLContext>();
     auto* wait_list = context.cl_wait_list();
+
     auto it = wait_list->find(x_ptr);
     if (it != wait_list->end()) {
 #ifndef LITE_SHUTDOWN_LOG
@@ -113,6 +114,9 @@ class IoCopykOpenCLToHostCompute
 #endif
       auto& event = *(it->second);
       event.wait();
+      auto command_queue = CLRuntime::Global()->command_queue();
+      command_queue.flush();
+      command_queue.finish();
     } else {
       LOG(FATAL) << "Could not find the sync event for the target cl tensor.";
     }