diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index 510fc8d7db0d957b619e4dbeb25fc14b9768327c..480f48290cc1bbf4888832d76187a13a4915ec40 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -26,9 +26,6 @@ limitations under the License. */
 #include "framework/program/var_desc.h"
 #include "framework/scope.h"
 #include "framework/tensor.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif  // _OPENMP
 #ifdef PADDLE_EXECUTOR_MULTITHREAD
 #include <queue>
 #include <utility>
@@ -407,14 +404,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
   return result_vector;
 }
 
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::SetThreadNum(int num) {
-#ifdef _OPENMP
-  //  omp_set_dynamic(0);
-  omp_set_num_threads(num);
-#endif
-}
-
 template class Executor<CPU, Precision::FP32>;
 template class Executor<FPGA, Precision::FP32>;
 template class Executor<GPU_MALI, Precision::FP32>;
diff --git a/src/io/executor.h b/src/io/executor.h
index 28b0d65181355fd76e4ec09aa5964130aee2ab68..f8f2a8ad5657fdb3cf6cb249e32537bd5e866913 100644
--- a/src/io/executor.h
+++ b/src/io/executor.h
@@ -58,8 +58,6 @@ class Executor {
   std::vector<Ptype> Predict(const std::vector<Ptype> &input,
                              const std::vector<int64_t> &dims);
 
-  void SetThreadNum(int num);
-
  protected:
   Executor() = default;
   void InitMemory();
diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp
index 3d5735f8da66db6f4b5f139f8261a4cd9cf0f796..cabdd799a0e7d561d8bc56c0913f1389c38f8907 100644
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -16,6 +16,14 @@ limitations under the License. */
 
 namespace paddle_mobile {
 
+template <typename Dtype, Precision P>
+void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
+#ifdef _OPENMP
+  //  omp_set_dynamic(0);
+  omp_set_num_threads(num);
+#endif
+};
+
 template <typename Dtype, Precision P>
 bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
                                   int batch_size) {
@@ -81,7 +89,9 @@ PaddleMobile<Dtype, P>::~PaddleMobile() {
 }
 
 template class PaddleMobile<CPU, Precision::FP32>;
+
 template class PaddleMobile<FPGA, Precision::FP32>;
+
 template class PaddleMobile<GPU_MALI, Precision::FP32>;
 
 }  // namespace paddle_mobile
diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h
index 3ce39e0ae1ffc7e193f6f4308a911875fdf95076..74c11471566c3db8a37ea2d62e0496e5d40cb3b7 100644
--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -17,6 +17,9 @@ limitations under the License. */
 #include <memory>
 #include <string>
 #include <vector>
+#ifdef _OPENMP
+#include <omp.h>
+#endif  // _OPENMP
 
 #include "common/types.h"
 #include "framework/tensor.h"
@@ -44,6 +47,7 @@ class PaddleMobile {
    * */
   bool Load(const std::string &model_path, const std::string &para_path,
             bool optimize = false, int batch_size = 1);
+  void SetThreadNum(int num);
 
   /*
    * @b to predict
diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp
index 1851f2668dee3a10e72b5dbeeadb9f51827a2729..2ab24736397c1e71350335561abbcabcba6e27a4 100644
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,26 +17,21 @@ limitations under the License. */
 #include "../test_include.h"
 
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
   bool optimize = true;
   auto time1 = time();
-  //  auto program = loader.Load(g_googlenet, optimize);
-  auto program = loader.Load(g_googlenet_combine + "/model",
-                             g_googlenet_combine + "/params", optimize);
-  auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time2) << "ms\n";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, optimize);
-  executor.SetThreadNum(4);
-  std::vector<float> input;
-  std::vector<int64_t> dims{1, 3, 224, 224};
-  GetInput<float>(g_test_image_1x3x224x224, &input, dims);
-  auto time3 = time();
-  int count = 1;
-  for (int i = 0; i < count; ++i) {
-    executor.Predict(input, dims);
-  }
+  if (paddle_mobile.Load(g_googlenet, optimize)) {
+    auto time2 = time();
+    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+    auto time3 = time();
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    auto time4 = time();
 
-  auto time4 = time();
-  DLOG << "predict cost :" << time_diff(time3, time4) / count << "ms\n";
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
   return 0;
 }