Merge pull request #500 from codeWorm2015/develop

fix #499 modify cmake add paddle mobile interface

Merge pull request #500 from codeWorm2015/develop
fix #499 modify cmake add paddle mobile interface
c5f1cea6 · WangLiu · GitHub · a4673c38 · 67554ae6 · c5f1cea6
24 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,8 +40,6 @@ else()
    list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/*.h)
    list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/*.cc)
    list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/*.cpp)
 endif()
 if(FPGA)
@@ -103,6 +101,10 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
+# NET default
+set(NET "defult" CACHE STRING "select net type")
+set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet")
 include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
 # if (IS_IOS)
@@ -118,4 +120,3 @@ if(DEBUGING)
    add_subdirectory(test)
 endif()
--- a/ios/PaddleMobile.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
+++ b/ios/PaddleMobile.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "io/io.h"
+#include "io/executor.h"
 #include <algorithm>
 #include <vector>
 #include "common/enforce.h"
@@ -39,7 +39,7 @@ char *Get_binary_data(std::string filename) {
  PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
                        filename.c_str());
  fseek(file, 0, SEEK_END);
-  long size = ftell(file);
+  int64_t size = ftell(file);
  PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
  rewind(file);
  char *data = new char[size];
@@ -50,116 +50,6 @@ char *Get_binary_data(std::string filename) {
  return data;
 }
-static size_t ReadBuffer(const char *file_name, uint8_t **out) {
-  printf("%s \n", file_name);
-  FILE *fp;
-  fp = fopen(file_name, "rb");
-  PADDLE_MOBILE_ENFORCE(fp != NULL, " %s open failed !", file_name);
-  fseek(fp, 0, SEEK_END);
-  size_t size = ftell(fp);
-  rewind(fp);
-  DLOG << "model size: " << size;
-  *out = reinterpret_cast<uint8_t *>(malloc(size));
-  size_t cur_len = 0;
-  size_t nread;
-  while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) {
-    cur_len += nread;
-  }
-  fclose(fp);
-  return cur_len;
-}
-template <typename Dtype, Precision P>
-const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-    const std::string &dirname, bool optimize, bool can_add_split) {
-  auto program =
-      this->LoadProgram(dirname + "/__model__", optimize, can_add_split);
-  program.model_path = dirname;
-  return program;
-}
-template <typename Dtype, Precision P>
-const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-    const std::string &model_path, const std::string &para_path,
-    bool optimize) {
-  auto program = this->LoadProgram(model_path, optimize);
-  program.para_path = para_path;
-  program.combined = true;
-  return program;
-}
-template <typename Dtype, Precision P>
-const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
-    const std::string &model_path, bool optimize, bool can_add_split) {
-  std::string model_filename = model_path;
-  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
-  uint8_t *buf = NULL;
-  size_t read_size = ReadBuffer(model_filename.c_str(), &buf);
-  PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null");
-  c_program = paddle_mobile__framework__proto__program_desc__unpack(
-      NULL, read_size, buf);
-  //
-  PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null");
-  //
-  DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
-  //
-  auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program);
-  framework::Program<Dtype, P> program;
-  program.originProgram = originProgramDesc;
-  auto scope = std::make_shared<framework::Scope>();
-  program.scope = scope;
-  for (const auto &block : originProgramDesc->Blocks()) {
-    for (auto var_desc : block->Vars()) {
-      auto var = scope->Var(var_desc->Name());
-      if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
-        if (var_desc->Persistable() &&
-            var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH &&
-            var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) {
-          auto dim = var_desc->Tensor_desc().Dims();
-          auto tensor = var->GetMutable<framework::LoDTensor>();
-          tensor->Resize(framework::make_ddim(dim));
-        } else {
-          auto dim = var_desc->Tensor_desc().Dims();
-          PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
-          dim[0] = 1;
-          auto tensor = var->GetMutable<framework::LoDTensor>();
-          tensor->Resize(framework::make_ddim(dim));
-        }
-      } else {
-        // TODO(codeWorm): some.
-      }
-    }
-  }
-  if (optimize) {
-    framework::ProgramOptimize program_optimize;
-    program.optimizeProgram =
-        program_optimize.FusionOptimize(originProgramDesc, can_add_split);
-  }
-  if (optimize) {
-    program.optimizeProgram->Description("optimize: ");
-  } else {
-    originProgramDesc->Description("program: ");
-  }
-  paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
-  return program;
-}
-template class Loader<CPU, Precision::FP32>;
-template class Loader<FPGA, Precision::FP32>;
-template class Loader<GPU_MALI, Precision::FP32>;
 #pragma mark - executor
 template <typename Dtype, Precision P>
 Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
@@ -209,30 +99,30 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
-                                    framework::LoDTensor *tensor, char *&data) {
+                                    framework::LoDTensor *tensor, char **data) {
  // 1. version
-  uint32_t version = *(uint32_t *)data;
+  uint32_t version = *reinterpret_cast<uint32_t *>(*data);
-  data += sizeof(uint32_t);
+  (*data) += sizeof(uint32_t);
  // 2 Lod information
  uint64_t *lod_level_ptr = new uint64_t();
-  memcpy(lod_level_ptr, data, sizeof(uint64_t));
+  memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
  uint64_t lod_level = *lod_level_ptr;
  delete lod_level_ptr;
-  data += sizeof(uint64_t);
+  (*data) += sizeof(uint64_t);
  auto &lod = *tensor->mutable_lod();
  lod.resize(lod_level);
  for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size = *(uint64_t *)data;
+    uint64_t size = *reinterpret_cast<uint64_t *>(*data);
-    data += sizeof(uint64_t);
+    (*data) += sizeof(uint64_t);
    DLOG << "lod size: " << i << size;
    std::vector<size_t> tmp(size / sizeof(size_t));
    for (int k = 0; k < tmp.size(); ++k) {
-      tmp[k] = *(size_t *)data;
+      tmp[k] = *reinterpret_cast<size_t *>(*data);
-      DLOG << "tmp[k]: " << k << *(size_t *)data;
+      (*data) += sizeof(size_t);
-      data += sizeof(size_t);
    }
    for (auto j : tmp) {
@@ -242,18 +132,18 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
  }
  // 3. tensor version
-  uint32_t tensor_version = *(uint32_t *)data;
+  uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
-  data += sizeof(uint32_t);
+  (*data) += sizeof(uint32_t);
  // 4. tensor desc
-  int32_t size = *(int32_t *)data;
+  int32_t size = *reinterpret_cast<int32_t *>(*data);
-  data += sizeof(int32_t);
+  (*data) += sizeof(int32_t);
  std::unique_ptr<char[]> buf(new char[size]);
  for (int m = 0; m < size; ++m) {
-    buf.get()[m] = data[m];
+    buf.get()[m] = (*data)[m];
  }
-  data += (sizeof(char) * size);
+  (*data) += (sizeof(char) * size);
  const framework::TensorDesc &desc = var_desc.Tensor_desc();
  int memory_size = 1;
@@ -290,9 +180,9 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
  }
  for (int n = 0; n < memory_size * type_size; ++n) {
-    static_cast<char *>(memory)[n] = data[n];
+    static_cast<char *>(memory)[n] = (*data)[n];
  }
-  data += (sizeof(char) * memory_size * type_size);
+  (*data) += (sizeof(char) * memory_size * type_size);
 }
 template <typename Dtype, Precision P>
@@ -309,7 +199,7 @@ void Executor<Dtype, P>::InitMemory() {
        char *origin_data =
            Get_binary_data(program_.model_path + "/" + var_desc->Name());
        char *data = origin_data;
-        LoadMemory(*var_desc, tensor, data);
+        LoadMemory(*var_desc, tensor, &data);
        delete origin_data;
      } else {
        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
@@ -335,7 +225,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
        if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
          continue;
        }
-        LoadMemory(*var_desc, tensor, data);
+        LoadMemory(*var_desc, tensor, &data);
      } else {
        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
          auto tensor = var->template GetMutable<framework::LoDTensor>();
@@ -442,7 +332,8 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
                                                   *(program_.scope));
 #ifdef PADDLE_MOBILE_PROFILE
 #ifdef PADDLE_EXECUTOR_MULTITHREAD
-  // TODO expose profile info as an interface, user can get them to analysis
+  // TODO(haipeng): expose profile info as an interface, user can get them to
+  // analysis
  //      the performance of their deepnet.
  FILE *df = fopen("net.dot", "w");
  fprintf(df, "digraph {\n");
@@ -480,8 +371,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
  std::sort(_tv.begin(), _tv.end(), compf);
  _tv.push_back(std::make_pair("total", _ptotal));
  for (auto const &p : _tv) {
-    printf("%-16s\t%-10.0f\t%-2.4f\n", p.first.c_str(), (float)p.second,
+    printf("%-16s\t%-10.0f\t%-2.4f\n", p.first.c_str(),
-           (float)p.second / _ptotal * 100.0);
+           static_cast<float>(p.second),
+           static_cast<float>(p.second) / _ptotal * 100.0);
  }
  printf("====================[---------]======================\n");
 #endif

--- a/src/io/io.h
+++ b/src/io/io.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <memory>
 #include <string>
 #include <vector>
 #include "common/types.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
@@ -32,31 +33,6 @@ limitations under the License. */
 namespace paddle_mobile {
-template <typename Dtype = CPU, Precision P = Precision::FP32>
-class Loader {
- public:
-  /*
-   * @b load separate format fluid model
-   * @b 加载分开形式的 fluid 模型
-   * */
-  const framework::Program<Dtype, P> Load(const std::string &dirname,
-                                          bool optimize = false,
-                                          bool can_add_split = false);
-  /*
-   * @b load combine format fluid mode
-   * @b 加载结合在一起格式的模型
-   * */
-  const framework::Program<Dtype, P> Load(const std::string &model_path,
-                                          const std::string &para_path,
-                                          bool optimize = false);
- private:
-  const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
-                                                 bool optimize = false,
-                                                 bool can_add_split = false);
-};
 template <typename Dtype = CPU, Precision P = Precision::FP32>
 class Executor {
 public:
@@ -86,7 +62,7 @@ class Executor {
  Executor() = default;
  void InitMemory();
  void LoadMemory(const framework::VarDesc var_desc,
-                  framework::LoDTensor *tensor, char *&data);
+                  framework::LoDTensor *tensor, char **data);
  void InitCombineMemory();
  framework::Program<Dtype> program_;
  int batch_size_ = 1;

--- a/src/io/loader.cpp
+++ b/src/io/loader.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "io/loader.h"
+#include "framework/lod_tensor.h"
+#include "framework/program/program-optimize/program_optimize.h"
+namespace paddle_mobile {
+using framework::Variable;
+static size_t ReadBuffer(const char *file_name, uint8_t **out) {
+  printf("%s \n", file_name);
+  FILE *fp;
+  fp = fopen(file_name, "rb");
+  PADDLE_MOBILE_ENFORCE(fp != NULL, " %s open failed !", file_name);
+  fseek(fp, 0, SEEK_END);
+  size_t size = ftell(fp);
+  rewind(fp);
+  DLOG << "model size: " << size;
+  *out = reinterpret_cast<uint8_t *>(malloc(size));
+  size_t cur_len = 0;
+  size_t nread;
+  while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) {
+    cur_len += nread;
+  }
+  fclose(fp);
+  return cur_len;
+}
+template <typename Dtype, Precision P>
+const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
+    const std::string &dirname, bool optimize, bool can_add_split) {
+  auto program =
+      this->LoadProgram(dirname + "/__model__", optimize, can_add_split);
+  program.model_path = dirname;
+  return program;
+}
+template <typename Dtype, Precision P>
+const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
+    const std::string &model_path, const std::string &para_path,
+    bool optimize) {
+  auto program = this->LoadProgram(model_path, optimize);
+  program.para_path = para_path;
+  program.combined = true;
+  return program;
+}
+template <typename Dtype, Precision P>
+const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
+    const std::string &model_path, bool optimize, bool can_add_split) {
+  std::string model_filename = model_path;
+  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
+  uint8_t *buf = NULL;
+  size_t read_size = ReadBuffer(model_filename.c_str(), &buf);
+  PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null");
+  c_program = paddle_mobile__framework__proto__program_desc__unpack(
+      NULL, read_size, buf);
+  //
+  PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null");
+  //
+  DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
+  //
+  auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program);
+  framework::Program<Dtype, P> program;
+  program.originProgram = originProgramDesc;
+  auto scope = std::make_shared<framework::Scope>();
+  program.scope = scope;
+  for (const auto &block : originProgramDesc->Blocks()) {
+    for (auto var_desc : block->Vars()) {
+      auto var = scope->Var(var_desc->Name());
+      if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
+        if (var_desc->Persistable() &&
+            var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH &&
+            var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) {
+          auto dim = var_desc->Tensor_desc().Dims();
+          auto tensor = var->GetMutable<framework::LoDTensor>();
+          tensor->Resize(framework::make_ddim(dim));
+        } else {
+          auto dim = var_desc->Tensor_desc().Dims();
+          PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
+          dim[0] = 1;
+          auto tensor = var->GetMutable<framework::LoDTensor>();
+          tensor->Resize(framework::make_ddim(dim));
+        }
+      } else {
+        // TODO(codeWorm): some.
+      }
+    }
+  }
+  if (optimize) {
+    framework::ProgramOptimize program_optimize;
+    program.optimizeProgram =
+        program_optimize.FusionOptimize(originProgramDesc, can_add_split);
+  }
+  if (optimize) {
+    program.optimizeProgram->Description("optimize: ");
+  } else {
+    originProgramDesc->Description("program: ");
+  }
+  paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
+  return program;
+}
+template class Loader<CPU, Precision::FP32>;
+template class Loader<FPGA, Precision::FP32>;
+template class Loader<GPU_MALI, Precision::FP32>;
+}  // namespace paddle_mobile
--- a/src/io/loader.h
+++ b/src/io/loader.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include "common/types.h"
+#include "framework/program/program.h"
+namespace paddle_mobile {
+template <typename Dtype = CPU, Precision P = Precision::FP32>
+class Loader {
+ public:
+  /*
+   * @b load separate format fluid model
+   * @b 加载分开形式的 fluid 模型
+   * */
+  const framework::Program<Dtype, P> Load(const std::string &dirname,
+                                          bool optimize = false,
+                                          bool can_add_split = false);
+  /*
+   * @b load combine format fluid mode
+   * @b 加载结合在一起格式的模型
+   * */
+  const framework::Program<Dtype, P> Load(const std::string &model_path,
+                                          const std::string &para_path,
+                                          bool optimize = false);
+ private:
+  const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
+                                                 bool optimize = false,
+                                                 bool can_add_split = false);
+};
+}  // namespace paddle_mobile
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+//
+// Created by liuRuiLong on 2018/7/2.
+//
+#include "io/paddle_mobile.h"
+namespace paddle_mobile {
+template <typename Dtype, Precision P>
+bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
+                                  int batch_size) {
+  if (loader_.get() == nullptr) {
+    loader_ = std::make_shared<Loader<Dtype, P>>();
+  } else {
+    LOG(kLOG_INFO) << "loader inited";
+  }
+  if (executor_.get() == nullptr) {
+    executor_ = std::make_shared<Executor<Dtype, P>>(
+        loader_->Load(dirname, optimize), batch_size, optimize);
+  } else {
+    LOG(kLOG_INFO) << "executor inited";
+  }
+  return true;
+}
+template <typename Dtype, Precision P>
+bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
+                                  const std::string &para_path, bool optimize,
+                                  int batch_size) {
+  if (loader_.get() == nullptr) {
+    loader_ = std::make_shared<Loader<Dtype, P>>();
+  } else {
+    LOG(kLOG_INFO) << "loader inited";
+  }
+  if (executor_.get() == nullptr) {
+    executor_ = std::make_shared<Executor<Dtype, P>>(
+        loader_->Load(model_path, para_path, optimize), batch_size, optimize);
+  } else {
+    LOG(kLOG_INFO) << "executor inited";
+  }
+  return true;
+}
+template <typename Dtype, Precision P>
+std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::Predict(
+    const framework::Tensor &t) {
+  return executor_->Predict(t);
+}
+template <typename Dtype, Precision P>
+std::vector<typename PaddleMobile<Dtype, P>::Ptype>
+PaddleMobile<Dtype, P>::Predict(const std::vector<Ptype> &input,
+                                const std::vector<int64_t> &dims) {
+  return executor_->Predict(input, dims);
+}
+template <typename Dtype, Precision P>
+void PaddleMobile<Dtype, P>::Clear() {
+  executor_ = nullptr;
+  loader_ = nullptr;
+}
+template class PaddleMobile<CPU, Precision::FP32>;
+template class PaddleMobile<FPGA, Precision::FP32>;
+template class PaddleMobile<GPU_MALI, Precision::FP32>;
+}  // namespace paddle_mobile
--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+#include "common/types.h"
+#include "framework/tensor.h"
+#include "io/executor.h"
+#include "io/loader.h"
+namespace paddle_mobile {
+template <typename Dtype = CPU, Precision P = Precision::FP32>
+class PaddleMobile {
+  typedef typename PrecisionTrait<P>::ptype Ptype;
+ public:
+  PaddleMobile() {}
+  /*
+   * @b load separate format fluid model
+   * @b 加载分开形式的 fluid 模型
+   * */
+  bool Load(const std::string &dirname, bool optimize = false,
+            int batch_size = 1);
+  /*
+   * @b load combine format fluid mode
+   * @b 加载结合在一起格式的模型
+   * */
+  bool Load(const std::string &model_path, const std::string &para_path,
+            bool optimize = false, int batch_size = 1);
+  /*
+   * @b to predict
+   * */
+  std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
+  /*
+   * @b to predict with vector and dim
+   *
+   * @b 使用 输入 和 输入的维度信息 进行预测
+   * */
+  std::vector<Ptype> Predict(const std::vector<Ptype> &input,
+                             const std::vector<int64_t> &dims);
+  void Clear();
+ private:
+  std::shared_ptr<Loader<Dtype, P>> loader_;
+  std::shared_ptr<Executor<Dtype, P>> executor_;
+};
+}  // namespace paddle_mobile
--- a/src/jni/paddle_mobile_jni.cpp
+++ b/src/jni/paddle_mobile_jni.cpp
@@ -15,6 +15,10 @@ limitations under the License. */
 #ifdef ANDROID
 #include "paddle_mobile_jni.h"
+#include "common/log.h"
+#include "framework/tensor.h"
+#include "io/paddle_mobile.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -28,17 +32,16 @@ using std::string;
 extern const char *ANDROID_LOG_TAG =
    "paddle_mobile LOG built on " __DATE__ " " __TIME__;
-static Executor<CPU> *shared_executor_instance = nullptr;
+static PaddleMobile<CPU> *shared_paddle_mobile_instance = nullptr;
 // toDo mutex lock
 // static std::mutex shared_mutex;
-Executor<CPU> *getExecutorInstance(const Program<CPU> p, int batch_size,
+PaddleMobile<CPU> *getPaddleMobileInstance() {
-                                   bool use_optimize) {
+  if (nullptr == shared_paddle_mobile_instance) {
-  if (nullptr == shared_executor_instance) {
+    shared_paddle_mobile_instance = new PaddleMobile<CPU>();
-    shared_executor_instance = new Executor<CPU>(p, batch_size, use_optimize);
  }
-  return shared_executor_instance;
+  return shared_paddle_mobile_instance;
 }
 string jstring2cppstring(JNIEnv *env, jstring jstr) {
@@ -51,11 +54,9 @@ string jstring2cppstring(JNIEnv *env, jstring jstr) {
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
                                                          jclass thiz,
                                                          jstring modelPath) {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
  bool optimize = true;
-  auto program = loader.Load(jstring2cppstring(env, modelPath), optimize);
+  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-  shared_executor_instance = getExecutorInstance(program, 1, optimize);
+                                         optimize);
-  return shared_executor_instance != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
@@ -73,7 +74,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
  for (int i = 0; i < framework::product(ddim); i++) {
    input_ptr[i] = dataPointer[i];
  }
-  auto output = shared_executor_instance->Predict(input);
+  auto output = shared_paddle_mobile_instance->Predict(input);
  count = output->numel();
  result = env->NewFloatArray(count);
  env->SetFloatArrayRegion(result, 0, count, output->data<float>());
@@ -81,7 +82,9 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
 }
 JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
-                                                       jclass thiz) {}
+                                                       jclass thiz) {
+  getPaddleMobileInstance()->Clear();
+}
 }  // namespace jni
 }  // namespace paddle_mobile

--- a/src/jni/paddle_mobile_jni.h
+++ b/src/jni/paddle_mobile_jni.h
@@ -15,9 +15,6 @@ limitations under the License. */
 #pragma once
 #ifdef ANDROID
 #include <jni.h>
-#include "common/log.h"
-#include "framework/tensor.h"
-#include "io/io.h"
 #ifdef __cplusplus
 extern "C" {

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
 set(dir ${CMAKE_CURRENT_SOURCE_DIR})
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
-if (googlenet)
+if (NET STREQUAL "googlenet")
    # gen test
    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-googlenet paddle-mobile)
-elseif (mobilenet)
+elseif (NET STREQUAL "mobilenet")
    # gen test
    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-mobilenet paddle-mobile)
-elseif (yolo)
+elseif (NET STREQUAL "yolo")
    # gen test
    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-yolo paddle-mobile)
-elseif (squeezenet)
+elseif (NET STREQUAL "squeezenet")
    # gen test
    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-squeezenet paddle-mobile)
-elseif(resnet)
+elseif(NET STREQUAL "resnet")
    # gen test
    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-resnet paddle-mobile)

--- a/test/executor_for_test.h
+++ b/test/executor_for_test.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include "common/log.h"
 #include "framework/op_registry.h"
-#include "io/io.h"
+#include "io/executor.h"
 #include "operators/conv_op.h"
 #include "operators/elementwise_add_op.h"
 #include "operators/pool_op.h"

--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "../test_helper.h"
-#include "io/io.h"
+#include "io/loader.h"
 int main() {
  paddle_mobile::Loader<paddle_mobile::CPU> loader;

--- a/test/framework/test_optimize.cpp
+++ b/test/framework/test_optimize.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #include "../test_helper.h"
 #include "framework/program/program-optimize/node.h"
 #include "framework/program/program-optimize/program_optimize.h"
-#include "io/io.h"
+#include "io/loader.h"
 int main() {
  paddle_mobile::Loader<paddle_mobile::CPU> loader;

--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,25 +17,25 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  bool optimize = true;
  auto time1 = time();
  //  auto program = loader.Load(g_googlenet, optimize);
-  auto program = loader.Load(g_googlenet_combine + "/model",
+  if (paddle_mobile.Load(g_googlenet_combine + "/model",
-                             g_googlenet_combine + "/params", optimize);
+                         g_googlenet_combine + "/params", optimize)) {
-  auto time2 = time();
+    auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time2) << "ms\n";
+    DLOG << "load cost :" << time_diff(time1, time2) << "ms\n";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, optimize);
+    std::vector<float> input;
-  std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
-  std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
-  GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+    auto time3 = time();
-  auto time3 = time();
-  for (int i = 0; i < 10; ++i) {
+    for (int i = 0; i < 10; ++i) {
-    executor.Predict(input, dims);
+      paddle_mobile.Predict(input, dims);
-  }
+    }
-  auto time4 = time();
+    auto time4 = time();
-  DLOG << "predict cost :" << time_diff(time3, time4) << "ms\n";
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms\n";
+  }
  return 0;
 }
--- a/test/net/test_mobilenet+ssd.cpp
+++ b/test/net/test_mobilenet+ssd.cpp
@@ -17,23 +17,23 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  auto time1 = time();
-  auto program = loader.Load(g_mobilenet_ssd, true);
+  if (paddle_mobile.Load(g_mobilenet_ssd, true)) {
-  auto time2 = time();
+    auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true);
-  std::vector<int64_t> dims{1, 3, 300, 300};
+    std::vector<int64_t> dims{1, 3, 300, 300};
-  Tensor input_tensor;
+    Tensor input_tensor;
-  SetupTensor<float>(&input_tensor, {1, 3, 300, 300}, static_cast<float>(0),
+    SetupTensor<float>(&input_tensor, {1, 3, 300, 300}, static_cast<float>(0),
-                     static_cast<float>(1));
+                       static_cast<float>(1));
-  std::vector<float> input(input_tensor.data<float>(),
+    std::vector<float> input(input_tensor.data<float>(),
-                           input_tensor.data<float>() + input_tensor.numel());
+                             input_tensor.data<float>() + input_tensor.numel());
-  auto time3 = time();
+    auto time3 = time();
-  executor.Predict(input, dims);
+    paddle_mobile.Predict(input, dims);
-  auto time4 = time();
+    auto time4 = time();
-  DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
  return 0;
 }
--- a/test/net/test_mobilenet.cpp
+++ b/test/net/test_mobilenet.cpp
@@ -17,24 +17,25 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  auto time1 = time();
-  auto program = loader.Load(g_mobilenet, true);
+  if (paddle_mobile.Load(g_mobilenet, true)) {
-  auto time2 = time();
+    auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true);
+    std::vector<int64_t> dims{1, 3, 224, 224};
-  std::vector<int64_t> dims{1, 3, 224, 224};
+    Tensor input_tensor;
-  Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
-  SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
+                       static_cast<float>(1));
-                     static_cast<float>(1));
+    std::vector<float> input(input_tensor.data<float>(),
-  std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
-                           input_tensor.data<float>() + input_tensor.numel());
+    auto time3 = time();
-  auto time3 = time();
+    auto vec_result = paddle_mobile.Predict(input, dims);
-  auto vec_result = executor.Predict(input, dims);
+    auto time4 = time();
-  auto time4 = time();
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
-  DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
  return 0;
 }
--- a/test/net/test_resnet.cpp
+++ b/test/net/test_resnet.cpp
@@ -17,23 +17,23 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  auto time1 = time();
-  auto program = loader.Load(g_resnet, false);
+  if (paddle_mobile.Load(g_resnet, false)) {
-  auto time2 = time();
+    auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
+    std::vector<int64_t> dims{1, 3, 32, 32};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
+                       static_cast<float>(1));
-  std::vector<int64_t> dims{1, 3, 32, 32};
+    std::vector<float> input(input_tensor.data<float>(),
-  Tensor input_tensor;
+                             input_tensor.data<float>() + input_tensor.numel());
-  SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
+    auto time3 = time();
-                     static_cast<float>(1));
+    paddle_mobile.Predict(input, dims);
+    auto time4 = time();
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
-  std::vector<float> input(input_tensor.data<float>(),
-                           input_tensor.data<float>() + input_tensor.numel());
-  auto time3 = time();
-  executor.Predict(input, dims);
-  auto time4 = time();
-  DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
  return 0;
 }
--- a/test/net/test_squeezenet.cpp
+++ b/test/net/test_squeezenet.cpp
@@ -17,25 +17,25 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  //  ../../../test/models/googlenet
  //  ../../../test/models/mobilenet
  auto time1 = time();
-  auto program = loader.Load(g_squeezenet, false);
+  if (paddle_mobile.Load(g_squeezenet, false)) {
-  auto time2 = time();
+    auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
+    std::vector<int64_t> dims{1, 3, 227, 227};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
+                       static_cast<float>(1));
-  std::vector<int64_t> dims{1, 3, 227, 227};
+    std::vector<float> input(input_tensor.data<float>(),
-  Tensor input_tensor;
+                             input_tensor.data<float>() + input_tensor.numel());
-  SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
+    auto time3 = time();
-                     static_cast<float>(1));
+    paddle_mobile.Predict(input, dims);
+    auto time4 = time();
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
-  std::vector<float> input(input_tensor.data<float>(),
-                           input_tensor.data<float>() + input_tensor.numel());
-  auto time3 = time();
-  executor.Predict(input, dims);
-  auto time4 = time();
-  DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
  return 0;
 }
--- a/test/net/test_yolo.cpp
+++ b/test/net/test_yolo.cpp
@@ -17,25 +17,25 @@ limitations under the License. */
 #include "../test_include.h"
 int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  //  ../../../test/models/googlenet
  //  ../../../test/models/mobilenet
  auto time1 = time();
-  auto program = loader.Load(g_yolo, false);
+  if (paddle_mobile.Load(g_yolo, false)) {
-  auto time2 = time();
+    auto time2 = time();
-  DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
-  std::vector<int64_t> dims{1, 3, 227, 227};
+    std::vector<int64_t> dims{1, 3, 227, 227};
-  Tensor input_tensor;
+    Tensor input_tensor;
-  SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
+    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
-                     static_cast<float>(1));
+                       static_cast<float>(1));
-  std::vector<float> input(input_tensor.data<float>(),
+    std::vector<float> input(input_tensor.data<float>(),
-                           input_tensor.data<float>() + input_tensor.numel());
+                             input_tensor.data<float>() + input_tensor.numel());
-  auto time3 = time();
+    auto time3 = time();
-  executor.Predict(input, dims);
+    paddle_mobile.Predict(input, dims);
-  auto time4 = time();
+    auto time4 = time();
-  DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
  return 0;
 }
--- a/test/operators/test_sigmoid_op.cpp
+++ b/test/operators/test_sigmoid_op.cpp
@@ -14,7 +14,7 @@ limitations under the License. */
 #include "../../src/operators/kernel/sigmoid_kernel.h"
 #include "../test_helper.h"
-#include "io/io.h"
+#include "io/executor.h"
 int main() {
  paddle_mobile::framework::Tensor input;

--- a/test/test_include.h
+++ b/test/test_include.h
@@ -30,4 +30,4 @@ limitations under the License. */
 #include "framework/scope.h"
 #include "framework/tensor.h"
 #include "framework/variable.h"
-#include "io/io.h"
+#include "io/paddle_mobile.h"
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -60,7 +60,6 @@ build_for_android() {
    TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
    ANDROID_ARM_MODE="arm"
    if [ $# -eq 1 ]; then
-    NET=$1
    cmake .. \
        -B"../build/release/${PLATFORM}" \
        -DANDROID_ABI="${ABI}" \
@@ -70,7 +69,7 @@ build_for_android() {
        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
        -DANDROID_STL=c++_static \
        -DANDROID=true \
-        -D"${NET}=true" \
+        -DNET=$1 \
        -D"${ARM_PLATFORM}"=true
    else
@@ -99,7 +98,6 @@ build_for_ios() {
    CXX_FLAGS="-fobjc-abi-version=2 -fobjc-arc -std=gnu++14 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT}"
    mkdir -p "${BUILD_DIR}"
    if [ $# -eq 1 ]; then
-        NET=$1
        cmake .. \
            -B"${BUILD_DIR}" \
            -DCMAKE_BUILD_TYPE="${MODE}" \
@@ -107,7 +105,7 @@ build_for_ios() {
            -DIOS_PLATFORM=OS \
            -DCMAKE_C_FLAGS="${C_FLAGS}" \
            -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-            -D"${NET}"=true \
+            -DNET=$1 \
            -DIS_IOS="true"
    else
        cmake .. \
@@ -129,16 +127,12 @@ build_error() {
 if [ $# -lt 1 ]; then
 	echo "error: target missing!"
-    echo "available targets: mac|linux|ios|android"
+    echo "available targets: ios|android"
-    echo "sample usage: ./build.sh mac"
+    echo "sample usage: ./build.sh android"
 else
    if [ $# -eq 2 ]; then
        if [ $2 != "googlenet" -a $2 != "mobilenet" -a $2 != "yolo" -a $2 != "squeezenet" -a $2 != "resnet" ]; then
-            if [ $1 = "mac" ]; then
+	        if [ $1 = "android" ]; then
-		        build_for_mac
-	        elif [ $1 = "linux" ]; then
-		        build_for_linux
-	        elif [ $1 = "android" ]; then
 		        build_for_android
 	        elif [ $1 = "ios" ]; then
 		        build_for_ios
@@ -146,11 +140,7 @@ else
 		        build_error
 	        fi
        else
-            if [ $1 = "mac" ]; then
+	        if [ $1 = "android" ]; then
-		        build_for_mac $2
-	        elif [ $1 = "linux" ]; then
-		        build_for_linux $2
-	        elif [ $1 = "android" ]; then
 		        build_for_android $2
 	        elif [ $1 = "ios" ]; then
 		        build_for_ios $2
@@ -159,11 +149,7 @@ else
 	        fi
        fi
    else
-        if [ $1 = "mac" ]; then
+	    if [ $1 = "android" ]; then
-		    build_for_mac
-	    elif [ $1 = "linux" ]; then
-		    build_for_linux
-	    elif [ $1 = "android" ]; then
 		    build_for_android
 	    elif [ $1 = "ios" ]; then
 		    build_for_ios

--- a/tools/op.cmake
+++ b/tools/op.cmake
-set(NET "googlenet" CACHE STRING "select net type")
+if (NET STREQUAL "googlenet")
-set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet")
-if (NET EQUAL "googlenet")
  set(CONCAT_OP ON)
  set(CONV_OP ON)
  set(LRN_OP ON)
@@ -12,7 +9,7 @@ if (NET EQUAL "googlenet")
  set(RELU_OP ON)
  set(FUSION_CONVADD_OP ON)
  set(FUSION_CONVADD_RELU_OP ON)
-elseif (NET EQUAL "mobilenet")
+elseif (NET STREQUAL "mobilenet")
  set(CONV_OP ON)
  set(ELEMENTWISEADD_OP ON)
  set(RELU_OP ON)
@@ -23,12 +20,12 @@ elseif (NET EQUAL "mobilenet")
  set(POOL_OP ON)
  set(RESHAPE_OP ON)
  set(FUSION_CONVADDBNRELU_OP)
-elseif (NET EQUAL "yolo")
+elseif (NET STREQUAL "yolo")
  set(BATCHNORM_OP ON)
  set(CONV_OP ON)
  set(RELU_OP ON)
  set(ELEMENTWISEADD_OP ON)
-elseif (NET EQUAL "squeezenet")
+elseif (NET STREQUAL "squeezenet")
  set(CONCAT_OP ON)
  set(CONV_OP ON)
  set(RELU_OP ON)
@@ -36,7 +33,7 @@ elseif (NET EQUAL "squeezenet")
  set(POOL_OP ON)
  set(RESHAPE_OP ON)
  set(SOFTMAX_OP ON)
-elseif (NET EQUAL "resnet")
+elseif (NET STREQUAL "resnet")
  set(CONV_OP ON)
  set(BATCHNORM_OP ON)
  set(ELEMENTWISEADD_OP ON)