add some function

b2e33f33 · yangfei · de4b0ffb · b2e33f33 · b2e33f33 · b2e33f33
23 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,7 @@ option(DEBUGING "enable debug mode" ON)
 option(USE_EXCEPTION "use std exception" OFF)
 option(LOG_PROFILE "log profile" OFF)
 # select the platform to build
-option(CPU "armv7 with neon" ON)
+option(CPU "armv7 with neon" OFF)
 option(GPU_MALI "mali gpu" OFF)
 option(GPU_CL "opencl gpu" ON)
 option(FPGA "fpga" OFF)

--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -27,38 +27,62 @@ class CLImage {
  CLImage() = default;
  void Init(cl_context context, float *tensorInput, DDim ddim) {
+    tensorDims_ = ddim;
    cl_image_format cf = {
      .image_channel_order = CL_RGBA,
      .image_channel_data_type = CL_HALF_FLOAT
    };
    // NCHW -> [W * (C+3)/4, H * N]
-    size_t N = tensorDims_[0];
+    DLOG<<tensorDims_;
-    size_t C = tensorDims_[1];
+      size_t N,C,H,W;
-    size_t H = tensorDims_[2];
+    if(tensorDims_.size()==4){
-    size_t W = tensorDims_[3];
+        N = tensorDims_[0];
+        if(N<0){
+            N = 1;
+        }
+        C = tensorDims_[1];
+        H = tensorDims_[2];
+        W = tensorDims_[3];
+    }else if(tensorDims_.size()==1){
+        N = 1;
+        C = tensorDims_[0];
+        H = 1;
+        W = 1;
+    }
+      DLOG<<"-------InitMemory-------";
    size_t width = W * ((C + 3) / 4);
    size_t height = H * N;
    std::unique_ptr<half_t[]> imageData{};
+      int count = 0;
    if (tensorInput != nullptr) {
      imageData.reset(new half_t[width * height * 4]);
-      float *p = tensorInput;
+                  float *p = tensorInput;
-      size_t i0 = 0;
+                  size_t i0 = 0;
-      for (int n = 0; n < N; n++) {
+                  for (int n = 0; n < N; n++) {
-        for (int c = 0; c < C; c++) {
+                    for (int c = 0; c < C; c++) {
-          size_t i1 = i0;
+                      size_t i1 = i0;
-          for (int h = 0; h < H; h++) {
+                      for (int h = 0; h < H; h++) {
-            size_t i2 = i1 << 2 + c % 4;
+                        size_t i2 = (i1<<2) + c % 4;
-            for (int w = 0; w < W; w++) {
+                        for (int w = 0; w < W; w++) {
-              imageData[i2] = float2half(*p);
+                            if (i2 >= width * height * 4) {
-              i2 += 4;
+                                printf("%d > %d ----> %d, %d, %d, %d --- %d, %d, %d\n", i2, width*height*4, n, c, h, w, i0, i1, i2);
-              p++;
+                            }
-            }
+                            assert(i2 < width * height * 4);
-            i1 += width;
-          }
+                            imageData[i2] = float2half(*p);
-        }
+                          i2 += 4;
-        i0 += width * H;
+                          p++;
-      }
+            //              count++;
+            //              DLOG<<count;
+                        }
+                        i1 += width;
+                      }
+                    }
+                    i0 += width * H;
+                  }
    }
+      DLOG<<"-------InitMemory-------";
    cl_int err;
    cl_image_ = clCreateImage2D(
      context, // cl_context context

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -801,6 +801,106 @@ void Executor<Dtype, P>::Predict_To(int end) {
 #endif
 #ifdef PADDLE_MOBILE_CL
+template <>
+        void Executor<GPU_CL, Precision::FP32>::LoadMemory(const framework::VarDesc var_desc,
+                                            float *tensorInput, char **data) {
+            // 1. version
+            uint32_t version = *reinterpret_cast<uint32_t *>(*data);
+            (*data) += sizeof(uint32_t);
+            // 2 Lod information
+            uint64_t *lod_level_ptr = new uint64_t();
+            memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
+            uint64_t lod_level = *lod_level_ptr;
+            delete lod_level_ptr;
+            (*data) += sizeof(uint64_t);
+            for (uint64_t i = 0; i < lod_level; ++i) {
+                uint64_t size = *reinterpret_cast<uint64_t *>(*data);
+                (*data) += sizeof(uint64_t);
+                std::vector<size_t> tmp(size / sizeof(size_t));
+                for (int k = 0; k < tmp.size(); ++k) {
+                    tmp[k] = *reinterpret_cast<size_t *>(*data);
+                    (*data) += sizeof(size_t);
+                }
+            }
+            // 3. tensor version
+            uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
+            (*data) += sizeof(uint32_t);
+            // 4. tensor desc
+            int32_t size = *reinterpret_cast<int32_t *>(*data);
+            (*data) += sizeof(int32_t);
+            std::unique_ptr<char[]> buf(new char[size]);
+            for (int m = 0; m < size; ++m) {
+                buf.get()[m] = (*data)[m];
+            }
+            (*data) += (sizeof(char) * size);
+            const framework::TensorDesc &desc = var_desc.Tensor_desc();
+            int memory_size = 1;
+            for (auto l : desc.Dims()) {
+                memory_size *= l;
+            }
+            void *memory = nullptr;
+//            int type_size = 0;
+//            switch (desc.DataType()) {
+//                case framework::VARTYPE_TYPE_FP16:
+//                    type_size = 2;
+//                    break;
+//                case framework::VARTYPE_TYPE_FP32:
+//                    type_size = 4;
+//                    memory = tensor->mutable_data<float>();
+//                    break;
+//                case framework::VARTYPE_TYPE_FP64:
+//                    type_size = 8;
+//                    break;
+//                case framework::VARTYPE_TYPE_INT32:
+//                    memory = tensor->mutable_data<int32_t>();
+//                    type_size = 4;
+//                    break;
+//                case framework::VARTYPE_TYPE_INT64:
+//                    type_size = 8;
+//                    break;
+//                case framework::VARTYPE_TYPE_BOOL:
+//                    type_size = 1;
+//                    break;
+//                default:
+//                    break;
+//            }
+            int type_size = 4;
+            memory = tensorInput;
+            if (program_.quantification) {
+                float min_value;
+                float max_value;
+                memcpy(&min_value, *data, sizeof(float));
+                memcpy(&max_value, *data + sizeof(float), sizeof(float));
+                *data += 2 * sizeof(float);
+                const float factor = (max_value - min_value) / 255.0;
+                uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
+                for (int k = 0; k < memory_size; ++k) {
+                    static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
+                }
+                *data += (memory_size * sizeof(uint8_t));
+            } else {
+                for (int n = 0; n < memory_size; n++) {
+                    float value;
+                    memcpy(&value, *data + n * type_size, type_size);
+                    if (value < 1e-30 && value > -1e-30) {
+                        static_cast<float *>(memory)[n] = 0.0;
+                    } else {
+                        static_cast<float *>(memory)[n] = value;
+                    }
+                }
+                (*data) += (sizeof(char) * memory_size * type_size);
+            }
+        }
 template <>
 void Executor<GPU_CL, Precision::FP32>::InitMemory() {
@@ -812,26 +912,38 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
        if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
          continue;
        }
        char *origin_data =
            Get_binary_data(program_.model_path + "/" + var_desc->Name());
+          char *data = origin_data;
        cl_context context = program_.scope->GetCLScpoe()->Context();
+          const framework::TensorDesc &desc = var_desc->Tensor_desc();
-        float *tensorInput = (float *)origin_data;
+          int numel = 1;
+          for (auto l : desc.Dims()) {
-        const framework::TensorDesc &desc = var_desc->Tensor_desc();
+              numel *= l;
-        framework::DDim ddim = cl_image->dims();
+          }
+          DLOG<<var_desc->Name();
+        float *tensorInput = static_cast<float *>(
+                paddle_mobile::memory::Alloc(sizeof(float) * numel));
+        LoadMemory(*var_desc,tensorInput,&data);
+        framework::DDim ddim = framework::make_ddim(desc.Dims());
        cl_image->Init(context, tensorInput, ddim);
-        delete origin_data;
+          delete origin_data;
+          paddle_mobile::memory::Free(tensorInput);
      }else{
-        auto cl_image = var->template GetMutable<framework::CLImage>();
+          if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
-        cl_context context = program_.scope->GetCLScpoe()->Context();
+              auto cl_image = var->template GetMutable<framework::CLImage>();
+              cl_context context = program_.scope->GetCLScpoe()->Context();
-        const framework::TensorDesc &desc = var_desc->Tensor_desc();
+              const framework::TensorDesc &desc = var_desc->Tensor_desc();
-        framework::DDim ddim = cl_image->dims();
+              framework::DDim ddim = framework::make_ddim(desc.Dims());
+              DLOG<<var_desc->Name();
-        cl_image->Init(context, ddim);
+              cl_image->Init(context, ddim);
+          }
      }
    }
@@ -863,21 +975,23 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
        cl_context context = program_.scope->GetCLScpoe()->Context();
        const framework::TensorDesc &desc = var_desc->Tensor_desc();
-        framework::DDim ddim = cl_image->dims();
+        framework::DDim ddim = framework::make_ddim(desc.Dims());
        int numel = 1;
        for (int i = 0; i < ddim.size(); i++) {
          numel = numel * ddim[i];
        }
-        float *tensorInput = data;
+          float *tensorInput = static_cast<float *>(
+                  paddle_mobile::memory::Alloc(sizeof(float) * numel));
+          LoadMemory(*var_desc,tensorInput,&origin_data);
        cl_image->Init(context, tensorInput, ddim);
-        data += numel;
+          paddle_mobile::memory::Free(tensorInput);
      }else{
        auto cl_image = var->template GetMutable<framework::CLImage>();
        cl_context context = program_.scope->GetCLScpoe()->Context();
        const framework::TensorDesc &desc = var_desc->Tensor_desc();
-        framework::DDim ddim = cl_image->dims();
+        framework::DDim ddim = framework::make_ddim(desc.Dims());
        cl_image->Init(context, ddim);
      }

--- a/src/framework/executor.h
+++ b/src/framework/executor.h
@@ -73,6 +73,8 @@ class Executor {
  void LoadMemory(const framework::VarDesc var_desc,
                  framework::LoDTensor *tensor, char **data);
+  void LoadMemory(const framework::VarDesc var_desc,
+                    float * tensorInput, char **data);
  void InitCombineMemory();

--- a/src/framework/loader.cpp
+++ b/src/framework/loader.cpp
@@ -16,6 +16,9 @@ limitations under the License. */
 #include "framework/lod_tensor.h"
 #include "framework/program/program-optimize/program_optimize.h"
+#ifdef PADDLE_MOBILE_CL
+#include "framework/cl/cl_image.h"
+#endif
 namespace paddle_mobile {
 namespace framework {
@@ -26,7 +29,8 @@ namespace framework {
 * @param originProgramDesc
 * @param scope
 */
-void InitMemoryFromProgram(
+template<typename Dtype, Precision P>
+void Loader<Dtype, P>::InitMemoryFromProgram(
        std::shared_ptr<ProgramDesc> &originProgramDesc,
        std::shared_ptr<Scope> &scope) {
  for (const auto &block : originProgramDesc.get()->Blocks()) {
@@ -51,6 +55,35 @@ void InitMemoryFromProgram(
  }
 }
+#ifdef PADDLE_MOBILE_CL
+        template<>
+        void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
+                std::shared_ptr<ProgramDesc> &originProgramDesc,
+                std::shared_ptr<Scope> &scope) {
+          for (const auto &block : originProgramDesc.get()->Blocks()) {
+            for (const auto &var_desc : block->Vars()) {
+              auto var = scope.get()->Var(var_desc->Name());
+              if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
+                if (var_desc->Persistable()) {
+                  auto dim = var_desc->Tensor_desc().Dims();
+//              auto tensor = var->GetMutable<LoDTensor>();
+                  auto cl_image = var->GetMutable<framework::CLImage>();
+                  cl_image->Resize(make_ddim(dim));
+                } else {
+                  auto dim = var_desc->Tensor_desc().Dims();
+                  PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
+                  dim[0] = 1;
+                  auto cl_image = var->GetMutable<framework::CLImage>();
+                  cl_image->Resize(make_ddim(dim));
+                }
+              } else {
+                // TODO(codeWorm): some.
+              }
+            }
+          }
+        }
+#endif
 /**
 * fusion and print someinfos
 * @tparam Dtype

--- a/src/framework/loader.h
+++ b/src/framework/loader.h
@@ -53,6 +53,9 @@ class Loader {
                                                 bool optimize = false,
                                                 bool quantification = false,
                                                 bool can_add_split = false);
+  void InitMemoryFromProgram(std::shared_ptr<ProgramDesc> &originProgramDesc,
+                             std::shared_ptr<Scope> &scope);
 };
 }

--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -40,4 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(batch_norm, ops::BatchNormOp);
+#endif
 #endif
--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -54,5 +54,8 @@ USE_OP_MALI_GPU(batch_norm);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(batch_norm);
+#endif
 #endif
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -77,7 +77,6 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
    void Init() {}
    void RunImpl() {
    }
 #else
    void Init() {}

--- a/src/operators/fetch_op.cpp
+++ b/src/operators/fetch_op.cpp
@@ -43,3 +43,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(fetch, ops::FetchOp);
+#endif
--- a/src/operators/fetch_op.h
+++ b/src/operators/fetch_op.h
@@ -54,3 +54,6 @@ USE_OP_MALI_GPU(fetch);
 #ifdef PADDLE_MOBILE_FPGA
 USE_OP_FPGA(fetch);
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(fetch);
+#endif
--- a/src/operators/kernel/cl/conv_kernel.cpp
+++ b/src/operators/kernel/cl/conv_kernel.cpp
@@ -15,22 +15,21 @@ limitations under the License. */
 #ifdef CONV_OP
 #include "operators/kernel/conv_kernel.h"
-#include "operators/kernel/central-arm-func/conv_arm_func.h"
 namespace paddle_mobile {
 namespace operators {
 template <>
 bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
-  this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
+//  this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
  return true;
 }
 template <>
 void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
-  auto kernel = this->cl_helper_.KernelAt(0);
+//  auto kernel = this->cl_helper_.KernelAt(0);
-  size_t global_work_size[3] = {1, 2, 3};
+//  size_t global_work_size[3] = {1, 2, 3};
-  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL);
+//  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL);
 }
 template class ConvKernel<GPU_CL, float>;

--- a/src/operators/kernel/cl/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/cl/elementwise_add_kernel.cpp
@@ -21,7 +21,7 @@ namespace paddle_mobile {
        template <>
        bool ElementwiseAddKernel<GPU_CL, float>::Init(ElementwiseAddParam<GPU_CL> *param) {
-            this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
+//            this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
            return true;
        }

--- a/src/operators/kernel/cl/softmax_kernel.cpp
+++ b/src/operators/kernel/cl/softmax_kernel.cpp
@@ -12,22 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
-#include "operators/kernel/pool_kernel.h"
+#include "operators/kernel/softmax_kernel.h"
 namespace paddle_mobile {
-namespace operators {
+    namespace operators {
-template <>
+        template <>
-bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) {
+        bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) {
-  return true;
+            return true;
-}
+        }
-template <>
+        template <>
-void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {}
+        void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {}
-template class SoftmaxKernel<GPU_CL, float>;
+        template class SoftmaxKernel<GPU_CL, float>;
-}  // namespace operators
+    }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -68,5 +68,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp);
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(pool2d, ops::PoolOp);
+#endif
 #endif
--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
@@ -54,5 +54,8 @@ USE_OP_MALI_GPU(pool2d);
 #ifdef PADDLE_MOBILE_FPGA
 USE_OP_FPGA(pool2d);
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(pool2d);
+#endif
 #endif
--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
@@ -41,5 +41,8 @@ REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(relu, ops::ReluOp);
+#endif
 #endif
--- a/src/operators/relu_op.h
+++ b/src/operators/relu_op.h
@@ -57,5 +57,8 @@ USE_OP_MALI_GPU(relu);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(relu);
+#endif
 #endif
--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
@@ -40,5 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(reshape, ops::ReshapeOp);
+#endif
 #endif
--- a/src/operators/reshape_op.h
+++ b/src/operators/reshape_op.h
@@ -56,5 +56,8 @@ USE_OP_MALI_GPU(reshape);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(reshape);
+#endif
 #endif
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(softmax, ops::SoftmaxOp);
+#endif
 #endif
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -52,5 +52,8 @@ USE_OP_MALI_GPU(softmax);
 #ifdef PADDLE_MOBILE_FPGA
 USE_OP_FPGA(softmax);
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(softmax);
+#endif
 #endif
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -83,175 +83,175 @@ elseif("genet" IN_LIST NET)
    target_link_libraries(test-genet paddle-mobile)
 else ()
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-resnet paddle-mobile)
+#    target_link_libraries(test-resnet paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-squeezenet paddle-mobile)
+#    target_link_libraries(test-squeezenet paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-yolo paddle-mobile)
+#    target_link_libraries(test-yolo paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-googlenet paddle-mobile)
+#    target_link_libraries(test-googlenet paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-conv-op  operators/test_cov_op.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-conv-op  operators/test_cov_op.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-conv-op paddle-mobile)
+#    target_link_libraries(test-conv-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-mul-op  operators/test_mul_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-mul-op  operators/test_mul_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-mul-op paddle-mobile)
+#    target_link_libraries(test-mul-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-elementwiseadd-op paddle-mobile)
+#    target_link_libraries(test-elementwiseadd-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-concat-op paddle-mobile)
+#    target_link_libraries(test-concat-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-lrn-op  operators/test_lrn_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-lrn-op  operators/test_lrn_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-lrn-op paddle-mobile)
+#    target_link_libraries(test-lrn-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-batchnorm-op  operators/test_batchnorm_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-batchnorm-op  operators/test_batchnorm_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-batchnorm-op paddle-mobile)
+#    target_link_libraries(test-batchnorm-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-priorbox-op  operators/test_prior_box_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-priorbox-op  operators/test_prior_box_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-priorbox-op paddle-mobile)
+#    target_link_libraries(test-priorbox-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-boxcoder-op  operators/test_box_coder_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-boxcoder-op  operators/test_box_coder_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-boxcoder-op paddle-mobile)
+#    target_link_libraries(test-boxcoder-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-transpose-op  operators/test_transpose_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-transpose-op  operators/test_transpose_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-transpose-op paddle-mobile)
+#    target_link_libraries(test-transpose-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-multiclassnms-op  operators/test_multiclass_nms_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-multiclassnms-op  operators/test_multiclass_nms_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-multiclassnms-op paddle-mobile)
+#    target_link_libraries(test-multiclassnms-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-reshape-op  operators/test_reshape_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-reshape-op  operators/test_reshape_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-reshape-op paddle-mobile)
+#    target_link_libraries(test-reshape-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-relu-op  operators/test_relu_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-relu-op  operators/test_relu_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-relu-op paddle-mobile)
+#    target_link_libraries(test-relu-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-fc-op  operators/test_fusion_fc_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-fc-op  operators/test_fusion_fc_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-fc-op paddle-mobile)
+#    target_link_libraries(test-fc-op paddle-mobile)
+#
-    # gen test log
+#    # gen test log
-    ADD_EXECUTABLE(test-log common/test_log.cpp)
+#    ADD_EXECUTABLE(test-log common/test_log.cpp)
-    target_link_libraries(test-log paddle-mobile)
+#    target_link_libraries(test-log paddle-mobile)
+#
-    # gen test log
+#    # gen test log
-    ADD_EXECUTABLE(test-load framework/test_load.cpp)
+#    ADD_EXECUTABLE(test-load framework/test_load.cpp)
-    target_link_libraries(test-load paddle-mobile)
+#    target_link_libraries(test-load paddle-mobile)
+#
-    # gen test log
+#    # gen test log
-    ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp)
+#    ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp)
-    target_link_libraries(test-loadmemory paddle-mobile)
+#    target_link_libraries(test-loadmemory paddle-mobile)
+#
-    ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp)
+#    ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp)
-    target_link_libraries(test-inference-api paddle-mobile)
+#    target_link_libraries(test-inference-api paddle-mobile)
+#
+#
-    # gen test log
+#    # gen test log
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
+#    ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
-    target_link_libraries(test-optimize paddle-mobile)
+#    target_link_libraries(test-optimize paddle-mobile)
+#
+#
-    #gen test
+#    #gen test
-    ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-pool paddle-mobile)
+#    target_link_libraries(test-pool paddle-mobile)
+#
-    #gen test
+#    #gen test
-    ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-softmax paddle-mobile)
+#    target_link_libraries(test-softmax paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp)
+#    ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp)
-    target_link_libraries(test-gemm-accuracy paddle-mobile)
+#    target_link_libraries(test-gemm-accuracy paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp)
+#    ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp)
-    target_link_libraries(test-gemm-perf paddle-mobile)
+#    target_link_libraries(test-gemm-perf paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
+#    ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
-    target_link_libraries(test-enforce paddle-mobile)
+#    target_link_libraries(test-enforce paddle-mobile)
+#
-    # gen test - test if openmp works
+#    # gen test - test if openmp works
-    ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-openmp paddle-mobile)
+#    target_link_libraries(test-openmp paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-mobilenetssd paddle-mobile)
+#    target_link_libraries(test-mobilenetssd paddle-mobile)
+#
-     # gen test
+#     # gen test
-    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-mobilenet-combine paddle-mobile)
+#    target_link_libraries(test-mobilenet-combine paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-genet paddle-mobile)
+#    target_link_libraries(test-genet paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
+#    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
-    target_link_libraries(test-sigmoid paddle-mobile)
+#    target_link_libraries(test-sigmoid paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-depthwise-conv-op paddle-mobile)
+#    target_link_libraries(test-depthwise-conv-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-mobilenet paddle-mobile)
+#    target_link_libraries(test-mobilenet paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-conv-add-relu-op paddle-mobile)
+#    target_link_libraries(test-conv-add-relu-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
+#    target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-nlp paddle-mobile)
+#    target_link_libraries(test-nlp paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-gru-op  operators/test_gru_op.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-gru-op  operators/test_gru_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-gru-op paddle-mobile)
+#    target_link_libraries(test-gru-op paddle-mobile)
+#
-    # gen test
+#    # gen test
+#
-    ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-inceptionv4 paddle-mobile)
+#    target_link_libraries(test-inceptionv4 paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h  test_include.h executor_for_test.h)
+#    ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-alexnet paddle-mobile)
+#    target_link_libraries(test-alexnet paddle-mobile)
+#
-    ADD_EXECUTABLE(test-googlenetv1  net/test_googlenetv1_combine.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-googlenetv1  net/test_googlenetv1_combine.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-googlenetv1 paddle-mobile)
+#    target_link_libraries(test-googlenetv1 paddle-mobile)
+#
-    # gen test
+#    # gen test
-    ADD_EXECUTABLE(test-fssd  net/test_mobilenet_025_fssd.cpp test_helper.h  test_include.h)
+#    ADD_EXECUTABLE(test-fssd  net/test_mobilenet_025_fssd.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-fssd paddle-mobile)
+#    target_link_libraries(test-fssd paddle-mobile)
    # gen test
    ADD_EXECUTABLE(test-mobilenetgpu  net/test_mobilenet_GPU.cpp test_helper.h  test_include.h)