diff --git a/CMakeLists.txt b/CMakeLists.txt
index dd44f8041cfe0c27094c554bd65c9f6703289c88..f824a25efb870556d88e62bc198f3afd3954de79 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.6)
+cmake_minimum_required(VERSION 3.0)
 option(USE_OPENMP "openmp support" OFF)
 
 project(paddle-mobile)
@@ -30,6 +30,7 @@ else()
     set(CMAKE_BUILD_TYPE Release)
     set(CMAKE_CXX_FLAGS "-Os ${CMAKE_CXX_FLAGS}")
     set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
+    add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
 endif()
 
 if(USE_EXCEPTION)
diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a6ed8d400ede11a09c4e10ac4dd84273dcf079dc
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
@@ -0,0 +1,209 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import MetalKit
+import Foundation
+
+@objc public enum Platform: Int{
+  case CPU, GPU
+}
+
+class ScaleKernel: CusomKernel {
+  init(device: MTLDevice, shape: Shape) {
+    if computePrecision == .Float32 {
+      super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
+    } else if computePrecision == .Float16 {
+      super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
+    } else {
+      fatalError(" unsupport ")
+    }
+  }
+  
+}
+
+public class Runner: NSObject {
+  var program: Program?
+  var executor: Executor<Float32>?
+  var queue: MTLCommandQueue?
+  var textureLoader: MTKTextureLoader?
+  public let net: Net
+  let device: MTLDevice?
+  let platform: Platform
+  var cpuPaddleMobile: PaddleMobileCPU?
+  let numel: Int
+  let meansNumber: [NSNumber]
+  
+  // dims num nchw
+  let dimsNum: [NSNumber]
+  /**
+   * inNet:        需要运行的网络
+   * commandQueue: GPU 是需要传入
+   * inPlatform:   需要使用的平台, GPU or CPU
+   */
+  @objc public init(inNet: Net, commandQueue: MTLCommandQueue?, inPlatform: Platform) {
+    net = inNet
+    queue = commandQueue
+    device = queue?.device
+    platform = inPlatform
+    if let inDevice = device {
+      textureLoader = MTKTextureLoader.init(device: inDevice)
+    }
+    if platform == .CPU {
+      cpuPaddleMobile = PaddleMobileCPU.init()
+    }
+    numel = net.dim.n * net.dim.c * net.dim.h * net.dim.w
+    meansNumber = net.means.map { NSNumber.init(value: $0) }
+    dimsNum = [NSNumber.init(value: net.dim.n),
+               NSNumber.init(value: net.dim.c),
+               NSNumber.init(value: net.dim.h),
+               NSNumber.init(value: net.dim.w)]
+  }
+  
+  /**
+   * load 模型, 返回 true 可进行预测
+   */
+  @objc public func load() -> Bool {
+    if platform == .GPU {
+      guard let inDevice = device, let inQueue = queue else {
+        print(" paddle mobile gpu load error, need MTLCommandQueue")
+        return false
+      }
+      let loader = Loader<Float32>.init()
+      do {
+//        program = try loader.load(device: inDevice, paramPointer: net.paramPointer!, paramSize: net.paramSize,modePointer:net.modelPointer!,modelSize:net.modelSize)
+        program = try loader.load(device: inDevice, modelPath: net.modelPath, paraPath: net.paramPath)
+        net.updateProgram(program: program!)
+
+        executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!)
+      } catch let error {
+        print(error)
+        return false
+      }
+    } else {
+      return cpuPaddleMobile?.load(net.modelPath, andWeightsPath: net.paramPath) ?? false
+    }
+    return true
+  }
+  
+  @objc public func predict(inputPointer: UnsafeMutablePointer<Float32>, completion: @escaping ( _ success: Bool, _ result: PaddleMobileCPUResult?) -> Void) {
+    
+    guard let res = cpuPaddleMobile?.predictInput(inputPointer, dim: dimsNum) else {
+      completion(false, nil)
+      return
+    }
+    completion(true, res)
+  }
+  
+  /**
+   * GPU 版本 predict
+   * texture: 需要预测的 texture 需要做过预处理
+   * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
+   */
+  @objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) {
+    do {
+      try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in
+        guard let SSelf = self else {
+          fatalError( " self nil " )
+        }
+        let result = SSelf.net.fetchResult(paddleMobileRes: res)
+        completion(true, result)
+      }, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
+    } catch let error {
+      print(error)
+      completion(false, nil)
+      return
+    }
+  }
+  
+  /**
+   * CPU GPU 通用版本 predict
+   * cgImage: 需要预测的图片
+   * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
+   */
+//  @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
+//    if platform == .GPU {
+//      getTexture(image: cgImage) { [weak self] (texture) in
+//        guard let SSelf = self else {
+//          fatalError( "" )
+//        }
+//        SSelf.predict(texture: texture, completion: completion)
+//      }
+//    } else if platform == .CPU {
+//      let input = preproccess(image: cgImage)
+//      predict(inputPointer: input, completion: completion)
+//      input.deinitialize(count: numel)
+//      input.deallocate()
+//    }
+//  }
+  
+  /*
+   * 清理内存, 调用此函数后, 不能再使用, 需重新 load
+   */
+  @objc public func clear() {
+    if platform == .GPU {
+      executor?.clear()
+      executor = nil
+      program = nil
+    } else if platform == .CPU {
+      cpuPaddleMobile?.clear()
+    }
+  }
+  
+  @objc public func preproccess(image: CGImage) -> UnsafeMutablePointer<Float> {
+    let output = UnsafeMutablePointer<Float>.allocate(capacity: numel)
+    let means = net.means.map { NSNumber.init(value: $0) }
+    let dims = [NSNumber.init(value: net.dim.n),
+                NSNumber.init(value: net.dim.c),
+                NSNumber.init(value: net.dim.h),
+                NSNumber.init(value: net.dim.w)]
+    cpuPaddleMobile?.preprocess(image, output: output, means: means, scale: net.scale, dim: dims)
+    return output
+  }
+  
+  /*
+   * 获取 texture, 对 texture 进行预处理, GPU 预测时使用
+   */
+  @objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
+    let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
+    scaleTexture(input: texture!, complete: getTexture)
+  }
+  
+  public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
+    
+    guard let inQueue = queue, let inDevice = device else {
+      fatalError( " queue or devcie nil " )
+    }
+    
+    guard let buffer = inQueue.makeCommandBuffer() else {
+      fatalError( " make buffer error" )
+    }
+    
+    let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3))
+    
+    do {
+      try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
+    } catch let error {
+      print(error)
+      fatalError()
+    }
+    
+    buffer.addCompletedHandler { (buffer) in
+      complete(scaleKernel.outputTexture)
+    }
+    buffer.commit()
+  }
+}
+
+
diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp
index d1014ff87a86efeeefec731ebac05a8a30abe3b1..ec91946d95be9b4e4384606fd67a69c552166a5e 100644
--- a/src/fpga/api.cpp
+++ b/src/fpga/api.cpp
@@ -218,6 +218,8 @@ int get_aligned_filter_num(int num) {
 
 void format_filter(framework::Tensor *filter_tensor, float max_value,
                    int group_num) {
+  filter_tensor->scale[0] = float(max_value / 127.0);
+  filter_tensor->scale[1] = float(127.0 / max_value);
   auto dims = filter_tensor->dims();
   auto num = dims[0], channel = dims[1], height = dims[2], width = dims[3];
   auto data_ptr = filter_tensor->mutable_data<float>();
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index edec033162c9b1679192ec983592609700bf8780..1acfd37fb6821a82888fc0eebd932358536ba675 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -393,6 +393,77 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
   return result_vector;
 }
 
+#ifdef PADDLE_MOBILE_FPGA
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
+                                        string var_name) {
+  framework::Variable *g_feed_value = program_.scope->Var(var_name);
+  framework::Tensor *feed_tensor =
+      g_feed_value->GetMutable<framework::LoDTensor>();
+  feed_tensor->Resize(t.dims());
+  feed_tensor->ShareDataWith(t);
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
+  InjectVariable(t, "feed");
+};
+
+template <typename Dtype, Precision P>
+std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
+  std::shared_ptr<framework::BlockDesc> to_predict_block =
+      to_predict_program_->Block(0);
+  auto &ops = ops_of_block_[*to_predict_block.get()];
+
+  PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range");
+  auto op = id < 0 ? ops[ops.size() - 1] : ops[id];
+  auto output_map = op->Outputs();
+  std::vector<std::string> out_keys = op->GetOutKeys();
+  PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "this op contains no output");
+  auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
+      out_keys[0], output_map, *(program_.scope));
+  return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::Predict_From_To(int start, int end) {
+  std::shared_ptr<framework::BlockDesc> to_predict_block =
+      to_predict_program_->Block(0);
+  auto &ops = ops_of_block_[*to_predict_block.get()];
+  end = end < 0 ? (int)ops.size() : end;
+  PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
+                        "start or end parameter is wrong");
+
+#ifdef PADDLE_MOBILE_PROFILE
+  std::vector<ProfInfo> profile(ops.size());
+#endif
+  for (int i = start; i < end; i++) {
+#ifdef PADDLE_MOBILE_PROFILE
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
+#endif
+    DLOG << "Running op: " << i << "  " << ops[i]->Type();
+    ops[i]->Run();
+
+#ifdef PADDLE_MOBILE_PROFILE
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
+#endif
+  }
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::Predict_From(int start) {
+  Predict_From_To(start);
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::Predict_To(int end) {
+  Predict_From_To(0, end);
+};
+#endif
+
 template class Executor<CPU, Precision::FP32>;
 template class Executor<GPU_MALI, Precision::FP32>;
 template class Executor<FPGA, Precision::FP32>;
diff --git a/src/jni/PML.java b/src/jni/PML.java
index 717d9ebb972a2ba36aec33ff59868ff8f0530c5b..9cbea253ff54ca82cb5059ea096d5a436018119a 100644
--- a/src/jni/PML.java
+++ b/src/jni/PML.java
@@ -9,6 +9,14 @@ public class PML {
      */
     public static native boolean load(String modelDir);
 
+    /**
+     * load seperated model
+     *
+     * @param modelDir model dir
+     * @return isloadsuccess
+     */
+    public static native boolean loadnlp(String modelDir);
+
     /**
      * load combined model
      *
diff --git a/src/jni/paddle_mobile_jni.cpp b/src/jni/paddle_mobile_jni.cpp
index 111ec35def78afc52360f163450ab8003430121b..8f3350e17435ec3843525d446d10078753cd1e8f 100644
--- a/src/jni/paddle_mobile_jni.cpp
+++ b/src/jni/paddle_mobile_jni.cpp
@@ -74,6 +74,28 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
   return static_cast<jboolean>(isLoadOk);
 }
 
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_PML_loadnlp(JNIEnv *env, jclass thiz, jstring modelPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+  ANDROIDLOGI("load invoked");
+  bool optimize = true;
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), optimize, false, true);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             optimize, false, true);
+#endif
+  return static_cast<jboolean>(isLoadOk);
+}
+
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadQualified(
     JNIEnv *env, jclass thiz, jstring modelPath) {
   std::lock_guard<std::mutex> lock(shared_mutex);
diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h
index 98d80a1226787b380d4df13014eaa2671e538d55..974405e04779fe322a5ee70b4f1ce042ec149126 100644
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -45,8 +45,8 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
 
   void RunImpl() const {
     auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX());
-    auto input_ptr = input->data<float>();
     fpga::format_image(input);
+    auto input_ptr = input->data<float>();
     Tensor *output = param_.Out();
     auto output_ptr = output->mutable_data<half>();
 
diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp
index 9de1511746f70c225e2d978a43b43cb34ad9143f..ec7f04e3760f805cc51fd20c13913d13a286a96b 100644
--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -47,7 +47,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
   concatArgs.image_num = (uint32_t)image_num;
   concatArgs.images_in = images_in;
   concatArgs.scales_in = scales_in;
-  concatArgs.image_out = (half *)out->mutable_data<float>();
+  concatArgs.image_out = (half *)out->data<float>();
   concatArgs.scale_out = out->scale;
   concatArgs.channel_num = channel_num;
   concatArgs.height = (uint32_t)height;
diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp
index 20c86a5c73bc9c35b8f8fd430013bb97d269fb4a..a7f7b0cf57f3c1498a6e9f36cb7196cf9f8b4ceb 100644
--- a/src/operators/kernel/fpga/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/softmax_kernel.cpp
@@ -24,19 +24,24 @@ namespace operators {
 
 template <>
 bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
-  const Tensor *input = param->InputX();
-
+  auto input = const_cast<Tensor *>(param->InputX());
   auto input_ptr = input->data<float>();
-  auto output = param->Out();
-  auto output_ptr = output->mutable_data<float>();
-  fpga::BypassArgs args;
-  args.convert_type = fpga::DATA_FP16_TO_FP32;
-  args.layout_type = fpga::LAYOUT_NO_CONVERT;
-  args.image.address = (void *)(input_ptr);
-  args.image.height = (uint32_t)input->dims()[0];
-  args.image.width = (uint32_t)input->dims()[1];
-  args.image.channels = 1;
-  args.output.address = output_ptr;
+  auto float_input = new Tensor;
+  float_input->mutable_data<float>(input->dims());
+  fpga::format_fp32_ofm(float_input);
+
+  fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
+  args.input_layout_type = fpga::LAYOUT_HWC;
+  args.output_layout_type = fpga::LAYOUT_CHW;
+  args.input_data_type = fpga::DATA_TYPE_FP16;
+  args.output_data_type = fpga::DATA_TYPE_FP32;
+  args.image.address = input_ptr;
+  args.image.height = 1;
+  args.image.width = 1;
+  args.image.channels = (uint32_t)input->dims()[1];
+  args.output.address = float_input->data<float>();
+  args.output.scale_address = float_input->scale;
+  param->SetFloatInput(float_input);
   param->SetFpgaArgs(args);
 
   return true;
diff --git a/src/operators/sigmoid_op.cpp b/src/operators/sigmoid_op.cpp
index 1bb42792e760cf02c16a0ea38f759fbb52827fcf..04410ece583b63f5b8d9a04342f6418a85475561 100644
--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
+
 template <typename DeviceType, typename T>
 void SigmoidOp<DeviceType, T>::InferShape() const {
   this->param_.Out()->Resize(this->param_.InputX()->dims());
diff --git a/src/operators/sigmoid_op.h b/src/operators/sigmoid_op.h
index 406db6db114775460b4af616b372cfb7285d7ac1..62fc65dce1025fff629dd81ea4a7f797ded1a1d6 100644
--- a/src/operators/sigmoid_op.h
+++ b/src/operators/sigmoid_op.h
@@ -17,13 +17,13 @@ limitations under the License. */
 #pragma once
 
 #include <string>
-
 #include "framework/operator.h"
 #include "operators/kernel/sigmoid_kernel.h"
 #include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
+
 template <typename DeviceType, typename T>
 class SigmoidOp : public framework::OperatorWithKernel<
                       DeviceType, SigmoidParam<DeviceType>,
@@ -43,6 +43,7 @@ class SigmoidOp : public framework::OperatorWithKernel<
 
   void InferShape() const override;
 };
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 03bf4c50bb3726b109bf38bfa34e1c7a000a23f8..d68a8c1fb1a2cd0584d80d5afa8ed8f439d5d5d4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,58 +1,321 @@
 set(dir ${CMAKE_CURRENT_SOURCE_DIR})
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
+set(FOUND_MATCH OFF)
 
-if ("googlenet" IN_LIST NET)
+set(CON -1)
+
+message(STATUS "nets :${NET}")
+
+list(FIND NET "googlenet" CON)
+if (CON GREATER -1)
     # gen test
-    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-googlenet paddle-mobile)
-elseif ("mobilenet" IN_LIST NET)
+
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "mobilenet" CON)
+if (CON GREATER -1)
     # gen test
-    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-mobilenet paddle-mobile)
 
     # gen test
-    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-mobilenet-combine paddle-mobile)
+    set(FOUND_MATCH ON)
+
+endif ()
 
-elseif ("yolo" IN_LIST NET)
+list(FIND NET "yolo" CON)
+if (CON GREATER -1)
     # gen test
-    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-yolo paddle-mobile)
-elseif ("squeezenet" IN_LIST NET)
     # gen test
-    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test_yolo_combined paddle-mobile)
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "squeezenet" CON)
+if (CON GREATER -1)
+    # gen test
+    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-squeezenet paddle-mobile)
-elseif("resnet" IN_LIST NET)
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "resnet" CON)
+if (CON GREATER -1)
     # gen test
-    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-resnet paddle-mobile)
-elseif("FPGAnets" IN_LIST NET)
-    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "FPGAnets" CON)
+if (CON GREATER -1)
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-resnet paddle-mobile)
 
-    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-resnet50 paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-fpga-EW paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-fpga-conv paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-fpga-pooling paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-fpga-bypass paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-fpga-softmax paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-fpga-concat paddle-mobile)
+
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-tensor-quant paddle-mobile)
 
-    ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h  test_include.h)
+    ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h test_include.h)
     target_link_libraries(test-fpga-concat-op paddle-mobile)
 
-    ADD_EXECUTABLE(test-format-data fpga/test_format_data.cpp test_helper.h  test_include.h)
+    ADD_EXECUTABLE(test-format-data fpga/test_format_data.cpp test_helper.h test_include.h)
     target_link_libraries(test-format-data paddle-mobile)
-elseif("mobilenetssd" IN_LIST NET)
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "mobilenetssd" CON)
+if (CON GREATER -1)
     # gen test
-    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-mobilenetssd paddle-mobile)
 
-elseif("nlp" IN_LIST NET)
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "nlp" CON)
+if (CON GREATER -1)
     # gen test
-    ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-nlp paddle-mobile)
 
     # gen test
-    ADD_EXECUTABLE(test-gru-op  operators/test_gru_op.cpp test_helper.h  test_include.h)
+    ADD_EXECUTABLE(test-gru-op operators/test_gru_op.cpp test_helper.h test_include.h)
     target_link_libraries(test-gru-op paddle-mobile)
-else ()
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "mobilenetfssd" CON)
+if (CON GREATER -1)
+    # gen test
+    ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h)
+    target_link_libraries(test-fssd paddle-mobile)
+
+    set(FOUND_MATCH ON)
+
+endif ()
+
+list(FIND NET "genet" CON)
+if (CON GREATER -1)
+    # gen test
+    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-genet paddle-mobile)
+    set(FOUND_MATCH ON)
+
+endif ()
+
+if (NOT FOUND_MATCH)
+    # gen test
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-resnet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-squeezenet paddle-mobile)
+
     # gen test
-    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-yolo paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test_yolo_combined paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h)
     target_link_libraries(test-googlenet paddle-mobile)
-endif()
+
+    # gen test
+    ADD_EXECUTABLE(test-conv-op operators/test_cov_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-conv-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mul-op operators/test_mul_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-mul-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-elementwiseadd-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-concat-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-lrn-op operators/test_lrn_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-lrn-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-batchnorm-op operators/test_batchnorm_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-batchnorm-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-priorbox-op operators/test_prior_box_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-priorbox-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-boxcoder-op operators/test_box_coder_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-boxcoder-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-transpose-op operators/test_transpose_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-transpose-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-multiclassnms-op operators/test_multiclass_nms_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-multiclassnms-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-reshape-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-relu-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-fc-op paddle-mobile)
+
+    # gen test log
+    ADD_EXECUTABLE(test-log common/test_log.cpp)
+    target_link_libraries(test-log paddle-mobile)
+
+    # gen test log
+    ADD_EXECUTABLE(test-load framework/test_load.cpp)
+    target_link_libraries(test-load paddle-mobile)
+
+    # gen test log
+    ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp)
+    target_link_libraries(test-loadmemory paddle-mobile)
+
+    ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp)
+    target_link_libraries(test-inference-api paddle-mobile)
+
+
+    # gen test log
+    # gen test
+    ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
+    target_link_libraries(test-optimize paddle-mobile)
+
+
+    #gen test
+    ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-pool paddle-mobile)
+
+    #gen test
+    ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-softmax paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp)
+    target_link_libraries(test-gemm-accuracy paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp)
+    target_link_libraries(test-gemm-perf paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
+    target_link_libraries(test-enforce paddle-mobile)
+
+    # gen test - test if openmp works
+    ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-openmp paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenetssd paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-genet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
+    target_link_libraries(test-sigmoid paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-depthwise-conv-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-conv-add-relu-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-nlp paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-gru-op operators/test_gru_op.cpp test_helper.h test_include.h)
+    target_link_libraries(test-gru-op paddle-mobile)
+
+    # gen test
+
+    ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-inceptionv4 paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-alexnet paddle-mobile)
+
+    ADD_EXECUTABLE(test-googlenetv1 net/test_googlenetv1_combine.cpp test_helper.h test_include.h)
+    target_link_libraries(test-googlenetv1 paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h)
+    target_link_libraries(test-fssd paddle-mobile)
+
+
+    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
+
+
+endif ()
diff --git a/test/fpga/test_format_data.cpp b/test/fpga/test_format_data.cpp
index 0fa3c23d2af6220959d434a6805adc9a7ae984a5..1d67c3110ff86dc6fba2d49412edb70ab1c9c16d 100644
--- a/test/fpga/test_format_data.cpp
+++ b/test/fpga/test_format_data.cpp
@@ -22,7 +22,7 @@ namespace fpga = paddle_mobile::fpga;
 using std::cout;
 using std::endl;
 
-int main() {
+void test_format_image() {
   std::vector<int> dims{1, 1, 3, 3};
   std::vector<float> elements{1, 2, 3, 4, 5, 6, 7, 8, 9};
   frame::DDim ddim = frame::make_ddim(dims);
@@ -44,6 +44,50 @@ int main() {
   cout << endl;
   auto dd = image.dims();
   cout << dims[0] << dims[1] << dims[2] << dims[3] << endl;
+}
+
+void test_fill_conv_arg() {
+  Tensor input, out, filter;
+  DLOG << "Setup input";
+  SetupTensor<int16_t>(&input, {1, 250, 32, 30}, static_cast<int16_t>(0),
+                       static_cast<int16_t>(1));
+
+  DLOG << "Setup filter";
+  SetupTensor<float>(&filter, {1001, 250, 3, 3}, static_cast<float>(0),
+                     static_cast<float>(1));
+
+  DLOG << "Setup output";
+  SetupTensor<int16_t>(&out, {1, 1001, 32, 30}, static_cast<int16_t>(0),
+                       static_cast<int16_t>(1));
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * 1001 * sizeof(float));
+
+  DLOG << "find max";
+  float max_value = fpga::filter_find_max(&filter);
+  DLOG << "format filter";
+  fpga::format_filter(&filter, max_value, 1);
+
+  DLOG << "format bs_ptr";
+  int element_num_per_div = fpga::get_filter_num_per_div(&filter, 1);
+  fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, 1001);
 
+  DLOG << "format ofm";
+  fpga::format_fp16_ofm(&out);
+  DLOG << "Build arg";
+
+  fpga::WrapperConvArgs arg;
+  fpga::fill_conv_arg(&arg, &input, &out, &filter, true, 1, 1, 1, 1, 1, bs_ptr);
+  DLOG << "splitNum: " << arg.split_num << "  group_num:" << arg.group_num
+       << "  filter_num:" << arg.filter_num;
+
+  for (int i = 0; i < arg.split_num; i++) {
+    DLOG << arg.conv_args[i].filter_num << "   " << arg.conv_args[i].sb_address
+         << "   " << arg.conv_args[i].filter_address << "   "
+         << arg.conv_args[i].filter_scale_address;
+  }
+}
+
+int main() {
+  test_format_image();
+  test_fill_conv_arg();
   return 0;
 }
diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cca6793f10da5a0784cf8a3ba2d0104f3508028d
--- /dev/null
+++ b/test/fpga/test_resnet50.cpp
@@ -0,0 +1,39 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+static const char *g_resnet_combine = "../models/resnet50";
+
+int main() {
+  DLOG << paddle_mobile::fpga::open_device();
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+  if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model",
+                         std::string(g_resnet_combine) + "/params", true)) {
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+
+    paddle_mobile.FeedData(input_tensor);
+    paddle_mobile.Predict_To(-1);
+    //    paddle_mobile.Predict_From(73);
+    //    paddle_mobile.Predict_From_To(72, 73);
+
+    DLOG << "Computation done";
+    return 0;
+  }
+}
diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp
index 25cad4feaa706899122902dee2a8f0c915e78975..64fa42658be6b39fabe9bb26296a426949d31197 100644
--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
@@ -21,6 +21,7 @@ int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
+
   //  auto program = loader.Load(g_googlenet, true);
   //  auto program = loader.Load(g_mobilenet_ssd, true);
 
diff --git a/test/net/test_alexnet.cpp b/test/net/test_alexnet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..50053fe82f95177fd786c1c8f8f5c9b7a521b888
--- /dev/null
+++ b/test/net/test_alexnet.cpp
@@ -0,0 +1,59 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+  //                     std::string(g_mobilenet_detect) + "/params", true);
+
+  auto isok = paddle_mobile.Load(g_alexnet, true);
+  if (isok) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    DLOG << vec_result;
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+
+  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+               "是否存在?"
+            << std::endl;
+  return 0;
+}
diff --git a/test/net/test_genet_combine.cpp b/test/net/test_genet_combine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e6b0505a670f1a58ed7d09cc4854ef52b05b0649
--- /dev/null
+++ b/test/net/test_genet_combine.cpp
@@ -0,0 +1,51 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
+                         std::string(g_genet_combine) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 128, 128};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp
index 0d21f3032c58302cf8bf655c406e8ee8a5b0e077..a2f030eeac5c2584b33fad2b082b9d5513707260 100644
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,7 +17,14 @@ limitations under the License. */
 #include "../test_include.h"
 
 int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+
   paddle_mobile.SetThreadNum(4);
   bool optimize = true;
   auto time1 = time();
diff --git a/test/net/test_googlenetv1_combine.cpp b/test/net/test_googlenetv1_combine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9aab25afd2aa6ece4e6b99bbd368b8a5be2e3106
--- /dev/null
+++ b/test/net/test_googlenetv1_combine.cpp
@@ -0,0 +1,60 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_googlenetv1_combined) + "/model",
+                         std::string(g_googlenetv1_combined) + "/params",
+                         false)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 160, 160};
+    GetInput<float>(g_img, &input, dims);
+
+    for (int i = 0; i < input.size(); i += 1000) {
+      std::cout << input[i] << std::endl;
+    }
+    //    auto vec_result = paddle_mobile.Predict(input, dims);
+    //    std::vector<float>::iterator biggest =
+    //        std::max_element(std::begin(vec_result), std::end(vec_result));
+    //    std::cout << " Max element is " << *biggest << " at position "
+    //              << std::distance(std::begin(vec_result), biggest) <<
+    //              std::endl;
+
+    //    // 预热十次
+    //    for (int i = 0; i < 1; ++i) {
+    //      auto vec_result = paddle_mobile.Predict(input, dims);
+    //    }
+    auto time3 = time();
+
+    auto vec_result = paddle_mobile.Predict(input, dims);
+
+    for (int j = 0; j < vec_result.size(); ++j) {
+      std::cout << j << " : " << vec_result[j] << std::endl;
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
+              << std::endl;
+  }
+
+  return 0;
+}
diff --git a/test/net/test_inceptionv4.cpp b/test/net/test_inceptionv4.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fbbc9dd39e64f7a8ea745cf7489e46f00ffe1413
--- /dev/null
+++ b/test/net/test_inceptionv4.cpp
@@ -0,0 +1,59 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+  //                     std::string(g_mobilenet_detect) + "/params", true);
+
+  auto isok = paddle_mobile.Load(g_inceptionv4, true);
+  if (isok) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    //        DLOG << vec_result;
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+
+  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+               "是否存在?"
+            << std::endl;
+  return 0;
+}
diff --git a/test/net/test_mobilenet+ssd.cpp b/test/net/test_mobilenet+ssd.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..85083ca441ad242ffb5b63dd612a0e35e3589f99
--- /dev/null
+++ b/test/net/test_mobilenet+ssd.cpp
@@ -0,0 +1,48 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  auto isok = paddle_mobile.Load(
+      std::string(g_mobilenet_ssd_gesture) + "/model",
+      std::string(g_mobilenet_ssd_gesture) + "/params", true);
+  //  auto isok = paddle_mobile.Load(g_mobilenet_ssd, false);
+  if (isok) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 300, 300};
+    GetInput<float>(g_hand, &input, dims);
+
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      auto output = paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto output = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  return 0;
+}
diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4ed7d3b756cfef9554028e1d33f4dd86bf58e4b8
--- /dev/null
+++ b/test/net/test_mobilenet.cpp
@@ -0,0 +1,59 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+  //                     std::string(g_mobilenet_detect) + "/params", true);
+
+  auto isok = paddle_mobile.Load(g_mobilenet, true);
+  if (isok) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    DLOG << vec_result;
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+
+  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+               "是否存在?"
+            << std::endl;
+  return 0;
+}
diff --git a/test/net/test_mobilenet_025_fssd.cpp b/test/net/test_mobilenet_025_fssd.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c0d037ceb05f57361f1385cb9959beed66186e4f
--- /dev/null
+++ b/test/net/test_mobilenet_025_fssd.cpp
@@ -0,0 +1,61 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main(int argc, char **argv) {
+  int times = 10;
+  if (argc <= 1) {
+    times = 10;
+    std::cout << "没有输入 , 使用默认10次 " << times << std::endl;
+  } else {
+    std::string arstr = argv[1];
+    times = std::stoi(arstr);
+    std::cout << "input times: " << times << std::endl;
+  }
+
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(1);
+  auto isok =
+      paddle_mobile.Load(std::string(g_fluid_fssd_new) + "/model",
+                         std::string(g_fluid_fssd_new) + "/params", true);
+  if (isok) {
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 160, 160};
+    GetInput<float>(g_imgfssd_ar1, &input, dims);
+    std::cout << "预热10次....." << std::endl;
+
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      auto output = paddle_mobile.Predict(input, dims);
+    }
+    std::cout << "开始....." << std::endl;
+
+    double time_sum = 0;
+
+    for (int i = 0; i < times; ++i) {
+      auto time3 = time();
+      auto output = paddle_mobile.Predict(input, dims);
+      auto time4 = time();
+      double timeDiff = time_diff(time3, time4);
+      time_sum += timeDiff;
+      std::cout << "第" << i << "次"
+                << "predict cost :" << timeDiff << "ms" << std::endl;
+    }
+    std::cout << "平均时间:" << time_sum / times << "ms" << std::endl;
+  }
+  return 0;
+}
diff --git a/test/net/test_mobilenet_combine.cpp b/test/net/test_mobilenet_combine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..073607795967af09c81bc0a0c492d065bce7ed72
--- /dev/null
+++ b/test/net/test_mobilenet_combine.cpp
@@ -0,0 +1,54 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
+                         std::string(g_mobilenet_combined) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
diff --git a/test/net/test_nlp.cpp b/test/net/test_nlp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..961e74d23cfd9bfe3649e256ab86cc4ea4ed0c95
--- /dev/null
+++ b/test/net/test_nlp.cpp
@@ -0,0 +1,86 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+  //                     std::string(g_mobilenet_detect) + "/params", true);
+
+  auto isok = paddle_mobile.Load(g_nlp, true, false, true);
+
+  //  auto isok = paddle_mobile.Load(std::string(g_nlp) + "/model",
+  //                                 std::string(g_nlp) + "/params", false);
+  if (isok) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    //    1064 1603 644 699 2878 1219 867 1352 8 1 13 312 479
+
+    std::vector<int64_t> ids{1918, 117, 55, 97, 1352, 4272, 1656, 903};
+
+    paddle_mobile::framework::LoDTensor words;
+    auto size = static_cast<int>(ids.size());
+    paddle_mobile::framework::LoD lod{{0, ids.size()}};
+    DDim dims{size, 1};
+    words.Resize(dims);
+    words.set_lod(lod);
+    DLOG << "words lod : " << words.lod();
+    auto *pdata = words.mutable_data<int64_t>();
+    size_t n = words.numel() * sizeof(int64_t);
+    DLOG << "n :" << n;
+    memcpy(pdata, ids.data(), n);
+    DLOG << "words lod 22: " << words.lod();
+    auto time3 = time();
+    for (int i = 0; i < 1; ++i) {
+      auto vec_result = paddle_mobile.PredictLod(words);
+      DLOG << *vec_result;
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
+              << std::endl;
+  }
+
+  auto time2 = time();
+  std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+  //    1064 1603 644 699 2878 1219 867 1352 8 1 13 312 479
+
+  std::vector<int64_t> ids{1791, 656, 1549, 281, 96};
+
+  paddle_mobile::framework::LoDTensor words;
+  auto size = static_cast<int>(ids.size());
+  paddle_mobile::framework::LoD lod{{0, ids.size()}};
+  DDim dims{size, 1};
+  words.Resize(dims);
+  words.set_lod(lod);
+  DLOG << "words lod : " << words.lod();
+  auto *pdata = words.mutable_data<int64_t>();
+  size_t n = words.numel() * sizeof(int64_t);
+  DLOG << "n :" << n;
+  memcpy(pdata, ids.data(), n);
+  DLOG << "words lod 22: " << words.lod();
+  auto time3 = time();
+  for (int i = 0; i < 1; ++i) {
+    auto vec_result = paddle_mobile.PredictLod(words);
+    DLOG << *vec_result;
+  }
+  auto time4 = time();
+  std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
+            << std::endl;
+  return 0;
+}
diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d2a4abbbfd2c023f1e8220e74f815eda44acb6db
--- /dev/null
+++ b/test/net/test_resnet.cpp
@@ -0,0 +1,70 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(g_resnet, true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 3, 32, 32};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+#ifndef PADDLE_MOBILE_FPGA
+    //   预热十次
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
+
+#else
+    auto time3 = time();
+    paddle_mobile.FeedData(input_tensor);
+    paddle_mobile.Predict_To(10);
+    paddle_mobile.Predict_From(10);
+    auto tensor_ptr = paddle_mobile.FetchResult(9);
+    std::cout << "Tensor element number for op[9]: " << tensor_ptr->numel()
+              << std::endl;
+    auto result_ptr = paddle_mobile.FetchResult();
+    std::cout << "Result tensor element number: " << result_ptr->numel()
+              << std::endl;
+
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
+#endif
+  }
+  return 0;
+}
diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..02ec8691febbad5ec0e811f7d7bebde1bef54a79
--- /dev/null
+++ b/test/net/test_squeezenet.cpp
@@ -0,0 +1,49 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  //  ../../../test/models/googlenet
+  //  ../../../test/models/mobilenet
+  auto time1 = time();
+  if (paddle_mobile.Load(g_squeezenet, true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 3, 227, 227};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+
+  return 0;
+}
diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..700eb10cac6f0b80595d8c53866c7f675d2b56fb
--- /dev/null
+++ b/test/net/test_yolo.cpp
@@ -0,0 +1,49 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  //  ../../../test/models/googlenet
+  //  ../../../test/models/mobilenet
+  auto time1 = time();
+  if (paddle_mobile.Load(g_yolo, true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<int64_t> dims{1, 3, 227, 227};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+    // 预热十次
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  return 0;
+}
diff --git a/test/net/test_yolo_combined.cpp b/test/net/test_yolo_combined.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..88b889daa946cfaef1d86ff36f416b4643532c89
--- /dev/null
+++ b/test/net/test_yolo_combined.cpp
@@ -0,0 +1,60 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  //  ../../../test/models/googlenet
+  //  ../../../test/models/mobilenet
+  auto time1 = time();
+
+  if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model",
+                         std::string(g_yolo_combined) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<int64_t> dims{1, 3, 416, 416};
+    std::vector<float> input;
+
+    GetInput<float>(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims);
+    std::cout << "input.size():  " << input.size() << std::endl;
+    for (int j = 0; j < 100; ++j) {
+      std::cout << j << " :  " << input[j] << std::endl;
+    }
+    //        // 预热十次
+    //        for (int i = 0; i < 10; ++i) {
+    //            paddle_mobile.Predict(input, dims);
+    //        }
+    auto time3 = time();
+    const vector<float> vector_out = paddle_mobile.Predict(input, dims);
+    std::cout << "--------------------------------------------" << std::endl;
+
+    for (float i : vector_out) {
+      std::cout << i << std::endl;
+    }
+
+    std::cout << "--------------------------------------------" << std::endl;
+
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  return 0;
+}
diff --git a/test/operators/test_batchnorm_op.cpp b/test/operators/test_batchnorm_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4ccad8c1512036c2400a09575b3775e75b26acce
--- /dev/null
+++ b/test/operators/test_batchnorm_op.cpp
@@ -0,0 +1,175 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "../test_helper.h"
+#include "../test_include.h"
+#include "operators/batchnorm_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype>
+class TestBatchNormOp {
+ public:
+  explicit TestBatchNormOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
+    }
+
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "batch_norm" &&
+            op->Input("X")[0] == "conv2d_5.tmp_0") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Input Mean is : " << op->Input("Mean")[0];
+          DLOG << " Input Variance is : " << op->Input("Variance")[0];
+          DLOG << " Input Scale is : " << op->Input("Scale")[0];
+          DLOG << " Input Bias is : " << op->Input("Bias")[0];
+          DLOG << " Output Y is : " << op->Output("Y")[0];
+          DLOG << " epsilon : " << op->GetAttrMap().at("epsilon").Get<float>();
+          std::shared_ptr<operators::BatchNormOp<Dtype, float>> lrn =
+              std::make_shared<operators::BatchNormOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(lrn);
+        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_bn(const Tensor &t1, const Tensor &t2,
+                                     const Tensor &t3, const Tensor &t4,
+                                     const Tensor &t5) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x1_feed_value = scope->Var("conv2d_5.tmp_0");
+    auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *mean_feed_value = scope->Var("batch_norm_10.w_1");
+    auto tensor_mean = mean_feed_value->GetMutable<LoDTensor>();
+    tensor_mean->ShareDataWith(t2);
+
+    Variable *scale_feed_value = scope->Var("batch_norm_10.w_0");
+    auto tensor_scale = scale_feed_value->GetMutable<LoDTensor>();
+    tensor_scale->ShareDataWith(t3);
+
+    Variable *variance_feed_value = scope->Var("batch_norm_10.w_2");
+    auto tensor_variance = variance_feed_value->GetMutable<LoDTensor>();
+    tensor_variance->ShareDataWith(t4);
+
+    Variable *bias_feed_value = scope->Var("batch_norm_10.b_0");
+    auto tensor_bias = bias_feed_value->GetMutable<LoDTensor>();
+    tensor_bias->ShareDataWith(t5);
+
+    Variable *output = scope->Var("batch_norm_10.tmp_2");
+    auto *output_tensor = output->GetMutable<LoDTensor>();
+    output_tensor->mutable_data<float>({1, 256, 38, 38});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict_bn(t1, t2, t3, t4, t5, 0);
+    return out_tensor;
+  }
+
+ private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_bn(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+                  const Tensor &t4, const Tensor &t5, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
+    }
+  }
+};
+
+template class TestBatchNormOp<CPU>;
+}  // namespace framework
+}  // namespace paddle_mobile
+
+int main() {
+  DLOG << "----------**********----------";
+  DLOG << "begin to run BatchNormOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+
+  /// input x (4,10,2,2)
+  paddle_mobile::framework::Tensor inputx1;
+  SetupTensor<float>(&inputx1, {1, 256, 38, 38}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx1_ptr = inputx1.data<float>();
+
+  paddle_mobile::framework::Tensor mean;
+  SetupTensor<float>(&mean, {256}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *mean_ptr = mean.data<float>();
+
+  paddle_mobile::framework::Tensor scale;
+  SetupTensor<float>(&scale, {256}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *scale_ptr = scale.data<float>();
+
+  paddle_mobile::framework::Tensor variance;
+  SetupTensor<float>(&variance, {256}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *variance_ptr = variance.data<float>();
+
+  paddle_mobile::framework::Tensor bias;
+  SetupTensor<float>(&bias, {256}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *bias_ptr = bias.data<float>();
+
+  paddle_mobile::framework::TestBatchNormOp<paddle_mobile::CPU> testBatchNormOp(
+      program);
+
+  auto output_bn =
+      testBatchNormOp.predict_bn(inputx1, mean, scale, variance, bias);
+  auto *output_bn_ptr = output_bn->data<float>();
+
+  DLOG << " (" << inputx1_ptr[0] << " - " << mean_ptr[0] << ")/(("
+       << variance_ptr[0] << " + 0.00001"
+       << ")^0.5)* " << scale_ptr[0] << " + " << bias_ptr[0] << " = ";
+  DLOG << output_bn_ptr[0];
+
+  DLOG << "input_ptr 0 : " << inputx1_ptr[0];
+  DLOG << "output_ptr 0 : " << output_bn_ptr[0];
+
+  return 0;
+}
diff --git a/test/operators/test_box_coder_op.cpp b/test/operators/test_box_coder_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dac0d0b8051ec1790d6982a13ea31ef3f4a64242
--- /dev/null
+++ b/test/operators/test_box_coder_op.cpp
@@ -0,0 +1,197 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "../test_include.h"
+#include "operators/box_coder_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype>
+class TestBoxCoderOp {
+ public:
+  explicit TestBoxCoderOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
+    }
+
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (auto block_desc : blocks) {
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (auto op : ops) {
+        if (op->Type() == "box_coder" &&
+            op->Input("PriorBox")[0] == "concat_0.tmp_0") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input PriorBox is : " << op->Input("PriorBox")[0];
+          DLOG << " Input PriorBoxVar is : " << op->Input("PriorBoxVar")[0];
+          DLOG << " Input TargetBox is : " << op->Input("TargetBox")[0];
+          DLOG << " OutputBox is : " << op->Output("OutputBox")[0];
+          DLOG << " code_type : "
+               << op->GetAttrMap().at("code_type").Get<std::string>();
+          std::shared_ptr<operators::BoxCoderOp<Dtype, float>> boxcoder =
+              std::make_shared<operators::BoxCoderOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(boxcoder);
+        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_boxcoder(const Tensor &t1, const Tensor &t2,
+                                           const Tensor &t3) {
+    // feed
+    auto scope = program_.scope;
+    Variable *prior_box = scope->Var("concat_0.tmp_0");
+    auto tensor_x1 = prior_box->GetMutable<LoDTensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *prior_box_var = scope->Var("concat_1.tmp_0");
+    auto tensor_x2 = prior_box_var->GetMutable<LoDTensor>();
+    tensor_x2->ShareDataWith(t2);
+
+    Variable *target_box = scope->Var("concat_2.tmp_0");
+    auto tensor_x3 = target_box->GetMutable<LoDTensor>();
+    tensor_x3->ShareDataWith(t3);
+
+    Variable *boxes_output = scope->Var("box_coder_0.tmp_0");
+    auto *boxes_output_tensor = boxes_output->GetMutable<LoDTensor>();
+    boxes_output_tensor->mutable_data<float>({1, 1917, 4});
+
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> outbox_tensor = std::make_shared<LoDTensor>();
+    outbox_tensor.reset(boxes_output_tensor);
+
+    predict_boxcoder(t1, t2, t3, 0);
+
+    return outbox_tensor;
+  }
+
+ private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_boxcoder(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+                        int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
+    }
+  }
+};
+
+template class TestBoxCoderOp<CPU>;
+}  // namespace framework
+}  // namespace paddle_mobile
+
+int main() {
+  DLOG << "----------**********----------";
+  DLOG << "begin to run BoxCoderOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+
+  paddle_mobile::framework::Tensor priorbox;
+  SetupTensor<float>(&priorbox, {1917, 4}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *priorbox_ptr = priorbox.data<float>();
+
+  paddle_mobile::framework::Tensor priorboxvar;
+  SetupTensor<float>(&priorboxvar, {1917, 4}, static_cast<float>(0.1),
+                     static_cast<float>(0.2));
+  auto *priorboxvar_ptr = priorboxvar.data<float>();
+
+  paddle_mobile::framework::Tensor targetbox;
+  SetupTensor<float>(&targetbox, {1, 1917, 4}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *targetbox_ptr = targetbox.data<float>();
+
+  paddle_mobile::framework::TestBoxCoderOp<paddle_mobile::CPU> testBoxCoderOp(
+      program);
+
+  auto output_boxcoder =
+      testBoxCoderOp.predict_boxcoder(priorbox, priorboxvar, targetbox);
+  auto output_boxcoder_ptr = output_boxcoder->data<float>();
+
+  for (int i = 0; i < output_boxcoder->numel(); i++) {
+    DLOG << output_boxcoder_ptr[i];
+  }
+  DLOGF("\n");
+  /// testing 25th bbox.
+  DLOG << "PriorBox**************";
+  DLOG << priorbox_ptr[100];
+  DLOG << priorbox_ptr[101];
+  DLOG << priorbox_ptr[102];
+  DLOG << priorbox_ptr[103];
+  DLOG << "PriorBoxVar**************";
+  DLOG << priorboxvar_ptr[100];
+  DLOG << priorboxvar_ptr[101];
+  DLOG << priorboxvar_ptr[102];
+  DLOG << priorboxvar_ptr[103];
+  DLOG << "TargetBox***************";
+  DLOG << targetbox_ptr[100];
+  DLOG << targetbox_ptr[101];
+  DLOG << targetbox_ptr[102];
+  DLOG << targetbox_ptr[103];
+  DLOG << "OutputBox**************";
+  DLOG << output_boxcoder_ptr[100];
+  DLOG << output_boxcoder_ptr[101];
+  DLOG << output_boxcoder_ptr[102];
+  DLOG << output_boxcoder_ptr[103];
+
+  DLOG << "***********----------------------**************";
+  auto priorbox_w = priorbox_ptr[102] - priorbox_ptr[100];
+  auto priorbox_h = priorbox_ptr[103] - priorbox_ptr[101];
+  auto priorbox_center_x = (priorbox_ptr[100] + priorbox_ptr[102]) / 2;
+  auto priorbox_center_y = (priorbox_ptr[101] + priorbox_ptr[103]) / 2;
+  DLOG << "prior box width : " << priorbox_w;
+  DLOG << "prior box height : " << priorbox_h;
+  DLOG << "prior box center x : " << priorbox_center_x;
+  DLOG << "prior box center y : " << priorbox_center_y;
+  auto target_box_center_x =
+      priorboxvar_ptr[100] * targetbox_ptr[100] * priorbox_w +
+      priorbox_center_x;
+  DLOG << "target_box_center_x : " << target_box_center_x;
+  auto target_box_center_y =
+      priorboxvar_ptr[101] * targetbox_ptr[101] * priorbox_h +
+      priorbox_center_y;
+  DLOG << "target_box_center_y : " << target_box_center_y;
+  auto target_box_width =
+      std::exp(priorboxvar_ptr[102] * targetbox_ptr[102]) * priorbox_w;
+  DLOG << "target_box_width : " << target_box_width;
+  auto target_box_height =
+      std::exp(priorboxvar_ptr[103] * targetbox_ptr[103]) * priorbox_h;
+  DLOG << "target_box_height : " << target_box_height;
+  DLOG << "pre x min : " << target_box_center_x - target_box_width / 2;
+  DLOG << "pre y min : " << target_box_center_y - target_box_height / 2;
+  DLOG << "pre x max : " << target_box_center_x + target_box_width / 2;
+  DLOG << "pre y max : " << target_box_center_y + target_box_height / 2;
+  return 0;
+}
diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..edaa4ce1ddba251886c90262895333b0a56c3a07
--- /dev/null
+++ b/test/operators/test_concat_op.cpp
@@ -0,0 +1,87 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/concat_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_googlenet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ConcatOp<paddle_mobile::CPU, float>>
+      executor(program, "concat");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {4, 10, 2, 2}, 0, 1);
+  input_tensors.push_back(input1);
+  Tensor input2;
+  auto input2_data = CreateInput<float>(&input2, {4, 20, 2, 2}, 0, 1);
+  input_tensors.push_back(input2);
+  Tensor input3;
+  auto input3_data = CreateInput<float>(&input3, {4, 30, 2, 2}, 0, 1);
+  input_tensors.push_back(input3);
+  Tensor input4;
+  auto input4_data = CreateInput<float>(&input4, {4, 40, 2, 2}, 0, 1);
+  input_tensors.push_back(input4);
+  // 2. input_names
+  vector<string> input_names({
+      "conv2d_3.tmp_1",
+      "conv2d_5.tmp_1",
+      "conv2d_7.tmp_1",
+      "conv2d_8.tmp_1",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"concat_0.tmp_0"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  // 5. test one example.
+  int input_n = 1;
+  int input_c = 2;
+  int input_h = 0;
+  int input_w = 1;
+  int stride0 = input3.numel() / input3.dims()[0];
+  int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1];
+  int stride2 = input3.dims()[3];
+  /// inputx1 (4,10,2,2),
+  /// inputx2 (4,20,2,2),
+  /// inputx3 (4,30,2,2),
+  /// inputx4 (4,40,2,2),
+  /// axis = 1
+  /// output (4,100,2,2)
+  int input_index =
+      input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
+  int output_index = input_n * 100 * 2 * 2 +
+                     (input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 +
+                     input_h * 2 + input_w;
+
+  DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index];
+  DLOG << " output [1,32,0,1] = " << output0_data[output_index];
+  return 0;
+}
diff --git a/test/operators/test_conv_add_relu_op.cpp b/test/operators/test_conv_add_relu_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..987f52cd62f91b3bc00cc1ef49bd21913e288d75
--- /dev/null
+++ b/test/operators/test_conv_add_relu_op.cpp
@@ -0,0 +1,45 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/fusion_conv_add_relu_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  //  ../models/image_classification_resnet.inference.model
+  auto program = loader.Load(g_googlenet, true);
+
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<
+      paddle_mobile::CPU,
+      paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>>
+      executor(program, "fusion_conv_add_relu", true);
+
+  paddle_mobile::framework::Tensor input;
+  GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
+  //  // use SetupTensor if not has local input image .
+  //  SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
+  //                     static_cast<float>(1));
+
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
+  auto output = executor.Predict(input, "data", "conv2d_0.tmp_2", out_ddim);
+
+  auto output_ptr = output->data<float>();
+  for (int j = 0; j < 25; ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_cov_op.cpp b/test/operators/test_cov_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a85ad9edba5d3e2256b8d7ee7d7d3c5b7200888d
--- /dev/null
+++ b/test/operators/test_cov_op.cpp
@@ -0,0 +1,44 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/conv_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::GPU_MALI> loader;
+  //  ../models/image_classification_resnet.inference.model
+  auto program = loader.Load(g_googlenet);
+
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::GPU_MALI, paddle_mobile::operators::ConvOp<
+                                             paddle_mobile::GPU_MALI, float>>
+      executor(program, "conv2d");
+
+  paddle_mobile::framework::Tensor input;
+  GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
+  //  // use SetupTensor if not has local input image .
+  //  SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
+  //                     static_cast<float>(1));
+
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
+  auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim);
+
+  auto output_ptr = output->data<float>();
+  for (int j = 0; j < 20; ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_depthwise_conv_op.cpp b/test/operators/test_depthwise_conv_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bd2aad19eda896bad3da8a47f5b70b1a923dc1a7
--- /dev/null
+++ b/test/operators/test_depthwise_conv_op.cpp
@@ -0,0 +1,45 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/depthwise_conv_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  //  ../models/image_classification_resnet.inference.model
+  auto program = loader.Load(g_mobilenet_ssd);
+
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::DepthwiseConvOp<
+                                        paddle_mobile::CPU, float>>
+      executor(program, "depthwise_conv2d");
+
+  paddle_mobile::framework::LoDTensor input;
+  // GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
+  // use SetupTensor if not has local input image .
+  SetupTensor<float>(&input, {1, 32, 150, 150}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto input_ptr = input.data<float>();
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 150, 150});
+  auto output = executor.Predict(input, "batch_norm_0.tmp_3",
+                                 "depthwise_conv2d_0.tmp_0", out_ddim);
+
+  auto output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_elementwise_add_op.cpp b/test/operators/test_elementwise_add_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a5e9f7e92701e748df51078b21eb46eec90599d
--- /dev/null
+++ b/test/operators/test_elementwise_add_op.cpp
@@ -0,0 +1,62 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_resnet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::ElementwiseAddOp<
+                                        paddle_mobile::CPU, float>>
+      executor(program, "elementwise_add");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {1, 3, 224, 224}, 0, 1);
+  input_tensors.push_back(input1);
+
+  Tensor input2;
+  auto input2_data = CreateInput<float>(&input2, {224}, 0, 1);
+  input_tensors.push_back(input2);
+
+  // 2. input_names
+  vector<string> input_names({
+      "batch_norm_2.tmp_2",
+      "batch_norm_0.tmp_3",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"elementwise_add_0.tmp_0"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+  /// output (1,3,224,224)
+  DLOG << "output memory size : " << output[0]->memory_size();
+  DLOG << "output numel : " << output[0]->numel();
+
+  DLOG << input1_data[226] << " + " << input2_data[2] << " = "
+       << output0_data[226];
+}
diff --git a/test/operators/test_fusion_conv_add_bn_relu_op.cpp b/test/operators/test_fusion_conv_add_bn_relu_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7764d95ed72da613459233bd55ddcffdc444318f
--- /dev/null
+++ b/test/operators/test_fusion_conv_add_bn_relu_op.cpp
@@ -0,0 +1,63 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_include.h"
+#include "operators/fusion_conv_add_bn_relu_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  //  ../models/image_classification_resnet.inference.model
+  auto program = loader.Load(g_mobilenet, true);
+
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::FusionConvAddBNReluOp<
+                    paddle_mobile::CPU, float>>
+      executor(program, "fusion_conv_add_bn_relu", true);
+
+  std::cout << "executor 4 test: " << std::endl;
+
+  paddle_mobile::framework::Tensor input;
+  GetInput<float>(g_test_image_1x3x224x224_banana, &input, {1, 3, 224, 224});
+  //  // use SetupTensor if not has local input image .
+  //  SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
+  //                     static_cast<float>(1));
+
+  DLOG << " fuck: " << input;
+
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 112, 112});
+  std::cout << "before predict: " << std::endl;
+  auto output =
+      executor.Predict(input, "data", "conv2_1_dw_bn.tmp_2", out_ddim);
+  std::cout << "after predict " << std::endl;
+  auto output_ptr = output->data<float>();
+
+  int stride = output->numel() / 100;
+  for (int i = 0; i < 100; i++) {
+    DLOG << " index:" << i * stride << " value: " << output_ptr[i * stride];
+  }
+
+  //  for (int i = 0; i < 100; i++) {
+  //    DLOG << " index:" << i << " value: "<< output_ptr[i];
+  //  }
+
+  //  for (int j = 0; j < output->numel(); ++j) {
+  //    std::cout << " (index: " << j << " value: " << output_ptr[j] << ") ";
+  //  }
+  std::cout << std::endl;
+  return 0;
+}
diff --git a/test/operators/test_fusion_fc_op.cpp b/test/operators/test_fusion_fc_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a23bde45cb74f0f75e655821b15e66b1cef4c081
--- /dev/null
+++ b/test/operators/test_fusion_fc_op.cpp
@@ -0,0 +1,160 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <framework/program/program-optimize/program_optimize.h>
+#include "../test_include.h"
+#include "operators/fusion_fc_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype>
+class TestFcOp {
+ public:
+  explicit TestFcOp(const Program<Dtype> p) : program_(p) {
+    use_optimize_ = true;
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
+    }
+
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "fc" && op->Input("X")[0] == "pool2d_13.tmp_0") {
+          DLOG << " fc attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Input Y is : " << op->Input("Y")[0];
+          DLOG << " Input Y is : " << op->Input("Z")[0];
+          DLOG << " Output Out is : " << op->Output("Out")[0];
+          std::shared_ptr<operators::FusionFcOp<Dtype, float>> testOp =
+              std::make_shared<operators::FusionFcOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(testOp);
+        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict(const Tensor &t1, const Tensor &t2,
+                                  const Tensor &t3) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x_feed_value = scope->Var("pool2d_13.tmp_0");
+    auto tensor_x = x_feed_value->GetMutable<LoDTensor>();
+    tensor_x->ShareDataWith(t1);
+
+    Variable *y_feed_value = scope->Var("loss3_classifier-loc_weights");
+    auto tensor_y = y_feed_value->GetMutable<LoDTensor>();
+    tensor_y->ShareDataWith(t2);
+
+    Variable *z_feed_value = scope->Var("loss3_classifier-loc_biases");
+    auto tensor_z = z_feed_value->GetMutable<LoDTensor>();
+    tensor_z->ShareDataWith(t3);
+
+    Variable *con_output = scope->Var("loss3_classifier-loc.tmp_1");
+    auto *output_tensor = con_output->GetMutable<LoDTensor>();
+    output_tensor->mutable_data<float>({3, 10});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<LoDTensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict(t1, t2, t3, 0);
+    return out_tensor;
+  }
+
+ private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+               int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
+    }
+  }
+};
+
+template class TestFcOp<CPU>;
+}  // namespace framework
+}  // namespace paddle_mobile
+int main() {
+  DLOG << "----------**********----------";
+  DLOG << "begin to run Fc Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  //    "../../../test/models/googlenet"
+  auto program = loader.Load(g_googlenet);
+  paddle_mobile::framework::ProgramOptimize optimize;
+  //  program.originProgram->Description("origin");
+  auto optimize_program = optimize.FusionOptimize(program.originProgram);
+
+  program.optimizeProgram = optimize_program;
+
+  if (optimize_program != nullptr) {
+    optimize_program->Description("optimize");
+  } else {
+    LOG(paddle_mobile::kLOG_ERROR) << "optimize_program is null";
+  }
+
+  /// input x (1,3,224,224)
+  paddle_mobile::framework::LoDTensor inputx;
+  SetupTensor<float>(&inputx, {3, 64, 1, 1}, static_cast<float>(1),
+                     static_cast<float>(1));
+  auto *inputx_ptr = inputx.data<float>();
+  /// input y (224,)
+  paddle_mobile::framework::LoDTensor inputy;
+  SetupTensor<float>(&inputy, {64, 10}, static_cast<float>(1.5),
+                     static_cast<float>(1.5));
+  auto *inputy_ptr = inputy.data<float>();
+
+  paddle_mobile::framework::LoDTensor inputz;
+  SetupTensor<float>(&inputz, {10}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputz_ptr = inputz.data<float>();
+
+  paddle_mobile::framework::TestFcOp<paddle_mobile::CPU> testFcOp(program);
+
+  auto output = testFcOp.predict(inputx, inputy, inputz);
+  auto *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << "value of output: " << output_ptr[j];
+  }
+
+  DLOG << "1 (3,64) * 2 (64,10) = 96(3,10)";
+  DLOG << "output : 96(3,10) + bias(10)";
+
+  return 0;
+}
diff --git a/test/operators/test_gru_op.cpp b/test/operators/test_gru_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..52ab8b54d709391ea263b74a395a635ce50a18af
--- /dev/null
+++ b/test/operators/test_gru_op.cpp
@@ -0,0 +1,29 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/gru_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_nlp);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::GruOp<paddle_mobile::CPU, float>>
+      executor(program, "gru");
+
+  return 0;
+}
diff --git a/test/operators/test_im2sequence_op.cpp b/test/operators/test_im2sequence_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a7512d3bf3cffcb100fe292e50fc7b7b23fa0aa0
--- /dev/null
+++ b/test/operators/test_im2sequence_op.cpp
@@ -0,0 +1,62 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../executor_for_test.h"
+#include "../test_include.h"
+#include "operators/im2sequence_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_ocr_recg);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ReluOp<paddle_mobile::CPU, float>>
+      executor(program, "im2sequence");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {2, 2, 3, 3}, -1, 1);
+  input_tensors.push_back(input1);
+
+  // 2. input_names
+  vector<string> input_names({
+      "conv2d_19.tmp_1",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"im2sequence_0.tmp_0"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({8, 9});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  for (int j = 0; j < input_tensors[0].numel(); ++j) {
+    DLOG << " value of input: " << input1_data[j];
+  }
+
+  for (int j = 0; j < output[0]->numel(); ++j) {
+    DLOG << " value of output: " << output0_data[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_lrn_op.cpp b/test/operators/test_lrn_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d4d9f8da802fc0f5f885a3b2e81cba695776c29e
--- /dev/null
+++ b/test/operators/test_lrn_op.cpp
@@ -0,0 +1,83 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/lrn_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_googlenet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::LrnOp<paddle_mobile::CPU, float>>
+      executor(program, "lrn");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {3, 4, 2, 2}, 0, 1);
+  input_tensors.push_back(input1);
+
+  // 2. input_names
+  vector<string> input_names({
+      "pool2d_0.tmp_0",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"pool1_norm1.tmp_1"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  DLOG << " LrnOp input: ";
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      for (int c = 0; c < 2; c++) {
+        for (int d = 0; d < 2; d++) {
+          DLOGF("%f ", input1_data[i * 16 + j * 4 + c * 2 + d]);
+        }
+        DLOGF("\n");
+      }
+      DLOGF("\n");
+    }
+    DLOGF("\n");
+  }
+  DLOG << " LrnOp output: ";
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      for (int c = 0; c < 2; c++) {
+        for (int d = 0; d < 2; d++) {
+          DLOGF("%f ", output0_data[i * 16 + j * 4 + c * 2 + d]);
+        }
+        DLOGF("\n");
+      }
+      DLOGF("\n");
+    }
+    DLOGF("\n");
+  }
+  DLOG << input1_data[0] << " / ((1 + 0.00002 * ( " << input1_data[0] << "^2 + "
+       << input1_data[4] << "^2 + " << input1_data[8] << "^2 ))^0.75) = ";
+  DLOG << output0_data[0];
+  return 0;
+}
diff --git a/test/operators/test_mul_op.cpp b/test/operators/test_mul_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8ebf0926890497c0ed622b69f163a9f6f5c8612b
--- /dev/null
+++ b/test/operators/test_mul_op.cpp
@@ -0,0 +1,91 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/mul_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_resnet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::MulOp<paddle_mobile::CPU, float>>
+      executor(program, "mul");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {3, 2, 1, 1}, 0, 1);
+  input_tensors.push_back(input1);
+  Tensor input2;
+  auto input2_data = CreateInput<float>(&input2, {2, 3}, 0, 1);
+  input_tensors.push_back(input2);
+
+  // 2. input_names
+  vector<string> input_names({
+      "pool2d_0.tmp_0",
+      "fc_0.w_0",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"fc_0.tmp_0"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({3, 3});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  auto dim_1 = input1.numel() / input1.dims()[0];
+  DLOG << " input1 : ";
+  for (int i = 0; i < input1.dims()[0]; ++i) {
+    for (int j = 0; j < dim_1; ++j) {
+      DLOGF("%f ", input1_data[i * dim_1 + j]);
+    }
+    DLOGF("\n");
+  }
+
+  auto dim_2 = input2.numel() / input2.dims()[0];
+  DLOG << " input2 : ";
+  for (int i = 0; i < input2.dims()[0]; ++i) {
+    for (int j = 0; j < dim_2; ++j) {
+      DLOGF("%f ", input2_data[i * dim_2 + j]);
+    }
+    DLOGF("\n");
+  }
+
+  auto dim_output0 = output[0]->numel() / output[0]->dims()[0];
+  DLOG << " output : ";
+  for (int i = 0; i < output[0]->dims()[0]; ++i) {
+    for (int j = 0; j < dim_output0; ++j) {
+      DLOGF("%f ", output0_data[i * dim_2 + j]);
+    }
+    DLOGF("\n");
+  }
+
+  /// output (3,3)
+  DLOG << "output memory size : " << output[0]->memory_size();
+  DLOG << "output numel : " << output[0]->numel();
+
+  DLOG << input1_data[0] << " x " << input2_data[0] << " + " << input1_data[1]
+       << " x " << input2_data[0 + 3] << " = " << output0_data[0];
+  return 0;
+}
diff --git a/test/operators/test_multiclass_nms_op.cpp b/test/operators/test_multiclass_nms_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e6c41bd4b3bb241964a23accf4633e65818465be
--- /dev/null
+++ b/test/operators/test_multiclass_nms_op.cpp
@@ -0,0 +1,153 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "../test_include.h"
+#include "operators/multiclass_nms_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype>
+class TestMultiClassNMSOp {
+ public:
+  explicit TestMultiClassNMSOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
+    }
+
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (auto block_desc : blocks) {
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (auto op : ops) {
+        if (op->Type() == "multiclass_nms" &&
+            op->Input("BBoxes")[0] == "box_coder_0.tmp_0") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " BBoxes is : " << op->Input("BBoxes")[0];
+          DLOG << " Scores is : " << op->Input("Scores")[0];
+          DLOG << " Out is : " << op->Output("Out")[0];
+          DLOG << " keep_top_k : "
+               << op->GetAttrMap().at("keep_top_k").Get<int>();
+          DLOG << " background_label : "
+               << op->GetAttrMap().at("background_label").Get<int>();
+          DLOG << " nms_eta : " << op->GetAttrMap().at("nms_eta").Get<float>();
+          DLOG << " nms_threshold : "
+               << op->GetAttrMap().at("nms_threshold").Get<float>();
+          DLOG << " nms_top_k : "
+               << op->GetAttrMap().at("nms_top_k").Get<int>();
+          DLOG << " score_threshold : "
+               << op->GetAttrMap().at("score_threshold").Get<float>();
+          //                            DLOG << " variances : " <<
+          //                            op->GetAttrMap().at("variances").Get<std::vector<float>>();
+          //                            DLOG << " aspect_ratios : " <<
+          //                            op->GetAttrMap().at("aspect_ratios").Get<std::vector<float>>();
+          //                            DLOG << " min_sizes : " <<
+          //                            op->GetAttrMap().at("min_sizes").Get<std::vector<float>>();
+          //                            DLOG << " max_sizes : " <<
+          //                            op->GetAttrMap().at("max_sizes").Get<std::vector<float>>();
+          std::shared_ptr<operators::MultiClassNMSOp<Dtype, float>> priorbox =
+              std::make_shared<operators::MultiClassNMSOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(priorbox);
+        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict(const Tensor &t1, const Tensor &t2) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x1_feed_value = scope->Var("box_coder_0.tmp_0");
+    auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *x2_feed_value = scope->Var("transpose_12.tmp_0");
+    auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
+    tensor_x2->ShareDataWith(t2);
+
+    Variable *output = scope->Var("detection_output_0.tmp_0");
+    auto *output_tensor = output->GetMutable<LoDTensor>();
+    output_tensor->mutable_data<float>({1917, 6});
+
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict(t1, t2, 0);
+
+    return out_tensor;
+    // return outvars_tensor;
+  }
+
+ private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict(const Tensor &t1, const Tensor &t2, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
+    }
+  }
+};
+
+template class TestMultiClassNMSOp<CPU>;
+}  // namespace framework
+}  // namespace paddle_mobile
+
+int main() {
+  DLOG << "----------**********----------";
+  DLOG << "begin to run MulticlassNMS Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
+
+  /// input x (1,3,300,300)
+  paddle_mobile::framework::Tensor inputx1;
+  SetupTensor<float>(&inputx1, {10, 1917, 4}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx1_ptr = inputx1.data<float>();
+
+  paddle_mobile::framework::Tensor inputx2;
+  SetupTensor<float>(&inputx2, {10, 21, 1917}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx2_ptr = inputx2.data<float>();
+
+  paddle_mobile::framework::TestMultiClassNMSOp<paddle_mobile::CPU>
+      testMultiClassNMSOp(program);
+
+  auto output = testMultiClassNMSOp.predict(inputx1, inputx2);
+  auto *output_ptr = output->data<float>();
+
+  for (int i = 0; i < output->numel(); i++) {
+    DLOG << output_ptr[i];
+  }
+  return 0;
+}
diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2daecd7b4c1a50c612bc784c801208d2e6f31482
--- /dev/null
+++ b/test/operators/test_pool_op.cpp
@@ -0,0 +1,41 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/pool_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_googlenet));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::PoolOp<paddle_mobile::CPU, float>>
+      executor(program, "pool2d");
+
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {1, 64, 112, 112}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 56, 56});
+  auto output =
+      executor.Predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim);
+
+  float *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_prelu_op.cpp b/test/operators/test_prelu_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e93d8732d18496721b24cfba1df296250169f8b2
--- /dev/null
+++ b/test/operators/test_prelu_op.cpp
@@ -0,0 +1,58 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../executor_for_test.h"
+#include "../test_include.h"
+#include "operators/prelu_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_resnet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::PReluOp<paddle_mobile::CPU, float>>
+      executor(program, "prelu");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {1, 2, 3, 4}, -1, 1);
+  input_tensors.push_back(input1);
+
+  // 2. input_names
+  vector<string> input_names({
+      "batch_norm_0.tmp_2",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"batch_norm_0.tmp_3"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  for (int j = 0; j < output[0]->numel(); ++j) {
+    DLOG << " value of output: " << output0_data[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_prior_box_op.cpp b/test/operators/test_prior_box_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8c697a9a7982f05b71caa5bb5f4d12e50dc9d418
--- /dev/null
+++ b/test/operators/test_prior_box_op.cpp
@@ -0,0 +1,153 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "../test_include.h"
+#include "operators/prior_box_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype>
+class TestPriorBoxOp {
+ public:
+  explicit TestPriorBoxOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
+    }
+
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (auto block_desc : blocks) {
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (auto op : ops) {
+        if (op->Type() == "prior_box" &&
+            op->Input("Input")[0] == "batch_norm_26.tmp_3") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input is : " << op->Input("Input")[0];
+          DLOG << " Image is : " << op->Input("Image")[0];
+          DLOG << " Output Boxes is : " << op->Output("Boxes")[0];
+          DLOG << " Output Variances is : " << op->Output("Variances")[0];
+          DLOG << " offset : " << op->GetAttrMap().at("offset").Get<float>();
+          DLOG << " step_h : " << op->GetAttrMap().at("step_h").Get<float>();
+          DLOG << " step_w : " << op->GetAttrMap().at("step_w").Get<float>();
+          DLOG << " flip : " << op->GetAttrMap().at("flip").Get<bool>();
+          DLOG << " clip : " << op->GetAttrMap().at("clip").Get<bool>();
+          //                            DLOG << " variances : " <<
+          //                            op->GetAttrMap().at("variances").Get<std::vector<float>>();
+          //                            DLOG << " aspect_ratios : " <<
+          //                            op->GetAttrMap().at("aspect_ratios").Get<std::vector<float>>();
+          //                            DLOG << " min_sizes : " <<
+          //                            op->GetAttrMap().at("min_sizes").Get<std::vector<float>>();
+          //                            DLOG << " max_sizes : " <<
+          //                            op->GetAttrMap().at("max_sizes").Get<std::vector<float>>();
+          std::shared_ptr<operators::PriorBoxOp<Dtype, float>> priorbox =
+              std::make_shared<operators::PriorBoxOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(priorbox);
+        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_priorbox(const Tensor &t1, const Tensor &t2) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x1_feed_value = scope->Var("image");
+    auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *x2_feed_value = scope->Var("batch_norm_26.tmp_3");
+    auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
+    tensor_x2->ShareDataWith(t2);
+
+    Variable *boxes_output = scope->Var("prior_box_1.tmp_0");
+    auto *boxes_output_tensor = boxes_output->GetMutable<LoDTensor>();
+    boxes_output_tensor->mutable_data<float>({10, 10, 6, 4});
+
+    Variable *variances_output = scope->Var("prior_box_1.tmp_1");
+    auto *variances_output_tesnor = variances_output->GetMutable<LoDTensor>();
+    variances_output_tesnor->mutable_data<float>({10, 10, 6, 4});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> outboxes_tensor = std::make_shared<LoDTensor>();
+    outboxes_tensor.reset(boxes_output_tensor);
+
+    std::shared_ptr<Tensor> outvars_tensor = std::make_shared<LoDTensor>();
+    outvars_tensor.reset(variances_output_tesnor);
+    predict_priorbox(t1, t2, 0);
+
+    return outboxes_tensor;
+    // return outvars_tensor;
+  }
+
+ private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_priorbox(const Tensor &t1, const Tensor &t2, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
+    }
+  }
+};
+
+template class TestPriorBoxOp<CPU>;
+}  // namespace framework
+}  // namespace paddle_mobile
+
+int main() {
+  DLOG << "----------**********----------";
+  DLOG << "begin to run PriorBoxOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+
+  /// input x (1,3,300,300)
+  paddle_mobile::framework::Tensor input_image;
+  SetupTensor<float>(&input_image, {1, 3, 300, 300}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *input_image_ptr = input_image.data<float>();
+
+  paddle_mobile::framework::Tensor inputx1;
+  SetupTensor<float>(&inputx1, {1, 1024, 10, 10}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx1_ptr = inputx1.data<float>();
+
+  paddle_mobile::framework::TestPriorBoxOp<paddle_mobile::CPU> testPriorBoxOp(
+      program);
+
+  auto output_priorbox = testPriorBoxOp.predict_priorbox(input_image, inputx1);
+  auto *output_priorbox_ptr = output_priorbox->data<float>();
+
+  for (int i = 0; i < output_priorbox->numel(); i++) {
+    DLOG << output_priorbox_ptr[i];
+  }
+  return 0;
+}
diff --git a/test/operators/test_relu_op.cpp b/test/operators/test_relu_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fad0d0c30a126cc2730e4aa8b87364eee9fc8209
--- /dev/null
+++ b/test/operators/test_relu_op.cpp
@@ -0,0 +1,57 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/relu_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_resnet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ReluOp<paddle_mobile::CPU, float>>
+      executor(program, "relu");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {1, 2, 3, 4}, -1, 1);
+  input_tensors.push_back(input1);
+
+  // 2. input_names
+  vector<string> input_names({
+      "batch_norm_0.tmp_2",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"batch_norm_0.tmp_3"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  for (int j = 0; j < output[0]->numel(); ++j) {
+    DLOG << " value of output: " << output0_data[j];
+  }
+  return 0;
+}
diff --git a/test/operators/test_reshape_op.cpp b/test/operators/test_reshape_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3541151d8a1a286527e715f402df381d2efc094c
--- /dev/null
+++ b/test/operators/test_reshape_op.cpp
@@ -0,0 +1,47 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/reshape_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ReshapeOp<paddle_mobile::CPU, float>>
+      executor(program, "reshape");
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {2, 3, 3, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto input_ptr = input.data<float>();
+  auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
+  auto output =
+      executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
+  auto *output_ptr = output->data<float>();
+
+  DLOG << "input : ";
+  for (int j = 0; j < input.numel(); ++j) {
+    DLOG << " index " << j << " : " << input_ptr[j];
+  }
+
+  DLOG << "output : ";
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " index " << j << " : " << output_ptr[j];
+  }
+
+  return 0;
+}
diff --git a/test/operators/test_resize_op.cpp b/test/operators/test_resize_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f4dcaa6885d92a727e8c97d5106c3b6913a4ab33
--- /dev/null
+++ b/test/operators/test_resize_op.cpp
@@ -0,0 +1,47 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/resize_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ResizeOp<paddle_mobile::CPU, float>>
+      executor(program, "resize");
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {2, 3, 3, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto input_ptr = input.data<float>();
+  auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
+  auto output =
+      executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
+  auto *output_ptr = output->data<float>();
+
+  DLOG << "input : ";
+  for (int j = 0; j < input.numel(); ++j) {
+    DLOG << " index " << j << " : " << input_ptr[j];
+  }
+
+  DLOG << "output : ";
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " index " << j << " : " << output_ptr[j];
+  }
+
+  return 0;
+}
diff --git a/test/operators/test_scale_op.cpp b/test/operators/test_scale_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..574779d71e5ebc5f06fe5cd8fb33422726f39464
--- /dev/null
+++ b/test/operators/test_scale_op.cpp
@@ -0,0 +1,18 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/scale_op.h"
+
+int main() {}
diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..37d05a44b5b66f2428eedd8e8719cd127747ee08
--- /dev/null
+++ b/test/operators/test_sigmoid_op.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../../src/operators/kernel/sigmoid_kernel.h"
+#include "../../src/operators/kernel/central-arm-func/sigmoid_arm_func.h"
+#include "../test_helper.h"
+#include "io/executor.h"
+
+int main() {
+  paddle_mobile::framework::Tensor input;
+  paddle_mobile::framework::Tensor output;
+  SetupTensor<float>(&input, {1, 4, 60, 60}, static_cast<float>(0),
+                     static_cast<float>(1));
+
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 4, 60, 60});
+  output.Resize(out_ddim);
+  paddle_mobile::operators::sigmoid(&input, &output);
+  auto *output_ptr = output.data<float>();
+  for (int j = 0; j < output.numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  DLOG << 5;
+  return 0;
+}
diff --git a/test/operators/test_slice_op.cpp b/test/operators/test_slice_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9306bc53c6ae23b10c27a71071c11c9ddf1c0d25
--- /dev/null
+++ b/test/operators/test_slice_op.cpp
@@ -0,0 +1,18 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/slice_op.h"
+
+int main() {}
diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a0184729a8bc5e6b0ba952923eecd5242cfe36d4
--- /dev/null
+++ b/test/operators/test_softmax_op.cpp
@@ -0,0 +1,40 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+
+#include "operators/softmax_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::SoftmaxOp<paddle_mobile::CPU, float>>
+      executor(program, "softmax");
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {1, 1000}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 1000});
+  auto output =
+      executor.Predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim);
+  auto *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+
+  return 0;
+}
diff --git a/test/operators/test_transpose_op.cpp b/test/operators/test_transpose_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f83ee23c25d8f2588e0fe40d5fabc6114129b995
--- /dev/null
+++ b/test/operators/test_transpose_op.cpp
@@ -0,0 +1,49 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_helper.h"
+#include "../test_include.h"
+#include "operators/transpose_op.h"
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+  Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::TransposeOp<
+                                        paddle_mobile::CPU, float>>
+      executor(program, "transpose");
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {1, 2, 3, 4}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto input_ptr = input.data<float>();
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 4, 2});
+  auto output =
+      executor.Predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim);
+  auto *output_ptr = output->data<float>();
+
+  DLOG << "input : ";
+  for (int j = 0; j < input.numel(); ++j) {
+    DLOG << " index " << j << " : " << input_ptr[j];
+  }
+
+  DLOG << "output : ";
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " index " << j << " : " << output_ptr[j];
+  }
+  DLOG << " for example : ";
+  DLOG << " you can check if input[16] == output[9] ";
+  DLOG << " you can check if input[12] == output[1] ";
+  return 0;
+}
diff --git a/test/test_helper.h b/test/test_helper.h
index 7581405c3d9f14e7e997e73be91cb624ad6d9798..ecbc251a815e343f75b1247ffc430e9c52d6abfd 100644
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -41,13 +41,18 @@ static const char *g_resnet_50 = "../models/resnet_50";
 static const char *g_resnet = "../models/resnet";
 static const char *g_googlenet_combine = "../models/googlenet_combine";
 static const char *g_yolo = "../models/yolo";
+static const char *g_yolo_combined = "../models/yolo_combined";
 static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
 
 static const char *g_test_image_1x3x224x224 =
     "../images/test_image_1x3x224x224_float";
 static const char *g_test_image_1x3x224x224_banana =
     "../images/input_3x224x224_banana";
+static const char *g_test_image_desktop_1_3_416_416_nchw_float =
+    "../images/in_put_1_3_416_416_2";
 static const char *g_hand = "../images/hand_image";
+static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
+static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
 static const char *g_img = "../images/img.bin";
 
 using paddle_mobile::framework::DDim;
diff --git a/tools/build.sh b/tools/build.sh
index 5cf3b0454658c764ff99989de9dca1530c3b55d9..3b7204baefe6d843cbb4d0a237cf5e96f0c28373 100755
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -92,6 +92,47 @@ build_for_android() {
     make -j 8
 }
 
+
+build_for_arm_linux() {
+    MODE="Release"
+    ARM_LINUX="arm-linux"
+
+    if [ "${#NETS}" -gt 1 ]; then
+        cmake .. \
+            -B"../build/release/arm-linux" \
+            -DCMAKE_BUILD_TYPE="${MODE}" \
+            -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
+            -DCMAKE_CXX_FLAGS="-std=c++14 -mcpu=cortex-a53 -mtune=cortex-a53 -mfpu=neon-vfpv4 -mfloat-abi=hard -ftree-vectorize -funsafe-math-optimizations  -pipe -mlittle-endian -munaligned-access" \
+            -DNET="${NETS}" \
+            -D"V7"=true
+    else
+        cmake .. \
+            -B"../build/release/arm-linux" \
+            -DCMAKE_BUILD_TYPE="${MODE}" \
+            -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
+            -DCMAKE_CXX_FLAGS="-std=c++14 -mcpu=cortex-a53 -mtune=cortex-a53 -mfpu=neon-vfpv4 -mfloat-abi=hard -ftree-vectorize -funsafe-math-optimizations  -pipe -mlittle-endian -munaligned-access" \
+            -DNET="${NETS}" \
+            -D"V7"=true
+    fi
+
+    cd "../build/release/arm-linux"
+    make -j 8
+    cd "../../../test/"
+    DIRECTORY="models"
+    if [ "`ls -A $DIRECTORY`" = "" ]; then
+        echo "$DIRECTORY is indeed empty pull images"
+        wget http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip
+        unzip paddle-mobile%2FmodelsAndImages.zip
+        mv modelsAndImages/images/ images
+        mv modelsAndImages/models/ models
+        rm -rf paddle-mobile%2FmodelsAndImages.zip
+        rm -rf __MACOS
+    else
+        echo "$DIRECTORY is indeed not empty, DONE!"
+    fi
+
+}
+
 build_for_ios() {
 #    rm -rf "../build"
     PLATFORM="ios"
@@ -135,7 +176,7 @@ if [ $# -lt 1 ]; then
     echo "sample usage: ./build.sh android"
 else
     params=($@)
-    for(( i=1; i<$#; i++ )); do  
+    for(( i=1; i<$#; i++ )); do
         if [ ${i} != 1 ]; then
             NETS=$NETS$";"
         fi
@@ -162,6 +203,8 @@ else
 
     if [ $1 = "android" ]; then
         build_for_android
+    elif [ $1 = "arm_linux" ]; then
+        build_for_arm_linux
     elif [ $1 = "ios" ]; then
         build_for_ios
     else
diff --git a/tools/op.cmake b/tools/op.cmake
index 3f27f7fc4ae0d00394b9df63c214b30f98cdd31b..e17afb445dcb2ccb12c1cce4e05e4531c3e8cde9 100644
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -1,5 +1,10 @@
 set(FOUND_MATCH OFF)
-if ("googlenet" IN_LIST NET)
+set(CON -1)
+
+message(STATUS "nets :${NET}")
+
+list(FIND NET "googlenet" CON)
+if (CON GREATER -1)
   message("googlenet enabled")
   set(CONCAT_OP ON)
   set(CONV_OP ON)
@@ -15,7 +20,8 @@ if ("googlenet" IN_LIST NET)
   set(FOUND_MATCH ON)
 endif()
 
-if ("mobilenet" IN_LIST NET)
+list(FIND NET "mobilenet" CON)
+if (CON GREATER -1)
   message("mobilenet enabled")
   set(CONV_OP ON)
   set(ELEMENTWISEADD_OP ON)
@@ -33,7 +39,8 @@ if ("mobilenet" IN_LIST NET)
 endif()
 
 
-if ("mobilenetssd" IN_LIST NET)
+list(FIND NET "mobilenetssd" CON)
+if (CON GREATER -1)
   message("mobilenetssd enabled")
   set(FUSION_CONVBNRELU_OP ON)
   set(FUSION_CONVBNRELU_OP ON)
@@ -55,7 +62,8 @@ if ("mobilenetssd" IN_LIST NET)
 endif()
 
 
-if ("yolo" IN_LIST NET)
+list(FIND NET "yolo" CON)
+if (CON GREATER -1)
   message("yolo enabled")
   set(BATCHNORM_OP ON)
   set(CONV_OP ON)
@@ -65,7 +73,8 @@ if ("yolo" IN_LIST NET)
   set(FOUND_MATCH ON)
 endif()
 
-if ("squeezenet" IN_LIST NET)
+list(FIND NET "squeezenet" CON)
+if (CON GREATER -1)
   message("squeezenet enabled")
   set(CONCAT_OP ON)
   set(CONV_OP ON)
@@ -79,7 +88,8 @@ if ("squeezenet" IN_LIST NET)
 endif()
 
 
-if ("resnet" IN_LIST NET)
+list(FIND NET "resnet" CON)
+if (CON GREATER -1)
   message("resnet enabled")
   set(CONCAT_OP ON)
   set(CONV_OP ON)
@@ -95,7 +105,8 @@ if ("resnet" IN_LIST NET)
   set(FOUND_MATCH ON)
 endif()
 
-if ("FPGAnets" IN_LIST NET)
+list(FIND NET "FPGAnets" CON)
+if (CON GREATER -1)
   message("FPGAnets enabled")
   set(FUSION_CONVADDRELU_OP ON)
   set(FUSION_CONVADDBNRELU_OP ON)
@@ -114,7 +125,8 @@ if ("FPGAnets" IN_LIST NET)
   set(FOUND_MATCH ON)
 endif()
 
-if ("nlp" IN_LIST NET)
+list(FIND NET "nlp" CON)
+if (CON GREATER -1)
   message("nlp enabled")
   set(FUSION_FC_OP ON)
   set(LOOKUP_OP ON)
@@ -127,6 +139,43 @@ if ("nlp" IN_LIST NET)
   set(FOUND_MATCH ON)
 endif()
 
+list(FIND NET "mobilenetfssd" CON)
+if (CON GREATER -1)
+  message("mobilenetfssd enabled")
+  set(FUSION_CONVADDRELU_OP ON)
+  set(FUSION_CONVADDBNRELU_OP ON)
+  set(FUSION_CONVADD_OP ON)
+  set(SOFTMAX_OP ON)
+  set(RESHAPE_OP ON)
+  set(BILINEAR_INTERP_OP ON)
+  set(TRANSPOSE_OP ON)
+  set(CONCAT_OP ON)
+  set(PRIORBOX_OP ON)
+  set(BATCHNORM_OP ON)
+  set(BOXCODER_OP ON)
+  set(MULTICLASSNMS_OP ON)
+  set(FLATTEN_OP ON)
+  set(SPLIT_OP ON)
+  set(SHAPE_OP ON)
+
+  set(FOUND_MATCH ON)
+endif()
+
+list(FIND NET "genet" CON)
+if (CON GREATER -1)
+  message("genet enabled")
+  set(FUSION_CONVADDPRELU_OP ON)
+  set(FUSION_CONVADDADDPRELU_OP ON)
+  set(FUSION_CONVADD_OP ON)
+  set(CONV_TRANSPOSE_OP ON)
+  set(FUSION_CONVADDRELU_OP ON)
+  set(ELEMENTWISEADD_OP ON)
+  set(PRELU_OP ON)
+  set(POOL_OP ON)
+  set(CONCAT_OP ON)
+
+  set(FOUND_MATCH ON)
+endif()
 
 if(NOT FOUND_MATCH)
   message("--default--")
@@ -336,4 +385,4 @@ endif()
 
 if (SHAPE_OP)
   add_definitions(-DSHAPE_OP)
-endif()
\ No newline at end of file
+endif()
diff --git a/tools/toolchains/arm-linux-gnueabihf.cmake b/tools/toolchains/arm-linux-gnueabihf.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..7db42c7e73f4cfabce670bb2bc691e4b5bd314a2
--- /dev/null
+++ b/tools/toolchains/arm-linux-gnueabihf.cmake
@@ -0,0 +1,11 @@
+# CMake toolchain file for building ARM software on Linux environment
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+set(CMAKE_SYSTEM_VERSION 1)
+
+message("if U build on platform . this is right.")
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
\ No newline at end of file