Merge remote-tracking branch 'upstream/develop' into develop

1226253e · 朔-望 · 8dd98c17 · cd30eb8a · 1226253e · 1226253e
42 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,6 @@ option(LOG_PROFILE "log profile" ON)
 option(CPU "armv7 with neon" ON)
 option(MALI_GPU "mali gpu" OFF)
 option(FPGA "fpga" OFF)
-set(DEBUGING ON)
 if (ARM_LINUX)
 include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake")
@@ -134,6 +133,9 @@ else ()
 endif ()
 if(DEBUGING)
-    add_subdirectory(test)
+    if(IS_IOS)
+    else()
+        add_subdirectory(test)
+    endif()
 endif()
--- a/demo/android/PaddleMobile_Android/app/src/main/java/com/baidu/paddle/MainActivity.java
+++ b/demo/android/PaddleMobile_Android/app/src/main/java/com/baidu/paddle/MainActivity.java
@@ -121,7 +121,14 @@ public class MainActivity extends Activity {
                String assetPath = "pml_demo";
                String sdcardPath = Environment.getExternalStorageDirectory()
                        + File.separator + assetPath + File.separator + type;
-                PML.load(sdcardPath);
+                //PML.load(sdcardPath);
+                String modelPath = Environment.getExternalStorageDirectory()
+                                                           + File.separator + assetPath +
+                                                           File.separator + "googlenet_combine" + File.separator + "model";
+                String paramPath = Environment.getExternalStorageDirectory()
+                                                           + File.separator + assetPath +
+                                                           File.separator + "googlenet_combine" + File.separator + "params";
+                PML.loadCombined(modelPath, paramPath);
            }
        });

--- a/demo/android/PaddleMobile_Android/app/src/main/java/com/baidu/paddle/PML.java
+++ b/demo/android/PaddleMobile_Android/app/src/main/java/com/baidu/paddle/PML.java
@@ -8,6 +8,14 @@ public class PML {
     */
    public static native boolean load(String modelPath);
+    /**
+     * Load
+     * @param modelPath
+     * @param paramPath
+     * @return
+     */
+    public static native boolean loadCombined(String modelPath,String paramPath);
    /**
     * object detection

--- a/demo/ios/PaddleMobileDemo/PaddleMobileDemo.xcodeproj/project.pbxproj
+++ b/demo/ios/PaddleMobileDemo/PaddleMobileDemo.xcodeproj/project.pbxproj
@@ -15,9 +15,9 @@
 		FC12E94120EB6B2900807EF4 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = FC12E94020EB6B2900807EF4 /* main.m */; };
 		FC12E94A20EB6B6800807EF4 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC12E94820EB6B6800807EF4 /* libpaddle-mobile.a */; };
 		FC12E94D20EB6BBB00807EF4 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC12E94C20EB6BBB00807EF4 /* libstdc++.tbd */; };
-		FC12E95120EB6BED00807EF4 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC12E94F20EB6BED00807EF4 /* params */; };
-		FC12E95220EB6BED00807EF4 /* model in Resources */ = {isa = PBXBuildFile; fileRef = FC12E95020EB6BED00807EF4 /* model */; };
 		FC12E95420EB6C0D00807EF4 /* apple.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC12E95320EB6C0D00807EF4 /* apple.jpg */; };
+		FC51640120EF758D00636C28 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC5163FF20EF758D00636C28 /* params */; };
+		FC51640220EF758D00636C28 /* model in Resources */ = {isa = PBXBuildFile; fileRef = FC51640020EF758D00636C28 /* model */; };
 /* End PBXBuildFile section */
 /* Begin PBXFileReference section */
@@ -34,9 +34,9 @@
 		FC12E94820EB6B6800807EF4 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
 		FC12E94920EB6B6800807EF4 /* PaddleMobile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobile.h; sourceTree = "<group>"; };
 		FC12E94C20EB6BBB00807EF4 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
-		FC12E94F20EB6BED00807EF4 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
-		FC12E95020EB6BED00807EF4 /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
 		FC12E95320EB6C0D00807EF4 /* apple.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = apple.jpg; sourceTree = "<group>"; };
+		FC5163FF20EF758D00636C28 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
+		FC51640020EF758D00636C28 /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 /* Begin PBXFrameworksBuildPhase section */
@@ -72,9 +72,9 @@
 		FC12E93020EB6B2800807EF4 /* PaddleMobileDemo */ = {
 			isa = PBXGroup;
 			children = (
-				FC12E95320EB6C0D00807EF4 /* apple.jpg */,
-				FC12E94E20EB6BED00807EF4 /* googlenet_combine */,
 				FC12E94720EB6B6800807EF4 /* PaddleMobile */,
+				FC5163FE20EF758D00636C28 /* googlenet_combine */,
+				FC12E95320EB6C0D00807EF4 /* apple.jpg */,
 				FC12E93120EB6B2800807EF4 /* AppDelegate.h */,
 				FC12E93220EB6B2800807EF4 /* AppDelegate.m */,
 				FC12E93420EB6B2800807EF4 /* ViewController.h */,
@@ -105,11 +105,11 @@
 			name = Frameworks;
 			sourceTree = "<group>";
 		};
-		FC12E94E20EB6BED00807EF4 /* googlenet_combine */ = {
+		FC5163FE20EF758D00636C28 /* googlenet_combine */ = {
 			isa = PBXGroup;
 			children = (
-				FC12E94F20EB6BED00807EF4 /* params */,
+				FC5163FF20EF758D00636C28 /* params */,
-				FC12E95020EB6BED00807EF4 /* model */,
+				FC51640020EF758D00636C28 /* model */,
 			);
 			path = googlenet_combine;
 			sourceTree = "<group>";
@@ -171,10 +171,10 @@
 			isa = PBXResourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				FC51640220EF758D00636C28 /* model in Resources */,
+				FC51640120EF758D00636C28 /* params in Resources */,
 				FC12E93E20EB6B2900807EF4 /* LaunchScreen.storyboard in Resources */,
-				FC12E95220EB6BED00807EF4 /* model in Resources */,
 				FC12E93B20EB6B2900807EF4 /* Assets.xcassets in Resources */,
-				FC12E95120EB6BED00807EF4 /* params in Resources */,
 				FC12E95420EB6C0D00807EF4 /* apple.jpg in Resources */,
 				FC12E93920EB6B2800807EF4 /* Main.storyboard in Resources */,
 			);

--- a/demo/ios/PaddleMobileDemo/PaddleMobileDemo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
+++ b/demo/ios/PaddleMobileDemo/PaddleMobileDemo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
--- a/demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/PaddleMobile.h
+++ b/demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/PaddleMobile.h
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#pragma once
 #import <CoreImage/CoreImage.h>
 #import <Foundation/Foundation.h>
 @interface PaddleMobile : NSObject
+/*
+    创建对象
+*/
 - (instancetype)init;
+/*
+    load 模型, 开辟内存
+*/
 - (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
+/*
+  加载散开形式的模型, 需传入模型的目录
+*/
+- (BOOL)load:(NSString *)modelAndWeightPath;
+/*
+    进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
+*/
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
+/*
+    进行预测
+*/
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
+/*
+    清理内存
+*/
 - (void)clear;
 @end
--- a/doc/development_doc.md
+++ b/doc/development_doc.md
@@ -4,9 +4,11 @@
 ## 编译
-### 一. 使用 build.sh 编译
 ```sh
+# 在 paddle-mobile 目录下:
+cd tools
 sh build.sh ios
 # 如果只想编译某个特定模型的 op, 则需执行以下命令
@@ -17,9 +19,7 @@ cd ../build/release/ios/build
 ```
-### 二. 集成
+## 集成
-#### 如使用 oc 接口
 ```
 将上一步生成的:
@@ -28,7 +28,11 @@ libpaddle-mobile.a
 /src/ios_io/ 下的
 PaddleMobile.h
 ```
-拖入工程, 接口如下:
+拖入工程
+#### oc 接口
+接口如下:
 ```
 /*

--- a/src/ios_io/PaddleMobile.h
+++ b/src/ios_io/PaddleMobile.h
@@ -19,10 +19,34 @@
 @interface PaddleMobile : NSObject
+/*
+    创建对象
+*/
 - (instancetype)init;
+/*
+    load 模型, 开辟内存
+*/
 - (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
+/*
+  加载散开形式的模型, 需传入模型的目录
+*/
+- (BOOL)load:(NSString *)modelAndWeightPath;
+/*
+    进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
+*/
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
+/*
+    进行预测
+*/
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
+/*
+    清理内存
+*/
 - (void)clear;
 @end
--- a/src/ios_io/PaddleMobile.mm
+++ b/src/ios_io/PaddleMobile.mm
@@ -62,6 +62,15 @@ static std::mutex shared_mutex;
  }
 }
+- (BOOL)load:(NSString *)modelAndWeightPath{
+  std::string model_path_str = std::string([modelAndWeightPath UTF8String]);
+  if (loaded_ = pam_->Load(model_path_str)) {
+    return YES;
+  } else {
+    return NO;
+  }
+}
 -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray<NSNumber *> *)means scale:(float)scale dim:(std::vector<int64_t>)dim{
  if (means == nil) {
    means = @[@0, @0, @0];

--- a/src/jni/paddle_mobile_jni.cpp
+++ b/src/jni/paddle_mobile_jni.cpp
@@ -60,6 +60,15 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
                                         optimize);
 }
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
+    JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
+  ANDROIDLOGI("load invoked");
+  bool optimize = true;
+  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                         jstring2cppstring(env, paramPath),
+                                         optimize);
+}
 JNIEXPORT jfloatArray JNICALL
 Java_com_baidu_paddle_PML_predict(JNIEnv *env, jclass thiz, jfloatArray buf) {
  jfloatArray result = NULL;

--- a/src/jni/paddle_mobile_jni.h
+++ b/src/jni/paddle_mobile_jni.h
@@ -22,11 +22,16 @@ extern "C" {
 namespace paddle_mobile {
 namespace jni {
 /**
- * load model & params of the net for android
+ * load separated model for android
 */
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
                                                          jclass thiz,
                                                          jstring modelPath);
+/**
+ * load combined model  for android
+ */
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
+    JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath);
 /**
 * object detection for anroid

--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -45,28 +45,6 @@ void FusionConvAddOp<Dtype, T>::InferShape() const {
  this->param_.Output()->Resize(ddim);
 }
-#ifdef PADDLE_MOBILE_CPU
-#ifndef CONV_ADD_REGISTER
-framework::FusionOpRegistrar convadd_registrar(new FusionConvAddMatcher());
-#define CONV_ADD_REGISTER
-#endif
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#ifndef CONV_ADD_REGISTER
-static framework::FusionOpRegistrar convadd_registrar(
-    new FusionConvAddMatcher());
-#define CONV_ADD_REGISTER
-#endif
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
 template class FusionConvAddOp<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add.h
@@ -69,7 +69,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
 #ifdef PADDLE_MOBILE_CPU
 #ifndef CONV_ADD_REGISTER
-extern framework::FusionOpRegistrar convadd_registrar(
+static framework::FusionOpRegistrar convadd_registrar(
    new FusionConvAddMatcher());
 #define CONV_ADD_REGISTER
 #endif
@@ -82,6 +82,7 @@ extern framework::FusionOpRegistrar convadd_registrar(
 static framework::FusionOpRegistrar convadd_registrar(
    new FusionConvAddMatcher());
 #define CONV_ADD_REGISTER
 #endif
 #endif

--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -50,27 +50,6 @@ void FusionFcOp<Dtype, T>::InferShape() const {
  this->param_.Out()->Resize(ddim);
 }
-#ifdef PADDLE_MOBILE_CPU
-#ifndef CONV_CPU_REGISTER
-#define CONV_CPU_REGISTER
-framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
-#endif
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#ifndef CONV_CPU_REGISTER
-#define CONV_CPU_REGISTER
-framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
-#endif
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
 template class FusionFcOp<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -69,7 +69,7 @@ class FusionFcOp
 #ifndef CONV_CPU_REGISTER
 #define CONV_CPU_REGISTER
-extern framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
 #endif
 #endif
@@ -78,7 +78,7 @@ extern framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
 #ifndef CONV_CPU_REGISTER
 #define CONV_CPU_REGISTER
-extern framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
 #endif
 #endif

--- a/src/operators/kernel/arm/prelu_kernel.cpp
+++ b/src/operators/kernel/arm/prelu_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PRELU_OP
+#include "operators/kernel/prelu_kernel.h"
+#include <operators/math/transform.h>
+namespace paddle_mobile {
+namespace operators {
+template <typename T>
+struct PReluFunctor {
+  explicit PReluFunctor(float slope) { this->slope_ = slope; }
+  inline T operator()(T in) const { return in > 0 ? in : in * slope_; }
+  float slope_ = 0.0f;
+};
+/*
+ * @b 特化到具体平台的实现, param 从 op 层传入
+ * */
+template <>
+void PReluKernel<CPU, float>::Compute(const PReluParam &param) const {
+  const auto *input_x = param.InputX();
+  auto *input_x_ptr = input_x->data<float>();
+  auto *out = param.Out();
+  auto *out_ptr = out->mutable_data<float>();
+  if (param.Slopes().size() == 1) {
+    PReluFunctor<float> func_(param.Slopes()[0]);
+    math::Transform trans;
+    trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
+  } else if (param.Slopes().size() > 1) {
+    const int dim_size = input_x->dims().size();
+    switch (dim_size) {
+      case 0:
+        break;
+      case 1: {
+        const int input_width = input_x->dims()[0];
+        math::Transform trans;
+        #pragma omp parallel for
+        for (int w = 0; w < input_width; ++w) {
+          out_ptr[w] = input_x_ptr[w] * param.Slopes()[w];
+        }
+      } break;
+      case 2: {
+        const int input_height = input_x->dims()[0];
+        const int input_width = input_x->dims()[1];
+        math::Transform trans;
+        #pragma omp parallel for
+        for (int h = 0; h < input_height; ++h) {
+          PReluFunctor<float> func_(param.Slopes()[h]);
+          const float *ptr = input_x_ptr + h * input_width;
+          float *optr = out_ptr + +h * input_width;
+          trans(ptr, ptr + input_width, optr, func_);
+        }
+      } break;
+      case 3: {
+        const int chan_size = input_x->dims()[0];
+        const int input_height = input_x->dims()[1];
+        const int input_width = input_x->dims()[2];
+        math::Transform trans;
+        #pragma omp parallel for
+        for (int c = 0; c < chan_size; ++c) {
+          PReluFunctor<float> func_(param.Slopes()[c]);
+          int size = input_height * input_width;
+          const float *ptr = input_x_ptr + c * size;
+          float *optr = out_ptr + c * size;
+          trans(ptr, ptr + size, optr, func_);
+        }
+      } break;
+      case 4:
+      default: {
+        const int batch_size = input_x->dims()[0];
+        const int chan_size = input_x->dims()[1];
+        const int input_height = input_x->dims()[2];
+        const int input_width = input_x->dims()[3];
+        math::Transform trans;
+        #pragma omp parallel for
+        for (int b = 0; b < batch_size; ++b) {
+          for (int c = 0; c < chan_size; ++c) {
+            PReluFunctor<float> func_(param.Slopes()[c]);
+            int size = input_height * input_width;
+            const float *ptr = input_x_ptr + b * c * size;
+            float *optr = out_ptr + +b * c * size;
+            trans(ptr, ptr + size, optr, func_);
+          }
+        }
+      }  // case 3,default
+      break;
+    }
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/resize_kernel.cpp
+++ b/src/operators/kernel/arm/resize_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef RESIZE_OP
+#include "operators/kernel/resize_kernel.h"
+#include <cmath>
+namespace paddle_mobile {
+namespace operators {
+void BiLinearResizeTensor(const float* src, const int src_height,
+                          const int src_width, float* dst, const int dst_height,
+                          const int dst_width) {
+  const float scale_w = src_width / (float)dst_width;
+  const float scale_h = src_height / (float)dst_height;
+  float* dst_data = dst;
+  const float* src_data = src;
+  for (int dst_h = 0; dst_h < dst_height; ++dst_h) {
+    float fh = dst_h * scale_h;
+    int src_h = std::floor(fh);
+    fh -= src_h;
+    const float w_h0 = std::abs((float)1.0 - fh);
+    const float w_h1 = std::abs(fh);
+    const int dst_offset_1 = dst_h * dst_width;
+    const int src_offset_1 = src_h * src_width;
+    float* dst_data_ptr = dst_data + dst_offset_1;
+    for (int dst_w = 0; dst_w < dst_width; ++dst_w) {
+      float fw = dst_w * scale_w;
+      int src_w = std::floor(fw);
+      fw -= src_w;
+      const float w_w0 = std::abs((float)1.0 - fw);
+      const float w_w1 = std::abs(fw);
+      float dst_value = 0;
+      const int src_idx = src_offset_1 + src_w;
+      dst_value += (w_h0 * w_w0 * src_data[src_idx]);
+      int flag = 0;
+      if (src_w + 1 < src_width) {
+        dst_value += (w_h0 * w_w1 * src_data[src_idx + 1]);
+        ++flag;
+      }
+      if (src_h + 1 < src_height) {
+        dst_value += (w_h1 * w_w0 * src_data[src_idx + src_width]);
+        ++flag;
+      }
+      if (flag > 1) {
+        dst_value += (w_h1 * w_w1 * src_data[src_idx + src_width + 1]);
+        //                ++flag;
+      }
+      *(dst_data_ptr++) = dst_value;
+    }
+  }
+}
+void ResizeTensor(const Tensor* src, const int src_n, const int src_c,
+                  Tensor* dst, const int dst_n, const int dst_c) {
+  framework::DDim in_dims = src->dims();
+  const int src_chans = in_dims[1];
+  const int src_height = in_dims[2];
+  const int src_width = in_dims[3];
+  const int src_offset = (src_n * src_chans + src_c) * src_height * src_width;
+  framework::DDim out_dims = dst->dims();
+  const int dst_chans = out_dims[1];
+  const int dst_height = out_dims[2];
+  const int dst_width = out_dims[3];
+  const int dst_offset = (dst_n * dst_chans + dst_c) * dst_height * dst_width;
+  const auto* src_ptr = src->data<float>();
+  auto* dst_ptr = dst->data<float>();
+  const auto* src_data = &(src_ptr[src_offset]);
+  auto* dst_data = &(dst_ptr[dst_offset]);
+  BiLinearResizeTensor(src_data, src_height, src_width, dst_data, dst_height,
+                       dst_width);
+}
+void ResizeTensor(const Tensor* src, Tensor* dst) {
+  framework::DDim in_dims = src->dims();
+  framework::DDim out_dims = dst->dims();
+  PADDLE_MOBILE_ENFORCE(in_dims[0] == out_dims[0],
+                        "src tensor batch num not equal to dst tensor");
+  PADDLE_MOBILE_ENFORCE(in_dims[1] == out_dims[1],
+                        "src tensor channel num not equal to dst tensor");
+  for (int n = 0, batch_num = in_dims[0]; n < batch_num; ++n) {
+    for (int c = 0, chan_num = in_dims[1]; c < chan_num; ++c) {
+      ResizeTensor(src, n, c, dst, n, c);
+    }
+  }
+}
+template <>
+void ResizeKernel<CPU, float>::Compute(const ResizeParam& param) const {
+  const auto* input_x = param.InputX();
+  const auto& input_x_dims = input_x->dims();
+  auto* out = param.Out();
+  framework::DDim out_dims = CalOutputShape(param);
+  out->Resize(out_dims);
+  ResizeTensor(input_x, out);
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/scale_kernel.cpp
+++ b/src/operators/kernel/arm/scale_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef SCALE_OP
+#include "operators/kernel/scale_kernel.h"
+namespace paddle_mobile {
+namespace operators {
+/*
+ * @b 特化到具体平台的实现, param 从 op 层传入
+ * */
+template <>
+void ScaleKernel<CPU, float>::Compute(const ScaleParam &param) const {
+  const auto *input_x = param.InputX();
+  auto *input_x_ptr = input_x->data<float>();
+  auto *out = param.Out();
+  auto *out_ptr = out->mutable_data<float>();
+  const vector<float> scales = param.Scales();
+  bool has_bias = param.HasBias();
+  const int dim_size = input_x->dims().size();
+  switch (dim_size) {
+    case 1: {
+      const int input_width = input_x->dims()[0];
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+        #pragma omp parallel for
+        for (int w = 0; w < input_width; w++) {
+          out_ptr[w] = input_x_ptr[w] * scales[w] + biases[w];
+        }
+      } else {
+        #pragma omp parallel for
+        for (int w = 0; w < input_width; w++) {
+          out_ptr[w] = input_x_ptr[w] * scales[w];
+        }
+      }
+    } break;
+    case 2: {
+      const int input_height = input_x->dims()[0];
+      const int input_width = input_x->dims()[1];
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+        #pragma omp parallel for
+        for (int h = 0; h < input_height; ++h) {
+          const float *iptr = input_x_ptr + h * input_width;
+          float *optr = out_ptr + h * input_width;
+          for (int w = 0; w < input_width; ++w) {
+            optr[w] = iptr[w] * scales[w] + biases[w];
+          }
+        }
+      } else {
+        #pragma omp parallel for
+        for (int h = 0; h < input_height; ++h) {
+          const float *iptr = input_x_ptr + h * input_width;
+          float *optr = out_ptr + h * input_width;
+          for (int w = 0; w < input_width; ++w) {
+            optr[w] = iptr[w] * scales[w];
+          }
+        }
+      }
+    } break;
+    case 3: {
+      const int chan_size = input_x->dims()[0];
+      const int input_height = input_x->dims()[1];
+      const int input_width = input_x->dims()[2];
+      int size = input_width * input_height;
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+        #pragma omp parallel for
+        for (int c = 0; c < chan_size; ++c) {
+          const float *iptr = input_x_ptr + c * size;
+          float *optr = out_ptr + c * size;
+          for (int i = 0; i < size; ++i) {
+            optr[i] = iptr[i] * scales[c] + biases[c];
+          }
+        }
+      } else {
+        #pragma omp parallel for
+        for (int c = 0; c < chan_size; ++c) {
+          const float *iptr = input_x_ptr + c * size;
+          float *optr = out_ptr + c * size;
+          for (int i = 0; i < size; ++i) {
+            optr[i] = iptr[i] * scales[c];
+          }
+        }
+      }
+    } break;
+    case 4: {
+      const int batch_size = input_x->dims()[0];
+      const int chan_size = input_x->dims()[0];
+      const int input_height = input_x->dims()[1];
+      const int input_width = input_x->dims()[2];
+      int size = input_width * input_height;
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+        #pragma omp parallel for
+        for (int b = 0; b < batch_size; ++b) {
+          for (int c = 0; c < chan_size; ++c) {
+            const float *iptr = input_x_ptr + b * c * size;
+            float *optr = out_ptr + b * c * size;
+            for (int i = 0; i < size; ++i) {
+              optr[i] = iptr[i] * scales[c] + biases[c];
+            }
+          }
+        }
+      } else {
+        #pragma omp parallel for
+        for (int b = 0; b < batch_size; ++b) {
+          for (int c = 0; c < chan_size; ++c) {
+            const float *iptr = input_x_ptr + b * c * size;
+            float *optr = out_ptr + b * c * size;
+            for (int i = 0; i < size; ++i) {
+              optr[i] = iptr[i] * scales[c];
+            }
+          }
+        }
+      }
+    } break;
+    default:
+      break;
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/slice_kernel.cpp
+++ b/src/operators/kernel/arm/slice_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef SLICE_OP
+#include "operators/kernel/slice_kernel.h"
+namespace paddle_mobile {
+namespace operators {}
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/prelu_kernel.h
+++ b/src/operators/kernel/prelu_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "framework/operator.h"
+#include "operators/op_param.h"
+#pragma once;
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class PReluKernel : public framework::OpKernelBase<DeviceType, PReluParam> {
+ public:
+  void Compute(const PReluParam& param) const;
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/kernel/resize_kernel.h
+++ b/src/operators/kernel/resize_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef RESIZE_OP
+#pragma once
+#include <vector>
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+inline framework::DDim CalOutputShape(const ResizeParam &param) {
+  const auto *input_x = param.InputX();
+  const auto &input_x_dims = input_x->dims();
+  auto *out = param.Out();
+  framework::DDim out_dims = out->dims();
+  const auto *input_shape = param.InputShape();
+  if (input_shape) {
+    auto *shape_data = input_shape->data<int>();
+    framework::Tensor cpu_shape_tensor;
+    auto shape =
+        std::vector<int>(shape_data, shape_data + input_shape->numel());
+    const int in_batch_size = input_x->dims()[0];
+    const int in_chan_size = input_x->dims()[1];
+    const int in_height = input_x->dims()[2];
+    const int in_width = input_x->dims()[3];
+    int out_height = 0;
+    int out_width = 0;
+    bool is_pyramid_test = param.IsPyramidTest();
+    if (is_pyramid_test == false) {
+      out_height = param.Height();
+      out_width = param.Width();
+      PADDLE_MOBILE_ENFORCE(out_height > 0, "output height is required");
+      PADDLE_MOBILE_ENFORCE(out_width > 0, "output width is required");
+    } else {
+      float out_height_scale = param.OutHeightScale();
+      float out_width_scale = param.OutWidthScale();
+      PADDLE_MOBILE_ENFORCE(out_height_scale > 0,
+                            "output height scale is required");
+      PADDLE_MOBILE_ENFORCE(out_width_scale > 0,
+                            "output width scale is required");
+      out_height = int(out_height_scale * in_height);
+      out_width = int(out_width_scale * in_width);
+    }
+    out_dims = framework::make_ddim(
+        {in_batch_size, in_chan_size, in_height, in_width});
+  }
+  return out_dims;
+}
+template <typename DeviceType, typename T>
+class ResizeKernel : public framework::OpKernelBase<DeviceType, ResizeParam> {
+ public:
+  void Compute(const ResizeParam &param) const;
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/scale_kernel.h
+++ b/src/operators/kernel/scale_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "framework/operator.h"
+#include "operators/op_param.h"
+#pragma once;
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class ScaleKernel : public framework::OpKernelBase<DeviceType, ScaleParam> {
+ public:
+  void Compute(const ScaleParam& param) const;
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/kernel/slice_kernel.h
+++ b/src/operators/kernel/slice_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "framework/operator.h"
+#include "operators/op_param.h"
+#pragma once;
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class SliceKernel : public framework::OpKernelBase<DeviceType, SliceParam> {
+ public:
+  void Compute(const SliceParam& param) const {}
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
--- a/src/operators/math/gemm.h
+++ b/src/operators/math/gemm.h
@@ -19,12 +19,8 @@ limitations under the License. */
 #define B(i, j) B[(i)*ldb + (j)]
 #define C(i, j) C[(i)*ldc + (j)]
-// 分块计算的块大小，mc 与 kc 分别对应分块计算时的 m 与 k
-#define MC 128
-#define KC 128
-#define NC 1024
 #define MR 4
-#define NR 4
+#define NR 8
 #define s_min(i, j) ((i) < (j) ? (i) : (j))
@@ -49,28 +45,66 @@ void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb,
                  float *buffer);
 // 分块矩阵乘法
-void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda,
+void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
-                 const float *B, int ldb, float beta, float *C, int ldc,
+                 float beta, float *c, float *C, int ldc, bool relu);
-                 int first_time);
+void InnerKernelWithBn(int mc, int nc, float alpha, const float *a,
+                       const float *b, float beta, float *c, float *C, int ldc,
+                       bool relu, float *new_scale, float *new_bias);
 // 向量矩阵乘法 (M = 1)
 void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
-                  const float *B, int ldb, float beta, float *C, int ldc);
+                  const float *B, int ldb, float beta, float *C, int ldc,
+                  bool relu);
-// 计算一个更小的 4 * 4 的 C 矩阵分块
-void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B,
+void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A,
-               int ldb, float beta, float *C, int ldc, int mc, int nc);
+                        int lda, const float *B, int ldb, float beta, float *C,
+                        int ldc, bool relu, float *new_scale, float *new_bias);
-void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b,
-                    int ldb, float beta, float *C, int ldc, int mc, int nc,
+// 计算一个更小的 C 矩阵分块
-                    bool relu);
+void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc);
+void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc);
+// 分块矩阵乘法结果回写
+// C = A * B
+void WriteBasic(int mc, int nc, float *c, float *C, int ldc);
+// C = alpha * A * B + beta * C
+void WriteWithAlphaBeta(int mc, int nc, float *c, float *C, int ldc);
+// C = A * B + C
+void WriteWithAdd(int mc, int nc, float *c, float *C, int ldc);
+// C = A * B + C, relu(C)
+void WriteWithAddRelu(int mc, int nc, float *c, float *C, int ldc);
+// C = A * B, batchnorm(C)
+void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale,
+                 float *new_bias);
+// C = A * B, batchnorm(C), relu(C)
+void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc,
+                     float *new_scale, float *new_bias);
+// 向量矩阵乘法结果回写
+// C = A * B
+void VecWriteBasic(int n, float *c, float *C, int ldc);
+// C = alpha * A * B + beta * C
+void VecWriteWithAlphaBeta(int n, float *c, float *C, int ldc);
+// C = A * B + C
+void VecWriteWithAdd(int n, float *c, float *C, int ldc);
+// C = A * B + C, relu(C)
+void VecWriteWithAddRelu(int n, float *c, float *C, int ldc);
+// C = A * B, batchnorm(C)
+void VecWriteWithBn(int n, float *c, float *C, int ldc, float *new_scale,
+                    float *new_bias);
+// C = A * B, batchnorm(C), relu(C)
+void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *new_scale,
+                        float *new_bias);
 // 32位 float 矩阵乘法
-void sgemm(int m, int n, int k, float alpha, const float *A, int lda,
+void Sgemm(int m, int n, int k, float alpha, const float *A, int lda,
-           const float *B, int ldb, float beta, float *C, int ldc);
+           const float *B, int ldb, float beta, float *C, int ldc, bool relu);
-void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda,
+// 32位 float 矩阵乘法, 并对结果进行 batchnrom
-                const float *B, int ldb, float beta, float *C, int ldc);
+void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda,
+                 const float *B, int ldb, float beta, float *C, int ldc,
+                 bool relu, float *new_scale, float *new_bias);
 // 64位 double 矩阵乘法
 void dgemm(int m, int n, int k, float alpha, const double *A, int lda,

--- a/src/operators/math/math_function.cpp
+++ b/src/operators/math/math_function.cpp
@@ -39,22 +39,18 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
  int M = dim_out[0];
  int N = dim_out[1];
-  int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+  int K = (!trans_a) ? dim_a[1] : dim_a[0];
-  if (relu) {
+  Sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
-    sgemm_relu(M, N, K, alpha, matrix_a.data<float>(), K,
+        beta, matrix_out->data<float>(), N, relu);
-               matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N);
-  } else {
-    sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
-          beta, matrix_out->data<float>(), N);
-  }
 }
 template <>
-void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
+void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
-                    const framework::Tensor &matrix_b, bool trans_b,
+                         const framework::Tensor &matrix_b, bool trans_b,
-                    double alpha, framework::Tensor *matrix_out, double beta,
+                         float alpha, framework::Tensor *matrix_out, float beta,
-                    bool relu) {
+                         bool relu, framework::Tensor *new_scale,
+                         framework::Tensor *new_bias) {
  auto dim_a = matrix_a.dims();
  auto dim_b = matrix_b.dims();
  auto dim_out = matrix_out->dims();
@@ -71,7 +67,11 @@ void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
  int M = dim_out[0];
  int N = dim_out[1];
-  int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+  int K = (!trans_a) ? dim_a[1] : dim_a[0];
+  SgemmWithBn(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
+              N, beta, matrix_out->data<float>(), N, relu,
+              new_scale->data<float>(), new_bias->data<float>());
 }
 }  // namespace math

--- a/src/operators/math/math_function.h
+++ b/src/operators/math/math_function.h
@@ -26,6 +26,12 @@ template <typename T>
 void matmul(const framework::Tensor &matrix_a, bool trans_a,
            const framework::Tensor &matrix_b, bool trans_b, T alpha,
            framework::Tensor *matrix_out, T beta, bool relu = false);
+template <typename T>
+void matmulWithBn(const framework::Tensor &matrix_a, bool trans_a,
+                  const framework::Tensor &matrix_b, bool trans_b, T alpha,
+                  framework::Tensor *matrix_out, T beta, bool relu,
+                  framework::Tensor *new_scale, framework::Tensor *new_bias);
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -715,6 +715,123 @@ class ReshapeParam : public OpParam {
 };
 #endif
+#ifdef SCALE_OP
+class ScaleParam : public OpParam {
+ public:
+  ScaleParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+             const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<LoDTensor>(inputs, scope);
+    input_bias_ = InputBiasFrom<framework::LoDTensor>(inputs, scope);
+    out_ = OutFrom<LoDTensor>(outputs, scope);
+    inplace_ = GetAttr<bool>("inplace", attrs);
+    has_bias_ = GetAttr<bool>("has_bias", attrs);
+    scales_ = GetAttr<vector<float>>("scales", attrs);
+    biases_ = GetAttr<vector<float>>("biases", attrs);
+  }
+  const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputBias() const { return input_bias_; }
+  Tensor *Out() const { return out_; }
+  const bool &Inplace() const { return inplace_; }
+  const bool &HasBias() const { return has_bias_; }
+  const vector<float> &Scales() const { return scales_; }
+  const vector<float> &Biases() const { return biases_; }
+ private:
+  Tensor *input_x_;
+  Tensor *input_bias_;
+  Tensor *out_;
+  bool inplace_;
+  bool has_bias_;
+  vector<float> scales_;
+  vector<float> biases_;
+};
+#endif
+#ifdef SLICE_OP
+class SliceParam : public OpParam {
+ public:
+  SliceParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+             const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<LoDTensor>(inputs, scope);
+    input_shape_ = InputShapeFrom<LoDTensor>(inputs, scope);
+    out_ = OutFrom<LoDTensor>(outputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    slice_points_ = GetAttr<vector<int>>("slice_points", attrs);
+    inplace_ = GetAttr<bool>("inplace", attrs);
+  }
+  const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputShape() const { return input_shape_; }
+  Tensor *Out() const { return out_; }
+  const int &Axis() const { return axis_; }
+  const vector<int> &SlicePoints() const { return slice_points_; }
+  const bool &Inplace() const { return inplace_; }
+ private:
+  Tensor *input_x_;
+  Tensor *input_shape_;
+  Tensor *out_;
+  int axis_;
+  vector<int> slice_points_;
+  bool inplace_;
+};
+#endif
+#ifdef RESIZE_OP
+class ResizeParam : public OpParam {
+ public:
+  ResizeParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+              const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<LoDTensor>(inputs, scope);
+    input_shape_ = InputShapeFrom<LoDTensor>(inputs, scope);
+    out_ = OutFrom<LoDTensor>(outputs, scope);
+    is_pyramid_test_ = GetAttr<bool>("is_pyramid_test", attrs);
+    height_ = GetAttr<int>("height", attrs);
+    width_ = GetAttr<int>("width", attrs);
+    out_height_scale_ = GetAttr<float>("out_height_scale", attrs);
+    out_width_scale_ = GetAttr<float>("out_width_scale", attrs);
+  }
+  const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputShape() const { return input_shape_; }
+  Tensor *Out() const { return out_; }
+  const bool &IsPyramidTest() const { return is_pyramid_test_; }
+  const int &Height() const { return height_; }
+  const int &Width() const { return width_; }
+  const float &OutHeightScale() const { return out_height_scale_; }
+  const float &OutWidthScale() const { return out_width_scale_; }
+ private:
+  Tensor *input_x_;
+  Tensor *input_shape_;
+  Tensor *out_;
+  bool is_pyramid_test_;
+  int height_;
+  int width_;
+  float out_height_scale_;
+  float out_width_scale_;
+};
+#endif
 #ifdef RELU_OP
 /*
 * @b op 层实例化好这个 param 传递给 kernel 层使用
@@ -737,6 +854,27 @@ class ReluParam : public OpParam {
 };
 #endif
+#ifdef PRELU_OP
+class PReluParam : public OpParam {
+ public:
+  PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+             const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<LoDTensor>(inputs, scope);
+    out_ = OutFrom<LoDTensor>(outputs, scope);
+    slopes_ = GetAttr<vector<float>>("slopes", attrs);
+  }
+  const Tensor *InputX() const { return input_x_; }
+  Tensor *Out() const { return out_; }
+  const vector<float> &Slopes() const { return slopes_; }
+ private:
+  Tensor *input_x_;
+  Tensor *out_;
+  vector<float> slopes_;
+};
+#endif
 #ifdef FUSION_FC_OP
 class FusionFcParam : public OpParam {
 public:

--- a/src/operators/prelu_op.cpp
+++ b/src/operators/prelu_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PRELU_OP
+#include "operators/prelu_op.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void PReluOp<Dtype, T>::InferShape() const {
+  auto input_dims = this->param_.InputX()->dims();
+  this->param_.Out()->Resize(input_dims);
+}
+template class PReluOp<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+/*
+ * @b 每一个 op 都需要注册一下的,
+ *    USE_OP的参数 和 REGISTER_OPERATOR的第一个参数
+ * 都是需要和model中类型对应起来的
+ * */
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(prelu);
+REGISTER_OPERATOR_CPU(prelu, ops::PReluOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(prelu);
+REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+#endif
--- a/src/operators/prelu_op.h
+++ b/src/operators/prelu_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PRELU_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/prelu_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using paddle_mobile::framework::Tensor;
+template <typename DeviceType, typename T>
+class PReluOp
+    : public framework::OperatorWithKernel<
+          DeviceType, PReluParam, operators::PReluKernel<DeviceType, T>> {
+ public:
+  PReluOp(const std::string &type, const VariableNameMap &inputs,
+          const VariableNameMap &outputs, const framework::AttributeMap &attrs,
+          std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, PReluParam,
+                                      operators::PReluKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  using framework::OperatorWithKernel<
+      DeviceType, PReluParam,
+      operators::PReluKernel<DeviceType, T>>::OperatorWithKernel;
+  void InferShape() const override;
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/resize_op.cpp
+++ b/src/operators/resize_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef RESIZE_OP
+#include "operators/resize_op.h"
+#include <vector>
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void ResizeOp<Dtype, T>::InferShape() const {
+  auto out_dims = CalOutputShape(this->param_);
+  this->param_.Out()->Resize(out_dims);
+}
+template class ResizeOp<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(resize);
+REGISTER_OPERATOR_CPU(resize, ops::ResizeOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(resize);
+REGISTER_OPERATOR_MALI_GPU(resize, ops::ResizeOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+#endif
--- a/src/operators/resize_op.h
+++ b/src/operators/resize_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef RESIZE_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/resize_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using paddle_mobile::framework::Tensor;
+template <typename DeviceType, typename T>
+class ResizeOp
+    : public framework::OperatorWithKernel<
+          DeviceType, ResizeParam, operators::ResizeKernel<DeviceType, T>> {
+ public:
+  ResizeOp(const std::string &type, const VariableNameMap &inputs,
+           const VariableNameMap &outputs, const framework::AttributeMap attrs,
+           std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, ResizeParam,
+                                      operators::ResizeKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  using framework::OperatorWithKernel<
+      DeviceType, ResizeParam,
+      operators::ResizeKernel<DeviceType, T>>::OperatorWithKernel;
+  void InferShape() const override;
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/scale_op.cpp
+++ b/src/operators/scale_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef SCALE_OP
+#include "operators/scale_op.h"
+#include <vector>
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void ScaleOp<Dtype, T>::InferShape() const {
+  auto input_dims = this->param_.InputX()->dims();
+  this->param_.Out()->Resize(input_dims);
+}
+template class ScaleOp<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(scale);
+REGISTER_OPERATOR_CPU(scale, ops::ScaleOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(scale);
+REGISTER_OPERATOR_MALI_GPU(scale, ops::ScaleOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+#endif
--- a/src/operators/scale_op.h
+++ b/src/operators/scale_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef SCALE_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/scale_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using paddle_mobile::framework::Tensor;
+template <typename DeviceType, typename T>
+class ScaleOp
+    : public framework::OperatorWithKernel<
+          DeviceType, ScaleParam, operators::ScaleKernel<DeviceType, T>> {
+ public:
+  ScaleOp(const std::string &type, const VariableNameMap &inputs,
+          const VariableNameMap &outputs, const framework::AttributeMap &attrs,
+          std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, ScaleParam,
+                                      operators::ScaleKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  using framework::OperatorWithKernel<
+      DeviceType, ScaleParam,
+      operators::ScaleKernel<DeviceType, T>>::OperatorWithKernel;
+  void InferShape() const override;
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/slice_op.cpp
+++ b/src/operators/slice_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef SLICE_OP
+#include "operators/slice_op.h"
+#include <vector>
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void SliceOp<Dtype, T>::InferShape() const {
+  /// todo: add InputShape() detection.
+}
+template class SliceOp<CPU, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(slice);
+REGISTER_OPERATOR_CPU(slice, ops::SliceOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(slice);
+REGISTER_OPERATOR_MALI_GPU(slice, ops::SliceOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+#endif
--- a/src/operators/slice_op.h
+++ b/src/operators/slice_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef SLICE_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/slice_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using paddle_mobile::framework::Tensor;
+template <typename DeviceType, typename T>
+class SliceOp
+    : public framework::OperatorWithKernel<
+          DeviceType, SliceParam, operators::SliceKernel<DeviceType, T>> {
+ public:
+  SliceOp(const std::string &type, const VariableNameMap &inputs,
+          const VariableNameMap &outputs, const framework::AttributeMap &attrs,
+          std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, SliceParam,
+                                      operators::SliceKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  using framework::OperatorWithKernel<
+      DeviceType, SliceParam,
+      operators::SliceKernel<DeviceType, T>>::OperatorWithKernel;
+  void InferShape() const override;
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/test/operators/test_prelu_op.cpp
+++ b/test/operators/test_prelu_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "../executor_for_test.h"
+#include "../test_include.h"
+#include "operators/prelu_op.h"
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(g_resnet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::PReluOp<paddle_mobile::CPU, float>>
+      executor(program, "prelu");
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {1, 2, 3, 4}, -1, 1);
+  input_tensors.push_back(input1);
+  // 2. input_names
+  vector<string> input_names({
+      "batch_norm_0.tmp_2",
+  });
+  // 3. output_names
+  vector<string> output_names({"batch_norm_0.tmp_3"});
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
+  out_ddims.push_back(out_ddim);
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+  auto output0_data = output[0]->data<float>();
+  for (int j = 0; j < output[0]->numel(); ++j) {
+    DLOG << " value of output: " << output0_data[j];
+  }
+  return 0;
+}
--- a/test/operators/test_resize_op.cpp
+++ b/test/operators/test_resize_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "../test_include.h"
+#include "operators/resize_op.h"
+int main() {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(g_mobilenet_ssd));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ResizeOp<paddle_mobile::CPU, float>>
+      executor(program, "resize");
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {2, 3, 3, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto input_ptr = input.data<float>();
+  auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
+  auto output =
+      executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
+  auto *output_ptr = output->data<float>();
+  DLOG << "input : ";
+  for (int j = 0; j < input.numel(); ++j) {
+    DLOG << " index " << j << " : " << input_ptr[j];
+  }
+  DLOG << "output : ";
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " index " << j << " : " << output_ptr[j];
+  }
+  return 0;
+}
--- a/test/operators/test_scale_op.cpp
+++ b/test/operators/test_scale_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "../test_include.h"
+#include "operators/scale_op.h"
+int main() {}
--- a/test/operators/test_slice_op.cpp
+++ b/test/operators/test_slice_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "../test_include.h"
+#include "operators/slice_op.h"
+int main() {}
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -119,6 +119,7 @@ build_for_ios() {
    fi
    cd "${BUILD_DIR}"
    make -j 8
+    cp ../../../src/ios_io/PaddleMobile.h ./build/PaddleMobile.h
    cd ./build
    # 生成符号表
    ranlib *.a
@@ -160,4 +161,4 @@ else
 		    build_error
 	    fi
 	fi
 fi
\ No newline at end of file
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -64,6 +64,10 @@ else ()
  set(TRANSPOSE_OP ON)
  set(FUSION_CONVADD_RELU_OP ON)
  set(FUSION_CONVADDBNRELU_OP ON)
+  set(PRELU_OP ON)
+  set(RESIZE_OP ON)
+  set(SCALE_OP ON)
+  set(SLICE_OP ON)
  set(DROPOUT_OP ON)
  set(IM2SEQUENCE_OP ON)
  # option(BATCHNORM_OP "" ON)
@@ -151,6 +155,18 @@ endif()
 if (FUSION_CONVADDBNRELU_OP)
  add_definitions(-DFUSION_CONVADDBNRELU_OP)
 endif()
+if (PRELU_OP)
+  add_definitions(-DPRELU_OP)
+endif()
+if (RESIZE_OP)
+  add_definitions(-DRESIZE_OP)
+endif()
+if (SCALE_OP)
+  add_definitions(-DSCALE_OP)
+endif()
+if (SLICE_OP)
+  add_definitions(-DSLICE_OP)
+endif()
 if (DROPOUT_OP)
  add_definitions(-DDROPOUT_OP)
 endif()