diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b3e6227acb58b3d8b3bf132fecf60c5298f172f..7d06636b587e9c001990fe9a2aae13117fd899da 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,7 @@ option(DEBUGING "enable debug mode" ON)
 option(USE_EXCEPTION "use std exception" OFF)
 option(LOG_PROFILE "log profile" OFF)
 # select the platform to build
+option(X86 "x86" OFF)
 option(CPU "armv7 with neon" ON)
 option(MALI_GPU "mali gpu" OFF)
 option(FPGA "fpga" OFF)
@@ -18,44 +19,46 @@ include_directories(src/)
 if(IS_IOS)
     set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
 else()
-    set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}")
+    set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
 endif()
 
-if (DEBUGING)
+if(DEBUGING)
     message(STATUS "debug")
-    set(CMAKE_BUILD_TYPE Release)
-    set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
+    set(CMAKE_BUILD_TYPE debug)
+    set(CMAKE_CXX_FLAGS "-O0 -g ${CMAKE_CXX_FLAGS}")
     add_definitions(-DPADDLE_MOBILE_DEBUG)
-else ()
+else()
     set(CMAKE_BUILD_TYPE Release)
+    set(CMAKE_CXX_FLAGS "-Os ${CMAKE_CXX_FLAGS}")
     set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
-    add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
-endif ()
+endif()
 
-if (USE_EXCEPTION)
+if(USE_EXCEPTION)
     message(STATUS "use exception")
     add_definitions(-DENABLE_EXCEPTION)
     add_definitions(-fexceptions)
 else()
     add_definitions(-fno-exceptions)
-endif ()
+endif()
 
-if (LOG_PROFILE)
+if(LOG_PROFILE)
     add_definitions(-DPADDLE_MOBILE_PROFILE)
 endif()
 
-if(USE_OPENMP)
+if(USE_OPENMP AND NOT IS_IOS)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
     add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
 endif()
 
 # platform control
-if (ARM_LINUX)
+if(ARM_LINUX)
     include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake")
-endif ()
+endif()
 
-if (CPU)
-  add_definitions(-DPADDLE_MOBILE_CPU)
+if(CPU)
+    add_definitions(-DPADDLE_MOBILE_CPU)
+elseif(X86)
+    add_definitions(-DPADDLE_MOBILE_X86)
 else()
     file(GLOB_RECURSE _tmp_list src/operators/kernel/arm/*.cpp src/operators/kernel/arm/*.cc)
     foreach(f ${_tmp_list})
@@ -68,7 +71,7 @@ else()
     endforeach()
 endif()
 
-if (MALI_GPU)
+if(MALI_GPU)
     add_definitions(-DPADDLE_MOBILE_MALI_GPU)
     add_definitions(-DUSE_ACL=1)
     add_definitions(-DUSE_OPENCL)
@@ -120,20 +123,20 @@ else()
     endforeach()
 endif()
 
-if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
+if(ANDROID_NDK_TOOLCHAIN_INCLUDED)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
 else()
     list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h)
     list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp)
     list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
-endif ()
+endif()
 
-if (IS_IOS)
+if(IS_IOS)
 else()
-    list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.h)
-    list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.mm)
+    list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h)
+    list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm)
     list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h)
-endif ()
+endif()
 
 set(CMAKE_VERBOSE_MAKEFILE ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -142,7 +145,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
 
 # NET default
-if (FPGA)
+if(FPGA)
     set(NET "FPGAnets" CACHE STRING "select net type")
 else()
     set(NET "default" CACHE STRING "select net type")
@@ -153,7 +156,7 @@ include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
 
 
 # build library
-if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
+if(ANDROID_NDK_TOOLCHAIN_INCLUDED)
     list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS)
     add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
 elseif(IS_IOS)
@@ -168,9 +171,9 @@ elseif(IS_IOS)
     else()
         add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
     endif()
-else ()
-    add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
-endif ()
+else()
+  add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
+endif()
 
 # unit test
 if(DEBUGING)
diff --git a/README.md b/README.md
index de7dd530c94b4a3055cbf07a4a19a55c21457ed0..fd5222655821e36fe194225a4d71a3b60b8a89d5 100644
--- a/README.md
+++ b/README.md
@@ -69,18 +69,8 @@ Paddle-Mobile是PaddlePaddle组织下的项目，是一个致力于嵌入式平
 
 - **苹果设备的GPU Metal实现**
 
-|mobilenetfssd|速度|
-|------------|-----|
-|A9(ms)|33.78|
-|A10(ms)|24.05|
-|A11(ms)|17.15|
-|||
-|genet|速度|
-|A9(ms) |3.49|
-|A10(ms)|2.54|
-|A11(ms)|1.43|
-
-
+    基于Metal实现的苹果设备的GPU预测库，也已经在实现中，近期也会有相应可运行版本。
+     
 - **FPGA**
 
     FPGA实现正在进行中，是基于Xilinx的ZU5目标开发板。
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
index d6114880efcaf528bd26fcda11e08ec68d943575..f3ab9fc66a072cd5b0bbba56ae99258f04be3612 100644
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
@@ -8,29 +8,22 @@
 
 /* Begin PBXBuildFile section */
 		30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; };
-		C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = C2E67E5D21524E460013F575 /* LoadPointerViewController.m */; };
 		FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; };
 		FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; };
 		FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; };
 		FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
 		FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
 		FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
-		FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
-		FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; };
-		FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
+		FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602C72108580600FACB58 /* MetalHelper.swift */; };
+		FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; };
+		FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; };
+		FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; };
+		FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; };
+		FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */; };
+		FCDFD41B211D91C7005AB38B /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD41A211D91C7005AB38B /* synset.txt */; };
 		FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
 		FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
-		FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
-		FCFE9B692152858600DECA15 /* hand.jpg.zip in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B632152858600DECA15 /* hand.jpg.zip */; };
-		FCFE9B6A2152858600DECA15 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B642152858600DECA15 /* synset.txt */; };
-		FCFE9B6B2152858600DECA15 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B652152858600DECA15 /* banana.jpeg */; };
-		FCFE9B6C2152858600DECA15 /* hand.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B662152858600DECA15 /* hand.jpg */; };
-		FCFE9B6D2152858600DECA15 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B672152858600DECA15 /* iphone.JPG */; };
-		FCFE9B6E2152858600DECA15 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B682152858600DECA15 /* paddle-mobile.png */; };
-		FCFE9C512152859600DECA15 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B752152859500DECA15 /* genet_params */; };
-		FCFE9C522152859600DECA15 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B762152859500DECA15 /* genet_model */; };
-		FCFE9D232152859600DECA15 /* ar_model in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9C4C2152859500DECA15 /* ar_model */; };
-		FCFE9D242152859600DECA15 /* ar_params in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9C4D2152859500DECA15 /* ar_params */; };
+		FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCEEE7D3210627A000444BEC /* banana.jpeg */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
@@ -51,8 +44,6 @@
 		081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.release.xcconfig"; sourceTree = "<group>"; };
 		18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_demo.framework; sourceTree = BUILT_PRODUCTS_DIR; };
 		878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; };
-		C2E67E5C21524E460013F575 /* LoadPointerViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LoadPointerViewController.h; sourceTree = "<group>"; };
-		C2E67E5D21524E460013F575 /* LoadPointerViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LoadPointerViewController.m; sourceTree = "<group>"; };
 		FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
 		FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; };
 		FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
@@ -61,23 +52,15 @@
 		FC039B8820E11C560081E9F8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 		FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
 		FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
-		FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = "<group>"; };
-		FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; };
-		FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
-		FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; };
-		FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
+		FC3602C72108580600FACB58 /* MetalHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-unit-test/paddle-mobile-unit-test/MetalHelper.swift"; sourceTree = "<group>"; };
+		FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
+		FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
+		FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
+		FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
+		FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelHelper.swift; sourceTree = "<group>"; };
+		FCDFD41A211D91C7005AB38B /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
 		FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
-		FCFE9B632152858600DECA15 /* hand.jpg.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = hand.jpg.zip; sourceTree = "<group>"; };
-		FCFE9B642152858600DECA15 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
-		FCFE9B652152858600DECA15 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
-		FCFE9B662152858600DECA15 /* hand.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = hand.jpg; sourceTree = "<group>"; };
-		FCFE9B672152858600DECA15 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
-		FCFE9B682152858600DECA15 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
-		FCFE9B752152859500DECA15 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = "<group>"; };
-		FCFE9B762152859500DECA15 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = "<group>"; };
-		FCFE9C4C2152859500DECA15 /* ar_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = ar_model; sourceTree = "<group>"; };
-		FCFE9C4D2152859500DECA15 /* ar_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = ar_params; sourceTree = "<group>"; };
+		FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -105,7 +88,6 @@
 		7B7DED984E9EE7BFB45E24E8 /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
-				FC4FD97B2140EE250073E130 /* libc++.tbd */,
 				18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */,
 			);
 			name = Frameworks;
@@ -133,82 +115,49 @@
 		FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
 			isa = PBXGroup;
 			children = (
-				FCFE9B6F2152859500DECA15 /* models */,
-				FCFE9B622152858600DECA15 /* images */,
-				FC803BCA214D27920094B8E5 /* VideoCapture */,
-				FC8CFED2213519540094D569 /* Net */,
+				FC0E2C2020EDC03B009C1FAC /* models */,
+				FC0E2C1D20EDC030009C1FAC /* images */,
 				FC039B8120E11C550081E9F8 /* AppDelegate.swift */,
+				FC013927210204A3008100E3 /* PreProcessKernel.metal */,
 				FC039B8320E11C550081E9F8 /* ViewController.swift */,
 				FC039B8520E11C550081E9F8 /* Main.storyboard */,
 				FC039B8820E11C560081E9F8 /* Assets.xcassets */,
 				FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
 				FC039B8D20E11C560081E9F8 /* Info.plist */,
-				FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */,
-				FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */,
-				C2E67E5C21524E460013F575 /* LoadPointerViewController.h */,
-				C2E67E5D21524E460013F575 /* LoadPointerViewController.m */,
+				FC3602C72108580600FACB58 /* MetalHelper.swift */,
+				FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */,
 			);
 			path = "paddle-mobile-demo";
 			sourceTree = "<group>";
 		};
-		FC803BCA214D27920094B8E5 /* VideoCapture */ = {
-			isa = PBXGroup;
-			children = (
-				FC803BCB214D27920094B8E5 /* FPSCounter.swift */,
-				FC803BCC214D27920094B8E5 /* VideoCapture.swift */,
-			);
-			path = VideoCapture;
-			sourceTree = "<group>";
-		};
-		FC8CFED2213519540094D569 /* Net */ = {
-			isa = PBXGroup;
-			children = (
-				FC013927210204A3008100E3 /* PreProcessKernel.metal */,
-				FCBCCC542122EF5400D94F7E /* MetalHelper.swift */,
-			);
-			path = Net;
-			sourceTree = "<group>";
-		};
-		FCFE9B622152858600DECA15 /* images */ = {
+		FC0E2C1D20EDC030009C1FAC /* images */ = {
 			isa = PBXGroup;
 			children = (
-				FCFE9B632152858600DECA15 /* hand.jpg.zip */,
-				FCFE9B642152858600DECA15 /* synset.txt */,
-				FCFE9B652152858600DECA15 /* banana.jpeg */,
-				FCFE9B662152858600DECA15 /* hand.jpg */,
-				FCFE9B672152858600DECA15 /* iphone.JPG */,
-				FCFE9B682152858600DECA15 /* paddle-mobile.png */,
+				FC918192211DC70500B6F354 /* iphone.JPG */,
+				FC918190211DBC3500B6F354 /* paddle-mobile.png */,
+				FCDFD41A211D91C7005AB38B /* synset.txt */,
+				FCEEE7D3210627A000444BEC /* banana.jpeg */,
 			);
 			name = images;
 			path = ../../images;
 			sourceTree = "<group>";
 		};
-		FCFE9B6F2152859500DECA15 /* models */ = {
+		FC0E2C2020EDC03B009C1FAC /* models */ = {
 			isa = PBXGroup;
 			children = (
-				FCFE9B742152859500DECA15 /* genet */,
-				FCFE9C4B2152859500DECA15 /* fluid_fssd_new_ar */,
+				FCD04E6020F3146A0007374F /* mobilenet */,
 			);
 			name = models;
 			path = ../../models;
 			sourceTree = "<group>";
 		};
-		FCFE9B742152859500DECA15 /* genet */ = {
-			isa = PBXGroup;
-			children = (
-				FCFE9B752152859500DECA15 /* genet_params */,
-				FCFE9B762152859500DECA15 /* genet_model */,
-			);
-			path = genet;
-			sourceTree = "<group>";
-		};
-		FCFE9C4B2152859500DECA15 /* fluid_fssd_new_ar */ = {
+		FCD04E6020F3146A0007374F /* mobilenet */ = {
 			isa = PBXGroup;
 			children = (
-				FCFE9C4C2152859500DECA15 /* ar_model */,
-				FCFE9C4D2152859500DECA15 /* ar_params */,
+				FCD04E6120F3146A0007374F /* params */,
+				FCD04E6220F3146A0007374F /* model */,
 			);
-			path = fluid_fssd_new_ar;
+			path = mobilenet;
 			sourceTree = "<group>";
 		};
 /* End PBXGroup section */
@@ -246,7 +195,6 @@
 				TargetAttributes = {
 					FC039B7D20E11C550081E9F8 = {
 						CreatedOnToolsVersion = 9.3.1;
-						LastSwiftMigration = 0940;
 					};
 				};
 			};
@@ -273,18 +221,14 @@
 			isa = PBXResourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				FCFE9D232152859600DECA15 /* ar_model in Resources */,
+				FCD04E6320F3146B0007374F /* params in Resources */,
 				FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
-				FCFE9C522152859600DECA15 /* genet_model in Resources */,
-				FCFE9D242152859600DECA15 /* ar_params in Resources */,
-				FCFE9B6E2152858600DECA15 /* paddle-mobile.png in Resources */,
-				FCFE9C512152859600DECA15 /* genet_params in Resources */,
-				FCFE9B692152858600DECA15 /* hand.jpg.zip in Resources */,
+				FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */,
 				FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
-				FCFE9B6A2152858600DECA15 /* synset.txt in Resources */,
-				FCFE9B6B2152858600DECA15 /* banana.jpeg in Resources */,
-				FCFE9B6D2152858600DECA15 /* iphone.JPG in Resources */,
-				FCFE9B6C2152858600DECA15 /* hand.jpg in Resources */,
+				FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */,
+				FC918193211DC70500B6F354 /* iphone.JPG in Resources */,
+				FCDFD41B211D91C7005AB38B /* synset.txt in Resources */,
+				FCD04E6420F3146B0007374F /* model in Resources */,
 				FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -336,13 +280,10 @@
 			buildActionMask = 2147483647;
 			files = (
 				FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
-				FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */,
+				FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */,
 				FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
-				FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */,
-				FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */,
-				FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */,
-				C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */,
 				FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
+				FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -487,23 +428,19 @@
 			baseConfigurationReference = 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */;
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_IDENTITY = "iPhone Developer";
 				CODE_SIGN_STYLE = Automatic;
 				DEVELOPMENT_TEAM = A798K58VVL;
-				ENABLE_BITCODE = NO;
 				INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/Frameworks",
 				);
-				PRODUCT_BUNDLE_IDENTIFIER = "com.baidu.paddle-mobile";
+				PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				PROVISIONING_PROFILE = "";
 				PROVISIONING_PROFILE_SPECIFIER = "";
-				SWIFT_OBJC_BRIDGING_HEADER = "paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h";
-				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 4.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
@@ -514,22 +451,19 @@
 			baseConfigurationReference = 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */;
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_IDENTITY = "iPhone Developer";
 				CODE_SIGN_STYLE = Automatic;
 				DEVELOPMENT_TEAM = A798K58VVL;
-				ENABLE_BITCODE = NO;
 				INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/Frameworks",
 				);
-				PRODUCT_BUNDLE_IDENTIFIER = "com.baidu.paddle-mobile";
+				PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				PROVISIONING_PROFILE = "";
 				PROVISIONING_PROFILE_SPECIFIER = "";
-				SWIFT_OBJC_BRIDGING_HEADER = "paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h";
 				SWIFT_VERSION = 4.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
index 537fb06ed9e5b9100bea43b7acae9c014e0f4a78..54dad2b5bf721f3d132bad2502d30b34ca0773ab 100644
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
@@ -19,6 +19,7 @@ class AppDelegate: UIResponder, UIApplicationDelegate {
 
     var window: UIWindow?
 
+
     func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
         // Override point for customization after application launch.
         return true
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
index d72694fdacf5b46821ba6422fa77e095f92382b9..a5efadeb97ccc41449dc32a2c1dfcdfcf9fceac5 100644
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
@@ -11,34 +11,6 @@
         <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
     </dependencies>
     <scenes>
-        <!--Multi Predict View Controller-->
-        <scene sceneID="ec4-AW-9Vs">
-            <objects>
-                <viewController id="Vwd-lt-764" customClass="MultiPredictViewController" customModule="paddle_mobile_demo" customModuleProvider="target" sceneMemberID="viewController">
-                    <view key="view" contentMode="scaleToFill" id="55D-rz-Ex6">
-                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <subviews>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="TQt-X9-PdF">
-                                <rect key="frame" x="164" y="318" width="46" height="30"/>
-                                <state key="normal" title="Button"/>
-                                <connections>
-                                    <action selector="predictAct:" destination="Vwd-lt-764" eventType="touchUpInside" id="d4z-Cv-6jY"/>
-                                </connections>
-                            </button>
-                        </subviews>
-                        <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-                        <constraints>
-                            <constraint firstItem="TQt-X9-PdF" firstAttribute="centerY" secondItem="55D-rz-Ex6" secondAttribute="centerY" id="bL3-wr-TcH"/>
-                            <constraint firstItem="TQt-X9-PdF" firstAttribute="centerX" secondItem="55D-rz-Ex6" secondAttribute="centerX" id="sBi-RQ-sJn"/>
-                        </constraints>
-                        <viewLayoutGuide key="safeArea" id="bsd-h4-RYZ"/>
-                    </view>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="68E-SG-96s" userLabel="First Responder" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="-559" y="686"/>
-        </scene>
         <!--View Controller-->
         <scene sceneID="tne-QT-ifu">
             <objects>
@@ -48,11 +20,12 @@
                         <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                         <subviews>
                             <imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
-                                <rect key="frame" x="0.0" y="20" width="225" height="247"/>
+                                <rect key="frame" x="0.0" y="20" width="375" height="247"/>
                             </imageView>
-                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" horizontalCompressionResistancePriority="749" text="Platform:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
-                                <rect key="frame" x="10" y="538" width="35" height="24"/>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
+                                <rect key="frame" x="10" y="538" width="68" height="24"/>
                                 <constraints>
+                                    <constraint firstAttribute="width" constant="68" id="Q5J-tq-JSX"/>
                                     <constraint firstAttribute="height" constant="24" id="SYv-As-Si8"/>
                                 </constraints>
                                 <fontDescription key="fontDescription" type="system" pointSize="20"/>
@@ -60,12 +33,12 @@
                                 <nil key="highlightedColor"/>
                             </label>
                             <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
-                                <rect key="frame" x="55" y="510.5" width="320" height="80"/>
+                                <rect key="frame" x="88" y="510.5" width="287" height="80"/>
                                 <constraints>
                                     <constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
                                 </constraints>
                             </pickerView>
-                            <pickerView contentMode="scaleToFill" horizontalCompressionResistancePriority="749" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
+                            <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
                                 <rect key="frame" x="85" y="401" width="290" height="80"/>
                                 <constraints>
                                     <constraint firstAttribute="height" constant="80" id="yAL-JY-G6b"/>
@@ -74,6 +47,7 @@
                             <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha">
                                 <rect key="frame" x="10" y="429" width="65" height="24"/>
                                 <constraints>
+                                    <constraint firstAttribute="width" constant="65" id="6oA-g2-Xq4"/>
                                     <constraint firstAttribute="height" constant="24" id="EwE-B3-z2R"/>
                                 </constraints>
                                 <fontDescription key="fontDescription" type="system" pointSize="20"/>
@@ -168,14 +142,9 @@
                                 <fontDescription key="fontDescription" type="system" pointSize="15"/>
                                 <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
                             </textView>
-                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Cil-py-NiA">
-                                <rect key="frame" x="225" y="20" width="150" height="247"/>
-                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-                            </view>
                         </subviews>
                         <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
                         <constraints>
-                            <constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="Cil-py-NiA" secondAttribute="bottom" constant="10" id="16p-IK-b5X"/>
                             <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="VQn-bS-fWp" secondAttribute="trailing" constant="10" id="1Xg-0h-9SE"/>
                             <constraint firstItem="avL-VK-Kha" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="2t9-hS-VXa"/>
                             <constraint firstItem="R90-Yf-S6g" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="76b-Ny-1Og"/>
@@ -190,12 +159,11 @@
                             <constraint firstItem="XpL-9M-UOp" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KWW-qT-Rzf"/>
                             <constraint firstItem="6MG-gv-hD5" firstAttribute="centerY" secondItem="avL-VK-Kha" secondAttribute="centerY" id="KZa-YZ-DEs"/>
                             <constraint firstItem="2EB-m2-a3L" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="Le3-TN-zOL"/>
-                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" constant="-150" id="MeS-HQ-voE"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="MeS-HQ-voE"/>
                             <constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="ZZh-fw-LwK" secondAttribute="bottom" constant="10" id="NUL-Ta-VI8"/>
                             <constraint firstItem="m5L-O7-P31" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="15" id="RFA-z1-9aB"/>
                             <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="a3K-ri-NVs" secondAttribute="width" id="Rp6-Bh-BN3"/>
                             <constraint firstItem="6MG-gv-hD5" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="S0W-0G-75m"/>
-                            <constraint firstItem="Cil-py-NiA" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="UNc-Et-9Yv"/>
                             <constraint firstItem="w7H-Sk-Rai" firstAttribute="leading" secondItem="wUL-9N-u1V" secondAttribute="trailing" id="VBM-8b-jP0"/>
                             <constraint firstItem="VQn-bS-fWp" firstAttribute="top" secondItem="m5L-O7-P31" secondAttribute="bottom" constant="8" id="VpS-4N-mOo"/>
                             <constraint firstItem="wUL-9N-u1V" firstAttribute="top" secondItem="2EB-m2-a3L" secondAttribute="bottom" constant="35" id="VpU-j2-gaE"/>
@@ -207,12 +175,10 @@
                             <constraint firstItem="ZZh-fw-LwK" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="eIC-fZ-OEE"/>
                             <constraint firstItem="976-fk-Kx2" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="fFg-pB-eyU"/>
                             <constraint firstItem="6Tk-OE-BBY" firstAttribute="bottom" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="40" id="fG6-0p-I0P"/>
-                            <constraint firstItem="Cil-py-NiA" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="gGK-DB-ibv"/>
                             <constraint firstItem="XpL-9M-UOp" firstAttribute="leading" secondItem="w7H-Sk-Rai" secondAttribute="trailing" id="guC-Db-cA9"/>
                             <constraint firstItem="6MG-gv-hD5" firstAttribute="leading" secondItem="avL-VK-Kha" secondAttribute="trailing" constant="10" id="jNW-iC-u7V"/>
                             <constraint firstItem="4ey-Xr-U4e" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="o1X-q5-P7j"/>
                             <constraint firstItem="6MG-gv-hD5" firstAttribute="top" secondItem="VQn-bS-fWp" secondAttribute="bottom" constant="8" id="tAE-ss-jlA"/>
-                            <constraint firstItem="Cil-py-NiA" firstAttribute="leading" secondItem="ZZh-fw-LwK" secondAttribute="trailing" id="teJ-PP-h2R"/>
                             <constraint firstItem="4ey-Xr-U4e" firstAttribute="top" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="10" id="udc-wT-jqd"/>
                             <constraint firstItem="ZZh-fw-LwK" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" id="vXI-l2-CjL"/>
                             <constraint firstItem="VQn-bS-fWp" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="wtI-Dl-YPq"/>
@@ -229,81 +195,11 @@
                         <outlet property="resultTextView" destination="VQn-bS-fWp" id="306-c7-3vM"/>
                         <outlet property="selectImageView" destination="ZZh-fw-LwK" id="afR-Bv-6AW"/>
                         <outlet property="threadPickerView" destination="DlO-dk-RMr" id="Kk4-QV-b5o"/>
-                        <outlet property="videoView" destination="Cil-py-NiA" id="QY2-BP-SNS"/>
                     </connections>
                 </viewController>
                 <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
             </objects>
-            <point key="canvasLocation" x="-1449" y="-3"/>
-        </scene>
-        <!--Load Pointer View Controller-->
-        <scene sceneID="56v-9i-I4d">
-            <objects>
-                <viewController id="4MS-jc-i6A" customClass="LoadPointerViewController" sceneMemberID="viewController">
-                    <view key="view" contentMode="scaleToFill" id="VbZ-nk-rJR">
-                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <subviews>
-                            <imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="2p5-S3-M4T">
-                                <rect key="frame" x="16" y="63" width="240" height="128"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                            </imageView>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="37q-nm-0H7">
-                                <rect key="frame" x="38" y="610" width="42" height="30"/>
-                                <constraints>
-                                    <constraint firstAttribute="height" constant="30" id="ofW-G3-KST"/>
-                                    <constraint firstAttribute="width" constant="42" id="pwd-tO-zcJ"/>
-                                </constraints>
-                                <state key="normal" title="Image"/>
-                            </button>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="fAg-ai-yaA">
-                                <rect key="frame" x="119" y="610" width="34" height="30"/>
-                                <constraints>
-                                    <constraint firstAttribute="height" constant="30" id="IES-jf-Z1n"/>
-                                    <constraint firstAttribute="width" constant="34" id="jxK-Xn-WCE"/>
-                                </constraints>
-                                <state key="normal" title="Load"/>
-                                <connections>
-                                    <action selector="loaderButtonPressed:" destination="4MS-jc-i6A" eventType="touchUpInside" id="3cy-PD-aiE"/>
-                                </connections>
-                            </button>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="pdS-6e-Pd1">
-                                <rect key="frame" x="185" y="610" width="49" height="30"/>
-                                <constraints>
-                                    <constraint firstAttribute="width" constant="49" id="ddY-uM-fzA"/>
-                                    <constraint firstAttribute="height" constant="30" id="yKd-YL-UML"/>
-                                </constraints>
-                                <state key="normal" title="Predict"/>
-                                <connections>
-                                    <action selector="predictButtonPressed:" destination="4MS-jc-i6A" eventType="touchUpInside" id="sOH-iT-s1w"/>
-                                </connections>
-                            </button>
-                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="DZa-sd-lY7">
-                                <rect key="frame" x="279" y="610" width="34" height="30"/>
-                                <constraints>
-                                    <constraint firstAttribute="width" constant="34" id="aSO-4q-PgA"/>
-                                    <constraint firstAttribute="height" constant="30" id="eAt-Uc-BxX"/>
-                                </constraints>
-                                <state key="normal" title="clear"/>
-                            </button>
-                        </subviews>
-                        <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
-                        <constraints>
-                            <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="37q-nm-0H7" secondAttribute="bottom" constant="27" id="4Wf-Uh-gvr"/>
-                            <constraint firstItem="DZa-sd-lY7" firstAttribute="leading" secondItem="pdS-6e-Pd1" secondAttribute="trailing" constant="45" id="8dB-uI-cs9"/>
-                            <constraint firstItem="fAg-ai-yaA" firstAttribute="leading" secondItem="37q-nm-0H7" secondAttribute="trailing" constant="39" id="EAV-Oq-jeD"/>
-                            <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="fAg-ai-yaA" secondAttribute="bottom" constant="27" id="Px0-A9-Eql"/>
-                            <constraint firstItem="pdS-6e-Pd1" firstAttribute="leading" secondItem="fAg-ai-yaA" secondAttribute="trailing" constant="32" id="ZUR-Nv-aNb"/>
-                            <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="pdS-6e-Pd1" secondAttribute="bottom" constant="27" id="kPx-mt-ab9"/>
-                            <constraint firstItem="37q-nm-0H7" firstAttribute="leading" secondItem="vsb-FH-h7h" secondAttribute="leading" constant="38" id="trH-Fq-sSv"/>
-                            <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="DZa-sd-lY7" secondAttribute="bottom" constant="27" id="yNJ-hq-2Qg"/>
-                        </constraints>
-                        <viewLayoutGuide key="safeArea" id="vsb-FH-h7h"/>
-                    </view>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="-721" y="-427"/>
+            <point key="canvasLocation" x="-724" y="98.50074962518741"/>
         </scene>
     </scenes>
     <resources>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.h b/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.h
deleted file mode 100644
index a876c236219817bf146cfa4a77eb9421f8472971..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.h
+++ /dev/null
@@ -1,13 +0,0 @@
-//
-//  LoadPointerViewController.h
-//  paddle-mobile-demo
-//
-//  Created by Xiao,Haichun on 2018/9/19.
-//  Copyright © 2018年 orange. All rights reserved.
-//
-
-#import <UIKit/UIKit.h>
-
-@interface LoadPointerViewController : UIViewController
-
-@end
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.m b/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.m
deleted file mode 100644
index 857745686fbe750de08e8be357ccf5a4159eaae8..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.m
+++ /dev/null
@@ -1,171 +0,0 @@
-//
-//  LoadPointerViewController.m
-//  paddle-mobile-demo
-//
-//  Created by Xiao,Haichun on 2018/9/19.
-//  Copyright © 2018年 orange. All rights reserved.
-//
-
-#import "LoadPointerViewController.h"
-#import <Metal/Metal.h>
-#import "paddle-mobile-demo-Bridging-Header.h"
-
-@interface LoadPointerViewController ()
-
-@property (strong, nonatomic) id<MTLDevice> device;
-@property (strong, nonatomic) id<MTLTexture> texture;
-@property (strong, nonatomic) id<MTLCommandQueue> queue;
-@property (strong, nonatomic) PaddleMobileGPU *runner;
-@property (strong, nonatomic) ModelConfig *modelConfig;
-
-@end
-
-@implementation LoadPointerViewController
-
-- (void)viewDidLoad {
-    [super viewDidLoad];
-  
-  
-  self.device = MTLCreateSystemDefaultDevice();
-  
-  self.queue = [self.device newCommandQueue];
-  
-    // Do any additional setup after loading the view.
-//  NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"genet_model" withExtension:nil].path;
-//  NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"genet_params" withExtension:nil].path;
-  
-  NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"ar_model" withExtension:nil].path;
-  NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"ar_params" withExtension:nil].path;
-
-  long fileSize;
-  FILE *fp;
-  fp = fopen([modelPath UTF8String], "rb");
-  fseek(fp, 0, SEEK_END);
-  fileSize = ftell(fp);
-  rewind(fp);
-  void *buffer = malloc(fileSize);
-  fread(buffer, 1, fileSize, fp);
-  fclose(fp);
-  
-  long paramfileSize;
-  FILE *parmaFilePointer;
-  parmaFilePointer = fopen([paramPath UTF8String], "rb");
-  fseek(parmaFilePointer, 0, SEEK_END);
-  paramfileSize = ftell(parmaFilePointer);
-  rewind(parmaFilePointer);
-  void *parmaBuffer = malloc(paramfileSize);
-  fread(parmaBuffer, 1, paramfileSize, parmaFilePointer);
-  fclose(parmaFilePointer);
-  
-  _modelConfig = [[ModelConfig alloc] init];
-//  _modelConfig.means = @[[NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0]];
-//  _modelConfig.scale = 0.017;
-//  _modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:128.], [NSNumber numberWithFloat:128.0],[NSNumber numberWithFloat:3.0]];
-  _modelConfig.means = @[[NSNumber numberWithFloat:103.94], [NSNumber numberWithFloat:116.78], [NSNumber numberWithFloat:123.68]];
-  _modelConfig.scale = 1;
-  _modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:160.], [NSNumber numberWithFloat:160.0],[NSNumber numberWithFloat:3.0]];
-  _modelConfig.modelPointer = buffer;
-  _modelConfig.modelSize = (int)fileSize;
-  _modelConfig.paramPointer = parmaBuffer;
-  _modelConfig.paramSize = (int)paramfileSize;
-}
-- (IBAction)loaderButtonPressed:(id)sender {
-//  _runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:GenetType modelConfig:_modelConfig];
-  _runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:MobileNetSSDType modelConfig:_modelConfig];
-  
-  [_runner load];
-}
-- (IBAction)predictButtonPressed:(id)sender {
-  [self predict];
-}
-
-- (id<MTLTexture>) createTextureFromImage:(UIImage*) image device:(id<MTLDevice>) device
-{
-  image  =[UIImage imageWithCGImage:[image CGImage]
-                              scale:[image scale]
-                        orientation: UIImageOrientationLeft];
-  
-  NSLog(@"orientation and size and stuff %ld %f %f", (long)image.imageOrientation, image.size.width, image.size.height);
-  
-  CGImageRef imageRef = image.CGImage;
-  
-  size_t width = self.view.frame.size.width;
-  size_t height = self.view.frame.size.height;
-  
-  size_t bitsPerComponent = CGImageGetBitsPerComponent(imageRef);
-  size_t bitsPerPixel = CGImageGetBitsPerPixel(imageRef);
-  
-  CGColorSpaceRef colorSpace = CGImageGetColorSpace(imageRef);
-  
-  CGImageAlphaInfo alphaInfo = CGImageGetAlphaInfo(imageRef);
-  
-  //  NSLog(@"%@ %u", colorSpace, alphaInfo);
-  
-  CGBitmapInfo bitmapInfo = kCGBitmapByteOrderDefault | alphaInfo;
-  //    NSLog(@"bitmap info %u", bitmapInfo);
-  
-  
-  CGContextRef context = CGBitmapContextCreate( NULL, width, height, bitsPerComponent, (bitsPerPixel / 8) * width, colorSpace, bitmapInfo);
-  
-  if( !context )
-  {
-    NSLog(@"Failed to load image, probably an unsupported texture type");
-    return nil;
-  }
-  
-  CGContextDrawImage( context, CGRectMake( 0, 0, width, height ), image.CGImage);
-  
-  
-  MTLPixelFormat format = MTLPixelFormatRGBA8Unorm;
-  
-  MTLTextureDescriptor *texDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format
-                                                                                     width:width
-                                                                                    height:height
-                                                                                 mipmapped:NO];
-  id<MTLTexture> texture = [device newTextureWithDescriptor:texDesc];
-  
-  [texture replaceRegion:MTLRegionMake2D(0, 0, width, height)
-             mipmapLevel:0
-               withBytes:CGBitmapContextGetData(context)
-             bytesPerRow:4 * width];
-  
-  return texture;
-}
-
-- (void)predict {
-  _texture = [self createTextureFromImage:[UIImage imageNamed:@"hand.jpg"] device:self.device];
-  NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970];
-  NSInteger max = 428;
-  for (int i = 0;i < max; i ++) {
-    [_runner predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) {
-      if (success) {
-        if (i == max -1) {
-          double time = [[NSDate date] timeIntervalSince1970] - startTime;
-          time = (time/max)*1000;
-          NSLog(@"gap ==== %fms",time);
-        }
-//        for (int i = 0; i < result.count; i ++) {
-//          NSNumber *number = result[i];
-//          NSLog(@"result %d = %f:",i, [number floatValue]);
-//        }
-      }
-    }];
-  }
-}
-
-- (void)didReceiveMemoryWarning {
-    [super didReceiveMemoryWarning];
-    // Dispose of any resources that can be recreated.
-}
-
-/*
-#pragma mark - Navigation
-
-// In a storyboard-based application, you will often want to do a little preparation before navigation
-- (void)prepareForSegue:(UIStoryboardSegue *)segue sender:(id)sender {
-    // Get the new view controller using [segue destinationViewController].
-    // Pass the selected object to the new view controller.
-}
-*/
-
-@end
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift
new file mode 100644
index 0000000000000000000000000000000000000000..74fa89d93e042f90fe1b590a596ec584fff67f6d
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift
@@ -0,0 +1,48 @@
+//
+//  MetalHelper.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/25.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Metal
+import MetalKit
+import Foundation
+import paddle_mobile
+import MetalPerformanceShaders
+
+class MetalHelper {
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    let textureLoader: MTKTextureLoader
+    static let shared: MetalHelper = MetalHelper.init()
+    private init(){
+        device = MTLCreateSystemDefaultDevice()!
+        queue = device.makeCommandQueue()!
+        textureLoader = MTKTextureLoader.init(device: device)
+    }
+    
+    static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
+        let tmpTextureDes = MTLTextureDescriptor.init()
+        tmpTextureDes.width = size.width
+        tmpTextureDes.height = size.height
+        tmpTextureDes.depth = 1
+        tmpTextureDes.usage = [.shaderRead, .shaderWrite]
+        tmpTextureDes.pixelFormat = .rgba32Float
+        tmpTextureDes.textureType = .type2D
+        tmpTextureDes.storageMode = .shared
+        tmpTextureDes.cpuCacheMode = .defaultCache
+        let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes)
+        
+        let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device)
+        
+        let buffer = queue.makeCommandBuffer()
+        scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!)
+        buffer?.addCompletedHandler({ (buffer) in
+            complete(dest!)
+        })
+        buffer?.commit()
+    }
+}
+
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift
new file mode 100644
index 0000000000000000000000000000000000000000..7e1f66855e45453eee9fdbe034a309aee44ff960
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift
@@ -0,0 +1,89 @@
+//
+//  ModelHelper.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+import MetalKit
+import Foundation
+import paddle_mobile
+import MetalPerformanceShaders
+
+class PreProccess: CusomKernel {
+    init(device: MTLDevice) {
+        let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
+        super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false)
+    }
+}
+
+let modelHelperMap: [SupportModel : ModelHelper] = [.mobilenet : MobileNetHelper.init()]
+
+enum SupportModel: String{
+    case mobilenet = "mobilenet"
+    static func supportedModels() -> [SupportModel] {
+        return [.mobilenet]
+    }
+}
+
+protocol ModelHelper {
+    var dim: [Int] { get }
+    var modelPath: String { get }
+    var paramPath: String { get }
+    var modelDir: String { get }
+    var preprocessKernel: CusomKernel { get }
+    func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void)
+    func resultStr(res: [Float]) -> String
+}
+
+extension ModelHelper {
+    func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
+        let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
+        MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in
+            getTexture(resTexture)
+        }
+    }
+}
+
+struct MobileNetHelper: ModelHelper{
+    class PreWords {
+        var contents: [String] = []
+        init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
+            if let filePath = inBundle.path(forResource: fileName, ofType: type) {
+                let string = try! String.init(contentsOfFile: filePath)
+                contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
+                    String($0[$0.index($0.startIndex, offsetBy: 10)...])
+                }
+            }else{
+                fatalError("no file call \(fileName)")
+            }
+        }
+        subscript(index: Int) -> String{
+            return contents[index]
+        }
+    }
+    let labels = PreWords.init(fileName: "synset")
+    
+    func resultStr(res: [Float]) -> String {
+        var s: [String] = []
+        res.top(r: 5).enumerated().forEach{
+            s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
+        }
+        return s.joined(separator: "\n")
+    }
+    
+    var preprocessKernel: CusomKernel
+    let dim = [1, 224, 224, 3]
+    let modelPath: String
+    let paramPath: String
+    let modelDir: String
+    
+    init() {
+        modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
+        paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
+        modelDir = ""
+        preprocessKernel = PreProccess.init(device: MetalHelper.shared.device)
+    }
+}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/MultiPredictViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/MultiPredictViewController.swift
deleted file mode 100644
index bd07da61d0215b243372c27addf60efc3b2ad7d6..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/MultiPredictViewController.swift
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import UIKit
-import paddle_mobile
-
-class MultiPredictViewController: UIViewController {
-  var runner1: Runner!
-  var runner2: Runner!
-  override func viewDidLoad() {
-    super.viewDidLoad()
-    let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
-    let genet = Genet.init(device: MetalHelper.shared.device)
-    runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
-    let queue2 = MetalHelper.shared.device.makeCommandQueue()
-    
-    runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
-  }
-
-  @IBAction func predictAct(_ sender: Any) {
-    let success = self.runner2.load()
-//    DispatchQueue.global().async {
-      let image1 = UIImage.init(named: "hand.jpg")
-//      let success = self.runner2.load()
-//      if success {
-//        for i in 0..<10000 {
-//          print(i)
-//          self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
-//            print("result1: ")
-////            print(res)
-//          })
-//        }
-//      } else {
-//        print("load failed")
-//      }
-//      self.runner1.clear()
-//    }
-//    return
-//    DispatchQueue.global().async {
-////      sleep(1)
-//      let image1 = UIImage.init(named: "banana.jpeg")
-////      if success {
-//        for _ in 0..<10 {
-//          self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
-//            print("result2: ")
-//            print(res)
-//          })
-//        }
-////      } else {
-////        print("load failed")
-////      }
-////      self.runner2.clear()
-//    }
-  }
-}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MetalHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MetalHelper.swift
deleted file mode 100644
index d314e8b3f8845ef95b36b4b25e61809d353f0f24..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MetalHelper.swift
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Metal
-import MetalKit
-import Foundation
-import paddle_mobile
-
-class MetalHelper {
-  let device: MTLDevice
-  let queue: MTLCommandQueue
-  let textureLoader: MTKTextureLoader
-  static let shared: MetalHelper = MetalHelper.init()
-  private init(){
-    device = MTLCreateSystemDefaultDevice()!
-    queue = device.makeCommandQueue()!
-    textureLoader = MTKTextureLoader.init(device: device)
-  }
-  
-
-}
-
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift
deleted file mode 100644
index a954328acae3a80643ad849d58cd6ac86bf7865e..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift
+++ /dev/null
@@ -1,9 +0,0 @@
-//
-//  PaddleMobile.swift
-//  paddle-mobile-demo
-//
-//  Created by liuRuiLong on 2018/9/5.
-//  Copyright © 2018年 orange. All rights reserved.
-//
-
-import Foundation
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal
deleted file mode 100644
index ac07e449bc5919a37a57143aa6881f79507a45b4..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal
+++ /dev/null
@@ -1,137 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-
-kernel void mobilenet_preprocess(
-                       texture2d<float, access::read> inTexture [[texture(0)]],
-                       texture2d<float, access::write> outTexture [[texture(1)]],
-                       uint2 gid [[thread_position_in_grid]])
-{
-    if (gid.x >= outTexture.get_width() ||
-        gid.y >= outTexture.get_height()) {
-        return;
-    }
-    const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
-    const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-    outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void mobilenet_preprocess_half(
-                       texture2d<half, access::read> inTexture [[texture(0)]],
-                       texture2d<half, access::write> outTexture [[texture(1)]],
-                       uint2 gid [[thread_position_in_grid]])
-{
-    if (gid.x >= outTexture.get_width() ||
-        gid.y >= outTexture.get_height()) {
-        return;
-    }
-    const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
-    const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-    outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void mobilenet_ssd_preprocess(
-                       texture2d<float, access::read> inTexture [[texture(0)]],
-                       texture2d<float, access::write> outTexture [[texture(1)]],
-                       uint2 gid [[thread_position_in_grid]])
-{
-    if (gid.x >= outTexture.get_width() ||
-        gid.y >= outTexture.get_height()) {
-        return;
-    }
-    const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
-    const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-    outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void mobilenet_ssd_preprocess_half(
-                            texture2d<half, access::read> inTexture [[texture(0)]],
-                            texture2d<half, access::write> outTexture [[texture(1)]],
-                            uint2 gid [[thread_position_in_grid]])
-{
-    if (gid.x >= outTexture.get_width() ||
-        gid.y >= outTexture.get_height()) {
-        return;
-    }
-    const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
-    const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-    outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void genet_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
-{
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height()) {
-    return;
-  }
-  const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
-  const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-  outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void genet_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
-{
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height()) {
-    return;
-  }
-  const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
-  const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-  outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void mobilent_ar_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
-{
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height()) {
-    return;
-  }
-  const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
-  const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-  outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
-{
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height()) {
-    return;
-  }
-  const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
-  const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
-  outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
-}
-
-kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height()) return;
-  float w_stride = inTexture.get_width() / outTexture.get_width();
-  float h_stride = inTexture.get_height() / outTexture.get_height();
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 input = inTexture.sample(sample, float2(gid.x * w_stride,    gid.y * h_stride), 0);
-  outTexture.write(input, gid);
-}
-
-kernel void scale_half(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height()) return;
-  float w_stride = inTexture.get_width() / outTexture.get_width();
-  float h_stride = inTexture.get_height() / outTexture.get_height();
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 input = inTexture.sample(sample, float2(gid.x * w_stride,    gid.y * h_stride), 0);
-  outTexture.write(half4(input), gid);
-}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal b/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal
new file mode 100644
index 0000000000000000000000000000000000000000..f359ab39ac5fbc18febfb6f0da367e72b61b959c
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal
@@ -0,0 +1,44 @@
+//
+//  PreProcessKernel.metal
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/20.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+
+kernel void preprocess(
+                       texture2d<float, access::read> inTexture [[texture(0)]],
+                       texture2d<float, access::write> outTexture [[texture(1)]],
+                       uint2 gid [[thread_position_in_grid]])
+{
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height()) {
+        return;
+    }
+    const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
+    const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
+    outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
+}
+
+kernel void preprocess_half(
+                       texture2d<half, access::read> inTexture [[texture(0)]],
+                       texture2d<half, access::write> outTexture [[texture(1)]],
+                       uint2 gid [[thread_position_in_grid]])
+{
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height()) {
+        return;
+    }
+    const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
+    const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
+    outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
+}
+
+
+
+
+
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift
deleted file mode 100644
index f9e841f9c2a3060e775726023b6d5cfc3eeb679d..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift
+++ /dev/null
@@ -1,31 +0,0 @@
-
-
-import Foundation
-import QuartzCore
-
-public class FPSCounter {
-  private(set) public var fps: Double = 0
-
-  var frames = 0
-  var startTime: CFTimeInterval = 0
-
-  public func start() {
-    frames = 0
-    startTime = CACurrentMediaTime()
-  }
-
-  public func frameCompleted() {
-    frames += 1
-    let now = CACurrentMediaTime()
-    let elapsed = now - startTime
-    if elapsed > 0.1 {
-      let current = Double(frames) / elapsed
-      let smoothing = 0.75
-      fps = smoothing*fps + (1 - smoothing)*current
-      if elapsed > 1 {
-        frames = 0
-        startTime = CACurrentMediaTime()
-      }
-    }
-  }
-}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift
deleted file mode 100644
index c235ed2f0391bdc97e9e182c0e9897814a0518fa..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift
+++ /dev/null
@@ -1,218 +0,0 @@
-
-import UIKit
-import Metal
-import CoreVideo
-import AVFoundation
-
-@available(iOS 10.0, *)
-@objc public protocol VideoCaptureDelegate: NSObjectProtocol {
-  @objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime)
-    @objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime)
-    @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?)
-    @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?)
-}
-
-/**
-  Simple interface to the iPhone's camera.
-*/
-@available(iOS 10.0, *)
-public class VideoCapture: NSObject {
-    public var previewLayer: AVCaptureVideoPreviewLayer?
-    public weak var delegate: VideoCaptureDelegate?
-    public var fps = -1
-    private let device: MTLDevice?
-    private let videoOrientation: AVCaptureVideoOrientation
-    private var textureCache: CVMetalTextureCache?
-    private let captureSession = AVCaptureSession()
-    private let videoOutput = AVCaptureVideoDataOutput()
-    private let photoOutput = AVCapturePhotoOutput()
-    private let queue = DispatchQueue(label: "net.machinethink.camera-queue")
-    private var lastTimestamp = CMTime()
-    private let cameraPosition: AVCaptureDevice.Position
-    public init(device: MTLDevice? = nil, orientation: AVCaptureVideoOrientation = .portrait, position: AVCaptureDevice.Position = .back) {
-        self.device = device
-        self.videoOrientation = orientation
-        self.cameraPosition = position
-        super.init()
-    }
-
-    public func setUp(sessionPreset: AVCaptureSession.Preset = .medium,
-                    completion: @escaping (Bool) -> Void) {
-        queue.async {
-            let success = self.setUpCamera(sessionPreset: sessionPreset)
-            DispatchQueue.main.async {
-                completion(success)
-            }
-        }
-    }
-
-    func fontCamera() -> AVCaptureDevice? {
-        let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices
-        return deveices.first
-        
-    }
-    
-    func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool {
-        if let inDevice = device{
-            guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, inDevice, nil, &textureCache) == kCVReturnSuccess else {
-                print("Error: could not create a texture cache")
-                return false
-            }
-        }
-        
-        captureSession.beginConfiguration()
-        captureSession.sessionPreset = sessionPreset
-
-        var oCaptureDevice: AVCaptureDevice?
-        switch cameraPosition {
-        case .back:
-            oCaptureDevice = AVCaptureDevice.default(for: AVMediaType.video)
-            break
-        case .front:
-            oCaptureDevice = fontCamera()
-            break
-        default:
-            break
-        }
-        
-        guard let captureDevice = oCaptureDevice else {
-            print("Error: no video devices available")
-            return false
-        }
-
-        guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else {
-            print("Error: could not create AVCaptureDeviceInput")
-            return false
-        }
-
-        if captureSession.canAddInput(videoInput) {
-            captureSession.addInput(videoInput)
-        }
-
-        let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
-        previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
-        previewLayer.connection?.videoOrientation = self.videoOrientation
-        self.previewLayer = previewLayer
-
-        let settings: [String : Any] = [
-        kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)
-        ]
-
-        videoOutput.videoSettings = settings
-        videoOutput.alwaysDiscardsLateVideoFrames = true
-        videoOutput.setSampleBufferDelegate(self, queue: queue)
-        if captureSession.canAddOutput(videoOutput) {
-            captureSession.addOutput(videoOutput)
-        }
-
-        // We want the buffers to be in portrait orientation otherwise they are
-        // rotated by 90 degrees. Need to set this _after_ addOutput()!
-        videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation
-
-        if captureSession.canAddOutput(photoOutput) {
-            captureSession.addOutput(photoOutput)
-        }
-
-        captureSession.commitConfiguration()
-        return true
-    }
-
-    public func start() {
-        if !captureSession.isRunning {
-            captureSession.startRunning()
-        }
-    }
-
-    public func stop() {
-        if captureSession.isRunning {
-            captureSession.stopRunning()
-        }
-    }
-
-    /* Captures a single frame of the camera input. */
-    public func capturePhoto() {
-        let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)])
-        settings.previewPhotoFormat = [
-            kCVPixelBufferPixelFormatTypeKey as String: settings.__availablePreviewPhotoPixelFormatTypes[0],
-            kCVPixelBufferWidthKey as String: 480,
-            kCVPixelBufferHeightKey as String: 360,
-        ]
-        photoOutput.capturePhoto(with: settings, delegate: self)
-    }
-
-    func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? {
-        if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
-            let width = CVPixelBufferGetWidth(imageBuffer)
-            let height = CVPixelBufferGetHeight(imageBuffer)
-            var texture: CVMetalTexture?
-            CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache, imageBuffer, nil, .bgra8Unorm, width, height, 0, &texture)
-            if let texture = texture {
-                return CVMetalTextureGetTexture(texture)
-            }
-        }
-        return nil
-    }
-
-    func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? {
-        if let sampleBuffer = sampleBuffer,
-            let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
-            let width = CVPixelBufferGetWidth(imageBuffer)
-            let height = CVPixelBufferGetHeight(imageBuffer)
-            let rect = CGRect(x: 0, y: 0, width: CGFloat(width), height: CGFloat(height))
-            let ciImage = CIImage(cvPixelBuffer: imageBuffer)
-            let ciContext = CIContext(options: nil)
-            if let cgImage = ciContext.createCGImage(ciImage, from: rect) {
-                return UIImage(cgImage: cgImage)
-            }
-        }
-        return nil
-    }
-}
-
-
-@available(iOS 10.0, *)
-extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
-  public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
-    // Because lowering the capture device's FPS looks ugly in the preview,
-    // we capture at full speed but only call the delegate at its desired
-    // framerate. If `fps` is -1, we run at the full framerate.
-    let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
-    let deltaTime = timestamp - lastTimestamp
-    if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) {
-        lastTimestamp = timestamp
-        self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp)
-        if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{
-            let texture = convertToMTLTexture(sampleBuffer: sampleBuffer)
-            delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp)
-        }
-    }
-  }
-
-  public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
-    print("dropped frame")
-  }
-}
-
-@available(iOS 10.0, *)
-extension VideoCapture: AVCapturePhotoCaptureDelegate {
-  public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
-                          didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
-                          previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?,
-                          resolvedSettings: AVCaptureResolvedPhotoSettings,
-                          bracketSettings: AVCaptureBracketedStillImageSettings?,
-                          error: Error?) {
-    var imageTexture: MTLTexture?
-    var previewImage: UIImage?
-    if error == nil {
-        if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{
-            imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer)
-            self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture)
-        }
-        
-        if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{
-            previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer)
-            self.delegate?.videoCapture?(self, didCapturePhoto: previewImage)
-        }
-    }
-  }
-}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
index 1c6d0a91c9bf1d202091282e43859270a238edaa..30fdaf078556bdc4546aec4f27e153f469d9e5ac 100644
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
@@ -14,292 +14,164 @@
 
 import UIKit
 import MetalKit
-import CoreMedia
 import paddle_mobile
 import MetalPerformanceShaders
 
-var platform: Platform = .GPU
-let threadSupport: [(Platform, String)] = [(.GPU, "GPU"), (.CPU, "CPU")]
-
-//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
-let modelHelperMap: [SupportModel : Runner] = [
-                                               .genet : Runner.init(inNet: Genet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
-                                               .mobilenet_ssd_ar : Runner.init(inNet: MobileNet_ssd_AR.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)]
-//, .genet : Genet.init()
-//let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()]
-
-let netSupport: [SupportModel : Net] = [.genet : Genet.init(device: MetalHelper.shared.device), .mobilenet_ssd_ar : MobileNet_ssd_AR.init(device: MetalHelper.shared.device)]
-
-enum SupportModel: String{
-  //  case mobilenet = "mobilenet"
-//  case mobilenet_ssd    = "mobilenetssd"
-  case genet            = "genet"
-  case mobilenet_ssd_ar = "mobilenetssd_ar"
-  
-  static func supportedModels() -> [SupportModel] {
-    // .mobilenet,
-    // .mobilenet_ssd,
-    return [.genet, .mobilenet_ssd_ar]
-  }
-}
+let threadSupport = [1]
 
 class ViewController: UIViewController {
-  @IBOutlet weak var resultTextView: UITextView!
-  @IBOutlet weak var selectImageView: UIImageView!
-  @IBOutlet weak var elapsedTimeLabel: UILabel!
-  @IBOutlet weak var modelPickerView: UIPickerView!
-  @IBOutlet weak var threadPickerView: UIPickerView!
-  @IBOutlet weak var videoView: UIView!
-//  var videoCapture: VideoCapture!
-
-  var selectImage: UIImage?
-  var inputPointer: UnsafeMutablePointer<Float32>?
-  var modelType: SupportModel = SupportModel.supportedModels()[0]
-  var toPredictTexture: MTLTexture?
-  
-  var runner: Runner!
-  
-  var threadNum = 1
-  
-  @IBAction func loadAct(_ sender: Any) {
-     runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue, inPlatform: platform)
+    @IBOutlet weak var resultTextView: UITextView!
+    @IBOutlet weak var selectImageView: UIImageView!
+    @IBOutlet weak var elapsedTimeLabel: UILabel!
+    @IBOutlet weak var modelPickerView: UIPickerView!
+    @IBOutlet weak var threadPickerView: UIPickerView!
+    var selectImage: UIImage?
+    var program: Program?
+    var executor: Executor<Float32>?
+    var modelType: SupportModel = .mobilenet
+    var toPredictTexture: MTLTexture?
+    var modelHelper: ModelHelper {
+        return modelHelperMap[modelType] ?! " has no this type "
+    }
+    var threadNum = 1
     
-    if platform == .CPU {
-      if inputPointer == nil {
-        inputPointer = runner.preproccess(image: selectImage!.cgImage!)
-       
-      }
-    } else if platform == .GPU {
-      if self.toPredictTexture == nil {
-        runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
-          self?.toPredictTexture = texture
+    @IBAction func loadAct(_ sender: Any) {
+        let inModelHelper = modelHelper
+        let queue = MetalHelper.shared.queue
+        let loader = Loader<Float32>.init()
+        do {
+            let modelPath = inModelHelper.modelPath
+            let paraPath = inModelHelper.paramPath
+            
+            program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath)
+            executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!)
+        } catch let error {
+            print(error)
         }
-      }
-    } else {
-      fatalError( " unsupport " )
     }
     
-    if runner.load() {
-      print(" load success ! ")
-    } else {
-      print(" load error ! ")
+    @IBAction func selectImageAct(_ sender: Any) {
+        let imagePicker = UIImagePickerController()
+        imagePicker.sourceType = .camera
+        imagePicker.delegate = self
+        self.present(imagePicker, animated: true, completion: nil)
     }
-  }
-  
-  @IBAction func selectImageAct(_ sender: Any) {
-    let imagePicker = UIImagePickerController()
-    imagePicker.sourceType = .camera
-    imagePicker.delegate = self
-    self.present(imagePicker, animated: true, completion: nil)
-  }
-  
-  @IBAction func clearAct(_ sender: Any) {
-    runner.clear()
-  }
-  
-  @IBAction func predictAct(_ sender: Any) {
-    let max = 50
-    switch platform {
-    case .GPU:
-      guard let inTexture = toPredictTexture else {
-        resultTextView.text = "请选择图片 ! "
-        return
-      }
-      
-      for _ in 0..<10{
-        runner.predict(texture: inTexture) { (success, resultHolder)  in
-          resultHolder?.releasePointer()
-        }
-      }
-      
-      let startDate = Date.init()
-      for i in 0..<max {
-        runner.predict(texture: inTexture) { [weak self] (success, resultHolder)  in
-          guard let sSelf = self else {
-            fatalError()
-          }
-          if success {
-            if i == max - 1 {
-              let time = Date.init().timeIntervalSince(startDate)
-              DispatchQueue.main.async {
-//                print(resultHolder!.result![0])
-                sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
-                
-                sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
-               
-              }
-            }
-          }
-          
-          DispatchQueue.main.async {
-            resultHolder?.releasePointer()
-          }
-//            print("释放")
+    
+    @IBAction func clearAct(_ sender: Any) {
+        executor?.clear()
+        program = nil
+        executor = nil
+        
+    }
+    
+    @IBAction func predictAct(_ sender: Any) {        
+        guard let inTexture = toPredictTexture else {
+            resultTextView.text = "请选择图片 ! "
+            return
         }
-//        print("sleep before ")
-//        usleep(33000)
-//        print("sleep after ")
-      }
-    case .CPU:
-      guard let inInputPointer = inputPointer else {
-        fatalError( " need input pointer " )
-      }
-      
-      for _ in 0..<10 {
-        runner.predict(inputPointer: inInputPointer) { (success, res) in
-          res?.releaseOutput()
+        
+        guard let inExecutor = executor else {
+            resultTextView.text = "请先 load ! "
+            return
         }
-      }
-      
-      let startDate = Date.init()
-      for i in 0..<max {
-        runner.predict(inputPointer: inInputPointer) { [weak self](success, res) in
-          guard let sSelf = self else {
-            fatalError()
-          }
-          if success {
-            if i == max - 1 {
-              let time = Date.init().timeIntervalSince(startDate)
-              DispatchQueue.main.async {
-//                sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
-                sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
-              }
+
+        do {
+            let max = 100
+            var startDate = Date.init()
+            for i in 0..<max {
+                try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in
+                    guard let sSelf = self else {
+                        fatalError()
+                    }
+                    
+                    if i == (max / 2 - 1) {
+                        startDate = Date.init()
+                    }
+                    
+                    if i == max - 1 {
+                        let time = Date.init().timeIntervalSince(startDate)
+                        DispatchQueue.main.async {
+                            sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
+                            sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
+                        }
+                    }
+                }, preProcessKernle: self.modelHelper.preprocessKernel)
             }
-          }
-          res?.releaseOutput()
+        } catch let error {
+            print(error)
         }
-      }
     }
-  }
-  
-  override func viewDidLoad() {
-    super.viewDidLoad()
-    
-//    if runner.load() {
-//      print(" load success ! ")
-//    } else {
-//      print(" load error ! ")
-//    }
-//    
-    modelPickerView.delegate = self
-    modelPickerView.dataSource = self
-    threadPickerView.delegate = self
-    threadPickerView.dataSource = self
-    
-    selectImage = UIImage.init(named: "hand.jpg")
-    selectImageView.image = selectImage
-    
-//    if platform == .CPU {
-//      inputPointer = runner.preproccess(image: selectImage!.cgImage!)
-//    } else if platform == .GPU {
-//      runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
-//        self?.toPredictTexture = texture
-//      }
-//    } else {
-//      fatalError( " unsupport " )
-//    }
-    
-//    videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
-//    videoCapture.fps = 30
-//    videoCapture.delegate = self
-//    videoCapture.setUp { (success) in
-//      DispatchQueue.main.async {
-//        if let preViewLayer = self.videoCapture.previewLayer {
-//          self.videoView.layer.addSublayer(preViewLayer)
-//          self.videoCapture.previewLayer?.frame = self.videoView.bounds
-//        }
-//        self.videoCapture.start()
-//      }
-//    }
 
-  }
+    override func viewDidLoad() {
+        super.viewDidLoad()
+        modelPickerView.delegate = self
+        modelPickerView.dataSource = self
+        threadPickerView.delegate = self
+        threadPickerView.dataSource = self
+        
+        selectImage = UIImage.init(named: "banana.jpeg")
+        selectImageView.image = selectImage
+        modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
+            self?.toPredictTexture = texture
+        }
+    }
 }
 
 extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
-  func numberOfComponents(in pickerView: UIPickerView) -> Int {
-    if pickerView == modelPickerView {
-      return 1
-    } else if pickerView == threadPickerView {
-      return 1
-    } else {
-      fatalError()
+    func numberOfComponents(in pickerView: UIPickerView) -> Int {
+        if pickerView == modelPickerView {
+            return 1
+        } else if pickerView == threadPickerView {
+            return 1
+        } else {
+            fatalError()
+        }
     }
-  }
-  
-  func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
-    if pickerView == modelPickerView {
-      return SupportModel.supportedModels().count
-    } else if pickerView == threadPickerView {
-      return threadSupport.count
-    } else {
-      fatalError()
+    
+    func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
+        if pickerView == modelPickerView {
+            return SupportModel.supportedModels().count
+        } else if pickerView == threadPickerView {
+            return threadSupport.count
+        } else {
+            fatalError()
+        }
     }
-  }
-  
-  public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
-    if pickerView == modelPickerView {
-      return SupportModel.supportedModels()[row].rawValue
-    } else if pickerView == threadPickerView {
-      return threadSupport[row].1
-    } else {
-      fatalError()
+    
+    public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
+        if pickerView == modelPickerView {
+            return SupportModel.supportedModels()[row].rawValue
+        } else if pickerView == threadPickerView {
+            return "\(threadSupport[row])"
+        } else {
+            fatalError()
+        }
     }
-  }
-  
-  public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
-    if pickerView == modelPickerView {
-      self.modelType = SupportModel.supportedModels()[row]
-    } else if pickerView == threadPickerView {
-      
-      platform = threadSupport[row].0
-    } else {
-      fatalError()
+    
+    public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
+        if pickerView == modelPickerView {
+            self.modelType = SupportModel.supportedModels()[row]
+        } else if pickerView == threadPickerView {
+            self.threadNum = threadSupport[row]
+        } else {
+            fatalError()
+        }
     }
-  }
 }
 
 extension ViewController:  UIImagePickerControllerDelegate, UINavigationControllerDelegate {
-  func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
-    picker.dismiss(animated: true){[weak self] in
-      guard let sSelf = self, let image =  info["UIImagePickerControllerOriginalImage"] as? UIImage else{
-        fatalError("no image")
-      }
-      sSelf.selectImage = image
-      sSelf.selectImageView.image = image
-      sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
-        sSelf.toPredictTexture = texture
-      })
-    }
-  }
-}
-
-var bool1 = false
-extension ViewController: VideoCaptureDelegate{
-  func predictTexture(texture: MTLTexture){
-    runner.scaleTexture(input: texture) { (scaledTexture) in
-      self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
-//        print(resultHolder!.result![0])
-        resultHolder?.releasePointer()
-      })
+    func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
+        picker.dismiss(animated: true){[weak self] in
+            guard let sSelf = self, let image =  info["UIImagePickerControllerOriginalImage"] as? UIImage else{
+                fatalError("no image")
+            }
+            sSelf.selectImage = image
+            sSelf.selectImageView.image = image
+            sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in
+                sSelf.toPredictTexture = texture
+            })
+        }
     }
-  }
-  
-  
-//  @available(iOS 10.0, *)
-//  func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
-////    if !bool1 {
-////      DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
-//    self.predictTexture(texture: texture!)
-////      }
-//
-//
-////      bool1 = true
-////    }
-//
-//  }
-
 }
 
 
-
-
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h b/metal/paddle-mobile-demo/paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h
deleted file mode 100644
index 92de82860ccd372ba0eae962edd1b271986f1862..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h
+++ /dev/null
@@ -1,5 +0,0 @@
-//
-//  Use this file to import your target's public headers that you would like to expose to Swift.
-//
-
-#import <paddle_mobile/paddle_mobile.h>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
index 7817befaedf1aff04b75abd39cc6f7f06bc935d3..6ab6f7c05e30049e850170409efcd6f049c73abe 100644
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
@@ -1,16 +1,10 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+//
+//  AppDelegate.swift
+//  paddle-mobile-unit-test
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
 
 import UIKit
 
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
index 98f03affa2a230b2698edf6bafe5e06def8986b6..d57b610e4d10f02d2eace4892a6d55eda8f2c9b9 100644
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
@@ -1,34 +1,18 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+//
+//  ViewController.swift
+//  paddle-mobile-unit-test
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
 
 import UIKit
-import Metal
-//import MetalKit
 import paddle_mobile
 
 class ViewController: UIViewController {
+
     override func viewDidLoad() {
         super.viewDidLoad()
-        let device = Metal.MTLCreateSystemDefaultDevice()!
-        let queue = device.makeCommandQueue()!
-        let test = PaddleMobileUnitTest.init(
-            inDevice: device,
-            inQueue: queue
-        )
-        test.testConcat()
-//        test.testReshape()
-//        test.testTranspose()
         print(" done ")
     }
 
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
index 34d45528542d0d6a9d5ac153a7d6f818d962cbfd..6bceab43210c42ef83a2152463caf3bc8917b8c8 100644
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
@@ -7,31 +7,7 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
-		4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; };
-		4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; };
-		4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; };
-		4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8B2146640900D0F791 /* SplitOp.swift */; };
-		4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */; };
-		4AA1EA90214664CD00D0F791 /* Split.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8F214664CD00D0F791 /* Split.metal */; };
-		4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA91214665D700D0F791 /* ShapeOp.swift */; };
-		4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA932146661500D0F791 /* ShapeKernel.swift */; };
-		4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA972146666500D0F791 /* FlattenOp.swift */; };
-		4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */ = {isa = PBXBuildFile; fileRef = 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */; };
-		4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */; };
-		4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */; };
-		4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */; };
-		4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA5214B5F6800D0F791 /* Shape.metal */; };
-		4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */; };
-		4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */; };
-		4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */; };
-		4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */; };
-		4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928762133F1DB005B6C3A /* BoxCoder.metal */; };
-		4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; };
-		4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* ConcatKernel.metal */; };
-		4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; };
 		D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; };
-		FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC0226552138F33800F395E2 /* TransposeKernel.metal */; };
-		FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC0226572138F38D00F395E2 /* PoolKernel.metal */; };
 		FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; };
 		FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9520E11C9A0081E9F8 /* Errors.swift */; };
@@ -59,54 +35,17 @@
 		FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
 		FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
 		FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
-		FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC292C5521421B4600CF622F /* PaddleMobileGPU.m */; };
-		FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7C214255BC00CF622F /* CPUCompute.mm */; };
-		FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7E214255BC00CF622F /* MobileNetSSD.swift */; };
-		FC292C85214257CB00CF622F /* CPUCompute.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C7D214255BC00CF622F /* CPUCompute.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		FC292C872142624800CF622F /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C862142624800CF622F /* Genet.swift */; };
-		FC33B0F02147659000714A93 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC33B0EF2147659000714A93 /* MobileNet.swift */; };
+		FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1B186520ECF1C600678B91 /* ResizeKernel.swift */; };
 		FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
 		FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
 		FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
-		FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; };
-		FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; };
-		FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; };
 		FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
 		FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
-		FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; };
-		FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */; };
-		FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */; };
-		FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */; };
-		FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; };
-		FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; };
 		FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
-		FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */; };
 		FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
 		FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
 		FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
 		FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; };
-		FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */; };
-		FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1642132A5EB00084FE5 /* Common.metal */; };
-		FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */; };
-		FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD42138272900BD58AA /* ConvAddMetal.metal */; };
-		FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
-		FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
-		FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
-		FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
-		FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
-		FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */; };
-		FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */; };
-		FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */; };
-		FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */; };
-		FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC642122FCD700D94F7E /* TransposeOp.swift */; };
-		FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC66212306B000D94F7E /* ConcatOp.swift */; };
-		FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC68212306D300D94F7E /* ConcatKernel.swift */; };
-		FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */; };
-		FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; };
-		FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; };
-		FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; };
 		FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
 		FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
 		FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
@@ -116,55 +55,15 @@
 		FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
 		FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
 		FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
-		FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */; };
-		FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */; };
-		FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */; };
-		FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
-		FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
-		FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
-		FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */; };
-		FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */; };
-		FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */; };
-		FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */; };
-		FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */; };
-		FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */; };
-		FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; };
-		FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
-		FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
-		FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
 		FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
 		FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
 		FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
-		4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = "<group>"; };
-		4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = "<group>"; };
-		4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = "<group>"; };
-		4AA1EA8B2146640900D0F791 /* SplitOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SplitOp.swift; sourceTree = "<group>"; };
-		4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SplitKernel.swift; sourceTree = "<group>"; };
-		4AA1EA8F214664CD00D0F791 /* Split.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Split.metal; sourceTree = "<group>"; };
-		4AA1EA91214665D700D0F791 /* ShapeOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ShapeOp.swift; sourceTree = "<group>"; };
-		4AA1EA932146661500D0F791 /* ShapeKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ShapeKernel.swift; sourceTree = "<group>"; };
-		4AA1EA972146666500D0F791 /* FlattenOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FlattenOp.swift; sourceTree = "<group>"; };
-		4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ConcatKernel.inc.metal; sourceTree = "<group>"; };
-		4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ReshapeKernel.inc.metal; sourceTree = "<group>"; };
-		4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FlattenKernel.swift; sourceTree = "<group>"; };
-		4AA1EAA3214A295C00D0F791 /* Split.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Split.inc.metal; sourceTree = "<group>"; };
-		4AA1EAA5214B5F6800D0F791 /* Shape.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Shape.metal; sourceTree = "<group>"; };
-		4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.inc.metal; sourceTree = "<group>"; };
-		4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.inc.metal; sourceTree = "<group>"; };
-		4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.inc.metal; sourceTree = "<group>"; };
-		4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = TransposeKernel.inc.metal; sourceTree = "<group>"; };
-		4AF928762133F1DB005B6C3A /* BoxCoder.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.metal; sourceTree = "<group>"; };
-		4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = "<group>"; };
-		4AF928812135673D005B6C3A /* ConcatKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = ConcatKernel.metal; sourceTree = "<group>"; };
-		4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = "<group>"; };
 		CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = "<group>"; };
 		DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
 		E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = "<group>"; };
-		FC0226552138F33800F395E2 /* TransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = TransposeKernel.metal; sourceTree = "<group>"; };
-		FC0226572138F38D00F395E2 /* PoolKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PoolKernel.metal; sourceTree = "<group>"; };
 		FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
 		FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = paddle_mobile.h; sourceTree = "<group>"; };
 		FC039B6E20E11C3C0081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
@@ -194,54 +93,17 @@
 		FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
 		FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
 		FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
-		FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = "<group>"; };
-		FC292C5521421B4600CF622F /* PaddleMobileGPU.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = "<group>"; };
-		FC292C7C214255BC00CF622F /* CPUCompute.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = "<group>"; };
-		FC292C7D214255BC00CF622F /* CPUCompute.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; };
-		FC292C7E214255BC00CF622F /* MobileNetSSD.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; };
-		FC292C862142624800CF622F /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; };
-		FC33B0EF2147659000714A93 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
+		FC1B186520ECF1C600678B91 /* ResizeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ResizeKernel.swift; sourceTree = "<group>"; };
 		FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
 		FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
 		FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
-		FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; };
-		FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = "<group>"; };
-		FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
-		FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
 		FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
 		FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
-		FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluOp.swift; sourceTree = "<group>"; };
-		FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluKernel.swift; sourceTree = "<group>"; };
-		FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPreluKernel.metal; sourceTree = "<group>"; };
-		FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPrelu.inc.metal; sourceTree = "<group>"; };
-		FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = "<group>"; };
-		FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = "<group>"; };
 		FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
-		FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; };
 		FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
 		FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
 		FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
 		FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; };
-		FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReshapeKernel.metal; sourceTree = "<group>"; };
-		FCA3A1642132A5EB00084FE5 /* Common.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Common.metal; sourceTree = "<group>"; };
-		FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvTransposeKernel.metal; sourceTree = "<group>"; };
-		FCA67CD42138272900BD58AA /* ConvAddMetal.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddMetal.metal; sourceTree = "<group>"; };
-		FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
-		FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
-		FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
-		FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
-		FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
-		FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DepthwiseConvOp.swift; sourceTree = "<group>"; };
-		FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PriorBoxOp.swift; sourceTree = "<group>"; };
-		FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PriorBoxKernel.swift; sourceTree = "<group>"; };
-		FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TransposeKernel.swift; sourceTree = "<group>"; };
-		FCBCCC642122FCD700D94F7E /* TransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TransposeOp.swift; sourceTree = "<group>"; };
-		FCBCCC66212306B000D94F7E /* ConcatOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConcatOp.swift; sourceTree = "<group>"; };
-		FCBCCC68212306D300D94F7E /* ConcatKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConcatKernel.swift; sourceTree = "<group>"; };
-		FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderOp.swift; sourceTree = "<group>"; };
-		FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; };
-		FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; };
-		FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; };
 		FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
 		FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
 		FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
@@ -251,25 +113,9 @@
 		FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
 		FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
 		FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
-		FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluKernel.swift; sourceTree = "<group>"; };
-		FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeKernel.swift; sourceTree = "<group>"; };
-		FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BatchNormKernel.metal; sourceTree = "<group>"; };
-		FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
-		FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
-		FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
-		FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluOp.swift; sourceTree = "<group>"; };
-		FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluKernel.swift; sourceTree = "<group>"; };
-		FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluOp.swift; sourceTree = "<group>"; };
-		FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluKernel.swift; sourceTree = "<group>"; };
-		FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.inc.metal; sourceTree = "<group>"; };
-		FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.metal; sourceTree = "<group>"; };
-		FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = "<group>"; };
-		FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
-		FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
-		FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
 		FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
 		FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
-		FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; };
+		FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -277,9 +123,7 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */,
 				D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */,
-				FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -289,7 +133,6 @@
 		336CBE234BF5DE48658DE65F /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
-				FC4FD97D2140F2C30073E130 /* libstdc++.tbd */,
 				DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */,
 			);
 			name = Frameworks;
@@ -325,19 +168,10 @@
 		FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
 			isa = PBXGroup;
 			children = (
-				FCE9D7B6214F869000B520C3 /* Net.swift */,
-				FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */,
-				FC33B0EF2147659000714A93 /* MobileNet.swift */,
-				FC292C862142624800CF622F /* Genet.swift */,
-				FC292C7E214255BC00CF622F /* MobileNetSSD.swift */,
-				FC292C7C214255BC00CF622F /* CPUCompute.mm */,
-				FC292C7D214255BC00CF622F /* CPUCompute.h */,
-				FC292C5521421B4600CF622F /* PaddleMobileGPU.m */,
-				FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */,
-				FC4FD9762140E4920073E130 /* CPU */,
-				FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */,
 				FC039BAE20E11CC20081E9F8 /* Program */,
 				FC039BA320E11CBC0081E9F8 /* Operators */,
+				FC039BA120E11CB70081E9F8 /* Loader.swift */,
+				FC039B9A20E11CA00081E9F8 /* Executor.swift */,
 				FC039B9C20E11CB20081E9F8 /* framework */,
 				FC039B9320E11C9A0081E9F8 /* Common */,
 				FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */,
@@ -362,8 +196,6 @@
 		FC039B9C20E11CB20081E9F8 /* framework */ = {
 			isa = PBXGroup;
 			children = (
-				FC039BA120E11CB70081E9F8 /* Loader.swift */,
-				FC039B9A20E11CA00081E9F8 /* Executor.swift */,
 				FC039B9D20E11CB20081E9F8 /* Tensor.swift */,
 				FC039B9E20E11CB20081E9F8 /* Dim.swift */,
 				FC9D038320E23B01000F735A /* Texture.swift */,
@@ -387,23 +219,6 @@
 				FCD04E6920F319EC0007374F /* SoftmaxOp.swift */,
 				FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */,
 				FCD04E7120F343420007374F /* ConvAddOp.swift */,
-				FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */,
-				FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */,
-				FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */,
-				FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */,
-				FCBCCC642122FCD700D94F7E /* TransposeOp.swift */,
-				FCBCCC66212306B000D94F7E /* ConcatOp.swift */,
-				FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */,
-				4AA1EA8B2146640900D0F791 /* SplitOp.swift */,
-				4AA1EA91214665D700D0F791 /* ShapeOp.swift */,
-				4AA1EA972146666500D0F791 /* FlattenOp.swift */,
-				4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */,
-				FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */,
-				FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
-				FCEB684B212F093800D2448E /* PreluOp.swift */,
-				FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */,
-				FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */,
-				FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */,
 			);
 			path = Operators;
 			sourceTree = "<group>";
@@ -428,46 +243,24 @@
 		FC086BA520E67E8500D85EF7 /* Kernels */ = {
 			isa = PBXGroup;
 			children = (
-				FCDDC6CD212FE02100E5EF74 /* Base */,
-				FCEB6837212F00B100D2448E /* metal */,
-				FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */,
 				FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
+				FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
+				FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
+				FC1B186520ECF1C600678B91 /* ResizeKernel.swift */,
 				FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
 				FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */,
 				FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */,
 				FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */,
+				FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
 				FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */,
 				FCD04E6720F315020007374F /* PoolKernel.swift */,
 				FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */,
 				FCD04E6F20F31B720007374F /* ReshapeKernel.swift */,
-				4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */,
 				FCD04E7320F3437E0007374F /* ConvAddKernel.swift */,
-				FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */,
-				FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */,
-				FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */,
-				FCBCCC68212306D300D94F7E /* ConcatKernel.swift */,
-				FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */,
-				4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */,
-				4AA1EA932146661500D0F791 /* ShapeKernel.swift */,
-				4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */,
-				FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
-				FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
-				FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
-				FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */,
-				FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */,
 			);
 			path = Kernels;
 			sourceTree = "<group>";
 		};
-		FC4FD9762140E4920073E130 /* CPU */ = {
-			isa = PBXGroup;
-			children = (
-				FC4FD9782140E4980073E130 /* libpaddle-mobile.a */,
-				FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */,
-			);
-			path = CPU;
-			sourceTree = "<group>";
-		};
 		FCD592FA20E248EC00252966 /* Base */ = {
 			isa = PBXGroup;
 			children = (
@@ -478,56 +271,6 @@
 			path = Base;
 			sourceTree = "<group>";
 		};
-		FCDDC6CD212FE02100E5EF74 /* Base */ = {
-			isa = PBXGroup;
-			children = (
-				FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
-			);
-			path = Base;
-			sourceTree = "<group>";
-		};
-		FCEB6837212F00B100D2448E /* metal */ = {
-			isa = PBXGroup;
-			children = (
-				4AF928812135673D005B6C3A /* ConcatKernel.metal */,
-				4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */,
-				4AF9288321357BE3005B6C3A /* Elementwise.metal */,
-				FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
-				FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
-				4AF928762133F1DB005B6C3A /* BoxCoder.metal */,
-				4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */,
-				4AA1EAA5214B5F6800D0F791 /* Shape.metal */,
-				4AA1EA8F214664CD00D0F791 /* Split.metal */,
-				4AA1EAA3214A295C00D0F791 /* Split.inc.metal */,
-				4AA1EA892146631C00D0F791 /* BilinearInterp.metal */,
-				4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */,
-				4AF9287821341661005B6C3A /* Softmax.metal */,
-				4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */,
-				FCEB6849212F00DB00D2448E /* PreluKernel.metal */,
-				FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */,
-				FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */,
-				FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */,
-				FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */,
-				4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */,
-				FCA3A1642132A5EB00084FE5 /* Common.metal */,
-				FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */,
-				FCA67CD42138272900BD58AA /* ConvAddMetal.metal */,
-				FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */,
-				FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */,
-				FC0226552138F33800F395E2 /* TransposeKernel.metal */,
-				4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */,
-				FC0226572138F38D00F395E2 /* PoolKernel.metal */,
-				FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */,
-				FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */,
-				FC803BC6214CBA820094B8E5 /* Macro.metal */,
-				FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
-				FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
-				FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */,
-				FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */,
-			);
-			path = metal;
-			sourceTree = "<group>";
-		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -535,10 +278,6 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */,
-				FC292C85214257CB00CF622F /* CPUCompute.h in Headers */,
-				FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */,
-				4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */,
 				FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -576,7 +315,6 @@
 				TargetAttributes = {
 					FC039B6920E11C3C0081E9F8 = {
 						CreatedOnToolsVersion = 9.3.1;
-						LastSwiftMigration = 0940;
 					};
 				};
 			};
@@ -634,124 +372,53 @@
 			buildActionMask = 2147483647;
 			files = (
 				FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
-				4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */,
 				FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
-				FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */,
-				FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */,
-				4AF9287921341661005B6C3A /* Softmax.metal in Sources */,
-				4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */,
 				FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
 				FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
-				FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */,
-				FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */,
-				4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */,
-				4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */,
-				FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */,
 				FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
-				4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */,
 				FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
-				FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
 				FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
 				FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
-				FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
 				FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
 				FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
+				FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
 				FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
-				FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */,
-				FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */,
-				FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */,
-				FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
-				FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
-				4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
-				FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */,
 				FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
-				4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
 				FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
-				4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
-				FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */,
-				FC33B0F02147659000714A93 /* MobileNet.swift in Sources */,
-				FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
-				4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */,
-				FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */,
 				FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
 				FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,
-				4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */,
-				FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */,
 				FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
 				FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
 				FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
 				FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
 				FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
 				FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
-				FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
-				FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */,
-				FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
 				FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
 				FC9D038420E23B01000F735A /* Texture.swift in Sources */,
-				FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
-				4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
-				4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
-				FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
-				4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
 				FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
 				FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */,
 				FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */,
-				4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */,
 				FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */,
 				FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
-				FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */,
 				FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
-				FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
 				FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
-				FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
 				FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
-				FC292C872142624800CF622F /* Genet.swift in Sources */,
-				FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
-				4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
-				FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
-				FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */,
 				FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */,
 				FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
-				4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */,
-				FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */,
-				FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
-				FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
 				FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
-				FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */,
-				4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */,
-				FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */,
-				FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
-				FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */,
-				FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
 				FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
 				FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
 				FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
-				FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */,
-				FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */,
-				FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */,
 				FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
-				FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */,
 				FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
 				FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
-				FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
 				FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
-				FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */,
-				4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */,
-				FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */,
-				FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
 				FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
-				FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
 				FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
-				FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
 				FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
-				FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
-				4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
 				FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
-				FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */,
 				FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
-				FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
 				FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
-				4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
 				FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -883,7 +550,6 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */;
 			buildSettings = {
-				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_IDENTITY = "";
 				CODE_SIGN_STYLE = Automatic;
 				DEFINES_MODULE = YES;
@@ -891,7 +557,6 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
-				ENABLE_BITCODE = NO;
 				INFOPLIST_FILE = "paddle-mobile/Info.plist";
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
@@ -900,16 +565,10 @@
 					"@executable_path/Frameworks",
 					"@loader_path/Frameworks",
 				);
-				LIBRARY_SEARCH_PATHS = (
-					"$(inherited)",
-					"$(PROJECT_DIR)/paddle-mobile/CPU",
-				);
-				MACH_O_TYPE = mh_dylib;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
 				SKIP_INSTALL = YES;
-				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 4.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
@@ -919,7 +578,6 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */;
 			buildSettings = {
-				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_IDENTITY = "";
 				CODE_SIGN_STYLE = Automatic;
 				DEFINES_MODULE = YES;
@@ -927,7 +585,6 @@
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
-				ENABLE_BITCODE = NO;
 				INFOPLIST_FILE = "paddle-mobile/Info.plist";
 				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
@@ -936,11 +593,6 @@
 					"@executable_path/Frameworks",
 					"@loader_path/Frameworks",
 				);
-				LIBRARY_SEARCH_PATHS = (
-					"$(inherited)",
-					"$(PROJECT_DIR)/paddle-mobile/CPU",
-				);
-				MACH_O_TYPE = mh_dylib;
 				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
 				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
diff --git a/metal/paddle-mobile/paddle-mobile/CPUCompute.h b/metal/paddle-mobile/paddle-mobile/CPUCompute.h
deleted file mode 100644
index ed12dd60df4ea06944fdf4ff9b635fc12a99120e..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/CPUCompute.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#pragma once
-
-#import <Foundation/Foundation.h>
-
-
-@interface CPUResult: NSObject
-@property (assign, nonatomic) float *output;
-@property (assign, nonatomic) int outputSize;
-@end
-
-@interface NMSCompute: NSObject
-
-@property (assign, nonatomic) float scoreThredshold;
-
-@property (assign, nonatomic) int nmsTopK;
-
-@property (assign, nonatomic) int keepTopK;
-
-@property (assign, nonatomic) float nmsEta;
-
-@property (assign, nonatomic) float nmsThreshold;
-
-@property (assign, nonatomic) int background_label;
-
-@property (strong, nonatomic) NSArray<NSNumber *> *scoreDim;
-
-@property (strong, nonatomic) NSArray<NSNumber *> *bboxDim;
-
--(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox;
-
-@end
diff --git a/metal/paddle-mobile/paddle-mobile/CPUCompute.mm b/metal/paddle-mobile/paddle-mobile/CPUCompute.mm
deleted file mode 100644
index b97153765b46bb63d604d8845eee08d91283481d..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/CPUCompute.mm
+++ /dev/null
@@ -1,322 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-
-#import "CPUCompute.h"
-
-#import <map>
-#import <vector>
-#import <utility>
-#import <algorithm>
-
-
-
-
-struct NMSParam {
-  
-  float *score_data;
-  
-  float *box_data;
-  
-  float *output;
-  
-  int output_size;
-  
-  std::vector<int> score_dim;
-  
-  std::vector<int> box_dim;
-  
-  float scoreThredshold;
-  
-  int nmsTopK;
-  
-  int keepTopK;
-  
-  float nmsEta;
-  
-  float nmsThreshold;
-  
-  int background_label;
-};
-
-
-constexpr int kOutputDim = 6;
-constexpr int kBBoxSize = 4;
-
-template <class T>
-bool SortScorePairDescend(const std::pair<float, T>& pair1,
-                          const std::pair<float, T>& pair2) {
-  return pair1.first > pair2.first;
-}
-
-template <class T>
-static inline void GetMaxScoreIndex(
-                                    const std::vector<T>& scores, const T threshold, int top_k,
-                                    std::vector<std::pair<T, int>>* sorted_indices) {
-  for (size_t i = 0; i < scores.size(); ++i) {
-    if (scores[i] > threshold) {
-      sorted_indices->push_back(std::make_pair(scores[i], i));
-    }
-  }
-  // Sort the score pair according to the scores in descending order
-  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
-                   SortScorePairDescend<int>);
-  // Keep top_k scores if needed.
-  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
-    sorted_indices->resize(top_k);
-  }
-}
-
-template <class T>
-static inline T BBoxArea(const T* box, const bool normalized) {
-  if (box[2] < box[0] || box[3] < box[1]) {
-    // If coordinate values are is invalid
-    // (e.g. xmax < xmin or ymax < ymin), return 0.
-    return static_cast<T>(0.);
-  } else {
-    const T w = box[2] - box[0];
-    const T h = box[3] - box[1];
-    if (normalized) {
-      return w * h;
-    } else {
-      // If coordinate values are not within range [0, 1].
-      return (w + 1) * (h + 1);
-    }
-  }
-}
-
-template <class T>
-static inline T JaccardOverlap(const T* box1, const T* box2,
-                               const bool normalized) {
-  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
-      box2[3] < box1[1]) {
-    return static_cast<T>(0.);
-  } else {
-    const T inter_xmin = std::max(box1[0], box2[0]);
-    const T inter_ymin = std::max(box1[1], box2[1]);
-    const T inter_xmax = std::min(box1[2], box2[2]);
-    const T inter_ymax = std::min(box1[3], box2[3]);
-    const T inter_w = inter_xmax - inter_xmin;
-    const T inter_h = inter_ymax - inter_ymin;
-    const T inter_area = inter_w * inter_h;
-    const T bbox1_area = BBoxArea<T>(box1, normalized);
-    const T bbox2_area = BBoxArea<T>(box2, normalized);
-    return inter_area / (bbox1_area + bbox2_area - inter_area);
-  }
-}
-
-template <typename T>
-static inline void NMSFast(
-                           const T *bbox_data,
-                           std::vector<int> bbox_dim,
-                           const T *score_data,
-                           const T score_threshold, const T nms_threshold,
-                           const T eta, const int top_k,
-                           std::vector<int>* selected_indices) {
-  // The total boxes for each instance.
-  int num_boxes = bbox_dim[0];
-  // 4: [xmin ymin xmax ymax]
-  int box_size = bbox_dim[1];
-
-  std::vector<T> scores_data(num_boxes);
-  std::copy_n(score_data, num_boxes, scores_data.begin());
-  std::vector<std::pair<T, int>> sorted_indices;
-  GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
-
-  selected_indices->clear();
-  T adaptive_threshold = nms_threshold;
-
-  while (sorted_indices.size() != 0) {
-    const int idx = sorted_indices.front().second;
-    bool keep = true;
-    for (size_t k = 0; k < selected_indices->size(); ++k) {
-      if (keep) {
-        const int kept_idx = (*selected_indices)[k];
-        T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
-                                      bbox_data + kept_idx * box_size, true);
-        keep = overlap <= adaptive_threshold;
-      } else {
-        break;
-      }
-    }
-    if (keep) {
-      selected_indices->push_back(idx);
-    }
-    sorted_indices.erase(sorted_indices.begin());
-    if (keep && eta < 1 && adaptive_threshold > 0.5) {
-      adaptive_threshold *= eta;
-    }
-  }
-}
-
-template <typename T>
-void MultiClassNMS(const T *boxes_data,
-                   const std::vector<int> &box_dim,
-                   const T *scores_data,
-                   const std::vector<int> &score_dim,
-                   std::map<int, std::vector<int>>* indices, int* num_nmsed_out,
-                   const int& background_label, const int& nms_top_k,
-                   const int& keep_top_k, const T& nms_threshold,
-                   const T& nms_eta, const T& score_threshold) {
-  
-  int64_t class_num = score_dim[0];
-  int64_t predict_dim = score_dim[1];
-  int num_det = 0;
-  for (int c = 0; c < class_num; ++c) {
-    if (c == background_label) continue;
-    const T *score_data = scores_data + c * predict_dim;
-    
-    /// [c] is key
-    NMSFast<T>(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta,
-                   nms_top_k, &((*indices)[c]));
-    num_det += (*indices)[c].size();
-  }
-
-  *num_nmsed_out = num_det;
-  if (keep_top_k > -1 && num_det > keep_top_k) {
-    std::vector<std::pair<T, std::pair<int, int>>> score_index_pairs;
-    for (const auto& it : *indices) {
-      int label = it.first;
-      const T* sdata = scores_data + label * predict_dim;
-      const std::vector<int>& label_indices = it.second;
-      for (size_t j = 0; j < label_indices.size(); ++j) {
-        int idx = label_indices[j];
-        // PADDLE_ENFORCE_LT(idx, predict_dim);
-        score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx)));
-      }
-    }
-    // Keep top k results per image.
-    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
-                     SortScorePairDescend<std::pair<int, int>>);
-    score_index_pairs.resize(keep_top_k);
-
-    // Store the new indices.
-    std::map<int, std::vector<int>> new_indices;
-    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
-      int label = score_index_pairs[j].second.first;
-      int idx = score_index_pairs[j].second.second;
-      new_indices[label].push_back(idx);
-    }
-    new_indices.swap(*indices);
-    *num_nmsed_out = keep_top_k;
-  }
-}
-
-template <typename T>
-void MultiClassOutput(const T *scores_data,
-                      const std::vector<int> &score_dim,
-                      const T *bboxes_data,
-                      T *outputs_data,
-                      const std::map<int, std::vector<int>>& selected_indices) {
-  int predict_dim = score_dim[1];
-  int count = 0;
-  for (const auto& it : selected_indices) {
-    /// one batch
-    int label = it.first;
-    const T* sdata = scores_data + label * predict_dim;
-    const std::vector<int>& indices = it.second;
-    for (size_t j = 0; j < indices.size(); ++j) {
-      int idx = indices[j];
-      const T* bdata = bboxes_data + idx * kBBoxSize;
-      outputs_data[count * kOutputDim] = label;           // label
-      outputs_data[count * kOutputDim + 1] = sdata[idx];  // score
-      // xmin, ymin, xmax, ymax
-      std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T));
-      count++;
-    }
-  }
-}
-
-void MultiClassNMSCompute(NMSParam *param) {
-  assert(param->score_dim[0] == 1);
-  assert(param->box_dim[0] == 1);
-  assert (param->score_dim.size() == 3);
-  assert(param->box_dim.size() == 3);
-
-  float* outputs;
-  auto background_label = param->background_label;
-  auto nms_top_k = param->nmsTopK;
-  auto keep_top_k = param->keepTopK;
-  auto nms_threshold = param->nmsThreshold;
-  auto nms_eta = param->nmsEta;
-  auto score_threshold = param->scoreThredshold;
-
-  std::vector<int> score_dim_one_batch = {param->score_dim[1], param->score_dim[2]};
-  std::vector<int> box_dim_one_batch = {param->box_dim[1], param->box_dim[2]};
-  
-  std::vector<int> batch_starts = {0};
-  
-  std::map<int, std::vector<int>> indices;
-  int num_nmsed_out = 0;
-  
-  MultiClassNMS<float>(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out,
-                       background_label, nms_top_k, keep_top_k, nms_threshold,
-                       nms_eta, score_threshold);
-  batch_starts.push_back(batch_starts.back() + num_nmsed_out);
-
-  int output_size = 0;
-  int num_kept = batch_starts.back();
-  if (num_kept == 0) {
-    outputs = new float[1];
-    outputs[0] = -1;
-    output_size = 1;
-  } else {
-    outputs = new float[num_kept * kOutputDim];
-    int64_t s = batch_starts[0];
-    int64_t e = batch_starts[1];
-    if (e > s) {
-      MultiClassOutput<float>(param->score_data, score_dim_one_batch, param->box_data, outputs, indices);
-    }
-    output_size = num_kept * kOutputDim;
-  }
-  param->output = outputs;
-  param->output_size = output_size;
-}
-
-@implementation CPUResult
-@end
-
-@implementation NMSCompute
-
--(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox {
-  NMSParam param;
-  param.box_data = bbox;
-  param.score_data = score;
-  param.background_label = self.background_label;
-  param.scoreThredshold = self.scoreThredshold;
-  param.nmsTopK = self.nmsTopK;
-  param.keepTopK = self.keepTopK;
-  param.nmsEta = self.nmsEta;
-  param.nmsThreshold = self.nmsThreshold;
-  std::vector<int> score_dim;
-  for (int i = 0; i < self.scoreDim.count; ++i) {
-    score_dim.push_back(self.scoreDim[i].intValue);
-  }
-  param.score_dim = score_dim;
-  
-  std::vector<int> box_dim;
-  for (int i = 0; i < self.bboxDim.count; ++i) {
-    box_dim.push_back(self.bboxDim[i].intValue);
-  }
-  param.box_dim = box_dim;
-  MultiClassNMSCompute(&param);
-  CPUResult *cr = [[CPUResult alloc] init];
-  cr.output = param.output;
-  cr.outputSize = param.output_size;
-  return cr;
-}
-
-@end
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
index 4c38a1b7b42e21f88b3b1c8825c181bb83293a54..62954ede17d493ae12aa104d13a75dbc062e98a0 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
@@ -16,110 +16,95 @@ import Foundation
 
 // 自定义 ?!  如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息
 precedencegroup ExecutedOrFatalError{
-  associativity: left
-  higherThan: AssignmentPrecedence
+    associativity: left
+    higherThan: AssignmentPrecedence
 }
 infix operator ?!: ExecutedOrFatalError
 public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{
-  if let inOpt = option {
-    return inOpt
-  }else{
-    print(excuteOrError())
-    fatalError(excuteOrError())
-  }
+    if let inOpt = option {
+        return inOpt
+    }else{
+        print(excuteOrError())
+        fatalError(excuteOrError())
+    }
 }
 
 //Lense
 struct Lense<A, B> {
-  let from: (A) -> B
-  let to: (B, A) -> A
+    let from: (A) -> B
+    let to: (B, A) -> A
 }
 
 precedencegroup CombineLense{
-  associativity: left
-  higherThan: AssignmentPrecedence
+    associativity: left
+    higherThan: AssignmentPrecedence
 }
 
 infix operator >>>: CombineLense
 func >>><A, B, C>(left: Lense<B, C>, right: Lense<A, B>) -> Lense<A, C> {
-  return Lense<A, C>.init(from: { (a) -> C in
-    left.from(right.from(a))
-  }, to: { (c, a) -> A in
-    right.to( left.to(c, right.from(a)),a)
-  })
+    return Lense<A, C>.init(from: { (a) -> C in
+        left.from(right.from(a))
+    }, to: { (c, a) -> A in
+        right.to( left.to(c, right.from(a)),a)
+    })
 }
 
 protocol CIntIndex {
-  associatedtype T;
-  subscript(index: CInt) -> T { get set};
+    associatedtype T;
+    subscript(index: CInt) -> T { get set};
 }
 
 extension Array: CIntIndex{
-  typealias T = Element
-  subscript(index: CInt) -> T {
-    get{
-      guard Int64(Int.max) >= Int64(index) else{
-        fatalError("cint index out of Int range")
-      }
-      return self[Int(index)]
-    }
-    set{
-      guard Int64(Int.max) >= Int64(index) else{
-        fatalError("cint index out of Int range")
-      }
-      self[Int(index)] = newValue
+    typealias T = Element
+    subscript(index: CInt) -> T {
+        get{
+            guard Int64(Int.max) >= Int64(index) else{
+                fatalError("cint index out of Int range")
+            }
+            return self[Int(index)]
+        }
+        set{
+            guard Int64(Int.max) >= Int64(index) else{
+                fatalError("cint index out of Int range")
+            }
+            self[Int(index)] = newValue
+        }
+        
     }
-    
-  }
 }
 
 extension Array where Element: AnyObject{
-  mutating func remove(element: Element) {
-    if let index = index(where: { (node) -> Bool in
-      return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
-    }) {
-      remove(at: index)
+    mutating func remove(element: Element) {
+        if let index = index(where: { (node) -> Bool in
+            return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
+        }) {
+            remove(at: index)
+        }
     }
-  }
-  
+    
 }
 
 //MARK: Array extension
 extension Array where Element: Comparable{
-  
-  /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
-  ///
-  /// - Parameter r: 前 r 个元素
-  /// - Returns: [(原有位置, 排好位置的元素)]
-  public func top(r: Int) -> [(Int, Element)] {
-    precondition(r <= self.count)
-    return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
-  }
-}
-
-extension Array {
-  public func strideArray(inCount: Int = 20) -> [(Int, Element)] {
-    if count < inCount {
-      return (0..<count).map{ ($0, self[$0]) }
-    } else {
-      let stride = count / inCount
-      var newArray: [(Int, Element)] = []
-      for i in 0..<inCount {
-        newArray.append((i * stride, self[i * stride]))
-      }
-      return newArray
+    
+    /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
+    ///
+    /// - Parameter r: 前 r 个元素
+    /// - Returns: [(原有位置, 排好位置的元素)]
+    public func top(r: Int) -> [(Int, Element)] {
+        precondition(r <= self.count)
+        return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
     }
-  }
 }
 
 extension String{
-  func cStr() -> UnsafePointer<Int8>? {
-    return (self as NSString).utf8String
-  }
+    func cStr() -> UnsafePointer<Int8>? {
+        return (self as NSString).utf8String
+    }
 }
 
 func address<T: AnyObject>(o: T) -> String {
-  return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
+    return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
 }
 
 
diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
index 3be8c118613b3e9d6a9247fd731cc74392392d5b..b750018260f64ae89f5b3aab5cc987eee9a11415 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
@@ -18,588 +18,263 @@ fileprivate var defaultMetalLibrary: MTLLibrary?
 fileprivate var paddleMobileMetalLibrary: MTLLibrary?
 
 extension MTLDevice {
-  func defaultLibrary() -> MTLLibrary {
-    if defaultMetalLibrary == nil {
-      defaultMetalLibrary = makeDefaultLibrary()
-    }
-    if let inDefaultLib = defaultMetalLibrary {
-      return inDefaultLib
-    } else {
-      fatalError(" default metal libary is nil")
-    }
-  }
-  
-  func paddleMobileLibrary() -> MTLLibrary {
-    if paddleMobileMetalLibrary == nil {
-      guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
-        fatalError("Counld't find paddle mobile library")
-      }
-      do {
-        paddleMobileMetalLibrary = try makeLibrary(filepath: path)
-      } catch _ {
-        fatalError("Counld't load paddle mobile library")
-      }
-    }
-    
-    if let inPaddleMobileLib = paddleMobileMetalLibrary {
-      return inPaddleMobileLib
-    } else {
-      fatalError("PaddleMobile metal libary is nil")
-    }
-  }
-  
-  func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState {
-    let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
-    guard let function = useLib.makeFunction(name: funcName) else {
-      fatalError(" function " + funcName + " not found")
-    }
-    do {
-      let pipLine = try makeComputePipelineState(function: function)
-      return pipLine
-    } catch let error {
-      print(error)
-      fatalError("make pip line error occured : \(error)")
-    }
-    
-  }
-  
-  func makeBuffer<P>(value: [P]) -> MTLBuffer {
-    let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared)
-    let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size)
-    for i in 0..<value.count {
-      contents?[i] = value[i]
-    }
-    return buffer!
-  }
-  
-  func texture2tensor_loop<P>(texture: MTLTexture, cb: ([Int], P)->Void) -> Void {
-    let bpR = texture.width * 4 * MemoryLayout<P>.size
-    let bpI = texture.height * bpR
-    let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: 1))
-    for i in 0..<texture.arrayLength {
-      let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: bpI)
-      texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
-      for tx in 0..<texture.width * texture.height * 4 {
-        var k = tx
-        var xyzn: [Int] = [0, 0, 0, 0]
-        xyzn[1] = k / (texture.width * 4)
-        k %= (texture.width * 4)
-        xyzn[3] = k % 4
-        xyzn[0] = k / 4
-        xyzn[2] = i
-        cb(xyzn, pointer[tx])
-      }
-    }
-  }
-  
-  func texture2tensor_3<P>(texture: MTLTexture, dim: [Int],  transpose: [Int] = [0, 1, 2, 3]) -> [P] {
-    var tdim: [Int] = [1, 1, 1, 1]
-    for i in 0..<dim.count {
-      tdim[4 - dim.count + i] = dim[i]
-    }
-    let count = dim.reduce(1) { $0 * $1 }
-    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
-    let ndim: [Int] = transpose.map { tdim[$0] }
-    assert(dim.count == 3)
-    assert(texture.width == ndim[3])
-    assert(texture.height == ndim[2])
-    assert(ndim[0] == 1)
-    assert(texture.arrayLength == (ndim[1] + 3) / 4)
-    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
-      var tg: [Int] = [0, 0, 0, 0]
-      tg[1] = xyzn[2] * 4 + xyzn[3]
-      tg[2] = xyzn[1]
-      tg[3] = xyzn[0]
-      var ig: [Int] = [0, 0, 0, 0]
-      for k in 0..<4 {
-        ig[transpose[k]] = tg[k]
-      }
-      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
-      if ix < count {
-        tensor[ix] = v
-      }
-    }
-    return tensor
-  }
-  
-  func texture2tensor_2<P>(texture: MTLTexture, dim: [Int],  transpose: [Int] = [0, 1, 2, 3]) -> [P] {
-    var tdim: [Int] = [1, 1, 1, 1]
-    for i in 0..<dim.count {
-      tdim[4 - dim.count + i] = dim[i]
-    }
-    let count = dim.reduce(1) { $0 * $1 }
-    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
-    let ndim: [Int] = transpose.map { tdim[$0] }
-    assert(dim.count == 2)
-    let w = (ndim[3] + 3) / 4
-    assert(texture.width == w)
-    assert(texture.height == ndim[2])
-    assert(ndim[0] == 1)
-    assert(ndim[1] == 1)
-    assert(texture.arrayLength == 1)
-    
-    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
-      var tg: [Int] = [0, 0, 0, 0]
-      tg[2] = xyzn[1]
-      tg[3] = xyzn[0] * 4 + xyzn[3]
-      var ig: [Int] = [0, 0, 0, 0]
-      for k in 0..<4 {
-        ig[transpose[k]] = tg[k]
-      }
-      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
-      if ix < count {
-        tensor[ix] = v
-      }
-    }
-    return tensor
-  }
-  
-  func texture2tensor_1<P>(texture: MTLTexture, dim: [Int],  transpose: [Int] = [0, 1, 2, 3]) -> [P] {
-    var tdim: [Int] = [1, 1, 1, 1]
-    for i in 0..<dim.count {
-      tdim[4 - dim.count + i] = dim[i]
-    }
-    let count = dim.reduce(1) { $0 * $1 }
-    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
-    let ndim: [Int] = transpose.map { tdim[$0] }
-    assert(dim.count == 1)
-    let w = (ndim[3] + 3) / 4
-    assert(texture.width == w)
-    assert(texture.height == 1)
-    assert(ndim[0] == 1)
-    assert(ndim[1] == 1)
-    assert(ndim[2] == 1)
-    assert(texture.arrayLength == 1)
-    
-    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
-      var tg: [Int] = [0, 0, 0, 0]
-      tg[3] = xyzn[0] * 4 + xyzn[3]
-      var ig: [Int] = [0, 0, 0, 0]
-      for k in 0..<4 {
-        ig[transpose[k]] = tg[k]
-      }
-      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
-      if ix < count {
-        tensor[ix] = v
-      }
-    }
-    return tensor
-  }
-  
-  func texture2tensor<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
-    if dim.count == 3 {
-      return texture2tensor_3(texture: texture, dim: dim, transpose: transpose)
-    } else if dim.count == 2 {
-      return texture2tensor_2(texture: texture, dim: dim, transpose: transpose)
-    } else if dim.count == 1 {
-      return texture2tensor_1(texture: texture, dim: dim, transpose: transpose)
-    }
-    var tdim: [Int] = [1, 1, 1, 1]
-    for i in 0..<dim.count {
-      tdim[4 - dim.count + i] = dim[i]
+    func defaultLibrary() -> MTLLibrary {
+        if defaultMetalLibrary == nil {
+            defaultMetalLibrary = makeDefaultLibrary()
+        }
+        if let inDefaultLib = defaultMetalLibrary {
+            return inDefaultLib
+        } else {
+            fatalError(" default metal libary is nil")
+        }
     }
-    let count = dim.reduce(1) { $0 * $1 }
-    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
-    let ndim: [Int] = transpose.map { tdim[$0] }
     
-    assert(texture.width == ndim[2])
-    assert(texture.height == ndim[1])
-    assert(texture.arrayLength == (ndim[0] * ndim[3] + 3) / 4)
-    
-    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
-      var tg: [Int] = [0, 0, 0, 0]
-      tg[1] = xyzn[1]
-      tg[2] = xyzn[0]
-      tg[0] = (xyzn[2] * 4 + xyzn[3]) / ndim[3]
-      tg[3] = (xyzn[2] * 4 + xyzn[3]) % ndim[3]
-      var ig: [Int] = [0, 0, 0, 0]
-      for k in 0..<4 {
-        ig[transpose[k]] = tg[k]
-      }
-      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
-      if ix < count {
-        tensor[ix] = v
-      }
-    }
-    return tensor
-  }
-  
-  func tensor2texture<P>(value: [P], dim: [Int], transpose: [Int] = [0, 1, 2, 3], inComputePrecision: ComputePrecision = .Float32) -> MTLTexture {
-    if value.count > 0 {
-      assert(value.count == dim.reduce(1) { $0 * $1 })
+    func paddleMobileLibrary() -> MTLLibrary {
+        if paddleMobileMetalLibrary == nil {
+            guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
+                fatalError("Counld't find paddle mobile library")
+            }
+            do {
+                paddleMobileMetalLibrary = try makeLibrary(filepath: path)
+            } catch _ {
+                fatalError("Counld't load paddle mobile library")
+            }
+        }
+        
+        if let inPaddleMobileLib = paddleMobileMetalLibrary {
+            return inPaddleMobileLib
+        } else {
+            fatalError("PaddleMobile metal libary is nil")
+        }
     }
     
-    var tdim: [Int] = [1, 1, 1, 1]
-    for i in 0..<dim.count {
-      tdim[4 - dim.count + i] = dim[i]
+    func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState {
+        let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
+        guard let function = useLib.makeFunction(name: funcName) else {
+            fatalError(" function " + funcName + " not found")
+        }
+        do {
+            let pipLine = try makeComputePipelineState(function: function)
+            return pipLine
+        } catch _ {
+            fatalError("make pip line error occured")
+        }
+        
     }
-    let ndim: [Int] = transpose.map { tdim[$0] }
     
-    let textureDesc = MTLTextureDescriptor.init()
-    textureDesc.width = ndim[2]
-    textureDesc.height = ndim[1]
-    textureDesc.depth = 1
-    textureDesc.usage = [.shaderRead, .shaderWrite]
-    
-    if inComputePrecision == .Float16 {
-      textureDesc.pixelFormat = .rgba16Float
-    } else if inComputePrecision == .Float32 {
-      textureDesc.pixelFormat = .rgba32Float
+    func makeBuffer<P>(value: [P]) -> MTLBuffer {
+        let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared)
+        let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size)
+        for i in 0..<value.count {
+            contents?[i] = value[i]
+        }
+        return buffer!
     }
     
-    textureDesc.textureType = .type2DArray
-    textureDesc.storageMode = .shared
-    textureDesc.cpuCacheMode = .defaultCache
-    textureDesc.arrayLength = (ndim[0] * ndim[3] + 3) / 4
-    let texture = makeTexture(descriptor: textureDesc)!
-    
-    if value.count > 0 {
-      var rcount: Int = (ndim[0] * ndim[3] + 3) / 4
-      rcount = rcount * 4 * ndim[1] * ndim[2]
-      var nvalue: [Float32] = .init(repeating: 0.0, count: rcount)
-      
-      for i0 in 0..<tdim[0] {
-        for i1 in 0..<tdim[1] {
-          for i2 in 0..<tdim[2] {
-            for i3 in 0..<tdim[3] {
-              let ig = [i0, i1, i2, i3]
-              let ix = (i0 * tdim[1] * tdim[2] * tdim[3]) + (i1 * tdim[2] * tdim[3]) + (i2 * tdim[3]) + i3
-              
-              let jg = transpose.map { ig[$0] }
-              let k = jg[0] * ndim[3] + jg[3]
-              let jx = ((k / 4) * ndim[1] * ndim[2] * 4) + (jg[1] * ndim[2] * 4) + (jg[2] * 4) + (k % 4)
-              
-              nvalue[jx] = value[ix] as! Float32
+    func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
+        
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.width = textureWidth
+        textureDesc.height = textureHeight
+        textureDesc.depth = 1
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.textureType = .type2DArray
+        textureDesc.storageMode = .shared
+        textureDesc.cpuCacheMode = .defaultCache
+        textureDesc.arrayLength = arrayLength
+        let texture = makeTexture(descriptor: textureDesc)!
+        
+        if arrayLength == 1 && value.count >= 4{
+            let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: value.count * MemoryLayout<P>.size)
+            for i in 0..<value.count {
+                pointer[i] = value[i]
             }
-          }
-        }
-      }
-      
-      let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1))
-      if inComputePrecision == .Float16 {
-        let xvalue: [UInt16] = .init(repeating: 0, count: rcount)
-        let pointer: UnsafeMutablePointer<Float32> = UnsafeMutablePointer(mutating: nvalue)
-        let outputP: UnsafeMutablePointer<UInt16> = UnsafeMutablePointer(mutating: xvalue)
-        float32ToFloat16(input: pointer, output: outputP, count: rcount)
-        let bpR = ndim[2] * 4 * 2
-        let bpI = ndim[1] * bpR
-        for i in 0..<textureDesc.arrayLength {
-          let p = outputP + texture.width * texture.height * 4 * i
-          texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bpR, bytesPerImage: bpI)
-        }
-      } else {
-        let pointer: UnsafeMutablePointer<Float32> = UnsafeMutablePointer(mutating: nvalue)
-        let bpR = ndim[2] * 4 * MemoryLayout<P>.size
-        let bpI = ndim[1] * bpR
-        for i in 0..<textureDesc.arrayLength {
-          let p = pointer + texture.width * texture.height * 4 * i
-          texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bpR, bytesPerImage: bpI)
+            
+            let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
+            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
+            texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow)
+        } else {
+            
+            
+            
         }
-      }
-    }
-    return texture
-  }
-  
-  func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
-    
-    let textureDesc = MTLTextureDescriptor.init()
-    textureDesc.width = textureWidth
-    textureDesc.height = textureHeight
-    textureDesc.depth = 1
-    textureDesc.usage = [.shaderRead, .shaderWrite]
-    textureDesc.pixelFormat = .rgba32Float
-    textureDesc.textureType = .type2DArray
-    textureDesc.storageMode = .shared
-    textureDesc.cpuCacheMode = .defaultCache
-    textureDesc.arrayLength = arrayLength
-    let texture = makeTexture(descriptor: textureDesc)!
-    
-    if value.count >= 4{
-      let counts = arrayLength * 4 * textureWidth * textureHeight
-      let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: counts * MemoryLayout<P>.size)
-      for i in 0..<value.count {
-        pointer[i] = value[i]
-      }
-      for i in value.count..<counts {
-        pointer[i] = 0 as! P
-      }
-      
-      let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
-      let bytesPerImage = texture.height * bytesPerRow
-      let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
-      for i in 0..<arrayLength {
-        let p = pointer + texture.width * texture.height * 4 * i
-        texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage)
-      }
-    } else {
-      
+        
+        return texture
     }
-    
-    return texture
-  }
 }
 
 extension MTLComputeCommandEncoder {
-  public func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
-    let slices = (outTexture.arrayLength * 4 + 3)/4
-    
-    let width = computePipline.threadExecutionWidth
-    let height = computePipline.maxTotalThreadsPerThreadgroup/width
-    let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
-    
-//    print(" thread: threads per group: \(threadsPerGroup) ")
-//    print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
-    
-    let groupWidth = (outTexture.width + width - 1)/width
-    let groupHeight = (outTexture.height + height - 1)/height
-    let groupDepth = slices
-    let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
-    
-    setComputePipelineState(computePipline)
-    
-    dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
-  }
+    func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
+        let slices = (outTexture.arrayLength * 4 + 3)/4
+        
+        let width = computePipline.threadExecutionWidth
+        let height = computePipline.maxTotalThreadsPerThreadgroup/width
+        let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
+        
+//        print(" thread: threads per group: \(threadsPerGroup) ")
+//        print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
+        
+        let groupWidth = (outTexture.width + width - 1)/width
+        let groupHeight = (outTexture.height + height - 1)/height
+        let groupDepth = slices
+        let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
+        
+//        print("groups: \(groups) ")
+//        print("threads per group: \(threadsPerGroup)")
+        
+        setComputePipelineState(computePipline)
+        
+        dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
+    }
 }
 
+
 public extension MTLTexture {
-  
-  func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] {
-    var arr: [P] = floatArray { (p: P) -> P in
-      return p;
-    }
-    var result:  [(index: Int, value: P)] = []
-    if arr.count > 100 && stridable {
-      for j in stride(from: 0, to: arr.count , by: arr.count / 100){
-        result.append((j, arr[j]))
-      }
-    } else {
-      for j in 0..<arr.count {
-        result.append((j, arr[j]))
-      }
-    }
-    return result
-  }
-  
-  func floatArray<P, T>(res: (P) -> T) -> [T] {
-    var fArr: [T] = []
-    if textureType == .type2DArray {
-      for i in 0..<arrayLength{
-        let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
-        let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
-        let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size
-        let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
-        getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
-        let p = bytes.assumingMemoryBound(to: P.self)
-        
-        for j in 0..<width * height * depth * 4 {
-          fArr.append(res(p[j]))
-        }
-        bytes.deallocate()
-      }
-    } else if textureType == .type2D {
-      let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
-      let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
-      let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
-      getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
-      let p = bytes.assumingMemoryBound(to: P.self)
-      
-      for j in 0..<width * height * 4 {
-        fArr.append(res(p[j]))
-      }
-      bytes.deallocate()
-    }
-    return fArr
-  }
-  
-  func float32Array() -> [Float32] {
-    if pixelFormat == .rgba32Float {
-      let float32Array = floatArray { (f: Float32) -> Float32 in
-        return f
-      }
-      return float32Array
-    } else if pixelFormat == .rgba16Float {
-      
-      var float16Array = floatArray { (f: Float16) -> Float16 in
-        return f
-      }
-      return float16To32(input: &float16Array, count: float16Array.count)
-    } else {
-      fatalError()
-    }
-  }
-  
-  func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
-    print(header)
-    print("texture: \(self)")
-    //        let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
-    //        print(res)
     
-    if textureType == .type2DArray {
-      for i in 0..<arrayLength{
-        var str: String = "slice: \(i): \n"
-        let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
-        let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
-        let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
-        let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
-        getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
-        let p = bytes.assumingMemoryBound(to: T.self)
-        str += "2d array count : \(width * height * depth * 4) \n"
-        if stridable && width * height * depth * 4 > 20 {
-          for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 20){
-            str += " index \(j): \(p[j])"
-          }
-        } else {
-          for j in 0..<width * height * depth * 4 {
-            str += " index \(j): \(p[j])"
-          }
-        }
-        
-        bytes.deallocate()
-        print(str)
-      }
-    } else if textureType == .type2D {
-      var str: String = "texture 2D: "
-      let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
-      let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
-      let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
-      getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
-      let p = bytes.assumingMemoryBound(to: T.self)
-      str += "2d count : \(width * width * 4) \n"
-      
-      if stridable {
-        for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 20){
-          str += "index \(j): \(p[j]) "
-        }
-      } else {
-        for j in 0..<width * height * 4 {
-          str += "index \(j): \(p[j]) "
+    func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] {
+        var arr: [P] = floatArray { (p: P) -> P in
+            return p;
         }
-      }
-      
-      print(str)
-      bytes.deallocate()
-    }
-    return nil
-    
-  }
-  
-  // n c h w - dim
-  func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] {
-    var textureArray: [Float32]
-    if pixelFormat == .rgba32Float {
-      textureArray = floatArray { (i : Float32) -> Float32 in
-        return i
-      }
-    } else if pixelFormat == .rgba16Float {
-      
-      var textureFloat16Array = floatArray { (i : Float16) -> Float16 in
-        return i
-      }
-      textureArray = float16To32(input: &textureFloat16Array, count: textureFloat16Array.count)
-    } else {
-      fatalError(" 目前还不支持其他类型 ")
-    }
-    
-    var output: [Float32] = []
-    for s in 0..<arrayLength {
-      for c in 0..<4{
-        for h in 0..<dim.h {
-          for w in 0..<dim.w {
-            if (s * 4 + c) < dim.c {
-              let textureValue = textureArray[dim.w * dim.h * 4 * s + h * dim.w * 4 + w * 4 + c]
-              output.append(textureValue)
+        var result:  [(index: Int, value: P)] = []
+        if arr.count > 100 && stridable {
+            for j in stride(from: 0, to: arr.count , by: arr.count / 100){
+                result.append((j, arr[j]))
+            }
+        } else {
+            for j in 0..<arr.count {
+                result.append((j, arr[j]))
             }
-          }
         }
-      }
-    }
-    return output
-  }
-  
-  func realNHWC(dim: (n: Int, h: Int, w: Int, c: Int)) -> [Float32] {
-//    print("origin dim: \(dim)")
-//    print("texture: ")
-//    print(self)
-    
-    var textureArray: [Float32]
-    if pixelFormat == .rgba32Float {
-      textureArray = floatArray { (i : Float32) -> Float32 in
-        return i
-      }
-    } else if pixelFormat == .rgba16Float {
-      var textureFloat16Array = floatArray { (i : Float16) -> Float16 in
-        return i
-      }
-      textureArray = float16To32(input: &textureFloat16Array, count: textureFloat16Array.count)
-    } else {
-      fatalError(" 目前还不支持其他类型 ")
+        return result
     }
     
-    var output: [Float32] = []
-    let numOfASlice = dim.h * dim.w * 4
-    for h in 0..<dim.h {
-      for w in 0..<dim.w {
-        for sliceIndex in 0..<arrayLength {
-          if sliceIndex * 4 + 4 > dim.c {
-            for i in 0..<(4 - ((sliceIndex * 4 + 4) - dim.c)) {
-              let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i]
-              output.append(value)
+    func floatArray<P, T>(res: (P) -> T) -> [T] {
+        var fArr: [T] = []
+        if textureType == .type2DArray {
+            for i in 0..<arrayLength{
+                let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
+                let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
+                let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size
+                let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+                getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
+                let p = bytes.assumingMemoryBound(to: P.self)
+               
+                for j in 0..<width * height * depth * 4 {
+                    fArr.append(res(p[j]))
+                }
+                bytes.deallocate()
             }
-          } else {
-            for i in 0..<4 {
-              let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i]
-              output.append(value)
+        } else if textureType == .type2D {
+            let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
+            let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
+            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+            getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
+            let p = bytes.assumingMemoryBound(to: P.self)
+
+            for j in 0..<width * height * 4 {
+                fArr.append(res(p[j]))
             }
-          }
+            bytes.deallocate()
         }
-      }
+        return fArr
     }
-    return output
-  }
+    
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("texture: \(self)")
+        let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
+        print(res)
   
+//        if textureType == .type2DArray {
+//            for i in 0..<arrayLength{
+//                var str: String = "slice: \(i): \n"
+//                let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
+//                let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
+//                let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
+//                let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+//                getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
+//                let p = bytes.assumingMemoryBound(to: T.self)
+//                str += "2d array count : \(width * height * depth * 4) \n"
+//                if stridable && width * height * depth * 4 > 100 {
+//                    for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
+//                        str += " index \(j): \(p[j])"
+//                    }
+//                } else {
+//                    for j in 0..<width * height * depth * 4 {
+//                        str += " index \(j): \(p[j])"
+//                    }
+//                }
+//
+//                bytes.deallocate()
+//                print(str)
+//            }
+//        } else if textureType == .type2D {
+//            var str: String = "texture 2D: "
+//            let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
+//            let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
+//            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+//            getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
+//            let p = bytes.assumingMemoryBound(to: T.self)
+//            str += "2d count : \(width * width * 4) \n"
+//
+//            if stridable {
+//                for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
+//                    str += "index \(j): \(p[j]) "
+//                }
+//            } else {
+//                for j in 0..<width * height * 4 {
+//                    str += "index \(j): \(p[j]) "
+//                }
+//            }
+//
+//            print(str)
+//            bytes.deallocate()
+//        }
+        return nil
+           
+    }
 }
 
 
 public extension MTLBuffer {
-  func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
-    print(header)
-    print("MTLBuffer: \(self) ")
-    var str = ""
-    if stridable && length/MemoryLayout<T>.stride > 1000{
-      for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
-        str += " \(contents().assumingMemoryBound(to: T.self)[j])"
-      }
-    } else {
-      for i in 0..<length/MemoryLayout<T>.size {
-        str += " \(contents().assumingMemoryBound(to: T.self)[i])"
-      }
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("MTLBuffer: \(self) ")
+        var str = ""
+        if stridable && length/MemoryLayout<T>.stride > 1000{
+            for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
+                str += " \(contents().assumingMemoryBound(to: T.self)[j])"
+            }
+        } else {
+            for i in 0..<length/MemoryLayout<T>.size {
+                str += " \(contents().assumingMemoryBound(to: T.self)[i])"
+            }
+        }
+        print(str)
+        return nil
     }
-    print(str)
-    return nil
-  }
-  
-  func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture {
-    let textureDesc = MTLTextureDescriptor.init()
-    textureDesc.width = textureWidth
-    textureDesc.height = textureHeight
-    textureDesc.depth = 1
-    textureDesc.usage = [.shaderRead, .shaderWrite]
-    textureDesc.pixelFormat = .rgba32Float
-    textureDesc.textureType = .type2DArray
-    textureDesc.storageMode = .shared
-    textureDesc.cpuCacheMode = .defaultCache
-    textureDesc.arrayLength = arrayLength
-    let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)!
-    return texture
-  }
-  
-  func array<T>() -> [T] {
-    var array: [T] = []
-    let pointer = contents().bindMemory(to: T.self, capacity: length)
-    for i in 0..<(length / MemoryLayout<T>.size) {
-      array.append(pointer[i])
+    
+    func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture {
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.width = textureWidth
+        textureDesc.height = textureHeight
+        textureDesc.depth = 1
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.textureType = .type2DArray
+        textureDesc.storageMode = .shared
+        textureDesc.cpuCacheMode = .defaultCache
+        textureDesc.arrayLength = arrayLength
+        let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)!
+        return texture
     }
-    return array;
-  }
+    
+    
+
 }
 
+
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
index 91afae6f6415d187a69063381f3a27a6bbe92b81..a2927c4693c35fd8181d891cc33fa27c2c4cf0b9 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
@@ -1,16 +1,10 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+//
+//  TestConvAddBatchNormRelu.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/25.
+//  Copyright © 2018年 orange. All rights reserved.
+//
 
 import Metal
 import Foundation
@@ -23,204 +17,6 @@ public class PaddleMobileUnitTest {
         queue = inQueue
     }
     
-    private func indentPrintTensor(tensor: [Float32], dim: [Int], ix: [Int], indentLevel: Int) {
-        let indent = Array.init(repeating: " ", count: indentLevel).joined(separator: "")
-        var tx = ix
-        if dim.count == indentLevel + 1 {
-            var log: String = indent + "["
-            for i in 0..<dim[indentLevel] {
-                tx = ix
-                tx[indentLevel] = i
-                for x in 1..<dim.count {
-                    for y in 0..<x {
-                        tx[y] *= dim[x]
-                    }
-                }
-                let c = tx.reduce(0) { $0 + $1 }
-                if i > 0 {
-                    log += ", "
-                }
-                log += tensor[c].description
-            }
-            log += "]"
-            if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) {
-                log += ","
-            }
-            print(log)
-        } else {
-            print(indent + "[")
-            for i in 0..<dim[indentLevel] {
-                tx[indentLevel] = i
-                indentPrintTensor(tensor: tensor, dim: dim, ix: tx, indentLevel: indentLevel + 1)
-            }
-            if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) {
-                print(indent + "],")
-            } else {
-                print(indent + "]")
-            }
-        }
-    }
-    
-    private func tensorPrint(tensor: [Float32], dim: [Int]) {
-        var detectPos = -1
-        var odim = 1
-        var ndim = dim
-        for i in 0..<dim.count {
-            if dim[i] == -1 {
-                if detectPos == -1 {
-                    detectPos = i
-                } else {
-                    detectPos = -2
-                }
-            } else if dim[i] <= 0 {
-                detectPos = -3
-            } else {
-                odim *= dim[i]
-            }
-        }
-        assert(detectPos >= -1)
-        if (detectPos == -1) {
-            assert(tensor.count == odim)
-        } else {
-            assert(tensor.count % odim == 0)
-            ndim[detectPos] = tensor.count / odim
-        }
-        indentPrintTensor(tensor: tensor, dim: ndim, ix: dim.map { $0 * 0 }, indentLevel: 0)
-    }
-    
-    public func testConcat() {
-//        let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
-//        var it: [[Float32]] = []
-//        for _ in 0..<7 {
-//            it.append((0..<12).map { Float32($0) })
-//        }
-//        let input = it.map { device.tensor2texture(value: $0, dim: [3, 4]) }
-//        let output = device.tensor2texture(value: [Float32](), dim: [3, 28])
-//
-//        let param = ConcatTestParam.init(
-//            input: input,
-//            output: output,
-//            dims: [[3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4]],
-//            axis: 1,
-//            odim: [3, 28]
-//        )
-//        let concatKernel = ConcatKernel<Float32>.init(device: device, testParam: param)
-//        concatKernel.test(cmdBuffer: buffer, param: param)
-//        buffer.addCompletedHandler { (buffer) in
-//            for i in 0..<it.count {
-//                let _: Float32? = input[i].logDesc()
-//                self.tensorPrint(tensor: it[i], dim: [3, 4])
-//            }
-//            let _: Float32? = output.logDesc()
-//            let tx: [Float32] = self.device.texture2tensor(texture: output, dim: [3, 28])
-//            self.tensorPrint(tensor: tx, dim: [3, 28])
-//        }
-//
-//        buffer.commit()
-    }
-    
-    public func testReshape() {
-//        let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
-//        let input: [Float32] = (0..<24).map { Float32($0) }
-//        let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
-//        let outTexture = device.tensor2texture(value: [Float32](), dim: [4, 6])
-//        let mp = ReshapeMetalParam.init(
-//            idim: (1, 2, 3, 4),
-//            itrans: (0, 1, 2, 3),
-//            odim: (1, 1, 4, 6),
-//            otrans: (0, 1, 2, 3)
-//        )
-//        let param = ReshapeTestParam.init(
-//            inputTexture: inTexture,
-//            outputTexture: outTexture,
-//            param: mp
-//        )
-//        let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
-//        reshapeKernel.test(commandBuffer: buffer, testParam: param)
-//        buffer.addCompletedHandler { (buffer) in
-//            let _: Float32? = inTexture.logDesc()
-//            let _: Float32? = outTexture.logDesc()
-//            self.tensorPrint(tensor: input, dim: [2, 3, 4])
-//            let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6])
-//            self.tensorPrint(tensor: tx, dim: [4, 6])
-//        }
-        
-//        let input: [Float32] = (0..<24).map { Float32($0) }
-//        let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
-//        let outTexture = device.tensor2texture(value: [Float32](), dim: [24])
-//        let mp = ReshapeMetalParam.init(
-//            idim: (1, 2, 3, 4),
-//            itrans: (0, 1, 2, 3),
-//            odim: (1, 1, 1, 24),
-//            otrans: (0, 1, 2, 3)
-//        )
-//        let param = ReshapeTestParam.init(
-//            inputTexture: inTexture,
-//            outputTexture: outTexture,
-//            param: mp
-//        )
-//        let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
-//        reshapeKernel.test(commandBuffer: buffer, testParam: param)
-//        buffer.addCompletedHandler { (buffer) in
-//            let _: Float32? = inTexture.logDesc()
-//            let _: Float32? = outTexture.logDesc()
-//            self.tensorPrint(tensor: input, dim: [2, 3, 4])
-//            let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24])
-//            self.tensorPrint(tensor: tx, dim: [24])
-//        }
-//
-//        
-//        buffer.commit()
-    }
-    
-    public func testTranspose() {
-
-        let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
-//        var input: [Float32] = []
-//        for i in 0..<72 {
-//            input.append(Float32(i))
-//        }
-////        let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3)
-//        let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]);
-//        // group 1
-//        let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4])
-//        let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0])
-////        let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1])
-////        // group 2
-////        let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6)
-////        let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1])
-////
-//        let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
-//
-//        transposeKernel.test(commandBuffer: buffer, param: param)
-//
-//        buffer.addCompletedHandler { (buffer) in
-//            let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
-//            let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
-//            self.tensorPrint(tensor: input, dim: [4, 3, 2, 3])
-//            let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4])
-//            self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4])
-//        }
-//
-//        let input: [Float32] = (0..<24).map { Float32($0) }
-//        let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
-//        let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2])
-//        let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1])
-//        let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
-//
-//        transposeKernel.test(commandBuffer: buffer, param: param)
-//
-//        buffer.addCompletedHandler { (buffer) in
-//            let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
-//            let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
-//            self.tensorPrint(tensor: input, dim: [2, 3, 4])
-//            let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2])
-//            self.tensorPrint(tensor: tx, dim: [3, 4, 2])
-//        }
-//        
-        buffer.commit()
-    }
-    
     public func testConvAddBnRelu() {
         let buffer = queue.makeCommandBuffer() ?! " buffer is nil "
         
@@ -320,7 +116,7 @@ public class PaddleMobileUnitTest {
         let offsetX = filterSize.width/2 - paddings.0
         let offsetY = filterSize.height/2 - paddings.1
         
-        let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), dilationX: UInt16(1), dilationY: UInt16(1))
+        let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0))
         
         let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
         
@@ -336,6 +132,16 @@ public class PaddleMobileUnitTest {
         }
         
         buffer.commit()
+        
+        
+//        let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>)
+        
+        
+//        let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>)
+        
+//        ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>)
+        
+        
     }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Tools.swift b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
index 23ad7113971de3d0843abe17accfe3d67f0caaa9..930198fbf9c2cbfd917ddcb9ecb1fe02767c21f9 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
@@ -1,16 +1,10 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+//
+//  Tools.swift
+//  paddle-mobile
+//
+//  Created by liuRuiLong on 2018/7/26.
+//  Copyright © 2018年 orange. All rights reserved.
+//
 
 import Foundation
 
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Types.swift b/metal/paddle-mobile/paddle-mobile/Common/Types.swift
index a1197ed2188a263af3c0819fec09b584af501dd3..98353617f5090f1eeac0c644c17548555638a6ca 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/Types.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Types.swift
@@ -13,228 +13,80 @@
  limitations under the License. */
 
 import Foundation
-import Accelerate
 
 public protocol SummableMultipliable: Equatable {
-  static func +(lhs: Self, rhs: Self) -> Self
-  static func *(lhs: Self, rhs: Self) -> Self
-  static func -(lhs: Self, rhs: Self) -> Self
+    static func +(lhs: Self, rhs: Self) -> Self
+    static func *(lhs: Self, rhs: Self) -> Self
+    static func -(lhs: Self, rhs: Self) -> Self
 }
 public protocol PrecisionType: SummableMultipliable{
-  init(inFloat: Float32)
-  init(inFloat16: Float16)
-  init<P: PrecisionType>(_ inP: P)
-  static var bitSize: UInt { get }
+    init(inFloat: Float32)
+    init(inFloat16: Float16)
+    init<P: PrecisionType>(_ inP: P)
+    static var bitSize: UInt { get }
 }
 
 public typealias Float16 = Int16
 extension Float16: PrecisionType {
-  public static func * (prefix: Float16, postfix: Float16) {
-    return prefix * postfix
-  }
-  
-  public init<P>(_ inP: P) where P : PrecisionType {
-    if P.bitSize == Float32.bitSize {
-      self = Float16(inFloat: inP as! Float32)
-    } else if P.bitSize == Float16.bitSize {
-      self = inP as! Float16
-    } else {
-      fatalError()
+    public static func * (prefix: Float16, postfix: Float16) {
+        return prefix * postfix
     }
-  }
-  
-  public static var bitSize: UInt {
-    return 16
-  }
-  
-  public init(inFloat16: Float16) {
-    self = inFloat16
-  }
-  public init(inFloat: Float32) {
-    self = Int16(inFloat)
-  }
-}
-
-extension Float32: PrecisionType {
-  public init<P>(_ inP: P) where P : PrecisionType {
-    if P.bitSize == Float32.bitSize {
-      self = inP as! Float32
-    } else if P.bitSize == Float16.bitSize {
-      self = Float32.init(inP as! Float16)
-    } else {
-      fatalError()
-    }
-  }
-  
-  public init(inFloat: Float32) {
-    self = inFloat
-  }
-  
-  public init(inFloat16: Float16) {
-    self = Float32.init(inFloat16)
-  }
-  
-  public static var bitSize: UInt {
-    return 32
-  }
-}
-
-public func float32ToFloat16(input: UnsafeMutablePointer<Float32>, output: UnsafeMutableRawPointer, count: Int) {
-  var float32Buffer = vImage_Buffer(data: input,  height: 1, width: UInt(count), rowBytes: count * 4)
-  var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2)
-  guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else {
-    fatalError(" float 32 to float 16 error ! ")
-  }
-}
-
-public func float16To32(input: UnsafeMutablePointer<Float16>, count: Int) -> [Float32] {
-  var output = Array<Float>.init(repeating: 0.0, count: count)
-  float16to32(input: input, output: &output, count: count)
-  return output
-}
-
-public func float16to32(input: UnsafeMutablePointer<Float16>, output: UnsafeMutablePointer<Float32>, count: Int) {
-  var bufferFloat16 = vImage_Buffer(data: input,  height: 1, width: UInt(count), rowBytes: count * 2)
-  var bufferFloat32 = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 4)
-  if vImageConvert_Planar16FtoPlanarF(&bufferFloat16, &bufferFloat32, 0) != kvImageNoError {
-    fatalError(" convert float16 to float32 error")
-  }
-}
-
-// N - 0   C - 1   H - 2   W - 3
-struct DataLayout {
-  
-  static func NCHW(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
-    return DataLayout.init([(.N, dim[0]), (.C, dim[1]), (.H, dim[2]), (.W, dim[3])])
-  }
-  
-  static func NHWC(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
-    return DataLayout.init([(.N, dim[0]), (.H, dim[1]), (.W, dim[2]), (.C, dim[3])])
-  }
-  
-  func count() -> Int {
-    return layoutWithDim.count
-  }
-  
-  var N: Int? {
-    get {
-      for layoutDim in layoutWithDim {
-        if layoutDim.0 == .N {
-          return layoutDim.1
+    
+    public init<P>(_ inP: P) where P : PrecisionType {
+        if P.bitSize == Float32.bitSize {
+            self = Float16(inFloat: inP as! Float32)
+        } else if P.bitSize == Float16.bitSize {
+            self = inP as! Float16
+        } else {
+            fatalError()
         }
-      }
-      return nil
     }
-    set {
-      var newN = (Layout.N, newValue)
-      if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
-        return layout == .N
-      }) {
-        fatalError()
-      }
+    
+    public static var bitSize: UInt {
+        return 16
     }
-  }
-  var C: Int? {
-    get {
-      for layoutDim in layoutWithDim {
-        if layoutDim.0 == .C {
-          return layoutDim.1
-        }
-      }
-      return nil
+    
+    public init(inFloat16: Float16) {
+        self = inFloat16
     }
-    set {
-      var newN = (Layout.C, newValue)
-      if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
-        return layout == .N
-      }) {
-        fatalError()
-      }
+    public init(inFloat: Float32) {
+        self = Int16(inFloat)
     }
-  }
-  var H: Int? {
-    get {
-      for layoutDim in layoutWithDim {
-        if layoutDim.0 == .H {
-          return layoutDim.1
+    
+    
+    
+}
+
+extension Float32: PrecisionType {
+    public init<P>(_ inP: P) where P : PrecisionType {
+        if P.bitSize == Float32.bitSize {
+            self = inP as! Float32
+        } else if P.bitSize == Float16.bitSize {
+            self = Float32.init(inP as! Float16)
+        } else {
+            fatalError()
         }
-      }
-      return nil
     }
-    set {
-      var newN = (Layout.H, newValue)
-      if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
-        return layout == .H
-      }) {
-        fatalError()
-      }
-    }
-  }
-  var W: Int? {
-    get {
-      for layoutDim in layoutWithDim {
-        if layoutDim.0 == .W {
-          return layoutDim.1
-        }
-      }
-      return nil
+    
+    public init(inFloat: Float32) {
+        self = inFloat
     }
-    set {
-      var newN = (Layout.W, newValue)
-      if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
-        return layout == .W
-      }) {
-        fatalError()
-      }
+    
+    public init(inFloat16: Float16) {
+        self = Float32.init(inFloat16)
     }
-  }
-  
-  
-  init(_ inLayout: [(Layout, Int)]) {
-    layoutWithDim = inLayout
-  }
-  
-  func layout() -> [Layout] {
-    return layoutWithDim.map({ (layout: Layout, dim: Int) -> Layout in
-      return layout
-    })
-  }
-  
-  var layoutWithDim: [(Layout, Int)] = [(.N, 0), (.C, 0), (.H, 0), (.W, 0)]
-  
-  func convertTo(inLayout: [Layout]) {
     
-  }
-  
-  enum Layout: Int{
-    case N = 0
-    case C = 1
-    case H = 2
-    case W = 3
-    static func defaultLayout() -> [Layout] {
-      return [N, C, H, W]
+    public static var bitSize: UInt {
+        return 32
     }
-  }
 }
 
-extension DataLayout: Equatable {
-  public static func == (lhs: DataLayout, rhs: DataLayout) -> Bool {
-    if lhs.layoutWithDim.count == rhs.layoutWithDim.count {
-      var result = true
-      for i in 0..<lhs.layoutWithDim.count {
-        result = (lhs.layoutWithDim[i].0 == rhs.layoutWithDim[i].0)
-        if !result {
-          break
-        }
-      }
-      return result
-    } else {
-      return false
-    }
-  }
+public enum DataLayout {
+    case NCHW
+    case NHWC
 }
 
-public protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
+protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
 }
 
 extension Tensor: Variant {
@@ -243,52 +95,12 @@ extension Tensor: Variant {
 extension Texture: Variant {
 }
 
-extension GPUResultHolder: Variant {
+extension ResultHolder: Variant {
 }
 
 extension InputTexture: Variant {
 }
 
 extension MTLTexture where Self: Variant {
-  
-}
-
-class FetchHolder: Variant {
-  var resultBuffer: MTLBuffer?
-  var dim: [Int]
-  var capacity: Int
-  
-  init(inCapacity: Int, inDim: [Int]) {
-    capacity = inCapacity
-    dim = inDim
-  }
-  
-  func initBuffer(device: MTLDevice) {
-    resultBuffer = device.makeBuffer(length: capacity * 4, options: [])
-  }
-  
-  var result: UnsafeMutablePointer<Float32> {
-    guard let inResultBuffer = resultBuffer else {
-      fatalError()
-    }
-    return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity)
-  }
-  
-}
-
-extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible {
-  var description: String {
-    fatalError()
-//    return "\(result)"
-  }
-  
-  var debugDescription: String {
-    fatalError()
-//    return "\(result)"
-  }
-  
-  
+    
 }
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift
new file mode 100644
index 0000000000000000000000000000000000000000..0dcb3151e21cc0f3968a07da39366d4ba5fd5813
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Executor.swift
@@ -0,0 +1,153 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public class ResultHolder<P: PrecisionType> {
+    public let dim: [Int]
+    public let resultArr: [P]
+    public let elapsedTime: Double
+    public init(inDim: [Int], inResult: [P], inElapsedTime: Double) {
+        dim = inDim
+        resultArr = inResult
+        elapsedTime = inElapsedTime
+    }
+}
+
+extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
+    public var debugDescription: String {
+        var str = ""
+        str += "Dim: \(dim) \n value:[ "
+        if resultArr.count < 20 {
+            for d in resultArr {
+                str += " \(d) "
+            }
+        } else {
+            for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
+                str += " \(resultArr[d]) "
+            }
+        }
+        str += " ]"
+        return str
+    }
+    
+    public var description: String {
+        return debugDescription
+    }
+}
+
+public class Executor<P: PrecisionType> {
+    var ops: [Runable & InferShaperable] = []
+    let program: Program
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
+        program = inProgram
+        device = inDevice
+        queue = inQueue
+        for block in inProgram.programDesc.blocks {
+            //block.ops.count
+            for i in 0..<block.ops.count {
+                let op = block.ops[i]
+                do {
+                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+                    op.inferShape()
+                    ops.append(op)
+                } catch let error {
+                    throw error
+                }
+            }
+            
+//            for op in block.ops {
+//                do {
+//                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+//                    op.inferShape()
+//                    ops.append(op)
+//                } catch let error {
+//                    throw error
+//                }
+//            }
+        }
+    }
+    
+    public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
+        guard let buffer = queue.makeCommandBuffer() else {
+            throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
+        }
+        let resInput: MTLTexture
+        if let inPre = preProcessKernle {
+            do {
+                try inPre.compute(inputTexuture: input, commandBuffer: buffer)
+                resInput = inPre.outputTexture
+            } catch let error {
+                throw error
+            }
+        } else {
+            resInput = input
+        }
+        
+        let beforeDate = Date.init()
+        let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect))
+        program.scope.setInput(input: inputTexture)
+ 
+        for op in ops {
+            do {
+                try op.run(device: device, buffer: buffer)
+            } catch let error {
+                throw error
+            }
+        }
+        
+        buffer.addCompletedHandler { (commandbuffer) in
+//            let inputArr = resInput.floatArray(res: { (p:P) -> P in
+//                return p
+//            })
+//            print(inputArr)
+            
+//            let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
+//            print(stridableInput)
+            
+//            let _: Flo? = input.logDesc(header: "input: ", stridable: true)
+//            for op in self.ops {
+//                op.delogOutput()
+//            }
+//            return
+            
+//            self.ops[2].delogOutput()
+            
+            
+            let afterDate = Date.init()
+            
+            guard let outputVar = self.program.scope.output() else {
+                fatalError("output nil")
+            }
+
+            guard let output = outputVar as? Texture<P> else {
+                fatalError("output var type error")
+            }
+            let resultHodlder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
+                return p
+            }), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
+            completionHandle(resultHodlder)
+        }
+        buffer.commit()
+    }
+    
+    public func clear() {
+        program.scope.clear()
+    }
+    
+}
+
+//public let paddle_executor: Executor = Executor.init()
diff --git a/metal/paddle-mobile/paddle-mobile/Genet.swift b/metal/paddle-mobile/paddle-mobile/Genet.swift
deleted file mode 100644
index d803d1e99537e3a24d1fae5a5653d680bd811ac2..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Genet.swift
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-public class Genet: Net {
-  @objc public override init(device: MTLDevice) {
-    super.init(device: device)
-    means = [128.0, 128.0, 128.0]
-    scale = 0.017
-    except = 0
-    modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
-    paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
-    modelDir = ""
-    preprocessKernel = GenetPreProccess.init(device: device)
-    dim = (n: 1, h: 128, w: 128, c: 3)
-  }
-  
-  @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
-    super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
-    means = [128.0, 128.0, 128.0]
-    scale = 0.017
-    except = 0
-    modelPath = ""
-    paramPath = ""
-    modelDir = ""
-    preprocessKernel = GenetPreProccess.init(device: device)
-    dim = (n: 1, h: 128, w: 128, c: 3)
-  }
-
-  class GenetPreProccess: CusomKernel {
-    init(device: MTLDevice) {
-      let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
-      super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
-    }
-  }
-  
-  override  public func resultStr(res: ResultHolder) -> String {
-//    fatalError()
-    return " \(res.result![0]) ... "
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Loader.swift b/metal/paddle-mobile/paddle-mobile/Loader.swift
new file mode 100644
index 0000000000000000000000000000000000000000..c68b68e1caffcadc2adb2b4ddf245c89b2c5a223
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Loader.swift
@@ -0,0 +1,187 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+import SwiftProtobuf
+
+public class Loader<P: PrecisionType> {
+    class ParaLoader {
+        let file: UnsafeMutablePointer<FILE>
+        let fileSize: Int
+        var nowIndex: Int
+        init(paramPath: String) throws {
+            guard let tmpFile = fopen(paramPath, "rb") else {
+                throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
+            }
+            file = tmpFile
+            fseek(file, 0, SEEK_END)
+            fileSize = ftell(file)
+            guard fileSize > 0 else {
+                throw PaddleMobileError.loaderError(message: "param file size is too small")
+            }
+            rewind(file)
+            nowIndex = 0
+        }
+        
+        func read(tensor: Tensor<P>) throws {
+            guard nowIndex <= fileSize else {
+                throw PaddleMobileError.loaderError(message: "out of the file range")
+            }
+            
+            func pointerReader<T>(type: T.Type) -> T {
+                let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
+                fread(ptr, 1, MemoryLayout<T>.size, file)
+                nowIndex += MemoryLayout<T>.size
+                let pointee = ptr.pointee
+                ptr.deinitialize(count: MemoryLayout<UInt32>.size)
+                ptr.deallocate()
+                return pointee
+            }
+            
+            let _ = pointerReader(type: UInt32.self)
+            let lodLevel = pointerReader(type: UInt64.self)
+            for _ in 0..<lodLevel {
+                let size = pointerReader(type: UInt64.self)
+                for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
+                    _ = pointerReader(type: size_t.self)
+                }
+            }
+            
+            let _ = pointerReader(type: UInt32.self)
+            
+            let tensorDescSize = pointerReader(type: Int32.self)
+            
+            fseek(file, Int(tensorDescSize), SEEK_CUR)
+            nowIndex += Int(tensorDescSize)
+            
+            /*
+             这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度
+             */
+            
+            //现在模型传入模型为  Float 类型, 这块应该根据模型来
+//            let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
+//            let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
+            let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
+            
+            guard bytesRead == tensor.data.size else {
+                throw PaddleMobileError.loaderError(message: "param read size error")
+            }
+            
+            // TODO: use script to convert
+//            let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
+//            for i in 0..<tensor.numel() {
+//                tensor.data[i] = P.init(inFloat: tmpPointer[i])
+//            }
+//            tmpPointer.deinitialize(count: tmpCapacity)
+//            tmpPointer.deallocate()
+            
+            nowIndex += bytesRead
+        }
+        
+        deinit {
+            fclose(file)
+        }
+    }
+    public init(){}
+    public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
+        guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
+            throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
+        }
+        
+        do {
+            let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
+                serializedData: modelData)
+            
+            let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
+            let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
+            print(programDesc)
+
+            guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else {
+                throw PaddleMobileError.loaderError(message: "load para error")
+            }
+            
+            guard programDesc.blocks.count > 0 else {
+                throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0")
+            }
+            
+            // to get feed key and fetch key
+            let block = programDesc.blocks[0]
+            guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
+                throw PaddleMobileError.loaderError(message: "at least two operator")
+            }
+            guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
+                throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
+            }
+            
+            guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else {
+                throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found")
+            }
+            guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else {
+                throw PaddleMobileError.loaderError(message: "feed key or fetch key not found")
+            }
+            
+            let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey)
+            
+            // to load memory
+            for block in programDesc.blocks {
+                for varDesc in block.vars {
+                    if (varDesc.type == .LodTensor) {
+                        guard let tensorDesc = varDesc.tensorDesc else {
+                            throw PaddleMobileError.loaderError(message: "get tensor desc failed")
+                        }
+                        
+//                        guard (try? tensorDesc.dataType.dataTypeSize()) == MemoryLayout<P>.size else {
+//                            throw PaddleMobileError.memoryError(message: "PrecisionType not support")
+//                        }
+                        
+                        if (varDesc.persistable
+                            && varDesc.type != .FeedMiniBatch
+                            && varDesc.type != .FetchList) {
+                            let dimArr = tensorDesc.dims
+                            
+                            guard dimArr.count > 0 else {
+                                throw PaddleMobileError.loaderError(message: "tensor desc dim size error")
+                            }
+                            
+                            let dim = Dim.init(inDim: dimArr)
+                            let tensor = Tensor<P>.init(inDim: dim, inLayout: tensorDesc.dataLayout)
+                            do {
+                                try paraLoader.read(tensor: tensor)
+                            } catch let error {
+                                throw error
+                            }
+                            tensor.convert(to: .NHWC)
+//                            tensor.initBuffer(device: device)
+                            scope[varDesc.name] = tensor
+                        } else {
+                            let dim = Dim.init(inDim: tensorDesc.NHWCDim)
+                            scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
+                        }
+                    } else {
+                        if varDesc.name == fetchKey {
+                            scope[varDesc.name] = ResultHolder<P>.init(inDim: [], inResult: [], inElapsedTime: 0.0)
+                        } else if varDesc.name == feedKey {
+                        }
+                    }
+                }
+            }
+            
+            let program = Program.init(inProgramDesc: programDesc, inParamPath: paraPath, inScope: scope)
+            
+            return program
+        } catch _ {
+            throw PaddleMobileError.loaderError(message: "protobuf decoder error")
+        }
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/MobileNet.swift b/metal/paddle-mobile/paddle-mobile/MobileNet.swift
deleted file mode 100644
index 7d10a920d15e751f29fce7f9f6be71cd6a2d6b69..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/MobileNet.swift
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class MobileNet: Net{
-  
-  class MobilenetPreProccess: CusomKernel {
-    init(device: MTLDevice) {
-      let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
-      super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false)
-    }
-  }
-  
-  class PreWords {
-    var contents: [String] = []
-    init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
-      if let filePath = inBundle.path(forResource: fileName, ofType: type) {
-        let string = try! String.init(contentsOfFile: filePath)
-        contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
-          String($0[$0.index($0.startIndex, offsetBy: 10)...])
-        }
-      }else{
-        fatalError("no file call \(fileName)")
-      }
-    }
-    subscript(index: Int) -> String {
-      return contents[index]
-    }
-  }
-  
-  let labels = PreWords.init(fileName: "synset")
-  
-  override public func resultStr(res: ResultHolder) -> String {
-    guard let resPointer = res.result else {
-      fatalError()
-    }
-    var s: [String] = []
-    (0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
-      s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
-    }
-    return s.joined(separator: "\n")
-  }
-  
-
-  
-  override init(device: MTLDevice) {
-    super.init(device: device)
-    means = [123.68, 116.78, 103.94]
-    scale = 0.017
-    except = 0
-    modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
-    paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
-    modelDir = ""
-    preprocessKernel = MobilenetPreProccess.init(device: device)
-    dim = (n: 1, h: 224, w: 224, c: 3)
-  }
-}
-
diff --git a/metal/paddle-mobile/paddle-mobile/MobileNetSSD.swift b/metal/paddle-mobile/paddle-mobile/MobileNetSSD.swift
deleted file mode 100644
index 667cfa72c7f9409b641ef9061d9a82f212e97aac..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/MobileNetSSD.swift
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-public class MobileNet_ssd_hand: Net{
-  @objc public override init(device: MTLDevice) {
-    super.init(device: device)
-    means = [123.68, 116.78, 103.94]
-    scale = 0.017
-    except = 2
-    modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
-    paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
-    modelDir = ""
-    preprocessKernel = MobilenetssdPreProccess.init(device: device)
-    dim = (n: 1, h: 300, w: 300, c: 3)
-  }
-  
-  @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
-    super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
-    means = [123.68, 116.78, 103.94]
-    scale = 0.017
-    except = 2
-    modelPath = ""
-    paramPath = ""
-    modelDir = ""
-    preprocessKernel = MobilenetssdPreProccess.init(device: device)
-    dim = (n: 1, h: 300, w: 300, c: 3)
-  }
-  
-  class MobilenetssdPreProccess: CusomKernel {
-    init(device: MTLDevice) {
-      let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
-      super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false)
-    }
-  }
-  
-  override public func resultStr(res: ResultHolder) -> String {
-    return " \(res)"
-  }
-  
-  override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
-
-//    guard let interRes = paddleMobileRes.intermediateResults else {
-//      fatalError(" need have inter result ")
-//    }
-//
-//    guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as?  Texture<Float32> else {
-//      fatalError(" need score ")
-//    }
-//
-//    guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
-//      fatalError()
-//    }
-//
-//    var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
-////    print("score: ")
-////    print(scoreFormatArr.strideArray())
-////
-//    var bboxArr = bbox.metalTexture.float32Array()
-////    print("bbox: ")
-////    print(bboxArr.strideArray())
-//
-//    let nmsCompute = NMSCompute.init()
-//    nmsCompute.scoreThredshold = 0.01
-//    nmsCompute.nmsTopK = 400
-//    nmsCompute.keepTopK = 200
-//    nmsCompute.nmsEta = 1.0
-//    nmsCompute.nmsThreshold = 0.45
-//    nmsCompute.background_label = 0;
-//
-//    nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
-//
-//    nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
-//    guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
-//      fatalError( " result error " )
-//    }
-//
-//    let output: [Float32] = result.map { $0.floatValue }
-//
-//
-//    return output
-    fatalError()
-  }
-  
-
-  
- 
-}
diff --git a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
deleted file mode 100644
index 6c7bd9b9c6ae4f55327a370ceb1e682a8e5e7658..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
+++ /dev/null
@@ -1,153 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-public class MobileNet_ssd_AR: Net{
-  @objc public override init(device: MTLDevice) {
-    super.init(device: device)
-    means = [103.94, 116.78, 123.68]
-    scale = 1
-    except = 2
-    modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
-    paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
-    modelDir = ""
-    preprocessKernel = MobilenetssdPreProccess.init(device: device)
-    dim = (n: 1, h: 160, w: 160, c: 3)
-  }
-  
-  @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
-    super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
-    means = [103.94, 116.78, 123.68]
-    scale = 1
-    except = 2
-    modelPath = ""
-    paramPath = ""
-    modelDir = ""
-    preprocessKernel = MobilenetssdPreProccess.init(device: device)
-    dim = (n: 1, h: 160, w: 160, c: 3)
-  }
-  
-  class MobilenetssdPreProccess: CusomKernel {
-    init(device: MTLDevice) {
-      let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
-      super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false)
-    }
-  }
-  
-  override public func resultStr(res: ResultHolder) -> String {
-    return " \(res.result![0])"
-  }
-  
-  override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
-    guard let interRes = paddleMobileRes.intermediateResults else {
-      fatalError(" need have inter result ")
-    }
-    
-    guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as?  FetchHolder else {
-      fatalError(" need score ")
-    }
-    
-    guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
-      fatalError()
-    }
-    
-//    let startDate = Date.init()
-    
-//    print("scoreFormatArr: ")
-//print((0..<score.capacity).map{ score.result[$0] }.strideArray())
-//
-//    print("bbox arr: ")
-//
-//    print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
-    
-    let nmsCompute = NMSCompute.init()
-    nmsCompute.scoreThredshold = 0.25
-    nmsCompute.nmsTopK = 100
-    nmsCompute.keepTopK = 100
-    nmsCompute.nmsEta = 1.0
-    nmsCompute.nmsThreshold = 0.449999988
-    nmsCompute.background_label = 0;
-    nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
-    nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
-    guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
-      fatalError( " result error " )
-    }
-    let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
-//    for i in 0..<Int(result.outputSize) {
-//
-//      print("i \(i) : \(result.output[i])")
-//    }
-//    print(Date.init().timeIntervalSince(startDate))
-
-//    print(resultHolder.result![0])
-    return resultHolder
-  }
-  
-  override func updateProgram(program: Program) {
-    for i in [56, 66, 76, 86, 93, 99] {
-      let opDesc = program.programDesc.blocks[0].ops[i]
-      let output = opDesc.outputs["Out"]!.first!
-      let v = program.scope[output]!
-      let originTexture = v as! Texture<Float32>
-      originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
-      
-      originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
-      
-      originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
-      
-      program.scope[output] = originTexture
-      
-      if i == 99 {
-        opDesc.attrs["axis"] = 0
-      } else {
-        opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
-      }
-    }
-    
-    for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
-      let opDesc = program.programDesc.blocks[0].ops[i]
-      let output = opDesc.outputs["Out"]!.first!
-      let v = program.scope[output]!
-      
-      
-      
-      let originTexture = v as! Texture<Float32>
-      originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
-      opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
-    }
-    
-    for i in [60, 101, 90, 97, 70, 80] {
-      let opDesc = program.programDesc.blocks[0].ops[i]
-      let output = opDesc.outputs["Out"]!.first!
-      let v = program.scope[output]!
-      let originTexture = v as! Texture<Float32>
-      originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
-      opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
-    }
-    
-    for i in [102] {
-      let opDesc = program.programDesc.blocks[0].ops[i]
-      for output in opDesc.outputs["Out"]! {
-        let v = program.scope[output]!
-        let originTexture = v as! Texture<Float32>
-        originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
-      }
-      opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
-      print(" split axis \(opDesc.attrs["axis"])")
-    }
-    // 99
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Net.swift b/metal/paddle-mobile/paddle-mobile/Net.swift
deleted file mode 100644
index ce9ec98a66e685eec3a688a5a29402a76567b0e2..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Net.swift
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-
-import Foundation
-
-public class ResultHolder: NSObject {
-  @objc public let result: UnsafeMutablePointer<Float32>?
-  @objc public let capacity: Int
-
-  init(inResult: UnsafeMutablePointer<Float32>?, inCapacity: Int) {
-    result = inResult
-    capacity = inCapacity
-  }
-  
-  @objc public func releasePointer() {
-    result?.deinitialize(count: capacity)
-    result?.deallocate()
-  }
-}
-
-public class Net: NSObject {
-  var except: Int = 0
-  var means: [Float] = []
-  var scale: Float = 0.0
-  var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0)
-  var preprocessKernel: CusomKernel? = nil
-  var paramPointer: UnsafeMutableRawPointer? = nil
-  var paramSize: Int = 0
-  var modelPointer: UnsafeMutableRawPointer? = nil
-  var modelSize: Int = 0
-  var modelPath: String = ""
-  var paramPath: String = ""
-  var modelDir: String = ""
-  @objc public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
-      self.paramPointer = paramPointer
-      self.paramSize = paramSize
-      self.modelPointer = modePointer
-      self.modelSize = modelSize
-      super.init()
-  }
-
-  
-  public func resultStr(res: ResultHolder) -> String {
-    fatalError()
-  }
-  
-  func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
-    return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity)
-  }
-  
-  @objc public init(device: MTLDevice) {
-    super.init()
-  }
-  
-  func updateProgram(program: Program) {
-
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
index 9806042e9eb339d6d15f2cbfebe924b548d29922..0ba02af1c51ba218982cc116e2cf8500cfa14db0 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
@@ -43,31 +43,14 @@ class OpCreator<P: PrecisionType> {
         [gConvType                  :     ConvOp<P>.creat,
          gBatchNormType             :     BatchNormOp<P>.creat,
          gReluType                  :     ReluOp<P>.creat,
-         gElementwiseAddType        :     ElementwiseAddOp<P>.creat,
+         gElementwiseAdd            :     ElementwiseAddOp<P>.creat,
          gFeedType                  :     FeedOp<P>.creat,
          gFetchType                 :     FetchOp<P>.creat,
          gConvAddBatchNormReluType  :     ConvAddBatchNormReluOp<P>.creat,
          gPooType                   :     PoolOp<P>.creat,
          gSoftmaxType               :     SoftmaxOp<P>.creat,
          gReshapeType               :     ReshapeOp<P>.creat,
-         gConvAddType               :     ConvAddOp<P>.creat,
-         gDepthConvType             :     DepthConvOp<P>.creat,
-         gConcatType                :     ConcatOp<P>.creat,
-         gBoxcoderType              :     BoxcoderOp<P>.creat,
-         gConvBnReluType            :     ConvBNReluOp<P>.creat,
-         gDwConvBnReluType          :     DwConvBNReluOp<P>.creat,
-         gMulticlassNMSType         :     MulticlassNMSOp<P>.creat,
-         gTransposeType             :     TransposeOp<P>.creat,
-         gPriorBoxType              :     PriorBoxOp<P>.creat,
-         gPreluType                 :     PreluOp<P>.creat,
-         gConv2dTransposeType       :     ConvTransposeOp<P>.creat,
-         gBilinearInterpType        :     BilinearInterpOp<P>.creat,
-         gSplit                     :     SplitOp<P>.creat,
-         gShape                     :     ShapeOp<P>.creat,
-         gFlatten                   :     FlattenOp<P>.creat,
-         gConvAddPreluType          :     ConvAddPreluOp<P>.creat,
-         gConvAddAddPreluType       :     ConvAddAddPreluOp<P>.creat,
-         gElementwiseAddPreluType:   ElementwiseAddPreluOp<P>.creat]
-  
+         gConvAddType               :     ConvAddOp<P>.creat]
+    
     private init(){}
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
index 9f868e35864d59be5711c4ac0a02787638eeae8f..43f095d7008ad14ac71d610728e19ac6f6817800 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
@@ -22,199 +22,147 @@ import Foundation
  */
 
 protocol OpParam {
-  associatedtype OutputType: Variant
-  var output: OutputType { get set }
-  func outputDesc() -> String
-  
-  associatedtype ParamPrecisionType: PrecisionType
-  init(opDesc: OpDesc, inScope: Scope) throws
-  static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
-  static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType
-  static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
-  static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
-  static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  
-  static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  
-  static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
-  
-  static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
-  
-  static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
-  
-  static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
-  
-  static func paramInputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
-  
+    associatedtype OutputType: Variant
+    var output: OutputType { get set }
+    func outputDesc() -> String
+    
+    associatedtype ParamPrecisionType: PrecisionType
+    init(opDesc: OpDesc, inScope: Scope) throws
+    static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
+    static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType
+    static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
 }
 
 extension OpParam {
-  func outputDesc() -> String {
-    return output.debugDescription
-  }
-  
-  static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType {
-    guard let mapKeys = map[key], mapKeys.count > 0 else {
-      throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
-    }
-    guard let variant = from[mapKeys[0]] else {
-      throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
+    func outputDesc() -> String {
+        return output.debugDescription
     }
     
-    guard let v = variant as? VarType else {
-      throw PaddleMobileError.paramError(message: " type error")
-
-    }
-    return v
-  }
-  
-  static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorVariances: VarType = try getFirstTensor(key: "Variances", map: outputs, from: from)
-      return tensorVariances
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func paramInputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let alphaTensor: VarType = try getFirstTensor(key: "Alpha", map: inputs, from: from)
-      return alphaTensor
-    } catch let error {
-      throw error
-    }
-  }
-  
-  
-  static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorImage: VarType = try getFirstTensor(key: "Image", map: inputs, from: from)
-      return tensorImage
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
-      return tensorX
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorBox: VarType = try getFirstTensor(key: "Boxes", map: outputs, from: from)
-      return tensorBox
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
-      return tensorInput
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from)
-      return tensorOutput
-    } catch let error {
-      throw error
+    static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType {
+        guard let mapKeys = map[key], mapKeys.count > 0 else {
+            throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
+        }
+        guard let variant = from[mapKeys[0]], let v = variant as? VarType else {
+            throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
+        }
+        return v
     }
-  }
-  static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
-      return tensorOutputY
-    } catch let error {
-      throw error
-    }
-  }
-  static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
-      return tensorY
-    } catch let error {
-      throw error
+    
+    static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
+            
+            return tensorX
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from)
-      return out
-    } catch let error {
-      throw error
+    
+    static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
+            return tensorInput
+        } catch let error {
+            throw error
+        }
     }
-  }
-  static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
-      return tensorFilter
-    } catch let error {
-      throw error
+    
+    static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from)
+            return tensorOutput
+        } catch let error {
+            throw error
+        }
+    }
+    static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
+            return tensorOutputY
+        } catch let error {
+            throw error
+        }
+    }
+    static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
+            return tensorY
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from)
-      return tensorBias
-    } catch let error {
-      throw error
+    
+    static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from)
+            return out
+        } catch let error {
+            throw error
+        }
+    }
+    static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
+            return tensorFilter
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from)
-      return tensorMean
-    } catch let error {
-      throw error
+    
+    static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from)
+            return tensorBias
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from)
-      return tensorScale
-    } catch let error {
-      throw error
+    
+    static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from)
+            return tensorMean
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
-    do {
-      let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from)
-      return tensorVariance
-    } catch let error {
-      throw error
+    
+    static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from)
+            return tensorScale
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{
-    guard let attr = attrs[key] else {
-      throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
+    
+    static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from)
+            return tensorVariance
+        } catch let error {
+            throw error
+        }
     }
     
-    guard let tAttr = attr as? T else {
-      throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
+    static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{
+        guard let attr = attrs[key] else {
+            throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
+        }
+        
+        guard let tAttr = attr as? T else {
+            throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
+        }
+        return tAttr
     }
-    return tAttr
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
index 40698da5ecb047dbf557cea18556616020ee9750..bc95f84d8ae98cb8e4e7151f0cf69a574699dc80 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
@@ -16,118 +16,100 @@ import Metal
 import Foundation
 
 protocol Fusion {
-  static func fusionNode() -> Node
-  static func change() -> [String : [(from: String, to: String)]]
-  static func fusionType() -> String
-  static func needCheck() -> [(Int, String)]
-}
-extension Fusion {
-  static func needCheck() -> [(Int, String)] {
-    return []
-  }
+    static func fusionNode() -> Node
+    static func change() -> [String : [(from: String, to: String)]]
+    static func fusionType() -> String
 }
 
 protocol Runable {
-  func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
-  func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
-  func delogOutput()
-  func inputVariant() -> [String : [Variant]]
-  func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer)
+    func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
+    func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
+    func delogOutput()
 }
 
 extension Runable where Self: OperatorProtocol{
-  func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try runImpl(device: device, buffer: buffer)
-    } catch let error {
-      throw error
+    func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try runImpl(device: device, buffer: buffer)
+        } catch let error {
+            throw error
+        }
+//        print(type + ": " + para.outputDesc())
     }
-  }
-  
-  func inputVariant() -> [String : [Variant]] {
-//    return [:]
-    fatalError(" op \(type) need implement inputVariant")
-  }
-  
-  func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
-    fatalError(" need implement ")
-  }
-  
-  func delogOutput() {
     
-    print(type + ": has no implementation" )
-  }
+    func delogOutput() {
+        print(type + ": has no implementation" )
+    }
 }
 
 protocol Creator where Self: OperatorProtocol{
-  associatedtype OpType: OperatorProtocol & Runable & InferShaperable
-  static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
+    associatedtype OpType: OperatorProtocol & Runable & InferShaperable
+    static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
 }
 
 extension Creator where Self: OperatorProtocol {
-  static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
-    do {
-      return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
-    } catch let error {
-      throw error
+    static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
+        do {
+            return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
     }
-  }
 }
 
 protocol InferShaperable {
-  func inferShape()
+    func inferShape()
 }
 
 protocol OperatorProtocol {
-  associatedtype ParamType
-  associatedtype KerType:  Computable where Self.KerType.ParamType == ParamType
-  var type: String { get }
-  var scope: Scope { get }
-  var inputs: [String : [String]] { get }
-  var paraInputs: [String : [String]] { get set }
-  var outpus: [String : [String]] { get }
-  var attrs: [String : Attr] { get }
-  var para: ParamType { get }
-  var kernel: KerType { get }
-  init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
+    associatedtype ParamType
+    associatedtype KerType:  Computable where Self.KerType.ParamType == ParamType
+    var type: String { get }
+    var scope: Scope { get }
+    var inputs: [String : [String]] { get }
+    var paraInputs: [String : [String]] { get set }
+    var outpus: [String : [String]] { get }
+    var attrs: [String : Attr] { get }
+    var para: ParamType { get }
+    var kernel: KerType { get }
+    init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
 }
 
 extension OperatorProtocol {
-  static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
-    do {
-      return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
-    } catch let error {
-      throw error
+    static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
+        do {
+            return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
     }
-  }
 }
 
 class Operator <KernelType:  Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
-  typealias ParamType = ParameterType
-  typealias KerType = KernelType
-  let type: String
-  let inputs: [String : [String]]
-  var paraInputs: [String : [String]]
-  let outpus: [String : [String]]
-  let attrs: [String : Attr]
-  let para: ParamType
-  let scope: Scope
-  var kernel: KerType
-  required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
-//    print("create op: \(opDesc.type)")
-    type = opDesc.type
-    scope = inScope
-    inputs = opDesc.inputs
-    outpus = opDesc.outputs
-    attrs =  opDesc.attrs
-    paraInputs = opDesc.paraInputs
-    do {
-      para = try ParamType.init(opDesc:opDesc, inScope: inScope)
-    } catch let error {
-      throw error
+    typealias ParamType = ParameterType
+    typealias KerType = KernelType
+    let type: String
+    let inputs: [String : [String]]
+    var paraInputs: [String : [String]]
+    let outpus: [String : [String]]
+    let attrs: [String : Attr]
+    let para: ParamType
+    let scope: Scope
+    var kernel: KerType
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        type = opDesc.type
+        scope = inScope
+        inputs = opDesc.inputs
+        outpus = opDesc.outputs
+        attrs =  opDesc.attrs
+        paraInputs = opDesc.paraInputs
+        do {
+            para = try ParamType.init(opDesc:opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+        kernel = KernelType.init(device: device, param: para)
     }
-    kernel = KernelType.init(device: device, param: para)
-  }
 }
 
 // op infos
@@ -136,57 +118,22 @@ let gFeedType                   = "feed"
 let gConvType                   = "conv2d"
 let gBatchNormType              = "batch_norm"
 let gReluType                   = "relu"
-let gElementwiseAddType         = "elementwise_add"
+let gElementwiseAdd             = "elementwise_add"
 let gConvAddBatchNormReluType   = "conv_add_batchnorm_relu"
 let gPooType                    = "pool2d"
 let gSoftmaxType                = "softmax"
 let gReshapeType                = "reshape"
 let gConvAddType                = "conv_add"
-let gDepthConvType              = "depthwise_conv2d"
-let gPriorBoxType               = "prior_box"
-let gTransposeType              = "transpose"
-let gConcatType                 = "concat"
-let gBoxcoderType               = "box_coder"
-let gMulticlassNMSType          = "multiclass_nms"
-let gConvBnReluType             = "conv_bn_relu"
-let gDwConvBnReluType           = "depth_conv_bn_relu"
-let gPreluType                  = "prelu"
-let gConv2dTransposeType        = "conv2d_transpose"
-let gBilinearInterpType         = "bilinear_interp"
-let gSplit                      = "split"
-let gShape                      = "shape"
-let gFlatten                    = "flatten"
-let gConvAddPreluType           = "conv_add_prelu"
-let gConvAddAddPreluType        = "conv_add_add_prelu"
-let gElementwiseAddPreluType = "elementwise_add_prelu"
 
 
 let opInfos = [gConvType                    : (inputs: ["Input"], outputs: ["Output"]),
                gBatchNormType               : (inputs: ["X"], outputs: ["Y"]),
                gReluType                    : (inputs: ["X"], outputs: ["Out"]),
-               gElementwiseAddType          : (inputs: ["X"], outputs: ["Out"]),
+               gElementwiseAdd              : (inputs: ["X"], outputs: ["Out"]),
                gFeedType                    : (inputs: ["X"], outputs: ["Out"]),
                gFetchType                   : (inputs: ["X"], outputs: ["Out"]),
                gConvAddBatchNormReluType    : (inputs: ["Input"], outputs: ["Out"]),
                gPooType                     : (inputs: ["X"], outputs: ["Out"]),
                gSoftmaxType                 : (inputs: ["X"], outputs: ["Out"]),
                gReshapeType                 : (inputs: ["X"], outputs: ["Out"]),
-               gConvAddType                 : (inputs: ["Input"], outputs: ["Out"]),
-               gDepthConvType               : (inputs: ["Input"], outputs: ["Output"]),
-               gConcatType                  : (inputs: ["X"], outputs: ["Out"]),
-               gBoxcoderType                : (inputs: ["PriorBox", "PriorBoxVar", "TargetBox"], outputs: ["OutputBox"]),
-               gTransposeType               : (inputs: ["X"], outputs: ["Out"]),
-               gConvBnReluType              : (inputs: ["Input"], outputs: ["Out"]),
-               gDwConvBnReluType            : (inputs: ["Input"], outputs: ["Out"]),
-               gMulticlassNMSType           : (inputs: ["BBoxes", "Scores"], outputs: ["Out"]),
-               gPriorBoxType                : (inputs: ["Input", "Image"], outputs: ["Boxes", "Variances"]),
-               gPreluType                   : (inputs: ["X"], outputs: ["Out"]),
-               gConv2dTransposeType         : (inputs: ["Input"], outputs: ["Output"]),
-               gBilinearInterpType          : (inputs: ["X"], outputs: ["Out"]),
-               gSplit                       : (inputs: ["X"], outputs: ["Out"]),
-               gShape                       : (inputs: ["Input"], outputs: ["Out"]),
-               gFlatten                     : (inputs: ["X"], outputs: ["Out"]),
-               gConvAddPreluType            : (inputs: ["Input"], outputs: ["Out"]),
-               gConvAddAddPreluType         : (inputs: ["Input"], outputs: ["Out"]),
-               gElementwiseAddPreluType  :  (inputs: ["X"], outputs: ["Out"])
-              ]
+               gConvAddType                 : (inputs: ["Input"], outputs: ["Out"])]
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
index 9fc20f8a597d39d3b628c5e1033f9c5cceac45ed..3761dad60f0f8b20e3f95168445317a3e627ada9 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
@@ -1,66 +1,62 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
 
 import Foundation
 
 class BatchNormParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
-      if input.transpose != [0, 2, 3, 1] {
-        fatalError("batch norm only accepts NHWC")
-      }
-      output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
-      bias = try BatchNormParam.getFirstTensor(key: "Bias", map: opDesc.paraInputs, from: inScope)
-      mean = try BatchNormParam.getFirstTensor(key: "Mean", map: opDesc.paraInputs, from: inScope)
-      scale = try BatchNormParam.getFirstTensor(key: "Scale", map: opDesc.paraInputs, from: inScope)
-      variance = try BatchNormParam.getFirstTensor(key: "Variance", map: opDesc.paraInputs, from: inScope)
-      epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
-      momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
+            inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
+            inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
+            inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
+            inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
+            momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
+            is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
-  let bias: Tensor<P>
-  let mean: Tensor<P>
-  let scale: Tensor<P>
-  let variance: Tensor<P>
-  let epsilon: Float
-  let momentum: Float
+    let input: Texture<P>
+    var output: Texture<P>
+    let inputBias: Tensor<ParamPrecisionType>
+    let inputMean: Tensor<ParamPrecisionType>
+    let inputScale: Tensor<ParamPrecisionType>
+    let inputVariance: Tensor<ParamPrecisionType>
+    let epsilon: Float
+    let momentum: Float
+    let is_test: Bool
 }
 
 class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam<P>>, Runable, Creator, InferShaperable{
-  typealias OpType = BatchNormOp<P>
-
-  func inferShape() {
-    para.output.dim = para.input.dim
-  }
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    typealias OpType = BatchNormOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(outputArray.strideArray())
-  }
 }
+
+
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
deleted file mode 100644
index 8db64ac3a473fe59e7821f11abeb3437c337459d..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class BilinearInterpParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      out_h = try BilinearInterpParam.getAttr(key: "out_h", attrs: opDesc.attrs)
-      out_w = try BilinearInterpParam.getAttr(key: "out_w", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-    if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
-      fatalError()
-    }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
-  let out_h: Int
-  let out_w: Int
-}
-
-class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, BilinearInterpParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = BilinearInterpOp<P>
-
-  func inferShape() {
-    //        para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-//    print(outputArray)
-    print(outputArray.strideArray())
-  }
-  
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift
deleted file mode 100644
index 0e1d5f0c53128bbc2f0b5e94d2075eecdef0fcc6..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class BoxcoderParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
-      priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
-      targetBox = try BoxcoderParam.getFirstTensor(key: "TargetBox", map: opDesc.inputs, from: inScope)
-      output = try BoxcoderParam.getFirstTensor(key: "OutputBox", map: opDesc.outputs, from: inScope)
-      codeType = try BoxcoderParam.getAttr(key: "code_type", attrs: opDesc.attrs)
-      boxNormalized = try BoxcoderParam.getAttr(key: "box_normalized", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-    assert(priorBox.tensorDim.cout() == 2)
-    assert(priorBoxVar.tensorDim.cout() == 2)
-    assert(targetBox.tensorDim.cout() == 3)
-    assert(output.tensorDim.cout() == 3)
-    assert(priorBox.transpose == [0, 1, 2, 3])
-    assert(priorBoxVar.transpose == [0, 1, 2, 3])
-    assert(targetBox.transpose == [0, 1, 2, 3])
-    assert(codeType == "decode_center_size") // encode_center_size is not implemented
-    assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1)
-  }
-  let priorBox: Texture<P>
-  let priorBoxVar: Texture<P>
-  let targetBox: Texture<P>
-  var output: Texture<P>
-  let codeType: String
-  let boxNormalized: Bool
-}
-
-class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = BoxcoderOp<P>
-
-  func inferShape() {
-    //        para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    let device = para.output.metalTexture!.device
-    let pbv : [Float32] = device.texture2tensor(texture: para.priorBoxVar.metalTexture!, dim: para.priorBoxVar.tensorDim.dims, transpose: para.priorBoxVar.transpose)
-    let pb : [Float32] = device.texture2tensor(texture: para.priorBox.metalTexture!, dim: para.priorBox.tensorDim.dims, transpose: para.priorBox.transpose)
-    let tb : [Float32] = device.texture2tensor(texture: para.targetBox.metalTexture!, dim: para.targetBox.tensorDim.dims, transpose: para.targetBox.transpose)
-    let out : [Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(" prior box var ")
-    print(pbv.strideArray())
-    print(" target box ")
-    print(tb.strideArray())
-    print(" prior box ")
-    print(pb.strideArray())
-    print(" output ")
-    print(out.strideArray())
-  }
-  
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift
deleted file mode 100644
index 8ba74a1c31456d7cb6e9ad67974bc02055313958..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-//  CNNConvAddBatchNormReluOp.swift
-//  paddle-mobile
-
-import Foundation
-
-class CNNMPSConvTestParam: TestParam {
-    var outputTexture: MTLTexture?
-    var metalParam: MetalConvParam
-    let filterPointer: UnsafeMutableRawPointer
-    let biasePointer: UnsafeMutablePointer<Float>
-    let filterSize: (width: Int, height: Int, channel: Int)
-    init(inMetalParam: MetalConvParam, inFilter: [Float], inBiase: [Float], inFilterSize: (width: Int, height: Int, channel: Int)) {
-        metalParam = inMetalParam
-        filterPointer = UnsafeMutableRawPointer.init(mutating: inFilter)
-        biasePointer = UnsafeMutablePointer.init(mutating: inBiase)
-        filterSize = inFilterSize
-    }
-}
-
-@available(iOS 10.0, *)
-class CNNMPSConvOp<P: PrecisionType>: Operator<CNNConvKernel<P>, CNNConvParam<P>>, Runable, Creator, InferShaperable, Fusion {
-    
-    typealias OpType = CNNMPSConvOp<P>
-
-    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
-        fatalError()
-    }
-    
-    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-        do {
-            try kernel.compute(commandBuffer: buffer, param: para)
-        } catch let error {
-            throw error
-        }
-    }
-    
-    func delogOutput() {
-    }
-    
-    static func fusionNode() -> Node {
-        let beginNode = Node.init(inType: gConvType)
-        _ = beginNode-->Node.init(inType: gElementwiseAdd);
-        return beginNode
-    }
-    
-    static func change() -> [String : [(from: String, to: String)]] {
-        return [:]
-    }
-    
-    static func fusionType() -> String {
-        return gMPSCNNConvType
-    }
-    func inferShape() {
-        let inDims = para.input.dim
-        let filterDim = para.filter.dim
-        let strides = para.stride
-        let paddings = para.paddings
-        let dilations = para.dilations
-        
-        var outDim = [inDims[0]]
-        for i in 0..<strides.count {
-            let dilation: Int = Int(dilations[i])
-            let filterSize: Int = filterDim[i + 1]
-            let inputSize: Int = inDims[i + 1]
-            let padding: Int = Int(paddings[i])
-            let stride: Int = Int(strides[i])
-            let dKernel = dilation * (filterSize - 1) + 1
-            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-            outDim.append(outputSize)
-        }
-        outDim.append(filterDim[0])
-        para.output.dim = Dim.init(inDim: outDim)
-    }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
deleted file mode 100644
index d5320136190bb1b7af124b762b719921c1d25200..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
+++ /dev/null
@@ -1,77 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConcatParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      guard let xlist = opDesc.inputs["X"] else {
-        fatalError()
-      }
-      for x in xlist {
-        guard let variant = inScope[x], let v = variant as? Texture<P> else {
-          fatalError()
-        }
-        if transpose.count == 0 {
-          transpose = v.transpose
-        }
-        if v.transpose != transpose {
-          fatalError()
-        }
-       
-        input.append(v)
-      }
-      axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs)
-      output = try ConcatParam.outputOut(outputs: opDesc.outputs, from: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-  var input: [Texture<P>] = []
-  var output: Texture<P>
-  var transpose: [Int] = []
-  let axis: Int
-}
-
-class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = ConcatOp<P>
-
-  func inferShape() {
-    //        let dim = para.input.reduce([0, 0]) {[$0[0] + $1.dim[0], $1.dim[1]]}
-    //        para.output.dim = Dim.init(inDim: dim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(outputArray.strideArray())
-  }
-  
-}
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddAddPreluOp.swift
deleted file mode 100644
index e5bded65a1a8944d337fea65995af79cab580105..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddAddPreluOp.swift
+++ /dev/null
@@ -1,108 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-      input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ConvAddAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      stride = try ConvAddAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      paddings = try ConvAddAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      dilations = try ConvAddAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-      groups = try ConvAddAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
-      alpha = try ConvAddAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
-      mode = try ConvAddAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
-      y = try ConvAddAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  let input: Texture<P>
-  let y: Tensor<ParamPrecisionType>
-  let filter: Tensor<ParamPrecisionType>
-  let mode: String
-  let alpha: Tensor<P>
-  var output: Texture<P>
-  let stride: [Int32]
-  let paddings: [Int32]
-  let dilations: [Int32]
-  let groups: Int
-}
-
-class ConvAddAddPreluOp<P: PrecisionType>: Operator<ConvAddAddPreluKernel<P>, ConvAddAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  typealias OpType = ConvAddAddPreluOp<P>
-  
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gConvType)
-    _ = beginNode
-      --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gConvAddAddPreluType
-  }
-  
-  static func needCheck() -> [(Int, String)] {
-    return [(2, "Y"), (2, "X")]
-  }
-  
-  
-  
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
-    
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
-    }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
index 43935b65d1442d7c2e1ca3db49168140569c433f..f24e25b054f02c7b8f12015697fd61e9a2005ef8 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
@@ -14,117 +14,119 @@
 
 import Foundation
 
-
 class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      
-      filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-      input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-      epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
-      
-      groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
-      variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
-      bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
-      
-      scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
-      mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
-      y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
+            
+            groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
+            scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
+            mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
+            y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  let input: Texture<P>
-  
-  let variance: Tensor<ParamPrecisionType>
-  let bias: Tensor<ParamPrecisionType>
-  let mean: Tensor<ParamPrecisionType>
-  let scale: Tensor<ParamPrecisionType>
-  let y: Tensor<ParamPrecisionType>
-  let filter: Tensor<ParamPrecisionType>
-  let epsilon: Float32
-  var newScale: MTLBuffer?
-  var newBiase: MTLBuffer?
-  
-  var output: Texture<P>
-  let stride: [Int32]
-  let paddings: [Int32]
-  let dilations: [Int32]
-  let groups: Int
+    
+    let input: Texture<P>
+    
+    let variance: Tensor<ParamPrecisionType>
+    let bias: Tensor<ParamPrecisionType>
+    let mean: Tensor<ParamPrecisionType>
+    let scale: Tensor<ParamPrecisionType>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    let epsilon: Float32
+    var newScale: MTLBuffer?
+    var newBiase: MTLBuffer?
+    
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
 }
 
 class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKernel<P>, ConvAddBatchNormReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  
-  typealias OpType = ConvAddBatchNormReluOp<P>
-  
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
+    typealias OpType = ConvAddBatchNormReluOp<P>
     
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
     }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode
+            --> Node.init(inType: gElementwiseAdd)
+            --> Node.init(inType: gBatchNormType)
+            --> Node.init(inType: gReluType)
+        return beginNode
     }
-  }
-  
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gConvType)
-    _ = beginNode
-      --> Node.init(inType: gElementwiseAddType)
-      --> Node.init(inType: gBatchNormType)
-      --> Node.init(inType: gReluType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gConvAddBatchNormReluType
-  }
-  
-  func delogOutput() {
-    print(" conv add batchnorm relu output ")
-    print(para.output.toTensor().strideArray())
-    //        let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
-    //        para.filter.logDataPointer(header: "filter data pointer: ")
-    //        print("filter: \(para.filter)")
     
-    //        print("biase: \(para.y)")
-    //        print("padding: \(para.paddings)")
-    //        print("stride: \(para.stride)")
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
     
-    //        let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
-    //        let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
-    //        let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
+    static func fusionType() -> String {
+        return gConvAddBatchNormReluType
+    }
     
-    //        let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
-  }
+    func delogOutput() {
+        
+//        let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
+//        para.filter.logDataPointer(header: "filter data pointer: ")
+//        print("filter: \(para.filter)")
+        
+//        print("biase: \(para.y)")
+//        print("padding: \(para.paddings)")
+//        print("stride: \(para.stride)")
+        
+//        let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
+//        let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
+//        let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
+        
+        let output = para.output.metalTexture.floatArray { (p: P) -> P in
+            return p
+        }
+//
+        writeToLibrary(fileName: "output_112x112x32_2", array: output)
+        print(" write done")
+        
+//        let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
+    }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
index 5e184844d886beb19ac5ff297f8a270af8a076fa..40069f6550ea00e986926f40c5fc2a2d4bf22a83 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
@@ -15,102 +15,79 @@
 import Foundation
 
 class ConvAddParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-      input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-      groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
-      
-      y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  let input: Texture<P>
-  let y: Tensor<ParamPrecisionType>
-  let filter: Tensor<ParamPrecisionType>
-  
-  var output: Texture<P>
-  let stride: [Int32]
-  let paddings: [Int32]
-  let dilations: [Int32]
-  let groups: Int
+    
+    let input: Texture<P>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
 }
 
 class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  typealias OpType = ConvAddOp<P>
-
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gConvType)
-    _ = beginNode
-      --> Node.init(inType: gElementwiseAddType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gConvAddType
-  }
-  
-  func inferShape() {
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode
+            --> Node.init(inType: gElementwiseAdd)
+        return beginNode
+    }
     
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
     
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
+    static func fusionType() -> String {
+        return gConvAddType
     }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    typealias OpType = ConvAddOp<P>
+    
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+    
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  func delogOutput() {
-//    print("op \(type): ")
-//    print(" padding: ")
-//    print(para.paddings)
-//    print("stride: ")
-//    print(para.stride)
-//    print("dilations: ")
-//    print(para.dilations)
-//    print(" para input dim: ")
-//    print(para.input.dim)
-//    print(" para filter dim: ")
-//    print(para.filter.dim)
-//    print(" para output dim: ")
-//    print(para.output.dim)
-//    print(" biase: ")
-//    let biase: [Float32] = para.y.buffer.array()
-//    print(biase)
     
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddPreluOp.swift
deleted file mode 100644
index 0a0fcc7d7934e1c3c7a48f6925105b02ec6d8fc9..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddPreluOp.swift
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConvAddPreluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-      input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ConvAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      stride = try ConvAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      paddings = try ConvAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      dilations = try ConvAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-      groups = try ConvAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
-      alpha = try ConvAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
-      mode = try ConvAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
-      y = try ConvAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  let input: Texture<P>
-  let y: Tensor<ParamPrecisionType>
-  let filter: Tensor<ParamPrecisionType>
-  let mode: String
-  let alpha: Tensor<P>
-  var output: Texture<P>
-  let stride: [Int32]
-  let paddings: [Int32]
-  let dilations: [Int32]
-  let groups: Int
-}
-
-class ConvAddPreluOp<P: PrecisionType>: Operator<ConvAddPreluKernel<P>, ConvAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  typealias OpType = ConvAddPreluOp<P>
-  
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gConvType)
-    _ = beginNode
-      --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gConvAddPreluType
-  }
-  
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
-    
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
-    }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift
deleted file mode 100644
index 959fe44b98dabec2b39fdfdb438d482d720caa61..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConvBNReluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-      input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ConvBNReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      stride = try ConvBNReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      paddings = try ConvBNReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      dilations = try ConvBNReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-      epsilon = try ConvBNReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
-      
-      groups = try ConvBNReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
-      variance = try ConvBNReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
-      bias = try ConvBNReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
-      scale = try ConvBNReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
-      mean = try ConvBNReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  let input: Texture<P>
-  
-  let variance: Tensor<ParamPrecisionType>
-  let bias: Tensor<ParamPrecisionType>
-  let mean: Tensor<ParamPrecisionType>
-  let scale: Tensor<ParamPrecisionType>
-  let filter: Tensor<ParamPrecisionType>
-  let epsilon: Float32
-  var newScale: MTLBuffer?
-  var newBiase: MTLBuffer?
-  
-  var output: Texture<P>
-  let stride: [Int32]
-  let paddings: [Int32]
-  let dilations: [Int32]
-  let groups: Int
-}
-
-class ConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  typealias OpType = ConvBNReluOp<P>
-  
-  func inputs() -> [Variant] {
-    return [para.input, para.variance, para.bias, para.mean, para.scale, para.filter]
-  }
-  
-  
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
-    
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
-    }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gConvType)
-    _ = beginNode
-      --> Node.init(inType: gBatchNormType)
-      --> Node.init(inType: gReluType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gConvBnReluType
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
index e82eb1f4753f0ebfdb5a949c85181a0ae52ea2da..29b0c4246e728dbc3d3b865a189c7063ac1bbdcf 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
@@ -15,67 +15,74 @@
 import Foundation
 
 class ConvParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-      input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
-      stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-      groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
-      
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  let input: Texture<P>
-  let filter: Tensor<ParamPrecisionType>
-  var output: Texture<P>
-  let stride: [Int32]
-  let paddings: [Int32]
-  let dilations: [Int32]
-  let groups: Int
+    
+    let input: Texture<P>
+    let filter: Tensor<ParamPrecisionType>
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
 }
 
 class ConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
-  typealias OpType = ConvOp<P>
-
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            try super.init(device: device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+        
+    }
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
     
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
+    typealias OpType = ConvOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
     }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    func delogOutput() {
+        print("conv output : ")
+        print(para.output.metalTexture)
+//        let _: Float16? = para.output.metalTexture.logDesc()
     }
-  }
-  
-  func delogOutput() {
-    print("conv output : ")
-    print(para.output.toTensor().strideArray())
-    //        let _: Float16? = para.output.metalTexture.logDesc()
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvTransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvTransposeOp.swift
deleted file mode 100644
index 9ec6f7bd60260b5808e469129e9c292ff9837f7c..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvTransposeOp.swift
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      try super.init(opDesc: opDesc, inScope: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-}
-
-class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTransposeParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = ConvTransposeOp<P>
-  
-  func inferShape() {
-    // para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-  
-    print(" \(type) output: ")
-    let padToFourDim = para.output.padToFourDim
-    if para.output.transpose == [0, 1, 2, 3] {
-      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-      print(outputArray.strideArray())
-    } else if para.output.transpose == [0, 2, 3, 1] {
-      let output = para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3]))
-      print(output.strideArray())
-    } else {
-      print(" not implement")
-    }
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/DepthwiseConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/DepthwiseConvOp.swift
deleted file mode 100644
index ec76eecf1fc9736d9dff6a4cf0d69a314a9b1e0d..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/DepthwiseConvOp.swift
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class DepthConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
-
-  typealias OpType = DepthConvOp<P>
-
-  required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      try super.init(device: device, opDesc: opDesc, inScope: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
-    
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
-    }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift
deleted file mode 100644
index 8575cfd88c7ddea2f007cad21507b4620c87d3e2..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class DwConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  typealias OpType = ConvBNReluOp<P>
-  
-  func inferShape() {
-    let inDims = para.input.dim
-    let filterDim = para.filter.dim
-    let strides = para.stride
-    let paddings = para.paddings
-    let dilations = para.dilations
-    
-    var outDim = [inDims[0]]
-    for i in 0..<strides.count {
-      let dilation: Int = Int(dilations[i])
-      let filterSize: Int = filterDim[i + 1]
-      let inputSize: Int = inDims[i + 1]
-      let padding: Int = Int(paddings[i])
-      let stride: Int = Int(strides[i])
-      let dKernel = dilation * (filterSize - 1) + 1
-      let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
-      outDim.append(outputSize)
-    }
-    outDim.append(filterDim[0])
-    para.output.dim = Dim.init(inDim: outDim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gDepthConvType)
-    _ = beginNode
-      --> Node.init(inType: gBatchNormType)
-      --> Node.init(inType: gReluType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gDwConvBnReluType
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
index ae040dd65f74fc222275bc579338107f2ea188fd..5ed36f86d79ffd639dc2ba76da74d24a532b1bd1 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
@@ -15,80 +15,33 @@
 import Foundation
 
 class ElementwiseAddParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
+            inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+            
+            output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
     }
-    do {
-      inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-    } catch _ {
-      let tensorY: Tensor<P> = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-      let device = inputX.metalTexture!.device
-      inputY = Texture.init(device: device, inDim: tensorY.dim)
-      let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
-      inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
-    }
-    
-//    required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
-//      param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
-//      if computePrecision == .Float32 {
-//        super.init(device: device, inFunctionName: "elementwise_add")
-//      } else if computePrecision == .Float16 {
-//        super.init(device: device, inFunctionName: "elementwise_add_half")
-//      } else {
-//        fatalError()
-//      }
-//    }
-    
-    var offset = axis
-    if axis == -1 {
-      offset = inputX.tensorDim.cout() - inputY.tensorDim.cout()
-    }
-    for i in 0..<(inputY.tensorDim.cout()) {
-      assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i])
-    }
-  }
-  
-  var inputX: Texture<P>
-  var inputY: Texture<P>
-  var output: Texture<P>
-  var axis: Int
+    let input: Texture<P>
+    let inputY: Tensor<P>
+    var output: Texture<P>
+    let axis: Int
 }
 
 class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, ElementwiseAddParam<P>>, Runable, Creator, InferShaperable{
-  typealias OpType = ElementwiseAddOp<P>
-  
-  func inferShape() {
-//    para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    func inferShape() {
+        para.output.dim = para.input.dim
     }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output)
     
-    let padToFourDim = para.output.padToFourDim
-    if para.output.transpose == [0, 1, 2, 3] {
-      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-      print(outputArray.strideArray())
-    } else if para.output.transpose == [0, 2, 3, 1] {
-      print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-    } else {
-      print(" not implement")
+    typealias OpType = ElementwiseAddOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
     }
-  }
 }
 
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddPreluOp.swift
deleted file mode 100644
index 333303e9bb7c1224ff50d69b5523edabe0fc81a6..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddPreluOp.swift
+++ /dev/null
@@ -1,119 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
-      mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
-      inputX = try ElementwiseAddPreluParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try ElementwiseAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      axis = try ElementwiseAddPreluParam.getAttr(key: "axis", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-    do {
-      inputY = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-    } catch _ {
-      let tensorY: Tensor<P> = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-      let device = inputX.metalTexture!.device
-      inputY = Texture.init(device: device, inDim: tensorY.dim)
-      let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
-      inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
-    }
-    
-    //    required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
-    //      param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
-    //      if computePrecision == .Float32 {
-    //        super.init(device: device, inFunctionName: "elementwise_add")
-    //      } else if computePrecision == .Float16 {
-    //        super.init(device: device, inFunctionName: "elementwise_add_half")
-    //      } else {
-    //        fatalError()
-    //      }
-    //    }
-    
-    var offset = axis
-    if axis == -1 {
-      offset = inputX.tensorDim.cout() - inputY.tensorDim.cout()
-    }
-    for i in 0..<(inputY.tensorDim.cout()) {
-      assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i])
-    }
-  }
-  
-  let mode: String
-  let alpha: Tensor<P>
-  var inputX: Texture<P>
-  var inputY: Texture<P>
-  var output: Texture<P>
-  var axis: Int
-}
-
-class ElementwiseAddPreluOp<P: PrecisionType>: Operator<ElementwiseAddPreluKernel<P>, ElementwiseAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
-  static func fusionNode() -> Node {
-    let beginNode = Node.init(inType: gElementwiseAddType)
-    _ = beginNode
-      --> Node.init(inType: gPreluType)
-    return beginNode
-  }
-  
-  static func change() -> [String : [(from: String, to: String)]] {
-    return [:]
-  }
-  
-  static func fusionType() -> String {
-    return gElementwiseAddPreluType
-  }
-  
-  typealias OpType = ElementwiseAddPreluOp<P>
-  
-  func inferShape() {
-    //    para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output)
-    
-    let padToFourDim = para.output.padToFourDim
-    if para.output.transpose == [0, 1, 2, 3] {
-      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-      print(outputArray.strideArray())
-    } else if para.output.transpose == [0, 2, 3, 1] {
-      print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-    } else {
-      print(" not implement")
-    }
-  }
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
index 382ea58b844b25bb855ed7cdc155a860bca45da5..c81d9e786c91408d2412b30eaec089904df75751 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
@@ -15,53 +15,54 @@
 import Foundation
 
 class FeedParam<P: PrecisionType>: OpParam{
-  var output: Texture<P>
-  var input: InputTexture {
-    return scope.input() as! InputTexture
-  }
-  let scope: Scope
-  
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    scope = inScope
-    do {
-      output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
-    } catch let error {
-      throw error
+    var output: Texture<P>
+    var input: InputTexture {
+        return scope.input() as! InputTexture
     }
-  }
-  
-  typealias ParamPrecisionType = P
+    let scope: Scope
+    
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        scope = inScope
+        do {
+            output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    typealias ParamPrecisionType = P
 }
 
 class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<P>>, Runable, Creator, InferShaperable {
-  typealias OpType = FeedOp<P>
-
-  func inferShape() {
-    //        print("feed  input: \(para.input.expectDim)")
-    print("feed output: \(para.output.dim)")
-    //        para.output.dim =
-    //        para.output.dim = para.input.expectDim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    typealias OpType = FeedOp<P>
+    
+    func inferShape() {
+        //        print("feed  input: \(para.input.expectDim)")
+        print("feed output: \(para.output.dim)")
+        //        para.output.dim =
+        //        para.output.dim = para.input.expectDim
+    }
+    
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+        
+//        let resizeKernel = ResizeKernel<P>.init(device: device)
+//        let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
+//        do {
+//            try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
+//        } catch let error {
+//            throw error
+//        }
     }
     
-    //        let resizeKernel = ResizeKernel<P>.init(device: device)
-    //        let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
-    //        do {
-    //            try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
-    //        } catch let error {
-    //            throw error
-    //        }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
-  }
+    func delogOutput() {
+//        para.input.mtlTexture.logDesc()
+//        let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
+//        let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
+    }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
index ade5b09099b69f4784b33a3b108cfcfe1aa1ea7f..2964b89e5ddabbbbd4f2df032efa5ef2db82ec96 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
@@ -15,73 +15,40 @@
 import Foundation
 
 class FetchParam<P: PrecisionType>: OpParam{
-  var output: FetchHolder
-  let input: Texture<P>
-  let scope: Scope
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    scope = inScope
-    do {
-      input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims)
-      scope.setOutput(output: output)
-    } catch let error {
-      throw error
+    var output: Texture<P>
+    let input: Texture<P>
+    let scope: Scope
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        scope = inScope
+        do {
+            input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = input
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  typealias ParamPrecisionType = P
+    
+    typealias ParamPrecisionType = P
 }
 
 class FetchKernel<P: PrecisionType>: Kernel, Computable {
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
     }
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: FetchParam<P>) {
-    param.output.initBuffer(device: device)
-    if computePrecision == .Float16 {
-      if param.input.transpose == [0, 2, 3, 1] {
-        super.init(device: device, inFunctionName: "fetch_half")
-      } else {
-//        fatalError(" not support ")
-        super.init(device: device, inFunctionName: "fetch_placeholder_half")
-        print(" not support ")
-      }
-    } else if computePrecision == .Float32 {
-      if param.input.transpose == [0, 2, 3, 1] {
-        super.init(device: device, inFunctionName: "fetch")
-      } else {
-        print(" not support ")
-        super.init(device: device, inFunctionName: "fetch_placeholder")
-//        fatalError(" not support ")        
-      }
-    } else {
-      fatalError(" not support ")
+    
+    required init(device: MTLDevice, param: FetchParam<P>) {
+        super.init(device: device, inFunctionName: "texture2d_to_2d_array")
     }
-  }
 }
 
-class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
-  
-  typealias OpType = FetchOp<P>
-
-  func inferShape() {
-    print(para.input.dim)
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable{
+    func inferShape() {
+        print(para.input.dim)
+    }
+    
+    typealias OpType = FetchOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        scope.setOutput(output: para.output)
     }
-  }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FlattenOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FlattenOp.swift
deleted file mode 100644
index 4fc5f222932ce98c4bf3e29bdf6cd8c666f5f9f1..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/FlattenOp.swift
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class FlattenParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      axis = try FlattenParam.getAttr(key: "axis", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
-  let axis: Int
-}
-
-
-class FlattenOp<P: PrecisionType>: Operator<FlattenKernel<P>, FlattenParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = FlattenOp<P>
-
-  func inferShape() {
-    //        para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(outputArray.strideArray())
-  }
-  
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift
deleted file mode 100644
index f58358761f820809685510fa4e9b5ff237567b3c..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Metal
-import Foundation
-
-public protocol TestParam {
-}
-
-public protocol Testable {
-  associatedtype TestParamType: TestParam
-  func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
-  init(device: MTLDevice, testParam: TestParamType)
-}
-
-
-protocol Computable {
-  associatedtype ParamType: OpParam
-  func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
-  init(device: MTLDevice, param: ParamType)
-}
-
-protocol KernelProtocol {
-  var pipline: MTLComputePipelineState { get set }
-  var functionName: String { get set }
-  
-}
-
-open class Kernel {
-  let pipline: MTLComputePipelineState
-  let functionName: String
-  public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
-    pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
-    functionName = inFunctionName
-  }
-}
-
-open class CusomKernel: Kernel {
-  public struct Shape {
-    public let width: Int
-    public let height: Int
-    public let channel: Int
-    public init(inWidth: Int, inHeight: Int, inChannel: Int){
-      width = inWidth
-      height = inHeight
-      channel = inChannel
-    }
-  }
-  public let outputTexture: MTLTexture
-  public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
-    let textureDesc = MTLTextureDescriptor.init()
-    textureDesc.textureType = .type2D
-    textureDesc.width = outputDim.width
-    textureDesc.height = outputDim.height
-    textureDesc.depth = (outputDim.channel + 3) / 4
-    
-    if computePrecision == .Float16 {
-      textureDesc.pixelFormat = .rgba16Float
-    } else if computePrecision == .Float32 {
-      textureDesc.pixelFormat = .rgba32Float
-    } else {
-      fatalError()
-    }
-    
-    textureDesc.usage = [.shaderRead, .shaderWrite]
-    textureDesc.storageMode = .shared
-    outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
-    
-    super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
-  }
-  
-  public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    encoder.setTexture(inputTexuture, index: 0)
-    encoder.setTexture(outputTexture, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
-    encoder.endEncoding()
-  }
-  
-}
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
index dad8d0c6ac2e5a93273573473c700179f8b90a37..bae452dec331957ceda5a6f503802352f63a6dbe 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
@@ -15,39 +15,53 @@
 import Foundation
 
 class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
-  required init(device: MTLDevice, param: BatchNormParam<P>) {
-    let count = param.variance.dim.numel()
-    let varianceP = param.variance.data.pointer
-    let meanP = param.mean.data.pointer
-    let scaleP = param.scale.data.pointer
-    let biasP = param.bias.data.pointer
-    for i in 0..<count {
-      let invStd = P(1 / (Float32(varianceP[i]) + param.epsilon).squareRoot())
-      biasP[i] = biasP[i] - meanP[i] * invStd * scaleP[i]
-      scaleP[i] = invStd * scaleP[i]
+    var newScale: MTLBuffer
+    var newBias: MTLBuffer
+    
+    required init(device: MTLDevice, param: BatchNormParam<P>) {
+        guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
+            fatalError()
+        }
+        guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
+            fatalError()
+        }
+        self.newScale = newScale
+        self.newBias = newBias
+        
+        super.init(device: device, inFunctionName: "batchnorm")
+        
+        let varianceBuffer : MTLBuffer = param.inputVariance.buffer
+        
+        var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
+        let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
+            invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
+        }
+        
+        let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
+        let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
+        let scale : MTLBuffer = param.inputScale.buffer
+        let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
+        let bias : MTLBuffer = param.inputBias.buffer
+        let biasContents = bias.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
+            newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
+            newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
+        }
     }
-
-    param.bias.initBuffer(device: device, precision: computePrecision)
-    param.scale.initBuffer(device: device, precision: computePrecision)
-    param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "batchnorm")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "batchnorm_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        print("BatchNorm compute")
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBuffer(newScale, offset: 0, index: 0)
+        encoder.setBuffer(newBias, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBuffer(param.scale.buffer, offset: 0, index: 0)
-    encoder.setBuffer(param.bias.buffer, offset: 0, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift
deleted file mode 100644
index fca5719553038732b1646fb8b15885bd03bd5624..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift
+++ /dev/null
@@ -1,91 +0,0 @@
-//
-//  BatchNormRelu.swift
-//  paddle-mobile
-//
-//  Created by zhangxinjun on 2018/8/23.
-//  Copyright © 2018年 orange. All rights reserved.
-//
-
-import Foundation
-
-
-class BatchNormReluParam<P: PrecisionType>: BatchNormParam<P> {
-    
-}
-
-class BatchNormReluKernel<P: PrecisionType>: Kernel, Computable{
-    
-    
-    typealias ParamType = BatchNormReluParam<P>
-    var newScale: MTLBuffer
-    var newBias: MTLBuffer
-    
-    required init(device: MTLDevice, testParam: BatchNormReluTestParam) {
-        
-        newScale = testParam.newScaleBuffer
-        newBias = testParam.newBiaseBuffer
-        
-        super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
-    }
-    
-    required init(device: MTLDevice, param: BatchNormReluParam<P>) {
-        guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
-            fatalError()
-        }
-        guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
-            fatalError()
-        }
-        self.newScale = newScale
-        self.newBias = newBias
-        
-        super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
-        
-        
-        let varianceBuffer : MTLBuffer = param.inputVariance.buffer
-        
-        var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
-        let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
-        for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
-            invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
-        }
-        
-        let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
-        let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
-        let scale : MTLBuffer = param.inputScale.buffer
-        let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
-        let bias : MTLBuffer = param.inputBias.buffer
-        let biasContents = bias.contents().assumingMemoryBound(to: P.self)
-        let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
-        
-        for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
-            newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
-            newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
-        }
-    }
-    
-    func compute(commandBuffer: MTLCommandBuffer, param: BatchNormReluParam<P>) throws {
-        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-            fatalError()
-        }
-        encoder.setTexture(param.input as? MTLTexture, index: 0)
-        encoder.setTexture(param.output as? MTLTexture, index: 1)
-        encoder.setBuffer(newScale, offset: 0, index: 1)
-        encoder.setBuffer(newBias, offset: 0, index: 1)
-        encoder.dispatch(computePipline: pipline, outTexture: param.output as! MTLTexture)
-        encoder.endEncoding()
-    }
-    
-    func testCompute(commandBuffer: MTLCommandBuffer, testParam: BatchNormReluTestParam) throws {
-        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-            fatalError()
-        }
-        encoder.setTexture(testParam.inputTexture, index: 0)
-        encoder.setTexture(testParam.outputTexture, index: 1)
-        encoder.setBuffer(newScale, offset: 0, index: 0)
-        encoder.setBuffer(newBias, offset: 0, index: 1)
-        encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
-        encoder.endEncoding()
-    }
-    
-    
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
deleted file mode 100644
index 7f3e7433760cc1fa4d093b08027bce7c79172532..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct BilinearInterpMetalParam {
-  var ratio_h: Float32
-  var ratio_w: Float32
-}
-
-class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
-  func compute(commandBuffer: MTLCommandBuffer, param: BilinearInterpParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    var ratio_h: Float32 = 0
-    var ratio_w: Float32 = 0
-    if param.output.tensorDim.dims[2] > 1 {
-      ratio_h = Float32(param.input.tensorDim.dims[2]-1) / Float32(param.output.tensorDim.dims[2]-1)
-    }
-    if param.output.tensorDim.dims[3] > 1 {
-      ratio_w = Float32(param.input.tensorDim.dims[3]-1) / Float32(param.output.tensorDim.dims[3]-1)
-    }
-    var p = BilinearInterpMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w)
-    encoder.setBytes(&p, length: MemoryLayout<BilinearInterpMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: BilinearInterpParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "bilinear_interp_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "bilinear_interp_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift
deleted file mode 100644
index c084d9b28e1dc7019a14d3ae317ddf8a64547830..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct BoxcoderMetalParam {
-}
-
-class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{
-  func compute(commandBuffer: MTLCommandBuffer, param: BoxcoderParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    encoder.setTexture(param.priorBox.metalTexture, index: 0)
-    encoder.setTexture(param.priorBoxVar.metalTexture, index: 1)
-    encoder.setTexture(param.targetBox.metalTexture, index: 2)
-    encoder.setTexture(param.output.metalTexture, index: 3)
-    var bmp = BoxcoderMetalParam.init()
-    encoder.setBytes(&bmp, length: MemoryLayout<BoxcoderMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: BoxcoderParam<P>) {
-    param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: computePrecision)
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "boxcoder_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "boxcoder_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift
deleted file mode 100644
index 14a5bd521455632c8a67e4c1a8ebdedc6c460aa5..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift
+++ /dev/null
@@ -1,176 +0,0 @@
-//
-//  CNNConvKernel.swift
-//  paddle-mobile
-//
-
-import Foundation
-import Metal
-import Accelerate
-import MetalPerformanceShaders
-
-@available(iOS 10.0, *)
-class WeightsDataSource: NSObject, MPSCNNConvolutionDataSource  {
-    
-    let desc: MPSCNNConvolutionDescriptor
-    let weight:UnsafeMutableRawPointer
-    let bias:UnsafeMutablePointer<Float>
-    
-    
-    
-    init(inDesc: MPSCNNConvolutionDescriptor, inWeight: UnsafeMutableRawPointer, inBias: UnsafeMutablePointer<Float>) {
-        desc = inDesc
-        weight = inWeight
-        bias = inBias
-    }
-    
-    
-    func dataType() -> MPSDataType {
-        return .float32
-    }
-    
-    func descriptor() -> MPSCNNConvolutionDescriptor {
-        return desc
-    }
-    
-    func weights() -> UnsafeMutableRawPointer {
-        return self.weight
-    }
-    
-    func biasTerms() -> UnsafeMutablePointer<Float>? {
-        return self.bias
-    }
-    
-    func load() -> Bool {
-        return true
-    }
-    
-    func purge() {
-    }
-    
-    func label() -> String? {
-        return "Conv"
-    }
-    
-    
-}
-
-@available(iOS 10.0, *)
-class CNNConvParam<P: PrecisionType>: OpParam{
-    
-    typealias ParamPrecisionType = P
-    required init(opDesc: OpDesc, inScope: Scope) throws {
-        do {
-            filter = try CNNConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
-            input = try CNNConvParam.input(inputs: opDesc.inputs, from: inScope)
-            output = try CNNConvParam.outputOut(outputs: opDesc.outputs, from: inScope)
-            stride = try CNNConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
-            paddings = try CNNConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-            // 暂时不用关心
-            dilations = try CNNConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
-            // 暂时不用关心
-            groups = try CNNConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
-            
-            variance = try CNNConvParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
-            // bias
-            y = try CNNConvParam.inputY(inputs: opDesc.paraInputs, from: inScope)
-        } catch let error {
-            throw error
-        }
-    }
-    
-    var input: Texture<P>
-    let variance: Tensor<ParamPrecisionType>
-    let y: Tensor<ParamPrecisionType>
-    let filter: Tensor<ParamPrecisionType>
-    var output: Texture<P>
-    let stride: [Int32]
-    let paddings: [Int32]
-    let dilations: [Int32]
-    let groups: Int
-}
-
-@available(iOS 10.0, *)
-class CNNConvKernel<P: PrecisionType>: Kernel, Computable {
-    
-    typealias ParamType = CNNConvParam<P>
-    
-    var mpsImageCreator: MpsImageCreator<P>?
-    var activation:MPSCNNNeuron?
-    var conv:MPSCNNConvolution?
-    var weightDataSource:WeightsDataSource?
-    var param: CNNConvParam<P>?
-    var device: MTLDevice?
-    
-    
-    required init(device:MTLDevice, testParam:CNNMPSConvTestParam) {
-        self.device = device
-        
-        let desc = MPSCNNConvolutionDescriptor(kernelWidth: testParam.filterSize.width, kernelHeight: testParam.filterSize.height, inputFeatureChannels: testParam.filterSize.channel, outputFeatureChannels: testParam.filterSize.channel, neuronFilter: activation)
-        
-        desc.strideInPixelsX = Int(testParam.metalParam.offsetX)
-        desc.strideInPixelsY = Int(testParam.metalParam.offsetY)
-        
-        
-        weightDataSource = WeightsDataSource(inDesc: desc, inWeight:testParam.filterPointer, inBias:testParam.biasePointer)
-        
-        if #available(iOS 11.0, *) {
-            conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
-        } else {
-            // Fallback on earlier versions
-        }
-        
-        super.init(device: device, inFunctionName: "")
-    }
-
-    required init(device:MTLDevice, param:CNNConvParam<P>) {
-        
-        self.device = device
-
-        let inChannels: Int
-        let outChannels: Int
-        
-        if param.y.dim.cout() == 4 {
-            inChannels = (param.y.dim[3])
-            outChannels = inChannels
-        } else {
-            inChannels = 0
-            outChannels = inChannels
-        }
-        
-        let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.width, kernelHeight: param.filter.height, inputFeatureChannels: inChannels, outputFeatureChannels: outChannels, neuronFilter: activation)
-        
-        desc.strideInPixelsX = Int(param.stride[0])
-        desc.strideInPixelsY = Int(param.stride[1])
-        
-        
-        weightDataSource = WeightsDataSource(inDesc: desc, inWeight:param.filter.data.pointer as! UnsafeMutablePointer<Float>, inBias: param.y.data.pointer as! UnsafeMutablePointer<Float>)
-        
-        if #available(iOS 11.0, *) {
-            conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
-        } else {
-            // Fallback on earlier versions
-        }
-        
-        super.init(device: device, inFunctionName: "")
-    }
-
-    func compute(commandBuffer: MTLCommandBuffer, param: CNNConvParam<P>) throws {
-        let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
-        var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
-        
-        // 运算conv和add两个步骤，add用了bias偏差做为参数，被Metal API进行调用
-        conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
-        
-        param.input = outputImage.texture as! Texture<P>
-    }
-    
-    func testCompute(commandBuffer: MTLCommandBuffer, testParam: CNNMPSConvTestParam) throws {
-        let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
-        var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
-        
-        // 运算conv和add两个步骤，add用了bias偏差做为参数，被Metal API进行调用
-        conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
-        
-        testParam.outputTexture = outputImage.texture
-    }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Concat.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Concat.swift
deleted file mode 100644
index 25f0a21bfff420566d06a59dca626805dd0ce6e0..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Concat.swift
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConcatKernel<P: PrecisionType>: Kernel, Computable{
-    func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam<P>) throws {
-        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-            throw PaddleMobileError.predictError(message: " encoder is nil")
-        }
-        encoder.setTexture(param.input.metalTexture, index: 0)
-        encoder.setTexture(param.output.metalTexture, index: 1)
-        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-        encoder.endEncoding()
-    }
-    
-    required init(device: MTLDevice, param: ConcatParam<P>) {
-        super.init(device: device, inFunctionName: "concat")
-    }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift
deleted file mode 100644
index 81ef46c0b3e919615d07f667851007e95b02d54f..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift
+++ /dev/null
@@ -1,147 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct ConcatTestParam: TestParam {
-  var input: [MTLTexture]
-  var output: MTLTexture
-  var dims: [[Int]]
-  var axis: Int
-  var odim: [Int]
-}
-
-struct ConcatMetalParam {
-  var odim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1)
-  var axis: Int32 = 0
-  var offset: Int32 = 0
-  var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
-  var vdim: (Int32, Int32, Int32, Int32, Int32, Int32) = (0, 0, 0, 0, 0, 0)
-}
-
-class ConcatKernel<P: PrecisionType>: Kernel, Computable{
-  var v = "normal"
-  var pm = ConcatMetalParam.init()
-  func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam<P>) throws {
-    
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    let num = param.input.count
-    for i in 0..<num {
-      encoder.setTexture(param.input[i].metalTexture, index: i)
-    }
-    encoder.setTexture(param.output.metalTexture, index: num)
-    if v == "normal" {
-      encoder.setTexture(param.output.metalTexture, index: num + 1)
-    }
-    encoder.setBytes(&pm, length: MemoryLayout<ConcatMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-
-  required init(device: MTLDevice, param: ConcatParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: computePrecision)
-    let orank = param.output.tensorDim.cout()
-    let num = param.input.count
-    assert(num <= 6)
-    var axis = 4 - param.output.tensorDim.cout() + param.axis
-    for i in 0..<4 {
-      if param.transpose[i] == axis {
-        axis = i
-        break
-      }
-    }
-    pm.axis = Int32(axis)
-    pm.odim = (Int32(param.output.dim[0]), Int32(param.output.dim[1]), Int32(param.output.dim[2]), Int32(param.output.dim[3]))
-    pm.trans = (Int32(param.output.transpose[0]), Int32(param.output.transpose[1]), Int32(param.output.transpose[2]), Int32(param.output.transpose[3]))
-    var vdim: [Int] = [0, 0, 0, 0, 0, 0]
-    for i in 0..<num {
-      vdim[i] = param.input[i].dim[axis]
-    }
-    if orank == 4 {
-      if axis == 1 {
-        v = "y"
-      } else if axis == 2 {
-        v = "x"
-      } else {
-        if (param.output.dim[0] == 1) && axis == 3 {
-          var vz = true
-          for i in 0..<num {
-            if vdim[i] % 4 != 0 {
-              vz = false
-              break
-            }
-          }
-          if vz {
-            v = "z"
-            for i in 0..<num {
-              vdim[i] = vdim[i] / 4
-            }
-          }
-        }
-      }
-    } else if orank == 3 {
-      if axis == 2 {
-        v = "y"
-      } else if axis == 3 {
-        v = "x"
-      } else if axis == 1 {
-        var vz = true
-        for i in 0..<num {
-          if vdim[i] % 4 != 0 {
-            vz = false
-            break
-          }
-        }
-        if vz {
-          v = "z"
-          for i in 0..<num {
-            vdim[i] = vdim[i] / 4
-          }
-        }
-      }
-    } else {
-      if axis == 2 {
-        v = "y"
-      } else if axis == 3 {
-        var vx = true
-        for i in 0..<num {
-          if vdim[i] % 4 != 0 {
-            vx = false
-            break
-          }
-        }
-        if vx {
-          v = "x"
-          for i in 0..<num {
-            vdim[i] = vdim[i] / 4
-          }
-        }
-      }
-    }
-    pm.vdim = (Int32(vdim[0]), Int32(vdim[1]), Int32(vdim[2]), Int32(vdim[3]), Int32(vdim[4]), Int32(vdim[5]))
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-  required init(device: MTLDevice, testParam: ConcatTestParam) {
-    super.init(device: device, inFunctionName: "concat")
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddAddPreluKernel.swift
deleted file mode 100644
index dfd9a74291306337c9183595d02db7f8d25e63a9..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddAddPreluKernel.swift
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: MetalConvParam!
-  required init(device: MTLDevice, param: ConvAddAddPreluParam<P>) {
-    param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
-    param.filter.initBuffer(device: device, precision: computePrecision)
-    param.y.initBuffer(device: device, precision: computePrecision)
-    param.alpha.initBuffer(device: device, precision: computePrecision)
-    
-    if computePrecision == .Float16 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
-        }
-        
-      } else if param.filter.channel == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
-        }
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
-        }
-        
-      } else if param.filter.width == 1 && param.filter.height == 5 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
-        }
-      } else if param.filter.width == 5 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
-        }
-      } else {
-        fatalError(" unsupport yet ")
-      }
-    } else if computePrecision == .Float32 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
-        }
-      } else if param.filter.channel == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
-        }
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
-        }
-        
-      } else if param.filter.width == 1 && param.filter.height == 5 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
-        }
-      } else if param.filter.width == 5 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
-        }
-      } else {
-        fatalError(" unsupport yet ")
-      }
-    } else {
-      fatalError()
-    }
-    
-    let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
-    
-    let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
-    
-    //    print(" function: \(functionName)")
-    //    print("offset x: \(offsetX)")
-    //    print("offset y: \(offsetY)")
-    
-    let offsetZ = 0.0
-    let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-    //    print("metal param: ")
-    //    print(inMetalParam)
-    
-    metalParam = inMetalParam
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvAddAddPreluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
-    encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
index 66324dd47086fd7c1ccffb674c0f8b8623416e0d..0ffe90272fe36fa30d58c7c6bd1e287d49f0e92a 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
@@ -15,165 +15,124 @@
 import Foundation
 
 struct ConvAddBatchNormReluTestParam: TestParam {
-  let inputTexture: MTLTexture
-  let outputTexture: MTLTexture
-  var metalParam: MetalConvParam
-  let filterBuffer: MTLBuffer
-  let biaseBuffer: MTLBuffer
-  let newScaleBuffer: MTLBuffer
-  let newBiaseBuffer: MTLBuffer
-  let filterSize: (width: Int, height: Int, channel: Int)
-  init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
-    inputTexture = inInputTexture
-    outputTexture = inOutputTexture
-    metalParam = inMetalParam
-    filterBuffer = inFilterBuffer
-    biaseBuffer = inBiaseBuffer
-    newScaleBuffer = inNewScaleBuffer
-    newBiaseBuffer = inNewBiaseBuffer
-    filterSize = inFilterSize
-  }
+    let inputTexture: MTLTexture
+    let outputTexture: MTLTexture
+    var metalParam: MetalConvParam
+    let filterBuffer: MTLBuffer
+    let biaseBuffer: MTLBuffer
+    let newScaleBuffer: MTLBuffer
+    let newBiaseBuffer: MTLBuffer
+    let filterSize: (width: Int, height: Int, channel: Int)
+    init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
+        inputTexture = inInputTexture
+        outputTexture = inOutputTexture
+        metalParam = inMetalParam
+        filterBuffer = inFilterBuffer
+        biaseBuffer = inBiaseBuffer
+        newScaleBuffer = inNewScaleBuffer
+        newBiaseBuffer = inNewBiaseBuffer
+        filterSize = inFilterSize
+    }
 }
 
 class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
-  required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
-    if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
-      super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
-    } else if testParam.filterSize.channel == 1 {
-      super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
-    } else {
-      super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
-    }
-  }
-  
-  var metalParam: MetalConvParam!
-  
-  required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
-    param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
-    param.filter.initBuffer(device: device, precision: computePrecision)
-    param.y.initBuffer(device: device, precision: computePrecision)
-    param.variance.initBuffer(device: device, precision: .Float32)
-    param.mean.initBuffer(device: device, precision: .Float32)
-    param.scale.initBuffer(device: device, precision: .Float32)
-    param.bias.initBuffer(device: device, precision: .Float32)
-    
-    if computePrecision == .Float32 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
-      } else if param.filter.channel == 1 {
-        super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
-      } else {
-        fatalError(" unsupport ")
-      }
-    } else if computePrecision == .Float16 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half")
-      } else if param.filter.channel == 1 {
-        super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half")
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half")
-      } else {
-        fatalError(" unsupport ")
-      }
-    } else {
-      fatalError()
-    }
-    
-    let offsetX = param.filter.width/2 - Int(param.paddings[0])
-    let offsetY = param.filter.height/2 - Int(param.paddings[1])
-    
-    print("offset x: \(offsetX)")
-    print("offset y: \(offsetY)")
-    
-    let offsetZ = 0.0
-    metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-    
-    var invs: [P] = []
-    let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
-    
-    for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
-      let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
-      invs.append(P(inv))
-    }
-    
-    let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
-    let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
-    
-    let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
-    let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
-    let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
-    for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
-      newScale[i] = invs[i] * scaleContents[i]
-      newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
+    required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
+        if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
+        } else if testParam.filterSize.channel == 1 {
+            super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
+        } else {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
+        }
     }
     
-//    var newScaleFP16: UnsafeMutableRawPointer
-//
-//    float32ToFloat16(input: newScale as! UnsafeMutablePointer<Float32>, output: newScaleFP16, count: param.scale.buffer.length / MemoryLayout<P>.size)
-    
-    
-//    let newBiaseFloat16 = device.makeBuffer(length: <#T##Int#>, options: <#T##MTLResourceOptions#>)
-    
-    var newBiaseBuffer: MTLBuffer
-    var newScaleBuffer: MTLBuffer
-    
-    if computePrecision == .Float32 {
-      newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
-      newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
-    } else if computePrecision == .Float16 {
-      
-      newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
-      newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
-      
-      float32ToFloat16(input: newBiase as! UnsafeMutablePointer<Float32>, output: newBiaseBuffer.contents(), count: param.bias.buffer.length / MemoryLayout<P>.size)
-      
-      float32ToFloat16(input: newScale as! UnsafeMutablePointer<Float32>, output: newScaleBuffer.contents(), count: param.scale.buffer.length / MemoryLayout<P>.size)
-    } else {
-      fatalError(" unsupport ")
+    var metalParam: MetalConvParam!
+
+    required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
+        
+        if param.filter.width == 1 && param.filter.height == 1 {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
+        } else if param.filter.channel == 1 {
+            super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
+        } else {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
+        }
+        
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+
+        param.variance.initBuffer(device: device)
+        param.mean.initBuffer(device: device)
+        param.scale.initBuffer(device: device)
+        param.bias.initBuffer(device: device)
+        
+        let offsetX = param.filter.width/2 - Int(param.paddings[0])
+        let offsetY = param.filter.height/2 - Int(param.paddings[1])
+        
+        print("offset x: \(offsetX)")
+        print("offset y: \(offsetY)")
+        
+        let offsetZ = 0.0
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+        
+        var invs: [P] = []
+        let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {            
+            let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
+            invs.append(P(inv))
+        }
+        
+        let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
+        let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
+        
+        let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
+        let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
+            newScale[i] = invs[i] * scaleContents[i]
+            newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
+        }
+        param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)
+        param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)
+        
+        newScale.deinitialize(count: param.scale.buffer.length)
+        newScale.deallocate()
+        
+        newBiase.deinitialize(count: param.bias.buffer.length)
+        newBiase.deallocate()
     }
     
-    param.newBiase = newBiaseBuffer
-    param.newScale = newScaleBuffer
-    
-    newScale.deinitialize(count: param.scale.buffer.length)
-    newScale.deallocate()
-    
-    newBiase.deinitialize(count: param.bias.buffer.length)
-    newBiase.deallocate()
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
+        encoder.setBuffer(param.newScale!, offset: 0, index: 3)
+        encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
     
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
-    encoder.setBuffer(param.newScale!, offset: 0, index: 3)
-    encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      fatalError()
+    public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            fatalError()
+        }
+        
+        encoder.setTexture(param.inputTexture, index: 0)
+        encoder.setTexture(param.outputTexture, index: 1)
+        var inMetalParam = param.metalParam
+        encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
+        encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
+        encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
+        encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
+        encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
+        encoder.endEncoding()
     }
-    
-    encoder.setTexture(param.inputTexture, index: 0)
-    encoder.setTexture(param.outputTexture, index: 1)
-    var inMetalParam = param.metalParam
-    encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
-    encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
-    encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
-    encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
-    encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
-    encoder.endEncoding()
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
index d5aa98d2606ceda5cbcf0f3f4c1fc0ed2adeed25..81f3aacba8dded3341237e05f9afbc1e04f70596 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
@@ -15,73 +15,33 @@
 import Foundation
 
 class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: MetalConvParam!
-  required init(device: MTLDevice, param: ConvAddParam<P>) {
-    param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
-    param.filter.initBuffer(device: device, precision: computePrecision)
-    param.y.initBuffer(device: device, precision: computePrecision)
-    
-    if computePrecision == .Float16 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_add_1x1_half")
-      } else if param.filter.channel == 1 {
-        super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half")
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        super.init(device: device, inFunctionName: "conv_add_3x3_half")
-      } else if param.filter.width == 1 && param.filter.height == 5 {
-        super.init(device: device, inFunctionName: "conv_add_5x1_half")
-      } else if param.filter.width == 5 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_add_1x5_half")
-      } else {
-        fatalError(" unsupport yet ")
-      }
-    } else if computePrecision == .Float32 {
-      if param.filter.width == 1 && param.filter.height == 1 {
+    var metalParam: MetalConvParam!
+    required init(device: MTLDevice, param: ConvAddParam<P>) {
         super.init(device: device, inFunctionName: "conv_add_1x1")
-      } else if param.filter.channel == 1 {
-        super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
-      } else if param.filter.width == 1 && param.filter.height == 5 {
-        super.init(device: device, inFunctionName: "conv_add_5x1")
-      } else if param.filter.width == 5 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_add_1x5")
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        super.init(device: device, inFunctionName: "conv_add_3x3")
-      } else {
-        fatalError(" unsupport yet ")
-      }
-    } else {
-      fatalError()
+        let offsetX = param.filter.width/2 - Int(param.paddings[0])
+        let offsetY = param.filter.height/2 - Int(param.paddings[1])
+        
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        
+        print("offset x: \(offsetX)")
+        print("offset y: \(offsetY)")
+        
+        let offsetZ = 0.0
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
     }
     
-
-    
-    let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
-    
-    let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
-    
-//    print(" function: \(functionName)")
-//    print("offset x: \(offsetX)")
-//    print("offset y: \(offsetY)")
-    
-    let offsetZ = 0.0
-    let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-//    print("metal param: ")
-//    print(inMetalParam)
-    
-    metalParam = inMetalParam
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift
deleted file mode 100644
index 35d49953c656364799e8ca7400ef4bac445200a0..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ConvAddPreluKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: MetalConvParam!
-  required init(device: MTLDevice, param: ConvAddPreluParam<P>) {
-    param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
-    param.filter.initBuffer(device: device, precision: computePrecision)
-    param.y.initBuffer(device: device, precision: computePrecision)
-    param.alpha.initBuffer(device: device, precision: computePrecision)
-
-    if computePrecision == .Float16 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
-        }
-        
-      } else if param.filter.channel == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
-        }
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
-        }
-        
-      } else if param.filter.width == 1 && param.filter.height == 5 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
-        }
-      } else if param.filter.width == 5 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
-        }
-      } else {
-        fatalError(" unsupport yet ")
-      }
-    } else if computePrecision == .Float32 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
-        }
-      } else if param.filter.channel == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
-        }
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
-        }
-        
-      } else if param.filter.width == 1 && param.filter.height == 5 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
-        }
-      } else if param.filter.width == 5 && param.filter.height == 1 {
-        if param.mode == "channel" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
-        } else if param.mode == "element" {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
-        } else {
-          super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
-        }
-      } else {
-        fatalError(" unsupport yet ")
-      }
-    } else {
-      fatalError()
-    }
-    
-    let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
-    
-    let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
-    
-    //    print(" function: \(functionName)")
-    //    print("offset x: \(offsetX)")
-    //    print("offset y: \(offsetY)")
-    
-    let offsetZ = 0.0
-    let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-    //    print("metal param: ")
-    //    print(inMetalParam)
-    
-    metalParam = inMetalParam
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvAddPreluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
-    encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift
deleted file mode 100644
index e79f8f9be37c2575b28aef2e9169ab814c9587fe..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift
+++ /dev/null
@@ -1,180 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-import MetalPerformanceShaders
-
-struct ConvBNReluTestParam: TestParam {
-  let inputTexture: MTLTexture
-  let outputTexture: MTLTexture
-  var metalParam: MetalConvParam
-  let filterBuffer: MTLBuffer
-  let biaseBuffer: MTLBuffer
-  let newScaleBuffer: MTLBuffer
-  let newBiaseBuffer: MTLBuffer
-  let filterSize: (width: Int, height: Int, channel: Int)
-  init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
-    
-    inputTexture = inInputTexture
-    outputTexture = inOutputTexture
-    metalParam = inMetalParam
-    filterBuffer = inFilterBuffer
-    biaseBuffer = inBiaseBuffer
-    newScaleBuffer = inNewScaleBuffer
-    newBiaseBuffer = inNewBiaseBuffer
-    filterSize = inFilterSize
-  }
-}
-
-class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
-  required init(device: MTLDevice, testParam: ConvBNReluTestParam) {
-    if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
-      super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
-    } else if testParam.filterSize.channel == 1 {
-      super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
-    } else {
-      super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
-    }
-  }
-  
-  var metalParam: MetalConvParam!
-
-  required init(device: MTLDevice, param: ConvBNReluParam<P>) {
-    
-    param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
-    param.filter.initBuffer(device: device, precision: computePrecision)
-    param.variance.initBuffer(device: device, precision: .Float32)
-    param.mean.initBuffer(device: device, precision: .Float32)
-    param.scale.initBuffer(device: device, precision: .Float32)
-    param.bias.initBuffer(device: device, precision: .Float32)
-    
-    if computePrecision == .Float32 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
-      } else if param.filter.channel == 1 {
-        super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
-      } else {
-        fatalError(" unsupport ")
-      }
-    } else if computePrecision == .Float16 {
-      if param.filter.width == 1 && param.filter.height == 1 {
-        super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half")
-      } else if param.filter.channel == 1 {
-        super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half")
-      } else if param.filter.width == 3 && param.filter.height == 3 {
-        super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half")
-      } else {
-        fatalError(" unsupport ")
-      }
-    } else {
-      fatalError()
-    }
-    
-   
-    
-    let offsetX = param.filter.width/2 - Int(param.paddings[0])
-    let offsetY = param.filter.height/2 - Int(param.paddings[1])
-    
-//    print(" param filter width: \(param.filter.width)")
-//    print(" param filter height: \(param.filter.height)")
-//
-//    print(" param paddings: \(param.paddings)")
-//
-//    print("ConvBNReluKernel offset x: \(offsetX)")
-//    print("ConvBNReluKernel offset y: \(offsetY)")
-    
-    let offsetZ = 0.0
-    
-    metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-    
-    var invs: [P] = []
-    let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
-    
-    for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
-      let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
-      invs.append(P(inv))
-    }
-    
-    let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
-    let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
-    
-    let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
-    let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
-    let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
-    for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
-      newScale[i] = invs[i] * scaleContents[i]
-      newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
-    }
-    
-    var newBiaseBuffer: MTLBuffer
-    var newScaleBuffer: MTLBuffer
-    
-    if computePrecision == .Float32 {
-      newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
-      newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
-    } else if computePrecision == .Float16 {
-      
-      newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
-      newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
-      
-      float32ToFloat16(input: newBiase as! UnsafeMutablePointer<Float32>, output: newBiaseBuffer.contents(), count: param.bias.buffer.length / MemoryLayout<P>.size)
-      
-      float32ToFloat16(input: newScale as! UnsafeMutablePointer<Float32>, output: newScaleBuffer.contents(), count: param.scale.buffer.length / MemoryLayout<P>.size)
-    } else {
-      fatalError(" unsupport ")
-    }
-    
-    param.newBiase = newBiaseBuffer
-    param.newScale = newScaleBuffer
-    
-    newScale.deinitialize(count: param.scale.buffer.length)
-    newScale.deallocate()
-    
-    newBiase.deinitialize(count: param.bias.buffer.length)
-    newBiase.deallocate()
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvBNReluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.setBuffer(param.newScale!, offset: 0, index: 2)
-    encoder.setBuffer(param.newBiase!, offset: 0, index: 3)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  public func test(commandBuffer: MTLCommandBuffer, param: ConvBNReluTestParam) {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      fatalError()
-    }
-    
-    encoder.setTexture(param.inputTexture, index: 0)
-    encoder.setTexture(param.outputTexture, index: 1)
-    var inMetalParam = param.metalParam
-    encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
-    encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 2)
-    encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 3)
-    encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
new file mode 100644
index 0000000000000000000000000000000000000000..9d0c6de35ed23b14a05a9c3e6398931556d535a0
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
@@ -0,0 +1,400 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct MetalConvParam {
+    short offsetX;
+    short offsetY;
+    short offsetZ;
+    ushort strideX;
+    ushort strideY;
+};
+
+
+kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device half4 *weights [[buffer(1)]],
+                                         const device half4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device half4 *weights [[buffer(1)]],
+                                         const device half4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input[9];
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
+        input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
+        input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
+        input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
+        input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
+        input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
+        input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
+        input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
+        input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
+        for (int j = 0; j < 9; ++j) {
+            half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.x += dot(input[j], weight_x);
+            
+            half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.y += dot(input[j], weight_y);
+            
+            half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.z += dot(input[j], weight_z);
+            
+            half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.w += dot(input[j], weight_w);
+        }
+    }
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                         constant MetalConvParam &param [[buffer(0)]],
+                         const device half4 *weights [[buffer(1)]],
+                         const device half4 *biase [[buffer(2)]],
+                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = output + biase[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                                   texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                                   constant MetalConvParam &param [[buffer(0)]],
+                                                   const device half *weights [[buffer(1)]],
+                                                   const device half4 *biase [[buffer(2)]],
+                                                   const device float4 *new_scale [[buffer(3)]],
+                                                   const device float4 *new_biase [[buffer(4)]],
+                                                   uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    uint output_slice = gid.z;
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint weithTo = gid.z * kernelHXW * 4;
+    half4 output = half4(0.0);
+    half4 inputs[9];
+    inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
+    inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
+    inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
+    inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
+    inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
+    inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
+    inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
+    inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
+    inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
+    for (int j = 0; j < 9; ++j) {
+        half4 input = inputs[j];
+        output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
+        output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
+        output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
+        output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
+    }
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+
+/*---------------------------------------------*/
+
+
+
+kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input[9];
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
+        input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
+        input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
+        input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
+        input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
+        input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
+        input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
+        input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
+        input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
+        for (int j = 0; j < 9; ++j) {
+            float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.x += dot(input[j], weight_x);
+            
+            float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.y += dot(input[j], weight_y);
+            
+            float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.z += dot(input[j], weight_z);
+            
+            float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.w += dot(input[j], weight_w);
+        }
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = output + biase[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    uint output_slice = gid.z;
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint weithTo = gid.z * kernelHXW * 4;
+    float4 output = float4(0.0);
+    float4 inputs[9];
+    inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
+    inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
+    inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
+    inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
+    inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
+    inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
+    inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
+    inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
+    inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
+    for (int j = 0; j < 9; ++j) {
+        float4 input = inputs[j];
+        output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
+        output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
+        output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
+        output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
index 345136a503d8eda6ad23f85ef01eb53fa539d453..92c43fe3218aa0c3ecfabd9a8d85c8107ecad273 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
@@ -14,49 +14,38 @@
 
 import Foundation
 
+
 public struct MetalConvParam {
-  let offsetX: Int16
-  let offsetY: Int16
-  let offsetZ: Int16
-  let strideX: UInt16
-  let strideY: UInt16
-  let dilationX: UInt16
-  let dilationY: UInt16
+    let offsetX: Int16
+    let offsetY: Int16
+    let offsetZ: Int16
+    let strideX: UInt16
+    let strideY: UInt16
+    let paddedZ: UInt16
 }
 
 class ConvKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: MetalConvParam!
-  required init(device: MTLDevice, param: ConvParam<P>) {
-    param.filter.initBuffer(device: device, precision: ComputePrecision.Float32)
-    if param.filter.width == 1 && param.filter.height == 1 {
-      super.init(device: device, inFunctionName: "conv_1x1")
-    } else if param.filter.channel == 1 {
-      super.init(device: device, inFunctionName: "depthwise_conv_3x3")
-    } else if param.filter.width == 3 && param.filter.height == 3 {
-      super.init(device: device, inFunctionName: "conv_3x3")
-    } else {
-      fatalError(" unsupport ")
+    var metalParam: MetalConvParam!
+    required init(device: MTLDevice, param: ConvParam<P>) {
+        super.init(device: device, inFunctionName: "conv_add_1x1")
+        let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
+        let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
+        let offsetZ = 0.0
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
     }
-
-    let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
-    let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
-    let offsetZ = 0.0
     
-    metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
 }
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift
deleted file mode 100644
index 435776c850854f2fc4259e8a2089299da825f463..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct MetalConvTransposeParam {
-  let kernelW: UInt16;
-  let kernelH: UInt16;
-  
-  let strideX: UInt16;
-  let strideY: UInt16;
-  
-  let paddingX: UInt16;
-  let paddingY: UInt16;
-  
-  let dilationX: UInt16;
-  let dilationY: UInt16;
-}
-
-class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
-  var metalParam: MetalConvTransposeParam!
-  required init(device: MTLDevice, param: ConvTransposeParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    param.filter.initBuffer(device: device, precision: computePrecision, convertToNHWC: false, withTranspose: true)
-    if computePrecision == .Float32 {
-      if param.stride == [2, 2] && param.stride == [2, 2] {
-        super.init(device: device, inFunctionName: "conv_transpose2x2_stride2")
-      } else {
-        fatalError(" -- conv transpose unsupported yet -- ")
-      }
-    } else if computePrecision == .Float16 {
-      if param.stride == [2, 2] && param.stride == [2, 2] {
-        super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half")
-      } else {
-        fatalError(" -- conv transpose unsupported yet -- ")
-      }
-    } else {
-      fatalError()
-    }
-    
-//    let filter: [Float32] = param.filter.buffer.array()
-//    print(" conv transpose filter")
-//    print(filter)
-    let kernelWidth = UInt16(param.filter.width)
-    let kernelHeight = UInt16(param.filter.height)
-    
-    let strideX = UInt16(param.stride[0])
-    let strideY = UInt16(param.stride[1])
-    let paddingX = UInt16(param.paddings[0])
-    let paddingY = UInt16(param.paddings[1])
-    let dilationX = UInt16(param.dilations[0])
-    let dilationY = UInt16(param.dilations[1])
-    
-    metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY)
-
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvTransposeParam>.size, index: 0)
-    encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
index 16774a85492d2e21ca5575ed661674824319db28..361e77950841f2fa2b54884a2fbf394714f10902 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
@@ -14,60 +14,13 @@
 
 import Foundation
 
-struct ElementwiseAddMetalParam {
-  var fast: Int32 = 0
-  var axis: Int32 = 0
-  var ylen: Int32 = 0
-  var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0)
-  var xtrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
-  var ydim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0)
-  var ytrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
-}
 
 class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: ElementwiseAddMetalParam
-  required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
-    
-    metalParam = ElementwiseAddMetalParam.init()
-    
-    let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
-    let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
-    let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
-    let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
-    
-    metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
-    metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
-    metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
-    metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
-    if param.axis == -1 {
-      metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
-    } else {
-      metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
+    required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
+        super.init(device: device, inFunctionName: "elementwise_add")
     }
-    metalParam.ylen = Int32(param.inputY.tensorDim.cout())
-    if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
-      //      print("===> elementwise_add fast!!!")
-      metalParam.fast = 1
-    }
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "elementwise_add")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "elementwise_add_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam<P>) throws {
+        
     }
-    encoder.setTexture(param.inputX.metalTexture, index: 0)
-    encoder.setTexture(param.inputY.metalTexture, index: 1)
-    encoder.setTexture(param.output.metalTexture, index: 2)
-    encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddPreluKernel.swift
deleted file mode 100644
index 91589864b07f10754c860d038e754e09874db54e..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddPreluKernel.swift
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-
-class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: ElementwiseAddMetalParam
-  required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
-    param.alpha.initBuffer(device: device, precision: computePrecision)
-   
-    metalParam = ElementwiseAddMetalParam.init()
-    
-    let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
-    let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
-    let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
-    let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
-    
-    metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
-    metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
-    metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
-    metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
-    if param.axis == -1 {
-      metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
-    } else {
-      metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
-    }
-    metalParam.ylen = Int32(param.inputY.tensorDim.cout())
-    if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
-      //      print("===> elementwise_add fast!!!")
-      metalParam.fast = 1
-    }
-    
-    if computePrecision == .Float32 {
-      if param.mode == "channel" {
-        super.init(device: device, inFunctionName: "elementwise_add_channel_float")
-      } else if param.mode == "element" {
-        super.init(device: device, inFunctionName: "elementwise_add_element_float")
-      } else {
-        super.init(device: device, inFunctionName: "elementwise_add_prelu_float")
-      }
-    } else if computePrecision == .Float16 {
-      if param.mode == "channel" {
-        super.init(device: device, inFunctionName: "elementwise_add_channel_half")
-      } else if param.mode == "element" {
-        super.init(device: device, inFunctionName: "elementwise_add_channel_half")
-      } else {
-        super.init(device: device, inFunctionName: "elementwise_add_channel_half")
-      }
-    } else {
-      fatalError()
-    }
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddPreluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    encoder.setTexture(param.inputX.metalTexture, index: 0)
-    encoder.setTexture(param.inputY.metalTexture, index: 1)
-    encoder.setTexture(param.output.metalTexture, index: 2)
-    encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
-    encoder.setBuffer(param.alpha.buffer, offset: 0, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/FlattenKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/FlattenKernel.swift
deleted file mode 100644
index 090c55b16160dca19bfcdc4f3467cacdbc9a20c2..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/FlattenKernel.swift
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct FlattenMetalParam {
-  var idim: (Int32, Int32, Int32, Int32)
-  var itrans: (Int32, Int32, Int32, Int32)
-  var odim: (Int32, Int32, Int32, Int32)
-  var otrans: (Int32, Int32, Int32, Int32)
-}
-
-
-class FlattenKernel<P: PrecisionType>: Kernel, Computable{
-  
-  var metalParam: FlattenMetalParam
-  
-  required init(device: MTLDevice, param: FlattenParam<P>) {
-    param.output.initTexture(device: device, computePrecision: computePrecision)
-    var id: [Int32] = [1, 1, 1, 1]
-    for i in 0..<param.input.tensorDim.cout() {
-      id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
-    }
-    let it: [Int32] = param.input.transpose.map { Int32($0) }
-    var od: [Int32] = [1, 1, 1, 1]
-    for i in 0..<param.output.tensorDim.cout() {
-      od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
-    }
-    let ot: [Int32] = param.output.transpose.map { Int32($0) }
-    metalParam = FlattenMetalParam.init(
-      idim: (id[0], id[1], id[2], id[3]),
-      itrans: (it[0], it[1], it[2], it[3]),
-      odim: (od[0], od[1], od[2], od[3]),
-      otrans: (ot[0], ot[1], ot[2], ot[3])
-    )
-    let irank = param.input.tensorDim.cout()
-    let orank = param.output.tensorDim.cout()
-    assert(orank == 2)
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "reshape_\(irank)_2_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "reshape_\(irank)_2_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: FlattenParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-
-    encoder.setBytes(&metalParam, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..8f97d61e83fc71efca8a4d41705b3eb56d7dbdb3
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift
@@ -0,0 +1,86 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+public protocol TestParam {
+}
+
+public protocol Testable {
+    associatedtype TestParamType: TestParam
+    func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
+    init(device: MTLDevice, testParam: TestParamType)
+}
+
+
+protocol Computable {
+    associatedtype ParamType: OpParam
+    func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
+    init(device: MTLDevice, param: ParamType)
+}
+
+protocol KernelProtocol {
+    var pipline: MTLComputePipelineState { get set }
+    var functionName: String { get set }
+   
+}
+
+open class Kernel {
+    let pipline: MTLComputePipelineState
+    let functionName: String
+    public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
+        pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
+        functionName = inFunctionName
+    }
+}
+
+open class CusomKernel: Kernel {
+    public struct Shape {
+        public let width: Int
+        public let height: Int
+        public let channel: Int
+        public init(inWidth: Int, inHeight: Int, inChannel: Int){
+            width = inWidth
+            height = inHeight
+            channel = inChannel
+        }
+    }
+    let outputTexture: MTLTexture
+    public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.textureType = .type2D
+        textureDesc.width = outputDim.width
+        textureDesc.height = outputDim.height
+        textureDesc.depth = (outputDim.channel + 3) / 4
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.storageMode = .shared
+        outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
+
+        super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
+    }
+    
+    func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(inputTexuture, index: 0)
+        encoder.setTexture(outputTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
+        encoder.endEncoding()
+    }
+    
+}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
new file mode 100644
index 0000000000000000000000000000000000000000..92ee1184520d7b1df2577c1fc52cc3257de7be79
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
@@ -0,0 +1,252 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct OutputDim {
+    ushort width;
+    ushort height;
+    ushort strideX;
+    ushort strideY;
+};
+
+kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
+                   texture2d_array<half, access::write> outTexture [[texture(1)]],
+                   constant OutputDim &params [[buffer(0)]],
+                   uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
+    const half4 input = inTexture.read(pos);
+    outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
+}
+
+kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                 texture2d_array<half, access::write> outTexture [[texture(1)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    const float4 relu = fmax((float4)input, 0.0);
+    outTexture.write(half4(relu), gid.xy, gid.z);
+}
+
+kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                            texture2d_array<half, access::write> outTexture [[texture(1)]],
+                            const device half4 *biasTerms [[buffer(0)]],
+                            uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    outTexture.write(input, gid.xy, gid.z);
+}
+
+kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                      texture2d_array<half, access::write> outTexture [[texture(1)]],
+                      const device half4 * newScale [[buffer(0)]],
+                      const device half4 * newBias [[buffer(1)]],
+                      uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    half4 output = input * newScale[gid.z] + newBias[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
+//                               texture2d_array<half, access::write> outTexture [[texture(1)]],
+//                               uint3 gid [[thread_position_in_grid]]) {
+//    if (gid.x >= inTexture.get_width() ||
+//        gid.y >= inTexture.get_height()){
+//        return;
+//    }
+//    const half4 input = inTexture.read(gid.xy);
+//    outTexture.write(input, gid.xy, 0);
+//}
+
+kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= inTexture.get_width() ||
+        gid.y >= inTexture.get_height()){
+        return;
+    }
+    const float4 input = inTexture.read(gid.xy);
+    outTexture.write(input, gid.xy, 0);
+}
+
+
+kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= inTexture.get_width() ||
+        gid.y >= inTexture.get_height()){
+        return;
+    }
+    const half4 input = inTexture.read(gid.xy);
+    outTexture.write(input, gid.xy, 0);
+}
+
+struct PoolParam {
+    int ksizeX;
+    int ksizeY;
+    int strideX;
+    int strideY;
+    int paddingX;
+    int paddingY;
+    int poolType;
+};
+
+kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                 texture2d_array<float, access::write> outTexture [[texture(1)]],
+                 constant PoolParam &pm [[buffer(0)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int xmin = gid.x * pm.strideX - pm.paddingX;
+    int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
+    xmin = max(xmin, 0);
+    int ymin = gid.y * pm.strideX - pm.paddingX;
+    int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
+    ymin = max(ymin, 0);
+    
+    float4 r = 0;
+    if (pm.poolType == 0) {
+        r = inTexture.read(uint2(xmin, ymin), gid.z);
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r = fmax(r, inTexture.read(uint2(x, y), gid.z));
+            }
+        }
+    } else if (pm.poolType == 1) {
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r += inTexture.read(uint2(x, y), gid.z);
+            }
+        }
+        r /= pm.ksizeX * pm.ksizeY;
+    }
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+
+kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                 texture2d_array<half, access::write> outTexture [[texture(1)]],
+                 constant PoolParam &pm [[buffer(0)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int xmin = gid.x * pm.strideX - pm.paddingX;
+    int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
+    xmin = max(xmin, 0);
+    int ymin = gid.y * pm.strideX - pm.paddingX;
+    int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
+    ymin = max(ymin, 0);
+    
+    half4 r = 0;
+    if (pm.poolType == 0) {
+        r = inTexture.read(uint2(xmin, ymin), gid.z);
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r = fmax(r, inTexture.read(uint2(x, y), gid.z));
+            }
+        }
+    } else if (pm.poolType == 1) {
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r += inTexture.read(uint2(x, y), gid.z);
+            }
+        }
+        r /= pm.ksizeX * pm.ksizeY;
+    }
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    float4 r = inTexture.read(uint2(0, 0), gid.z);
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                    texture2d_array<half, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    half4 r = inTexture.read(uint2(0, 0), gid.z);
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int zsize = inTexture.get_array_size();
+    float maxv = inTexture.read(uint2(0, 0), 0)[0];
+    for (int z = 0; z < zsize; z++) {
+        float4 r = inTexture.read(uint2(0, 0), z);
+        maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+    }
+    float sum = 0;
+    for (int z = 0; z < zsize; z++) {
+        float4 r = inTexture.read(uint2(0, 0), z);
+        sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+    }
+    float4 rr = inTexture.read(gid.xy, gid.z);
+    rr = exp(rr - maxv) / sum;
+    outTexture.write(rr, gid.xy, gid.z);
+}
+
+
+kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                    texture2d_array<half, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int zsize = inTexture.get_array_size();
+    half maxv = inTexture.read(uint2(0, 0), 0)[0];
+    for (int z = 0; z < zsize; z++) {
+        half4 r = inTexture.read(uint2(0, 0), z);
+        maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+    }
+    float sum = 0;
+    for (int z = 0; z < zsize; z++) {
+        half4 r = inTexture.read(uint2(0, 0), z);
+        sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+    }
+    half4 rr = inTexture.read(gid.xy, gid.z);
+    rr = exp(rr - maxv) / sum;
+    outTexture.write(rr, gid.xy, gid.z);
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift
deleted file mode 100644
index 3f78efb89e47197ae0af6a1bb53955bc4a937eda..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{
-  let pipline1: MTLComputePipelineState
-
-  required init(device: MTLDevice, param: MulticlassNMSParam<P>) {
-    
-    param.middleOutput.initBuffer(device: device)
-    param.bboxOutput.initBuffer(device: device)
-    if computePrecision == .Float32 {
-      pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true)
-      super.init(device: device, inFunctionName: "nms_fetch_result")
-    } else if computePrecision == .Float16 {
-      pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true)
-      super.init(device: device, inFunctionName: "nms_fetch_result_half")
-    } else {
-      fatalError( " unsupport precision " )
-    }
-    
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: MulticlassNMSParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.scores.metalTexture, index: 0)
-    encoder.setBuffer(param.middleOutput.resultBuffer!, offset: 0, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.scores.metalTexture)
-    encoder.endEncoding()
-    
-    guard let encoderBox = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoderBox.setTexture(param.bboxes.metalTexture, index: 0)
-    encoderBox.setBuffer(param.bboxOutput.resultBuffer!, offset: 0, index: 0)
-    encoderBox.dispatch(computePipline: pipline1, outTexture: param.bboxes.metalTexture)
-    encoderBox.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
index 1d66e420e236f2e0a7734838a293215807caa968..983a3acb9943f2e549b07d095c7dd4a23c1e96d9 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
@@ -15,57 +15,46 @@
 import Foundation
 
 struct PoolMetalParam {
-  let ksizeX: Int32
-  let ksizeY: Int32
-  let strideX: Int32
-  let strideY: Int32
-  let paddingX: Int32
-  let paddingY: Int32
-  let poolType: Int32
+    let ksizeX: Int32
+    let ksizeY: Int32
+    let strideX: Int32
+    let strideY: Int32
+    let paddingX: Int32
+    let paddingY: Int32
+    let poolType: Int32
 }
 
 class PoolKernel<P: PrecisionType>: Kernel, Computable{
-  var metalParam: PoolMetalParam
-  required init(device: MTLDevice, param: PoolParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    
-    var poolType: Int32
-    switch param.poolType {
-    case "max":
-      poolType = 0
-    case "avg":
-      poolType = 1
-    default:
-      fatalError()
+    func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        var poolType: Int32
+        switch param.poolType {
+        case "max":
+            poolType = 0
+        case "avg":
+            poolType = 1
+        default:
+            throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType)
+        }
+        var pmp = PoolMetalParam.init(
+            ksizeX: param.ksize[0],
+            ksizeY: param.ksize[1],
+            strideX: param.stride[0],
+            strideY: param.stride[1],
+            paddingX: param.padding[0],
+            paddingY: param.padding[1],
+            poolType: poolType
+        )
+        encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-    metalParam = PoolMetalParam.init(
-      ksizeX: param.ksize[0],
-      ksizeY: param.ksize[1],
-      strideX: param.stride[0],
-      strideY: param.stride[1],
-      paddingX: param.padding[0],
-      paddingY: param.padding[1],
-      poolType: poolType
-    )
     
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "pool")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "pool_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
+    required init(device: MTLDevice, param: PoolParam<P>) {
+        super.init(device: device, inFunctionName: "pool")
     }
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-
-    encoder.setBytes(&metalParam, length: MemoryLayout<PoolMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift
deleted file mode 100644
index 4ee25888f06048bfe696028ea2338a56fd06053e..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class PreluKernel<P: PrecisionType>: Kernel, Computable{
-  required init(device: MTLDevice, param: PreluParam<P>) {
-    param.alpha.initBuffer(device: device, precision: computePrecision)
-    param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    if computePrecision == .Float32 {
-      if param.mode == "channel" {
-        super.init(device: device, inFunctionName: "prelu_channel")
-      } else if param.mode == "element" {
-        super.init(device: device, inFunctionName: "prelu_element")
-      } else {
-        super.init(device: device, inFunctionName: "prelu_other")
-      }
-    } else if computePrecision == .Float16 {
-      if param.mode == "channel" {
-        super.init(device: device, inFunctionName: "prelu_channel_half")
-      } else if param.mode == "element" {
-        super.init(device: device, inFunctionName: "prelu_element_half")
-      } else {
-        super.init(device: device, inFunctionName: "prelu_other_half")
-      }
-    } else {
-      fatalError()
-    }
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: PreluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBuffer(param.alpha.buffer, offset: 0, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift
deleted file mode 100644
index be18c4411ffbef704dff61bb2aa82bc338daf163..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift
+++ /dev/null
@@ -1,151 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct PriorBoxMetalParam {
-  let offset: Float32
-  let stepWidth: Float32
-  let stepHeight: Float32
-  let minSize: Float32
-  let maxSize: Float32
-  let imageWidth: Float32
-  let imageHeight: Float32
-  let clip: Bool
-  let numPriors: uint
-  let aspecRatiosSize: uint
-  let minSizeSize: uint
-  let maxSizeSize: uint
-}
-
-class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
-  var metalParam: PriorBoxMetalParam!
-  
-  required init(device: MTLDevice, param: PriorBoxParam<P>) {
-    
-    let originDim = param.output.tensorDim;
-    
-    param.output.tensorDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
-    param.output.padToFourDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
-    
-    param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: computePrecision)
-    param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision)
-    
-    
-    if computePrecision == .Float32 {
-      if param.min_max_aspect_ratios_order {
-        super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder")
-      } else {
-        super.init(device: device, inFunctionName: "prior_box")
-      }
-      
-    } else if computePrecision == .Float16 {
-      if param.min_max_aspect_ratios_order {
-        super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half")
-      } else {
-        super.init(device: device, inFunctionName: "prior_box_half")
-      }
-    } else {
-      fatalError()
-    }
-    
-    
-    guard param.minSizes.count == 1 else {
-      fatalError(" need implement ")
-    }
-    
-//    let n = 1
-//    let h = param.output.dim[1]
-//    let w = param.output.dim[2]
-//    let c = param.output.dim[3] * param.output.dim[0]
-//
-//    param.output.dim = Dim.init(inDim: [n, h, w, c])
-//    param.output.transpose = [0, 1, 2, 3]
-    
-    let imageWidth = Float32(param.inputImage.padToFourDim[3])
-    let imageHeight = Float32(param.inputImage.padToFourDim[2])
-    
-    let featureWidth = param.input.padToFourDim[3]
-    let featureHeight = param.input.padToFourDim[2]
-    
-    if param.stepW == 0 || param.stepH == 0 {
-      param.stepW = Float32(imageWidth) / Float32(featureWidth)
-      param.stepH = Float32(imageHeight) / Float32(featureHeight)
-    }
-    
-    var outputAspectRatior: [Float32] = []
-    outputAspectRatior.append(1.0)
-    
-    let epsilon = 1e-6
-    for ar in param.aspectRatios {
-      var alreadyExist = false
-      for outputAr in outputAspectRatior {
-        if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) {
-          alreadyExist = true
-          break
-        }
-      }
-      
-      if !alreadyExist {
-        outputAspectRatior.append(ar)
-      }
-      if param.flip {
-        outputAspectRatior.append(1.0 / ar)
-      }
-    }
-    
-    if computePrecision == .Float16 {
-      let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size)
-      float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count)
-      param.newAspectRatios = buffer
-
-    } else if computePrecision == .Float32 {
-      let buffer = device.makeBuffer(bytes: outputAspectRatior, length: outputAspectRatior.count * MemoryLayout<Float32>.size, options: [])
-      param.newAspectRatios = buffer
-    } else {
-      fatalError()
-    }
-    
-    let aspectRatiosSize = uint(outputAspectRatior.count)
-    
-    let maxSizeSize: uint = uint(param.maxSizes.count)
-    let minSizeSize: uint = uint(param.minSizes.count)
-    
-    let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize
-    
-    let minSize = param.minSizes.last ?? 0.0
-    let maxSize = param.maxSizes.last ?? 0.0
-    
-    metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize)
-    
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setTexture(param.outputVariances.metalTexture, index: 2)
-    
-    encoder.setBuffer(param.newAspectRatios!, offset: 0, index: 0)
-    
-    encoder.setBytes(&metalParam, length: MemoryLayout<PriorBoxMetalParam>.size, index: 1)
-    
-    encoder.setBytes(param.variances, length: MemoryLayout<Float32>.size * param.variances.count, index: 2)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
index 18f279e9f3c5226d6eea5b5e6f0a42502173071e..3c669cf4d965f7842070c4d38427f6d1d7440db5 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
@@ -15,23 +15,17 @@
 import Foundation
 
 class ReluKernel<P: PrecisionType>: Kernel, Computable{
-  func compute(commandBuffer: MTLCommandBuffer, param: ReluParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
+    func compute(commandBuffer: MTLCommandBuffer, param: ReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: ReluParam<P>) {
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "relu")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "relu_half")
-    } else {
-      fatalError()
+    
+    required init(device: MTLDevice, param: ReluParam<P>) {
+        super.init(device: device, inFunctionName: "relu")
     }
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
index 4114d3c3c62054235cd57fe37fe9cd83c5bb58cb..438c89e59eb7e9a2ef315997b9d8d1f3a44a5462 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
@@ -14,84 +14,18 @@
 
 import Foundation
 
-struct ReshapeMetalParam {
-  var idim: (Int32, Int32, Int32, Int32)
-  var itrans: (Int32, Int32, Int32, Int32)
-  var odim: (Int32, Int32, Int32, Int32)
-  var otrans: (Int32, Int32, Int32, Int32)
-}
-
-struct ReshapeTestParam: TestParam {
-  let inputTexture: MTLTexture
-  let outputTexture: MTLTexture
-  let param: ReshapeMetalParam
-}
-
 class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
-  
-  var metalParam: ReshapeMetalParam
-  
-  required init(device: MTLDevice, param: ReshapeParam<P>) {
-    param.output.initTexture(device: device, computePrecision: computePrecision)
-    var id: [Int32] = [1, 1, 1, 1]
-    for i in 0..<param.input.tensorDim.cout() {
-      id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
+    required init(device: MTLDevice, param: ReshapeParam<P>) {
+        super.init(device: device, inFunctionName: "reshape")
     }
-    let it: [Int32] = param.input.transpose.map { Int32($0) }
-    var od: [Int32] = [1, 1, 1, 1]
-    for i in 0..<param.output.tensorDim.cout() {
-      od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-    let ot: [Int32] = param.output.transpose.map { Int32($0) }
-    metalParam = ReshapeMetalParam.init(
-      idim: (id[0], id[1], id[2], id[3]),
-      itrans: (it[0], it[1], it[2], it[3]),
-      odim: (od[0], od[1], od[2], od[3]),
-      otrans: (ot[0], ot[1], ot[2], ot[3])
-    )
-    let irank = param.input.tensorDim.cout()
-    let orank = param.output.tensorDim.cout()
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-  required init(device: MTLDevice, testParam: ReshapeTestParam) {
-    metalParam = ReshapeMetalParam.init(
-    idim: (0, 0, 0, 0),
-    itrans: (0, 0, 0, 0),
-    odim: (0, 0, 0, 0),
-    otrans: (0, 0, 0, 0)
-    )
-    super.init(device: device, inFunctionName: "reshape")
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
-    }
-
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-
-    encoder.setBytes(&metalParam, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-//  func test(commandBuffer: MTLCommandBuffer, testParam: ReshapeTestParam) {
-//    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-//      fatalError()
-//    }
-//    encoder.setTexture(testParam.inputTexture, index: 0)
-//    encoder.setTexture(testParam.outputTexture, index: 1)
-//    var pm: ReshapeMetalParam = testParam.param
-//    encoder.setBytes(&pm, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
-//    encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
-//    encoder.endEncoding()
-//  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeBilinearKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeBilinearKernel.swift
deleted file mode 100644
index e5cbce1d1e196f88bb7a3b38d3e92c330774f3ba..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeBilinearKernel.swift
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct ResizeBilinearMetalParam {
-  var ratio_h: Float32
-  var ratio_w: Float32
-}
-
-class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{
-  func compute(commandBuffer: MTLCommandBuffer, param: ResizeBilinearParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    let ratio_h: Float32 = Float32(param.input.tensorDim.dims[2]) / Float32(param.output.tensorDim.dims[2])
-    let ratio_w: Float32 = Float32(param.input.tensorDim.dims[3]) / Float32(param.output.tensorDim.dims[3])
-    var p = ResizeBilinearMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w)
-    encoder.setBytes(&p, length: MemoryLayout<ConcatMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: ResizeBilinearParam<P>) {
-    param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "resize_bilinear")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "resize_bilinear_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d2795111ad1f43c759b95aa52ed34085a4ac147a
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift
@@ -0,0 +1,62 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+//
+//import Foundation
+//import MetalPerformanceShaders
+//
+//
+//struct ResizeParam: OpParam{
+//    typealias OutputType = <#type#>
+//    
+//    typealias ParamPrecisionType = <#type#>
+//    
+//    let input: MTLTexture
+//    let output: MTLTexture
+//    let expectDim: Dim
+//}
+//
+//struct OutputDim {
+//    let width: UInt16
+//    let height: UInt16
+//    let strideX: UInt16
+//    let strideY: UInt16
+//}
+//
+//class ResizeKernel<P: PrecisionType>: Kernel, Computable{
+//    var lanczos: MPSImageLanczosScale
+//    required init(device: MTLDevice, param: ResizeParam) {
+//        lanczos = MPSImageLanczosScale.init(device: device)
+//        super.init(device: device, inFunctionName: "resize")
+//    }
+//    func compute(commandBuffer: MTLCommandBuffer, param: ResizeParam) throws {
+////        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+////            throw PaddleMobileError.predictError(message: " encode is nil")
+////        }
+//        lanczos.encode(commandBuffer: commandBuffer, sourceTexture: param.input, destinationTexture: param.output)
+//        
+////        encoder.setTexture(param.input, index: 0)
+////        encoder.setTexture(param.output, index: 1)
+////        let strideX = param.input.width/param.expectDim[2]
+////        let strideY = param.input.height/param.expectDim[1]
+////        var outputDim = OutputDim.init(width: UInt16(param.expectDim[1]), height: UInt16(param.expectDim[2]), strideX: UInt16(strideX), strideY: UInt16(strideY))
+////        encoder.setBytes(&outputDim, length: MemoryLayout<OutputDim>.size, index: 0)
+////        encoder.dispatch(computePipline: pipline, outTexture: param.output)
+////        encoder.endEncoding()
+//    }
+//    
+//
+//    
+//    
+//}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift
deleted file mode 100644
index feb052a44fdc7c6134cc90f07f3fc94ad0a497df..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct ShapeMetalParam {
-}
-
-class ShapeKernel<P: PrecisionType>: Kernel, Computable{
-  func compute(commandBuffer: MTLCommandBuffer, param: ShapeParam<P>) throws {
-//    print("shape compute")
-//    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-//      throw PaddleMobileError.predictError(message: " encode is nil")
-//    }
-//    encoder.setTexture(param.output.metalTexture, index: 0)
-//    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: ShapeParam<P>) {
-    param.output.initTexture(device: device, computePrecision: computePrecision)
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "shape")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "shape_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
index 5d6874da151b64fd58c2016865515778d6267551..b94f0286f43ec482353ff278c6c104da77f47315 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
@@ -14,38 +14,19 @@
 
 import Foundation
 
-struct SoftmaxMetalParam {
-  let N: Int32
-  let K: Int32
-}
-
 class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
-  
-  var metalParam: SoftmaxMetalParam
-  required init(device: MTLDevice, param: SoftmaxParam<P>) {
-    param.output.initTexture(device: device, computePrecision: computePrecision)
-    metalParam = SoftmaxMetalParam.init(
-      N: Int32(param.input.tensorDim[0]),
-      K: Int32(param.input.tensorDim[1])
-    )
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "softmax_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "softmax_half")
-    } else {
-      fatalError()
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
-  }
-
-  func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encoder is nil")
+    
+    required init(device: MTLDevice, param: SoftmaxParam<P>) {
+        super.init(device: device, inFunctionName: "softmax")
     }
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SplitKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SplitKernel.swift
deleted file mode 100644
index 67e1cd9ab85c3c60d89846bab89ef10bbe513305..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SplitKernel.swift
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct SplitMetalParam {
-  var idim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1)
-  var axis: Int32 = 0
-  var offset: Int32 = 0
-  var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
-  var vdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0)
-}
-
-class SplitKernel<P: PrecisionType>: Kernel, Computable{
-  var smp: SplitMetalParam
-  func compute(commandBuffer: MTLCommandBuffer, param: SplitParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    for i in 0..<param.outputList.count {
-      encoder.setTexture(param.outputList[i].metalTexture, index: i + 1)
-    }
-    encoder.setBytes(&smp, length: MemoryLayout<SplitMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: SplitParam<P>) {
-    //     param.output.initTexture(device: device, computePrecision: computePrecision)
-    let num = param.outputList.count
-    let rank = param.input.tensorDim.cout()
-    assert(num >= 2 && num <= 4)
-    for output in param.outputList {
-      output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
-    }
-    smp = SplitMetalParam.init()
-    smp.idim = (Int32(param.input.dim[0]), Int32(param.input.dim[1]), Int32(param.input.dim[2]), Int32(param.input.dim[3]))
-    smp.axis = Int32(param.axis + param.input.dim.cout() - param.input.tensorDim.cout())
-    for i in 0..<4 {
-      if param.input.transpose[i] == smp.axis {
-        smp.axis = Int32(i)
-        break
-      }
-    }
-    smp.trans = (Int32(param.input.transpose[0]), Int32(param.input.transpose[1]), Int32(param.input.transpose[2]), Int32(param.input.transpose[3]))
-    var vdim: [Int32] = [0, 0, 0, 0]
-    for i in 0..<num {
-      vdim[i] = Int32(param.outputList[i].tensorDim[param.axis])
-    }
-    smp.vdim = (vdim[0], vdim[1], vdim[2], vdim[3])
-    var v = "normal"
-    if rank == 4 {
-      if smp.axis == 1 {
-        v = "y"
-      } else if smp.axis == 2 {
-        v = "x"
-      }
-    } else if rank == 3 {
-      if smp.axis == 2 {
-        v = "y"
-      } else if smp.axis == 3 {
-        v = "x"
-      }
-    } else if rank == 2 {
-      if smp.axis == 2 {
-        v = "y"
-      }
-    }
-    if v == "normal" {
-      fatalError("split unsupported")
-    }
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_float")
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_half")
-    } else {
-      fatalError()
-    }
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
index 0943686660e4bdd91b6cd909dff04cdd497cd817..b524c3ac80fac6fa98ac6c9d4e680fee1af4e46a 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
@@ -15,31 +15,23 @@
 import Foundation
 
 struct Texture2DTo2DArrayParam {
-  let input: MTLTexture
-  let output: MTLTexture
-  let expectDim: Dim
+    let input: MTLTexture
+    let output: MTLTexture
+    let expectDim: Dim
 }
 
 class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
-  func compute(commandBuffer: MTLCommandBuffer, param: FeedParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-    encoder.setTexture(param.input.mtlTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture)
-    encoder.endEncoding()
-  }
-  
-  required init(device: MTLDevice, param: FeedParam<P>) {
-    param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
-    if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "texture2d_to_2d_array_half")
-    } else if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "texture2d_to_2d_array")
-    } else {
-      fatalError()
+    func compute(commandBuffer: MTLCommandBuffer, param: FeedParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(param.input.mtlTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture)
+        encoder.endEncoding()
     }
     
-  }
+    required init(device: MTLDevice, param: FeedParam<P>) {
+        super.init(device: device, inFunctionName: "texture2d_to_2d_array")
+    }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift
deleted file mode 100644
index 7b872283d45bca4adb5e90a531c936f2ad5534f8..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-struct TransposeMetalParam {
-  var iC: Int32 = 0
-  var oC: Int32 = 0
-  var axis: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
-}
-
-class TransposeKernel<P: PrecisionType>: Kernel, Computable {
-  var metalParam: TransposeMetalParam = TransposeMetalParam.init()
-  required init(device: MTLDevice, param: TransposeParam<P>) {
-    param.output.initTexture(device: device, computePrecision: computePrecision)
-    let rank = param.input.tensorDim.cout()
-    var axis: [Int] = [0, 1, 2, 3]
-    for i in 0..<param.axis.count {
-      axis[4-rank+i] = 4 - rank + Int(param.axis[i])
-    }
-
-    var naxis: [Int] = [0, 0, 0, 0]
-    for i in 0..<4 {
-      for j in 0..<4 {
-        if param.input.transpose[j] == axis[i] {
-          naxis[i] = j
-          break
-        }
-      }
-    }
-    metalParam.iC = Int32(param.input.dim[param.input.transpose[3]])
-    metalParam.oC = Int32(param.output.dim[3])
-    metalParam.axis = (Int32(naxis[0]), Int32(naxis[1]), Int32(naxis[2]), Int32(naxis[3]))
-    var kernelFunc = "transpose_undefined"
-    if computePrecision == .Float16 {
-      if param.input.transpose == axis {
-        kernelFunc = "transpose_copy_half"
-      } else {
-        kernelFunc = "transpose_\(rank)_half"
-      }
-    } else if computePrecision == .Float32 {
-      if param.input.transpose == axis {
-        kernelFunc = "transpose_copy_float"
-      } else {
-        kernelFunc = "transpose_\(rank)_float"
-      }
-    } else {
-      fatalError()
-    }
-    print("===========>", kernelFunc)
-    print(metalParam)
-    super.init(device: device, inFunctionName: kernelFunc)
-  }
-  
-  func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
-    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-      throw PaddleMobileError.predictError(message: " encode is nil")
-    }
-  
-    encoder.setTexture(param.input.metalTexture, index: 0)
-    encoder.setTexture(param.output.metalTexture, index: 1)
-    encoder.setBytes(&metalParam, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
-    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-    encoder.endEncoding()
-  }
-  
-
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
deleted file mode 100644
index 96333a07a9669ecb2b5bfe901d71be729e37b533..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                      texture2d_array<float, access::write> outTexture [[texture(1)]],
-                      const device float4 * nscale [[buffer(0)]],
-                      const device float4 * nbias [[buffer(1)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  const float4 input = inTexture.read(gid.xy, gid.z);
-  float4 output = input * nscale[gid.z] + nbias[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                      texture2d_array<half, access::write> outTexture [[texture(1)]],
-                      const device half4 * newScale [[buffer(0)]],
-                      const device half4 * newBias [[buffer(1)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  const half4 input = inTexture.read(gid.xy, gid.z);
-  half4 output = input * newScale[gid.z] + newBias[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal
deleted file mode 100644
index eb94408c8ac664be5cf62bc28bfb02825856ebd4..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal
+++ /dev/null
@@ -1,36 +0,0 @@
-//
-//  BatchNormRelu.metal
-//  paddle-mobile
-//
-
-#include <metal_stdlib>
-using namespace metal;
-
-struct MetalConvParam {
-    short offsetX;
-    short offsetY;
-    short offsetZ;
-    ushort strideX;
-    ushort strideY;
-};
-
-kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                         const device float4 *new_scale [[buffer(0)]],
-                                         const device float4 *new_biase [[buffer(1)]],
-                                         uint3 gid [[thread_position_in_grid]]) {
-    
-    if (gid.x >= outTexture.get_width() ||
-        gid.y >= outTexture.get_height() ||
-        gid.z >= outTexture.get_array_size()) {
-        return;
-    }
-    
-    float4 input;
-    float4 output;
-    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-    input = inTexture.sample(sample, gid.x, gid.y, gid.z);
-    output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
-    outTexture.write(output, gid.xy, gid.z);
-
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
deleted file mode 100644
index a590f8089890f2fab1af4c1f736f3bfc5708aecf..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-
-#define FUNC(f, p) CONCAT2_(f, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-
-kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]],
-                     texture2d_array<P, access::write> output [[texture(1)]],
-                     constant bilinear_interp_param & pm [[buffer(0)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  VECTOR(P, 4) r;
-  if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
-    r = input.read(gid.xy, gid.z);
-  } else {
-    P w = gid.x * pm.ratio_w;
-    P h = gid.y * pm.ratio_h;
-    uint w0 = w, h0 = h;
-    uint w1 = w0 + 1, h1 = h0 + 1;
-    P w1lambda = w - w0, h1lambda = h - h0;
-    P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
-    if (w1 >= input.get_width()) w1 = w0;
-    if (h1 >= input.get_height()) h1 = h0;
-    VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
-    VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
-    VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
-    VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
-    r = h2lambda * (w2lambda * r0 + w1lambda * r1)
-      + h1lambda * (w2lambda * r2 + w1lambda * r3);
-  }
-  output.write(r, gid.xy, gid.z);
-}
-
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
deleted file mode 100644
index 394cf89db09d47b0d3c87ff124c21a93962c0972..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-struct bilinear_interp_param {
-  float ratio_h;
-  float ratio_w;
-};
-
-#define P float
-#include "BilinearInterp.inc.metal"
-#undef P
-
-#define P half
-#include "BilinearInterp.inc.metal"
-#undef P
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.inc.metal
deleted file mode 100644
index 918fbac1a713d7b0442a1eb1f07abea3616bec96..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.inc.metal
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-
-#define FUNC(f, p) CONCAT2_(f, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]],
-                     texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
-                     texture2d_array<P, access::read> targetBox [[texture(2)]],
-                     texture2d_array<P, access::write> output[[texture(3)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
-  VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
-  VECTOR(P, 4) t;
-  t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
-  t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
-  t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
-  t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
-  
-  P px = (p.x + p.z) / 2;
-  P py = (p.y + p.w) / 2;
-  P pw = p.z - p.x;
-  P ph = p.w - p.y;
-  
-  P tx = pv.x * t.x * pw + px;
-  P ty = pv.y * t.y * ph + py;
-  P tw = exp(pv.z * t.z) * pw;
-  P th = exp(pv.w * t.w) * ph;
-  
-  VECTOR(P, 4) r;
-  r.x = tx - tw / 2;
-  r.y = ty - th / 2;
-  r.z = tx + tw / 2;
-  r.w = ty + th / 2;
-
-  output.write(r, gid.xy, gid.z);
-}
-
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.metal
deleted file mode 100644
index 4009e213d51d0a9c33c70aea22b015df49e347dc..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.metal
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-#define P float
-#include "BoxCoder.inc.metal"
-#undef P
-#define P half
-#include "BoxCoder.inc.metal"
-#undef P
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
deleted file mode 100644
index 40bae035c097b5ab386d78520b6b04f074eb2fee..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
+++ /dev/null
@@ -1,120 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-
-inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
-  abcd[0] = abcd[1] = abcd[2] = 0;
-  abcd[3] = xyzn[0] * 4 + xyzn[3];
-}
-inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
-  abcd[0] = abcd[1] = 0;
-  abcd[2] = xyzn[1];
-  abcd[3] = xyzn[0] * 4 + xyzn[3];
-}
-inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
-  abcd[0] = 0;
-  abcd[3] = xyzn[0];
-  abcd[2] = xyzn[1];
-  abcd[1] = xyzn[2] * 4 + xyzn[3];
-}
-inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
-  abcd[2] = xyzn[0];
-  abcd[1] = xyzn[1];
-  uint t = xyzn[2] * 4 + xyzn[3];
-  abcd[0] = t / C;
-  abcd[3] = t % C;
-}
-
-inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
-  xyzn[1] = xyzn[2] = 0;
-  xyzn[0] = abcd[3] / 4;
-  xyzn[1] = abcd[3] % 4;
-}
-inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
-  xyzn[2] = 0;
-  xyzn[1] = abcd[2];
-  xyzn[0] = abcd[3] / 4;
-  xyzn[3] = abcd[3] % 4;
-}
-inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
-  xyzn[0] = abcd[3];
-  xyzn[1] = abcd[2];
-  xyzn[2] = abcd[1] / 4;
-  xyzn[3] = abcd[1] % 4;
-}
-inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) {
-  xyzn[0] = abcd[2];
-  xyzn[1] = abcd[1];
-  uint t = abcd[0] * C + abcd[3];
-  xyzn[2] = t / 4;
-  xyzn[3] = t % 4;
-}
-
-inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
-  abcd[2] = xyzn[0];
-  abcd[1] = xyzn[1];
-  uint t = xyzn[2] * 4 + xyzn[3];
-  abcd[0] = t / C;
-  abcd[3] = t % C;
-}
-
-inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
-  xyzn[0] = abcd[2];
-  xyzn[1] = abcd[1];
-  uint t = abcd[0] * C + abcd[3];
-  xyzn[2] = t / 4;
-  xyzn[3] = t % 4;
-}
-
-inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
-  int32_t r = abcd[0];
-  r = r * dim[1] + abcd[1];
-  r = r * dim[2] + abcd[2];
-  r = r * dim[3] + abcd[3];
-  return r;
-}
-
-inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
-  abcd[3] = ind % dim[3]; ind /= dim[3];
-  abcd[2] = ind % dim[2]; ind /= dim[2];
-  abcd[1] = ind % dim[1]; ind /= dim[1];
-  abcd[0] = ind;
-}
-
-inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
-  for (int i = 0; i < 4; i++) {
-    opos[i] = ipos[trans[i]];
-  }
-}
-
-inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
-  for (int i = 0; i < 4; i++) {
-    opos[trans[i]] = ipos[i];
-  }
-}
-
-
-struct MetalConvParam {
-  short offsetX;
-  short offsetY;
-  short offsetZ;
-  ushort strideX;
-  ushort strideY;
-  ushort dilationX;
-  ushort dilationY;
-};
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
deleted file mode 100644
index 2b070fc48b78391e96b93823eeff7f936de2ff7d..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
+++ /dev/null
@@ -1,318 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
-#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
-#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
-
-#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-#define FUNC_R(f, r) CONCAT2_(f, r)
-
-#if V == VX
-#define VV x
-#elif V == VY
-#define VV y
-#elif V == VZ
-#define VV z
-#else
-#define VV normal
-#endif
-
-#if V == VNORMAL
-//kernel void FUNC(concat, R, N, normal, P)(array<texture2d_array<P, access::read>, N> in [[texture(0)]],
-//                                     texture2d_array<P, access::read> out_x [[texture(N)]],
-//                                     texture2d_array<P, access::write> out [[texture(N+1)]],
-//                                     constant ConcatParam & pm [[buffer(0)]],
-//                                     uint3 gid [[thread_position_in_grid]]) {
-//}
-kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
-                                          texture2d_array<P, access::read> in1 [[texture(1)]],
-#if N >= 3
-                                          texture2d_array<P, access::read> in2 [[texture(2)]],
-#endif
-#if N >= 4
-                                          texture2d_array<P, access::read> in3 [[texture(3)]],
-#endif
-#if N >= 5
-                                          texture2d_array<P, access::read> in4 [[texture(4)]],
-#endif
-#if N >= 6
-                                          texture2d_array<P, access::read> in5 [[texture(5)]],
-#endif
-                                          texture2d_array<P, access::read> inx [[texture(N)]],
-                                          texture2d_array<P, access::write> out [[texture(N+1)]],
-                                          constant ConcatParam & pm [[buffer(0)]],
-                                          uint3 gid [[thread_position_in_grid]]) {
-
-   ConcatParam cp = pm;
-   int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
-   VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
-   for (int i = 0; i < 4; i++) {
-     xyzn[3] = i;
-#if R == 4
-     xyzn2abcd_4(cp.odim[3], xyzn, abcd);
-#else
-     FUNC_R(xyzn2abcd, R)(xyzn, abcd);
-#endif
-     int k = abcd[cp.axis] - cp.offset;
-     if (k < 0) continue;
-     int j = 0;
-     for (; j < N; j++) {
-       if (k < cp.vdim[j]) {
-         break;
-       }
-       k -= cp.vdim[j];
-     }
-     if (j == N) {
-       continue;
-     }
-     int ta = cp.odim[cp.axis];
-     abcd[cp.axis] = k;
-     cp.odim[cp.axis] = cp.vdim[j];
-#if R == 4
-     abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
-#else
-     FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
-#endif
-     cp.odim[cp.axis] = ta;
-     switch (j) {
-       case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
-       case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
-#if N >= 3
-       case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
-#endif
-#if N >= 4
-       case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
-#endif
-#if N >= 5
-       case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
-#endif
-#if N >= 6
-       case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
-#endif
-     }
-   }
-   out.write(r, gid.xy, gid.z);
-}
-
-#endif // V == NORMAL
-
-
-
-#if V == VX
-kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
-                                          texture2d_array<P, access::read> in1 [[texture(1)]],
-#if N >= 3
-                                          texture2d_array<P, access::read> in2 [[texture(2)]],
-#endif // N >= 3
-#if N >= 4
-                                          texture2d_array<P, access::read> in3 [[texture(3)]],
-#endif // N >= 4
-#if N >= 5
-                                          texture2d_array<P, access::read> in4 [[texture(4)]],
-#endif // N >= 5
-#if N >= 6
-                                          texture2d_array<P, access::read> in5 [[texture(5)]],
-#endif // N >= 6
-                                          texture2d_array<P, access::write> out [[texture(N)]],
-                                          constant ConcatParam & pm [[buffer(0)]],
-                                          uint3 gid [[thread_position_in_grid]]) {
-  int x = gid.x - pm.offset;
-  if (x < 0) return;
-  if (x < pm.vdim[0]) {
-    VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-  x -= pm.vdim[0];
-  if (x < pm.vdim[1]) {
-    VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#if N >= 3
-  x -= pm.vdim[1];
-  if (x < pm.vdim[2]) {
-    VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 3
-#if N >= 4
-  x -= pm.vdim[2];
-  if (x < pm.vdim[3]) {
-    VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 4
-#if N >= 5
-  x -= pm.vdim[3];
-  if (x < pm.vdim[4]) {
-    VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 5
-#if N >= 6
-  x -= pm.vdim[4];
-  if (x < pm.vdim[5]) {
-    VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 6
-}
-#endif // V == VX
-
-#if V == VY
-kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
-                                      texture2d_array<P, access::read> in1 [[texture(1)]],
-#if N >= 3
-                                      texture2d_array<P, access::read> in2 [[texture(2)]],
-#endif // N >= 3
-#if N >= 4
-                                      texture2d_array<P, access::read> in3 [[texture(3)]],
-#endif // N >= 4
-#if N >= 5
-                                      texture2d_array<P, access::read> in4 [[texture(4)]],
-#endif // N >= 5
-#if N >= 6
-                                      texture2d_array<P, access::read> in5 [[texture(5)]],
-#endif // N >= 6
-                                      texture2d_array<P, access::write> out [[texture(N)]],
-                                      constant ConcatParam & pm [[buffer(0)]],
-                                      uint3 gid [[thread_position_in_grid]]) {
-  int y = gid.y - pm.offset;
-  if (y < 0) return;
-  if (y < pm.vdim[0]) {
-    VECTOR(P, 4)  r = in0.read(gid.xy, gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-  y -= pm.vdim[0];
-  if (y < pm.vdim[1]) {
-    VECTOR(P, 4)  r = in1.read(uint2(gid.x, y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#if N >= 3
-  y -= pm.vdim[1];
-  if (y < pm.vdim[2]) {
-    VECTOR(P, 4)  r = in2.read(uint2(gid.x, y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 3
-#if N >= 4
-  y -= pm.vdim[2];
-  if (y < pm.vdim[3]) {
-    VECTOR(P, 4)  r = in3.read(uint2(gid.x, y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 4
-#if N >= 5
-  y -= pm.vdim[3];
-  if (y < pm.vdim[4]) {
-    VECTOR(P, 4)  r = in4.read(uint2(gid.x, y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 5
-#if N >= 6
-  y -= pm.vdim[4];
-  if (y < pm.vdim[5]) {
-    VECTOR(P, 4)  r = in5.read(uint2(gid.x, y), gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 6
-}
-#endif // V == VY
-
-#if V == VZ
-kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
-                                      texture2d_array<P, access::read> in1 [[texture(1)]],
-#if N >= 3
-                                      texture2d_array<P, access::read> in2 [[texture(2)]],
-#endif // N >= 3
-#if N >= 4
-                                      texture2d_array<P, access::read> in3 [[texture(3)]],
-#endif // N >= 4
-#if N >= 5
-                                      texture2d_array<P, access::read> in4 [[texture(4)]],
-#endif // N >= 5
-#if N >= 6
-                                      texture2d_array<P, access::read> in5 [[texture(5)]],
-#endif // N >= 6
-                                      texture2d_array<P, access::write> out [[texture(N)]],
-                                      constant ConcatParam & pm [[buffer(0)]],
-                                      uint3 gid [[thread_position_in_grid]]) {
-  int z = gid.z - pm.offset;
-  if (z < 0) return;
-  if (z < pm.vdim[0]) {
-    VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-  z -= pm.vdim[0];
-  if (z < pm.vdim[1]) {
-    VECTOR(P, 4)  r = in1.read(gid.xy, z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#if N >= 3
-  z -= pm.vdim[1];
-  if (z < pm.vdim[2]) {
-    VECTOR(P, 4)  r = in2.read(gid.xy, z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 3
-#if N >= 4
-  z -= pm.vdim[2];
-  if (z < pm.vdim[3]) {
-    VECTOR(P, 4)  r = in3.read(gid.xy, z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 4
-#if N >= 5
-  z -= pm.vdim[3];
-  if (z < pm.vdim[4]) {
-    VECTOR(P, 4)  r = in4.read(gid.xy, z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 5
-#if N >= 6
-  z -= pm.vdim[4];
-  if (z < pm.vdim[5]) {
-    VECTOR(P, 4)  r = in5.read(gid.xy, z);
-    out.write(r, gid.xy, gid.z);
-    return;
-  }
-#endif // N >= 6
-}
-#endif // V == VZ
-
-
-#undef VV
-#endif // #ifdef P
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.metal
deleted file mode 100644
index b7d17f2d25de544e4ce938c577e0d04f536da9af..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.metal
+++ /dev/null
@@ -1,171 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-
-using namespace metal;
-
-struct ConcatParam {
-  int32_t odim[4];
-  int32_t axis;
-  int32_t offset;
-  int32_t trans[4];
-  int32_t vdim[6];
-};
-
-#define VNORMAL 1
-#define VX 2
-#define VY 3
-#define VZ 4
-
-// >> fast mode
-// only support concat_{2,3,4}_{2,3,4,5,6}_y_{float,half}
-// only support concat_{3,4}_{2,3,4,5,6}_x_{float,half}
-// only support concat_{1,2,3,4}_{2,3,4,5,6}_z_{float,half}
-// >> normal mode (loop mode)
-// ssd-ar: (R=4, N=3, V=z), (R=3, N=2, V=y), (R=2, N=5, V=x), (R=3, N=5, V=x)
-// ssd: (R=2, N=6, V=y), (R=3, N=6, V=y)
-// genet: (R=4, N=2, V=normal)
-
-// ssd-ar: (R=3, N=5, V=x)
-#define V VX
-  #define R 3
-    #define N 5
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-// ssd-ar: (R=2, N=5, V=x)
-#define V VX
-  #define R 2
-    #define N 5
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-
-// ssd-ar: (R=3, N=2, V=y)
-#define V VY
-  #define R 3
-    #define N 2
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-// ssd-ar: (R=4, N=3, V=z)
-#define V VZ
-  #define R 4
-    #define N 3
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-
-// ssd: (R=2, N=6, V=y)
-#define V VY
-  #define R 2
-    #define N 6
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-// ssd: (R=3, N=6, V=y)
-#define V VY
-  #define R 3
-    #define N 6
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-#define V VNORMAL
-  #define R 4
-    #define N 2
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-
-#define V VY
-  #define R 2
-    #define N 2
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-
-#define V VY
-  #define R 2
-    #define N 5
-      #define P float
-        #include "ConcatKernel.inc.metal"
-      #undef P
-      #define P half
-        #include "ConcatKernel.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal
deleted file mode 100644
index 87b60a64fc48ab89af274e0b24897e0b411599e0..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal
+++ /dev/null
@@ -1,310 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-
-kernel void conv_add_batch_norm_relu_1x1_half(
-            texture2d_array<half, access::sample> inTexture [[texture(0)]],
-            texture2d_array<half, access::write> outTexture [[texture(1)]],
-            constant MetalConvParam &param [[buffer(0)]],
-            const device half4 *weights [[buffer(1)]],
-            const device half4 *biase [[buffer(2)]],
-            const device half4 *new_scale [[buffer(3)]],
-            const device half4 *new_biase [[buffer(4)]],
-            uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  half4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-  output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-kernel void conv_add_batch_norm_relu_3x3_half(
-            texture2d_array<half, access::sample> inTexture [[texture(0)]],
-            texture2d_array<half, access::write> outTexture [[texture(1)]],
-            constant MetalConvParam &param [[buffer(0)]],
-            const device half4 *weights [[buffer(1)]],
-            const device half4 *biase [[buffer(2)]],
-            const device half4 *new_scale [[buffer(3)]],
-            const device half4 *new_biase [[buffer(4)]],
-            uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  half4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
-    for (int j = 0; j < 9; ++j) {
-      half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_add_batch_norm_relu_3x3_half(
-            texture2d_array<half, access::sample> inTexture [[texture(0)]],
-            texture2d_array<half, access::write> outTexture [[texture(1)]],
-            constant MetalConvParam &param [[buffer(0)]],
-            const device half *weights [[buffer(1)]],
-            const device half4 *biase [[buffer(2)]],
-            const device half4 *new_scale [[buffer(3)]],
-            const device half4 *new_biase [[buffer(4)]],
-            uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = float4(0.0);
-  half4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    half4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-  output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-
-
-/*---------------------------------------------*/
-
-
-
-kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                         constant MetalConvParam &param [[buffer(0)]],
-                                         const device float4 *weights [[buffer(1)]],
-                                         const device float4 *biase [[buffer(2)]],
-                                         const device float4 *new_scale [[buffer(3)]],
-                                         const device float4 *new_biase [[buffer(4)]],
-                                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  float4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-  output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                         constant MetalConvParam &param [[buffer(0)]],
-                                         const device float4 *weights [[buffer(1)]],
-                                         const device float4 *biase [[buffer(2)]],
-                                         const device float4 *new_scale [[buffer(3)]],
-                                         const device float4 *new_biase [[buffer(4)]],
-                                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  float4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
-    for (int j = 0; j < 9; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                                   texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                                   constant MetalConvParam &param [[buffer(0)]],
-                                                   const device float *weights [[buffer(1)]],
-                                                   const device float4 *biase [[buffer(2)]],
-                                                   const device float4 *new_scale [[buffer(3)]],
-                                                   const device float4 *new_biase [[buffer(4)]],
-                                                   uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = float4(0.0);
-  float4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    float4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-  output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddMetal.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddMetal.metal
deleted file mode 100644
index 274e416576743a473ba8931bcd538e9c39415f3c..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddMetal.metal
+++ /dev/null
@@ -1,622 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-
-using namespace metal;
-
-#pragma mark - convAdd
-kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device float4 *weights [[buffer(1)]],
-                         const device float4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = biase[gid.z];
-  
-  float4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-//  output = output + biase[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device float4 *weights [[buffer(1)]],
-                         const device float4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  
-  const uint kernelHXW = 9;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = biase[gid.z];
-  
-  ushort dilation_x = param.dilationX;
-  ushort dilation_y = param.dilationY;
-  
-  float4 input[9];
-  
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
-    
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - dilation_y), i);
-    
-    input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
-    
-    input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y), i);
-    
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    
-    input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y), i);
-    
-    input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y + dilation_y), i);
-    
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + dilation_y), i);
-    
-    input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y + dilation_y), i);
-    
-    for (int j = 0; j < 9; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-//  output = output + biase[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device float4 *weights [[buffer(1)]],
-                         const device float4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  
-  const uint kernelHXW = 5;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = biase[gid.z];
-  
-  ushort dilation_y = param.dilationY;
-  float4 input[5];
-  
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
-    
-    input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
-    
-    input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    
-    input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
-    
-    input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
-    
-    for (int j = 0; j < 5; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-//  output = output + biase[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<float, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device float4 *weights [[buffer(1)]],
-                         const device float4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  
-  const uint kernelHXW = 5;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = biase[gid.z];
-  
-  ushort dilation_x = param.dilationX;
-  float4 input[5];
-  
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
-    
-    input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
-    
-    input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    
-    input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
-    
-    input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
-    
-    for (int j = 0; j < 5; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-//  output = output + biase[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                   texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                   constant MetalConvParam &param [[buffer(0)]],
-                                   const device float *weights [[buffer(1)]],
-                                   const device float4 *biase [[buffer(2)]],
-                                   uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = biase[gid.z];
-  float4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    float4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-//  output = output + biase[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-#pragma mark - half
-
-kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<half, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device half4 *weights [[buffer(1)]],
-                         const device half4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  half4 output = biase[gid.z];
-  
-  half4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-//  output = output + float4(biase[gid.z]);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<half, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device half4 *weights [[buffer(1)]],
-                         const device half4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  half4 output = biase[gid.z];
-  
-  ushort dilation_x = param.dilationX;
-  ushort dilation_y = param.dilationY;
-  
-  half4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y - dilation_y), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - dilation_y), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y - dilation_y), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y + dilation_y), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + dilation_y), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y + dilation_y), i);
-    for (int j = 0; j < 9; ++j) {
-      half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(float4(input[j]), float4(weight_x));
-      
-      half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(float4(input[j]), float4(weight_y));
-      
-      half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(float4(input[j]), float4(weight_z));
-      
-      half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(float4(input[j]), float4(weight_w));
-    }
-  }
-//  output = output + float4(biase[gid.z]);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                                   texture2d_array<half, access::write> outTexture [[texture(1)]],
-                                   constant MetalConvParam &param [[buffer(0)]],
-                                   const device half *weights [[buffer(1)]],
-                                   const device half4 *biase [[buffer(2)]],
-                                   uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  half4 output = biase[gid.z];
-  half4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    half4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-//  output = output + float4(biase[gid.z]);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<half, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device half4 *weights [[buffer(1)]],
-                         const device half4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  
-  const uint kernelHXW = 5;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  half4 output = biase[gid.z];
-  
-  ushort dilation_y = param.dilationY;
-  half4 input[5];
-  
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
-    
-    input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
-    
-    input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    
-    input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
-    
-    input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
-    
-    for (int j = 0; j < 5; ++j) {
-      half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-//  output = output + float4(biase[gid.z]);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<half, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device half4 *weights [[buffer(1)]],
-                         const device half4 *biase [[buffer(2)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  
-  const uint kernelHXW = 5;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  half4 output = biase[gid.z];
-  
-  ushort dilation_x = param.dilationX;
-  half4 input[5];
-  
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
-    
-    input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
-    
-    input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    
-    input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
-    
-    input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
-    
-    for (int j = 0; j < 5; ++j) {
-      half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-//  output = output + float4(biase[gid.z]);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void test_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                              texture2d_array<float, access::write> outTexture [[texture(1)]],
-                              constant MetalConvParam &param [[buffer(0)]],
-                              const device float4 *weights [[buffer(1)]],
-                              const device float4 *biase [[buffer(2)]],
-                              uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  if (gid.x > 0 || gid.y > 0 || gid.z > 0) { return; }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  
-  const uint kernelHXW = 9;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  ushort dilation_x = param.dilationX;
-  ushort dilation_y = param.dilationY;
-  
-  float4 input[9];
-  
-  for (uint i = 0; i < input_arr_size; ++i) {
-    
-    input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
-    
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - dilation_y), i);
-    
-    input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
-    
-    input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y), i);
-    
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    
-    input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y), i);
-    
-    input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y + dilation_y), i);
-    
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + dilation_y), i);
-    
-    input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y + dilation_y), i);
-    
-    for (int j = 0; j < 9; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  //  output = output + biase[gid.z];
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPrelu.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPrelu.inc.metal
deleted file mode 100644
index 069daa20e875eb00c0d518e0463987248ca8dce5..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPrelu.inc.metal
+++ /dev/null
@@ -1,447 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#include "Macro.metal"
-
-
-#pragma mark - convAdd
-kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<P, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device VECTOR(P, 4) *weights [[buffer(1)]],
-                         const device VECTOR(P, 4) *biase [[buffer(2)]],
-#ifdef PRELU_CHANNEL
-                         const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_ELEMENT
-                         const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_OTHER
-                         const device P *alpha [[buffer(3)]],
-#endif
-                         uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  VECTOR(P, 4) output = biase[gid.z];
-  
-  VECTOR(P, 4) input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i);
-    VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-  
-//  output = output + float4(biase[gid.z]);
-  
-#ifdef PRELU_CHANNEL
-  VECTOR(P, 4) alpha_value = alpha[gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_ELEMENT
-  int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
-  VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_OTHER
-  P alpha_value = alpha[0];
-  output.x = output.x > 0 ? output.x : (alpha_value * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value * output.w);
-#endif
-  outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
-}
-
-kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
-    texture2d_array<P, access::write> outTexture [[texture(1)]],
-    constant MetalConvParam &param [[buffer(0)]],
-    const device VECTOR(P, 4) *weights [[buffer(1)]],
-    const device VECTOR(P, 4) *biase [[buffer(2)]],
-#ifdef PRELU_CHANNEL
-     const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_ELEMENT
-     const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_OTHER
-     const device P *alpha [[buffer(3)]],
-#endif
-     uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-
-  const uint kernelHXW = 9;
-
-  uint input_arr_size = inTexture.get_array_size();
-
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-
-  VECTOR(P, 4) output = biase[gid.z];
-
-  ushort dilation_x = param.dilationX;
-  ushort dilation_y = param.dilationY;
-
-  VECTOR(P, 4) input[9];
-
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
-
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - dilation_y), i);
-
-    input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
-
-    input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y), i);
-
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-
-    input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y), i);
-
-    input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x,    posInInput.y + dilation_y), i);
-
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + dilation_y), i);
-
-    input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x,    posInInput.y + dilation_y), i);
-
-    for (int j = 0; j < 9; ++j) {
-      VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-
-      VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-
-      VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-
-      VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-//  output = output + float4(biase[gid.z]);
-  
-#ifdef PRELU_CHANNEL
-  VECTOR(P, 4) alpha_value = alpha[gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_ELEMENT
-  int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
-  VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_OTHER
-  P alpha_value = alpha[0];
-  output.x = output.x > 0 ? output.x : (alpha_value * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value * output.w);
-#endif
-  outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
-}
-
-kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<P, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device VECTOR(P, 4) *weights [[buffer(1)]],
-                         const device VECTOR(P, 4) *biase [[buffer(2)]],
-#ifdef PRELU_CHANNEL
-                        const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_ELEMENT
-                        const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_OTHER
-                        const device P *alpha [[buffer(3)]],
-#endif
-                         uint3 gid [[thread_position_in_grid]]) {
-
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-
-  const uint kernelHXW = 5;
-
-  uint input_arr_size = inTexture.get_array_size();
-
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-
-  VECTOR(P, 4) output = biase[gid.z];;
-
-  ushort dilation_y = param.dilationY;
-  VECTOR(P, 4) input[5];
-
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
-
-    input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
-
-    input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-
-    input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
-
-    input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
-
-    for (int j = 0; j < 5; ++j) {
-      VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-
-      VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-
-      VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-
-      VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  
-#ifdef PRELU_CHANNEL
-  VECTOR(P, 4) alpha_value = alpha[gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_ELEMENT
-  int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
-  VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_OTHER
-  P alpha_value = alpha[0];
-  output.x = output.x > 0 ? output.x : (alpha_value * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value * output.w);
-#endif
-  outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
-}
-
-
-kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
-                         texture2d_array<P, access::write> outTexture [[texture(1)]],
-                         constant MetalConvParam &param [[buffer(0)]],
-                         const device VECTOR(P, 4) *weights [[buffer(1)]],
-                         const device VECTOR(P, 4) *biase [[buffer(2)]],
-#ifdef PRELU_CHANNEL
-                         const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_ELEMENT
-                         const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_OTHER
-                         const device P *alpha [[buffer(3)]],
-#endif
-                         uint3 gid [[thread_position_in_grid]]) {
-
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-
-  const uint kernelHXW = 5;
-
-  uint input_arr_size = inTexture.get_array_size();
-
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-
-  VECTOR(P, 4) output = biase[gid.z];
-
-  ushort dilation_x = param.dilationX;
-  VECTOR(P, 4) input[5];
-
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
-
-    input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
-
-    input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-
-    input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
-
-    input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
-
-    for (int j = 0; j < 5; ++j) {
-      VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-
-      VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-
-      VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-
-      VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  
-#ifdef PRELU_CHANNEL
-  VECTOR(P, 4) alpha_value = alpha[gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_ELEMENT
-  int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
-  VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_OTHER
-  P alpha_value = alpha[0];
-  output.x = output.x > 0 ? output.x : (alpha_value * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value * output.w);
-#endif
-  outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
-}
-
-kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
-    texture2d_array<P, access::write> outTexture [[texture(1)]],
-    constant MetalConvParam &param [[buffer(0)]],
-    const device P *weights [[buffer(1)]],
-    const device VECTOR(P, 4) *biase [[buffer(2)]],
-#ifdef PRELU_CHANNEL
-    const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_ELEMENT
-    const device VECTOR(P, 4) *alpha [[buffer(3)]],
-#endif
-#ifdef PRELU_OTHER
-    const device P *alpha [[buffer(3)]],
-#endif
-    uint3 gid [[thread_position_in_grid]]) {
-
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  VECTOR(P, 4) output = biase[gid.z];
-  VECTOR(P, 4) inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    VECTOR(P, 4) input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-  
-#ifdef PRELU_CHANNEL
-  VECTOR(P, 4) alpha_value = alpha[gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_ELEMENT
-  int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
-  VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_OTHER
-  P alpha_value = alpha[0];
-  output.x = output.x > 0 ? output.x : (alpha_value * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value * output.w);
-#endif
-  outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
-}
-
-#endif
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal
deleted file mode 100644
index f03a1d5b625cf01f1f1bc5ac23bebf7dabd968d9..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-#define P float
-
-  #define PRELU_CHANNEL prelu_channel
-  #define PRELU_TYPE prelu_channel
-    #include "ConvAddPrelu.inc.metal"
-  #undef  PRELU_TYPE
-  #undef  PRELU_CHANNEL
-
-  #define PRELU_ELEMENT prelu_element
-  #define PRELU_TYPE prelu_element
-    #include "ConvAddPrelu.inc.metal"
-  #undef  PRELU_TYPE
-  #undef  PRELU_ELEMENT
-
-  #define PRELU_OTHER   prelu_other
-  #define PRELU_TYPE prelu_other
-    #include "ConvAddPrelu.inc.metal"
-  #undef  PRELU_TYPE
-  #undef  PRELU_OTHER
-
-#undef P
-
-#define P half
-
-  #define PRELU_CHANNEL prelu_channel
-  #define PRELU_TYPE prelu_channel
-    #include "ConvAddPrelu.inc.metal"
-  #undef  PRELU_TYPE
-  #undef  PRELU_CHANNEL
-
-  #define PRELU_ELEMENT prelu_element
-  #define PRELU_TYPE prelu_element
-    #include "ConvAddPrelu.inc.metal"
-  #undef  PRELU_TYPE
-  #undef  PRELU_ELEMENT
-
-  #define PRELU_OTHER   prelu_other
-  #define PRELU_TYPE prelu_other
-    #include "ConvAddPrelu.inc.metal"
-  #undef  PRELU_TYPE
-  #undef  PRELU_OTHER
-
-#undef P
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvBNReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvBNReluKernel.metal
deleted file mode 100644
index 4b97b7829a1fba27704fe7b60a03b2672f4f5953..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvBNReluKernel.metal
+++ /dev/null
@@ -1,297 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-
-using namespace metal;
-
-#pragma mark - conv bn relu
-kernel void conv_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                     texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                     constant MetalConvParam &param [[buffer(0)]],
-                                     const device float4 *weights [[buffer(1)]],
-                                     const device float4 *new_scale [[buffer(2)]],
-                                     const device float4 *new_biase [[buffer(3)]],
-                                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  float4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-  output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                     texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                     constant MetalConvParam &param [[buffer(0)]],
-                                     const device float4 *weights [[buffer(1)]],
-                                     const device float4 *new_scale [[buffer(2)]],
-                                     const device float4 *new_biase [[buffer(3)]],
-                                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  float4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
-    for (int j = 0; j < 9; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                               texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                               constant MetalConvParam &param [[buffer(0)]],
-                                               const device float *weights [[buffer(1)]],
-                                               const device float4 *new_scale [[buffer(2)]],
-                                               const device float4 *new_biase [[buffer(3)]],
-                                               uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = float4(0.0);
-  float4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    float4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-  output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-#pragma mark - half
-kernel void conv_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                                     texture2d_array<half, access::write> outTexture [[texture(1)]],
-                                     constant MetalConvParam &param [[buffer(0)]],
-                                     const device half4 *weights [[buffer(1)]],
-                                     const device half4 *new_scale [[buffer(2)]],
-                                     const device half4 *new_biase [[buffer(3)]],
-                                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  half4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(float4(input), float4(weight_x));
-    
-    half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(float4(input), float4(weight_y));
-    
-    half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(float4(input), float4(weight_z));
-    
-    half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(float4(input), float4(weight_w));
-  }
-  output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-kernel void conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                                     texture2d_array<half, access::write> outTexture [[texture(1)]],
-                                     constant MetalConvParam &param [[buffer(0)]],
-                                     const device half4 *weights [[buffer(1)]],
-                                     const device half4 *new_scale [[buffer(2)]],
-                                     const device half4 *new_biase [[buffer(3)]],
-                                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  half4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
-    for (int j = 0; j < 9; ++j) {
-      half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(float4(input[j]), float4(weight_x));
-      
-      half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(float4(input[j]), float4(weight_y));
-      
-      half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(float4(input[j]), float4(weight_z));
-      
-      half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(float4(input[j]), float4(weight_w));
-    }
-  }
-  output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                                               texture2d_array<half, access::write> outTexture [[texture(1)]],
-                                               constant MetalConvParam &param [[buffer(0)]],
-                                               const device half *weights [[buffer(1)]],
-                                               const device half4 *new_scale [[buffer(2)]],
-                                               const device half4 *new_biase [[buffer(3)]],
-                                               uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = float4(0.0);
-  half4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    half4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-  output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal
deleted file mode 100644
index c07515c13da54c7f8bf698f976e47f7cda6de32b..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal
+++ /dev/null
@@ -1,280 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-// conv
-#pragma mark -- conv
-kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                     texture2d_array<float, access::write> outTexture [[texture(1)]],
-                     constant MetalConvParam &param [[buffer(0)]],
-                     const device float4 *weights [[buffer(1)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  float4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
-    for (int j = 0; j < 9; ++j) {
-      float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(input[j], weight_x);
-      
-      float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(input[j], weight_y);
-      
-      float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(input[j], weight_z);
-      
-      float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(input[j], weight_w);
-    }
-  }
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                               texture2d_array<float, access::write> outTexture [[texture(1)]],
-                               constant MetalConvParam &param [[buffer(0)]],
-                               const device float *weights [[buffer(1)]],
-                               uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = float4(0.0);
-  float4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    float4 input = inputs[j];
-    output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
-    output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
-    output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
-    output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
-  }
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                     texture2d_array<float, access::write> outTexture [[texture(1)]],
-                     constant MetalConvParam &param [[buffer(0)]],
-                     const device float4 *weights [[buffer(1)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  float4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(input, weight_x);
-    
-    float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(input, weight_y);
-    
-    float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(input, weight_z);
-    
-    float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(input, weight_w);
-  }
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                     texture2d_array<half, access::write> outTexture [[texture(1)]],
-                     constant MetalConvParam &param [[buffer(0)]],
-                     const device half4 *weights [[buffer(1)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  half4 input[9];
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
-    input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
-    input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
-    input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
-    input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
-    input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
-    input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
-    input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
-    input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
-    for (int j = 0; j < 9; ++j) {
-      half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.x += dot(float4(input[j]), float4(weight_x));
-      
-      half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.y += dot(float4(input[j]), float4(weight_y));
-      
-      half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.z += dot(float4(input[j]), float4(weight_z));
-      
-      half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
-      output.w += dot(float4(input[j]), float4(weight_w));
-    }
-  }
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-kernel void depthwise_conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                               texture2d_array<half, access::write> outTexture [[texture(1)]],
-                               constant MetalConvParam &param [[buffer(0)]],
-                               const device half *weights [[buffer(1)]],
-                               uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  uint output_slice = gid.z;
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 9;
-  uint weithTo = gid.z * kernelHXW * 4;
-  float4 output = float4(0.0);
-  half4 inputs[9];
-  inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
-  inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
-  inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
-  inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
-  inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
-  inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
-  inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
-  inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
-  inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
-  for (int j = 0; j < 9; ++j) {
-    half4 input = inputs[j];
-    output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
-    output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
-    output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
-    output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
-  }
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-kernel void conv_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                     texture2d_array<half, access::write> outTexture [[texture(1)]],
-                     constant MetalConvParam &param [[buffer(0)]],
-                     const device half4 *weights [[buffer(1)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  ushort2 stride = ushort2(param.strideX, param.strideY);
-  ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint kernelHXW = 1;
-  
-  uint input_arr_size = inTexture.get_array_size();
-  uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
-  
-  float4 output = float4(0.0);
-  
-  half4 input;
-  for (uint i = 0; i < input_arr_size; ++i) {
-    input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
-    half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
-    output.x += dot(float4(input), float4(weight_x));
-    
-    half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
-    output.y += dot(float4(input), float4(weight_y));
-    
-    half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
-    output.z += dot(float4(input), float4(weight_z));
-    
-    half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
-    output.w += dot(float4(input), float4(weight_w));
-  }
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal
deleted file mode 100644
index baf3f31157a472412bb08ccb3c803f5ec9e25d9c..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-struct MetalConvTransposeParam{
-  ushort kernelW;
-  ushort kernelH;
-  
-  ushort strideX;
-  ushort strideY;
-  
-  ushort paddingX;
-  ushort paddingY;
-  
-  ushort dilationX;
-  ushort dilationY;
-};
-
-kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                                      texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                      constant MetalConvTransposeParam &param [[buffer(0)]],
-                                      const device float4 *weights [[buffer(1)]],
-                                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_array_size = inTexture.get_array_size();
-  int kernel_index_x = gid.x % 2;
-  int kernel_index_y = gid.y % 2;
-  int kernel_index = kernel_index_y * 2 + kernel_index_x;
-  int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
-  int input_x = gid.x / 2;
-  int input_y = gid.y / 2;
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 output = float4(0.0);
-  for (int i = 0; i < input_array_size; ++i) {
-    
-    float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
-    
-    float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
-    float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
-    float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
-    float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
-    
-    output.x += dot(input, kernel_slice0);
-    
-    output.y += dot(input, kernel_slice1);
-    
-    output.z += dot(input, kernel_slice2);
-    
-    output.w += dot(input, kernel_slice3);
-  }
-  
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                                      texture2d_array<half, access::write> outTexture [[texture(1)]],
-                                      constant MetalConvTransposeParam &param [[buffer(0)]],
-                                      const device half4 *weights [[buffer(1)]],
-                                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_array_size = inTexture.get_array_size();
-  int kernel_index_x = gid.x % 2;
-  int kernel_index_y = gid.y % 2;
-  int kernel_index = kernel_index_y * 2 + kernel_index_x;
-  int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
-  int input_x = gid.x / 2;
-  int input_y = gid.y / 2;
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 output = float4(0.0);
-  for (int i = 0; i < input_array_size; ++i) {
-    
-    half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
-    
-    half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
-    half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
-    half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
-    half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
-    
-    output.x += dot(float4(input), float4(kernel_slice0));
-    
-    output.y += dot(float4(input), float4(kernel_slice1));
-    
-    output.z += dot(float4(input), float4(kernel_slice2));
-    
-    output.w += dot(float4(input), float4(kernel_slice3));
-  }
-  
-  outTexture.write(half4(output), gid.xy, gid.z);
-}
-
-//kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-//                           texture2d_array<float, access::write> outTexture [[texture(1)]],
-//                           constant MetalConvTransposeParam &param [[buffer(0)]],
-//                           const device float4 *weights [[buffer(1)]],
-//                           uint3 gid [[thread_position_in_grid]]){
-//  if (gid.x >= outTexture.get_width() ||
-//      gid.y >= outTexture.get_height() ||
-//      gid.z >= outTexture.get_array_size()) {
-//    return;
-//  }
-//
-//  int input_array_size = inTexture.get_array_size();
-//
-//  uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
-//
-//  uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
-//
-//  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-//
-//  float4 output;
-//
-//  for (int w = 0; w < param.kernelW; ++w) {
-//    int top = gid.x - w * param.dilationX + param.paddingX;
-//    int input_x = top / param.strideX;
-//    if (top < 0 || input_x >= int(inTexture.get_width())) {
-//      continue;
-//    }
-//
-//    for (int h = 0; h < param.kernelH; ++h) {
-//      int top_y = gid.y - h * param.dilationY + param.paddingY;
-//      int input_y = top_y / param.strideY;
-//      if (top_y < 0 || input_y >= int(inTexture.get_height())) {
-//        continue;
-//      }
-//
-//      uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
-//
-//      for (int slice = 0; slice < input_array_size; ++slice) {
-//
-//        float4 input;
-//        float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
-//        float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
-//
-//        float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
-//
-//        float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
-//
-//        input = inTexture.sample(sample, float2(input_x,    input_y), slice);
-//        output.x += dot(input, kernel_slice);
-//        output.y += dot(input, kernel_slice1);
-//        output.z += dot(input, kernel_slice2);
-//        output.w += dot(input, kernel_slice3);
-//      }
-//    }
-//  }
-//
-//  outTexture.write(output, gid.xy, gid.z);
-//}
-//
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal
deleted file mode 100644
index b152df828106acd96171a89f4f636f308e0e9e39..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-
-using namespace metal;
-
-struct ElementwiseAddParam {
-  int32_t fast;
-  int32_t axis;
-  int32_t ylen;
-  int32_t xdim[4];
-  int32_t xtrans[4];
-  int32_t ydim[4];
-  int32_t ytrans[4];
-};
-
-kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
-                            texture2d_array<float, access::read> inputY [[texture(1)]],
-                            texture2d_array<float, access::write> outTexture [[texture(2)]],
-                            constant ElementwiseAddParam &pm [[buffer(0)]],
-                            uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  float4 rx, ry;
-
-  if (pm.fast == 1) {
-    rx = inputX.read(gid.xy, gid.z);
-    ry = inputY.read(gid.xy, gid.z);
-  } else {
-    rx = inputX.read(gid.xy, gid.z);
-    int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
-    int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
-    int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
-    int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
-    int32_t yshift = 4 - pm.ylen - pm.axis;
-    for (int n = 0; n < 4; n++) {
-      x_xyzn[3] = n;
-      xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
-      invtrans(xtrans, x_abcd, t_abcd);
-      for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
-        y_abcd[yshift+k] = t_abcd[k];
-      }
-      trans(ytrans, y_abcd, t_abcd);
-      abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
-      ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
-    }
-  }
-  float4 r = rx + ry;
-  outTexture.write(r, gid.xy, gid.z);
-}
-
-kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
-                            texture2d_array<half, access::read> inputY [[texture(1)]],
-                            texture2d_array<half, access::write> outTexture [[texture(2)]],
-                            constant ElementwiseAddParam &pm [[buffer(0)]],
-                            uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  half4 rx, ry;
-
-  if (pm.fast == 1) {
-    rx = inputX.read(gid.xy, gid.z);
-    ry = inputY.read(gid.xy, gid.z);
-  } else {
-    rx = inputX.read(gid.xy, gid.z);
-    int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
-    int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
-    int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
-    int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
-    int32_t yshift = 4 - pm.ylen - pm.axis;
-    for (int n = 0; n < 4; n++) {
-      x_xyzn[3] = n;
-      xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
-      invtrans(xtrans, x_abcd, t_abcd);
-      for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
-        y_abcd[yshift+k] = t_abcd[k];
-      }
-      trans(ytrans, y_abcd, t_abcd);
-      abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
-      ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
-    }
-  }
-  half4 r = rx + ry;
-  outTexture.write(r, gid.xy, gid.z);
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.inc.metal
deleted file mode 100644
index b1d68d680962c53778d624ab15bfcfeb1d1a3142..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.inc.metal
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#include <metal_stdlib>
-#include "Macro.metal"
-
-using namespace metal;
-
-kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
-                                 texture2d_array<P, access::read> inputY [[texture(1)]],
-                                 texture2d_array<P, access::write> outTexture [[texture(2)]],
-                                 constant ElementwiseAddParam &pm [[buffer(0)]],
-#ifdef PRELU_CHANNEL
-                                 const device VECTOR(P, 4) *alpha [[buffer(1)]],
-#endif
-#ifdef PRELU_ELEMENT
-                                 const device VECTOR(P, 4) *alpha [[buffer(1)]],
-#endif
-#ifdef PRELU_OTHER
-                                 const device P *alpha [[buffer(1)]],
-#endif
-                                 uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  VECTOR(P, 4) rx, ry;
-  
-  if (pm.fast == 1) {
-    rx = inputX.read(gid.xy, gid.z);
-    ry = inputY.read(gid.xy, gid.z);
-    } else {
-      rx = inputX.read(gid.xy, gid.z);
-      int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
-      int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
-      int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
-      int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
-      int32_t yshift = 4 - pm.ylen - pm.axis;
-      for (int n = 0; n < 4; n++) {
-        x_xyzn[3] = n;
-        xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
-        invtrans(xtrans, x_abcd, t_abcd);
-        for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
-          y_abcd[yshift+k] = t_abcd[k];
-        }
-        trans(ytrans, y_abcd, t_abcd);
-        abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
-        ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
-      }
-  }
-  VECTOR(P, 4) output = rx + ry;
-  
-#ifdef PRELU_CHANNEL
-  VECTOR(P, 4) alpha_value = alpha[gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_ELEMENT
-  int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
-  VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
-  output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
-#endif
-#ifdef PRELU_OTHER
-  P alpha_value = alpha[0];
-  output.x = output.x > 0 ? output.x : (alpha_value * output.x);
-  output.y = output.y > 0 ? output.y : (alpha_value * output.y);
-  output.z = output.z > 0 ? output.z : (alpha_value * output.z);
-  output.w = output.w > 0 ? output.w : (alpha_value * output.w);
-#endif
-  
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.metal
deleted file mode 100644
index 8fd1a9fdab8c86fbc52f6dab9c448b7b0f27d403..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.metal
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-struct ElementwiseAddParam {
-  int32_t fast;
-  int32_t axis;
-  int32_t ylen;
-  int32_t xdim[4];
-  int32_t xtrans[4];
-  int32_t ydim[4];
-  int32_t ytrans[4];
-};
-
-#define P float
-
-#define PRELU_CHANNEL prelu_channel
-#define PRELU_TYPE channel
-#include "ElementwiseAddPreluKernel.inc.metal"
-#undef  PRELU_TYPE
-#undef  PRELU_CHANNEL
-
-#define PRELU_ELEMENT element
-#define PRELU_TYPE prelu_element
-#include "ElementwiseAddPreluKernel.inc.metal"
-#undef  PRELU_TYPE
-#undef  PRELU_ELEMENT
-
-#define PRELU_OTHER   other
-#define PRELU_TYPE prelu_other
-#include "ElementwiseAddPreluKernel.inc.metal"
-#undef  PRELU_TYPE
-#undef  PRELU_OTHER
-
-#undef P
-
-#define P half
-
-#define PRELU_CHANNEL channel
-#define PRELU_TYPE channel
-#include "ElementwiseAddPreluKernel.inc.metal"
-#undef  PRELU_TYPE
-#undef  PRELU_CHANNEL
-
-#define PRELU_ELEMENT element
-#define PRELU_TYPE prelu_element
-#include "ElementwiseAddPreluKernel.inc.metal"
-#undef  PRELU_TYPE
-#undef  PRELU_ELEMENT
-
-#define PRELU_OTHER   other
-#define PRELU_TYPE prelu_other
-#include "ElementwiseAddPreluKernel.inc.metal"
-#undef  PRELU_TYPE
-#undef  PRELU_OTHER
-
-#undef P
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal
deleted file mode 100644
index b7d7028d46356e0dae21b352161de31b0820ff1a..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-kernel void fetch(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                       device float *output [[buffer(0)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height() ||
-      gid.z >= inTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_width = inTexture.get_width();
-  int input_height = inTexture.get_height();
-  const float4 input = inTexture.read(gid.xy, gid.z);
-  int output_to = 4 * input_width * input_height;
-  output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
-  output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
-//  output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
-//  output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
-}
-
-
-kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                  device float * output [[buffer(0)]],
-                  uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height() ||
-      gid.z >= inTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_width = inTexture.get_width();
-  int input_height = inTexture.get_height();
-  const half4 input = inTexture.read(gid.xy, gid.z);
-  int output_to = 4 * input_width * input_height;
-  output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
-  output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
-//  output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
-//  output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
-  
-}
-
-kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                              device float *output [[buffer(0)]],
-                              uint3 gid [[thread_position_in_grid]]) {
-  
-}
-
-kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                              device float *output [[buffer(0)]],
-                              uint3 gid [[thread_position_in_grid]]) {
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal
deleted file mode 100644
index 368509f001aca6361b81b9b7839cf24b2efc5c12..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-// 占位函数, 啥也没干
-kernel void place_holder(texture2d<half, access::read> inTexture [[texture(0)]],
-                         texture2d_array<half, access::write> outTexture [[texture(1)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-}
-
-struct OutputDim {
-  ushort width;
-  ushort height;
-  ushort strideX;
-  ushort strideY;
-};
-
-kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
-                   texture2d_array<half, access::write> outTexture [[texture(1)]],
-                   constant OutputDim &params [[buffer(0)]],
-                   uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  
-  constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
-  const half4 input = inTexture.read(pos);
-  outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
-}
-
-
-kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
-                                  texture2d_array<float, access::write> outTexture [[texture(1)]],
-                                  uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height()){
-    return;
-  }
-  const float4 input = inTexture.read(gid.xy);
-  outTexture.write(input, gid.xy, 0);
-}
-
-kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
-                                      texture2d_array<half, access::write> outTexture [[texture(1)]],
-                                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height()){
-    return;
-  }
-  const half4 input = inTexture.read(gid.xy);
-  outTexture.write(input, gid.xy, 0);
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal
deleted file mode 100644
index 950d7d5f0555b841da57554ff61f2f5cdbcae7aa..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
-#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
-#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
-
-#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-
-#define FUNC3_(a, b, c) CONCAT3_(a, b, c)
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal
deleted file mode 100644
index 44c57440e1ec138717ad1bc569fd772e0d7ede1a..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-kernel void nms_fetch_result(texture2d_array<float, access::read> inTexture [[texture(0)]],
-    device float *output [[buffer(0)]],
-    uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height() ||
-      gid.z >= inTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_width = inTexture.get_width();
-  const float4 input = inTexture.read(gid.xy, gid.z);
-  output[gid.y * input_width + gid.x] = input.x;
-  
-}
-
-
-kernel void nms_fetch_result_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                             device float *output [[buffer(0)]],
-                             uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height() ||
-      gid.z >= inTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_width = inTexture.get_width();
-  const half4 input = inTexture.read(gid.xy, gid.z);
-  output[gid.y * input_width + gid.x] = input.x;
-}
-
-kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[texture(0)]],
-    device float4 *output [[buffer(0)]],
-    uint3 gid [[thread_position_in_grid]]) {
-  
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height() ||
-      gid.z >= inTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_width = inTexture.get_width();
-//  int input_height = inTexture.get_height();
-  const float4 input = inTexture.read(gid.xy, gid.z);
-  output[gid.y * input_width + gid.x] = input;
-}
-
-kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                           device float4 *output [[buffer(0)]],
-                           uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= inTexture.get_width() ||
-      gid.y >= inTexture.get_height() ||
-      gid.z >= inTexture.get_array_size()) {
-    return;
-  }
-  
-  int input_width = inTexture.get_width();
-//  int input_height = inTexture.get_height();
-  const half4 input = inTexture.read(gid.xy, gid.z);
-  output[gid.y * input_width + gid.x] = float4(input);
-}
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PoolKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PoolKernel.metal
deleted file mode 100644
index 1f2f7240db2ba716090001ed539bddb87dff5117..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PoolKernel.metal
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-struct PoolParam {
-  int ksizeX;
-  int ksizeY;
-  int strideX;
-  int strideY;
-  int paddingX;
-  int paddingY;
-  int poolType;
-};
-
-kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                 texture2d_array<float, access::write> outTexture [[texture(1)]],
-                 constant PoolParam &pm [[buffer(0)]],
-                 uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  int xmin = gid.x * pm.strideX - pm.paddingX;
-  int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
-  xmin = max(xmin, 0);
-  int ymin = gid.y * pm.strideX - pm.paddingX;
-  int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
-  ymin = max(ymin, 0);
-  
-  float4 r = 0;
-  if (pm.poolType == 0) {
-    r = inTexture.read(uint2(xmin, ymin), gid.z);
-    for (int x = xmin; x < xmax; x++) {
-      for (int y = ymin; y < ymax; y++) {
-        r = fmax(r, inTexture.read(uint2(x, y), gid.z));
-      }
-    }
-  } else if (pm.poolType == 1) {
-    for (int x = xmin; x < xmax; x++) {
-      for (int y = ymin; y < ymax; y++) {
-        r += inTexture.read(uint2(x, y), gid.z);
-      }
-    }
-    r /= pm.ksizeX * pm.ksizeY;
-  }
-  outTexture.write(r, gid.xy, gid.z);
-}
-
-kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                      texture2d_array<half, access::write> outTexture [[texture(1)]],
-                      constant PoolParam &pm [[buffer(0)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  int xmin = gid.x * pm.strideX - pm.paddingX;
-  int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
-  xmin = max(xmin, 0);
-  int ymin = gid.y * pm.strideX - pm.paddingX;
-  int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
-  ymin = max(ymin, 0);
-  
-  half4 r = 0;
-  if (pm.poolType == 0) {
-    r = inTexture.read(uint2(xmin, ymin), gid.z);
-    for (int x = xmin; x < xmax; x++) {
-      for (int y = ymin; y < ymax; y++) {
-        r = fmax(r, inTexture.read(uint2(x, y), gid.z));
-      }
-    }
-  } else if (pm.poolType == 1) {
-    for (int x = xmin; x < xmax; x++) {
-      for (int y = ymin; y < ymax; y++) {
-        r += inTexture.read(uint2(x, y), gid.z);
-      }
-    }
-    r /= pm.ksizeX * pm.ksizeY;
-  }
-  outTexture.write(r, gid.xy, gid.z);
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal
deleted file mode 100644
index 597804137743dd253d05d91a5008f558dcaf42e7..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal
+++ /dev/null
@@ -1,151 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                           texture2d_array<float, access::write> outTexture [[texture(1)]],
-                           const device float4 *alpha [[buffer(0)]],
-                           uint3 gid [[thread_position_in_grid]]){
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
-  float4 alpha_value = alpha[gid.z];
-  float4 output;
-  output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
-  output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
-  output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
-  output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                          texture2d_array<float, access::write> outTexture [[texture(1)]],
-                          const device float4 *alpha [[buffer(0)]],
-                          uint3 gid [[thread_position_in_grid]]){
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
-
-  int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
-  float4 alpha_value = alpha[alpha_to + gid.z];
-
-  float4 output;
-  output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
-  output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
-  output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
-  output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                          texture2d_array<float, access::write> outTexture [[texture(1)]],
-                          const device float *alpha [[buffer(0)]],
-                          uint3 gid [[thread_position_in_grid]]){
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
-  float alpha_value = alpha[0];
-  float4 output;
-  output.x = input.x > 0 ? input.x : (alpha_value * input.x);
-  output.y = input.y > 0 ? input.y : (alpha_value * input.y);
-  output.z = input.z > 0 ? input.z : (alpha_value * input.z);
-  output.w = input.w > 0 ? input.w : (alpha_value * input.w);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
-kernel void prelu_channel_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                          texture2d_array<half, access::write> outTexture [[texture(1)]],
-                          const device half4 *alpha [[buffer(0)]],
-                          uint3 gid [[thread_position_in_grid]]){
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
-  half4 alpha_value = alpha[gid.z];
-  half4 output;
-  output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
-  output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
-  output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
-  output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void prelu_element_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                          texture2d_array<half, access::write> outTexture [[texture(1)]],
-                          const device half4 *alpha [[buffer(0)]],
-                          uint3 gid [[thread_position_in_grid]]){
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
-  
-  int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
-  half4 alpha_value = alpha[alpha_to + gid.z];
-  
-  half4 output;
-  output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
-  output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
-  output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
-  output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-kernel void prelu_other_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                        texture2d_array<half, access::write> outTexture [[texture(1)]],
-                        const device half *alpha [[buffer(0)]],
-                        uint3 gid [[thread_position_in_grid]]){
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) {
-    return;
-  }
-  
-  constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
-  half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
-  half alpha_value = alpha[0];
-  half4 output;
-  output.x = input.x > 0 ? input.x : (alpha_value * input.x);
-  output.y = input.y > 0 ? input.y : (alpha_value * input.y);
-  output.z = input.z > 0 ? input.z : (alpha_value * input.z);
-  output.w = input.w > 0 ? input.w : (alpha_value * input.w);
-  outTexture.write(output, gid.xy, gid.z);
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal
deleted file mode 100644
index 7630febf77210bb364f0191e8b10a5a6923d6c95..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal
+++ /dev/null
@@ -1,367 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-struct PriorBoxMetalParam {
-  float offset;
-  float stepWidth;
-  float stepHeight;
-  float minSize;
-  float maxSize;
-  float imageWidth;
-  float imageHeight;
-  
-  bool clip;
-  
-  uint numPriors;
-  uint aspecRatiosSize;
-  uint minSizeSize;
-  uint maxSizeSize;
-};
-
-kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                      texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
-                      texture2d_array<float, access::write> varianceTexture [[texture(2)]],
-                      const device float *aspect_ratios [[buffer(0)]],
-                      constant PriorBoxMetalParam &param [[buffer(1)]],
-                      const device float4 *variances [[buffer(2)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outBoxTexture.get_width() ||
-      gid.y >= outBoxTexture.get_height() ||
-      gid.z >= outBoxTexture.get_array_size()) return;
-  
-  float center_x = (gid.x + param.offset) * param.stepWidth;
-  float center_y = (gid.y + param.offset) * param.stepHeight;
-  
-  float box_width, box_height;
-  
-  if (gid.z < param.aspecRatiosSize) {
-    float ar = aspect_ratios[gid.z];
-    box_width = param.minSize * sqrt(ar) / 2;
-    box_height = param.minSize / sqrt(ar) / 2;
-    float4 box;
-    box.x = (center_x - box_width) / param.imageWidth;
-    box.y = (center_y - box_height) / param.imageHeight;
-    box.z = (center_x + box_width) / param.imageWidth;
-    box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = fmin(fmax(box, 0.0), 1.0);
-    } else {
-      res = box;
-    }
-    
-    outBoxTexture.write(res, gid.xy, gid.z);
-  } else if (gid.z >= param.aspecRatiosSize) {
-    if (param.maxSizeSize > 0) {
-      box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
-      float4 max_box;
-      max_box.x = (center_x - box_width) / param.imageWidth;
-      max_box.y = (center_y - box_height) / param.imageHeight;
-      max_box.z = (center_x + box_width) / param.imageWidth;
-      max_box.w = (center_y + box_height) / param.imageHeight;
-
-      float4 res;
-      if (param.clip) {
-        res = min(max(max_box, 0.0), 1.0);
-      } else {
-        res = max_box;
-      }
-      outBoxTexture.write(max_box, gid.xy, gid.z);
-    }
-  }
-  
-  float4 variance = variances[0];
-  if (gid.z < param.numPriors) {
-    float4 variances_output;
-    variances_output.x = variance.x;
-    variances_output.y = variance.y;
-    variances_output.z = variance.z;
-    variances_output.w = variance.w;
-    varianceTexture.write(variances_output, gid.xy, gid.z);
-  }
-}
-
-
-kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                      texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
-                      texture2d_array<half, access::write> varianceTexture [[texture(2)]],
-                      const device half *aspect_ratios [[buffer(0)]],
-                      constant PriorBoxMetalParam &param [[buffer(1)]],
-                      const device float4 *variances [[buffer(2)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outBoxTexture.get_width() ||
-      gid.y >= outBoxTexture.get_height() ||
-      gid.z >= outBoxTexture.get_array_size()) return;
-  
-  float center_x = (gid.x + param.offset) * param.stepWidth;
-  float center_y = (gid.y + param.offset) * param.stepHeight;
-  
-  float box_width, box_height;
-  
-  if (gid.z < param.aspecRatiosSize) {
-    half ar = aspect_ratios[gid.z];
-    box_width = param.minSize * sqrt(ar) / 2;
-    box_height = param.minSize / sqrt(ar) / 2;
-    float4 box;
-    box.x = (center_x - box_width) / param.imageWidth;
-    box.y = (center_y - box_height) / param.imageHeight;
-    box.z = (center_x + box_width) / param.imageWidth;
-    box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = fmin(fmax(box, 0.0), 1.0);
-    } else {
-      res = box;
-    }
-    
-    outBoxTexture.write(half4(res), gid.xy, gid.z);
-  } else if (gid.z >= param.aspecRatiosSize) {
-    if (param.maxSizeSize > 0) {
-      box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
-      float4 max_box;
-      max_box.x = (center_x - box_width) / param.imageWidth;
-      max_box.y = (center_y - box_height) / param.imageHeight;
-      max_box.z = (center_x + box_width) / param.imageWidth;
-      max_box.w = (center_y + box_height) / param.imageHeight;
-      
-      float4 res;
-      if (param.clip) {
-        res = min(max(max_box, 0.0), 1.0);
-      } else {
-        res = max_box;
-      }
-      outBoxTexture.write(half4(max_box), gid.xy, gid.z);
-    }
-  }
-  
-  float4 variance = variances[0];
-  if (gid.z < param.numPriors) {
-    float4 variances_output;
-    variances_output.x = variance.x;
-    variances_output.y = variance.y;
-    variances_output.z = variance.z;
-    variances_output.w = variance.w;
-    varianceTexture.write(half4(variances_output), gid.xy, gid.z);
-  }
-}
-
-
-
-kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                      texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
-                      texture2d_array<float, access::write> varianceTexture [[texture(2)]],
-                      const device float *aspect_ratios [[buffer(0)]],
-                      constant PriorBoxMetalParam &param [[buffer(1)]],
-                      const device float4 *variances [[buffer(2)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outBoxTexture.get_width() ||
-      gid.y >= outBoxTexture.get_height() ||
-      gid.z >= outBoxTexture.get_array_size()) return;
-  
-  float center_x = (gid.x + param.offset) * param.stepWidth;
-  float center_y = (gid.y + param.offset) * param.stepHeight;
-  
-  float box_width, box_height;
-  
-  
-  
-  if (gid.z == 0) {
-    box_width = box_height = param.minSize / 2;
-    
-    float4 box;
-    box.x = (center_x - box_width) / param.imageWidth;
-    box.y = (center_y - box_height) / param.imageHeight;
-    box.z = (center_x + box_width) / param.imageWidth;
-    box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = fmin(fmax(box, 0.0), 1.0);
-    } else {
-      res = box;
-    }
-
-    outBoxTexture.write(res, gid.xy, gid.z);
-  }
-  
-  if (gid.z == 1 && param.maxSizeSize > 0) {
-    
-    box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
-    float4 max_box;
-    max_box.x = (center_x - box_width) / param.imageWidth;
-    max_box.y = (center_y - box_height) / param.imageHeight;
-    max_box.z = (center_x + box_width) / param.imageWidth;
-    max_box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = min(max(max_box, 0.0), 1.0);
-    } else {
-      res = max_box;
-    }
-    outBoxTexture.write(res, gid.xy, gid.z);
-  }
-  
-  int aspect_to = 0;
-  if (param.maxSizeSize > 0) {
-    aspect_to = gid.z - 2;
-  } else {
-    aspect_to = gid.z - 1;
-  }
-  
-
-  
-  
-  if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) {
-    
-    int skip = 0;
-    for (int i = 0; i < aspect_to + 1; ++i) {
-      if (fabs(aspect_ratios[i] - 1.) < 1e-6) {
-        skip += 1;
-      }
-    }
-    aspect_to += skip;
-    
-    float ar = aspect_ratios[aspect_to];
-    
-    box_width = param.minSize * sqrt(ar) / 2;
-    box_height = param.minSize / sqrt(ar) / 2;
-    float4 box;
-    box.x = (center_x - box_width) / param.imageWidth;
-    box.y = (center_y - box_height) / param.imageHeight;
-    box.z = (center_x + box_width) / param.imageWidth;
-    box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = fmin(fmax(box, 0.0), 1.0);
-    } else {
-      res = box;
-    }
-    
-    outBoxTexture.write(res, gid.xy, gid.z);
-  }
-  
-  float4 variance = variances[0];
-  if (gid.z < param.numPriors) {
-    float4 variances_output;
-    variances_output.x = variance.x;
-    variances_output.y = variance.y;
-    variances_output.z = variance.z;
-    variances_output.w = variance.w;
-    varianceTexture.write(variances_output, gid.xy, gid.z);
-  }
-}
-
-
-kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                           texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
-                           texture2d_array<half, access::write> varianceTexture [[texture(2)]],
-                           const device half *aspect_ratios [[buffer(0)]],
-                           constant PriorBoxMetalParam &param [[buffer(1)]],
-                           const device float4 *variances [[buffer(2)]],
-                           uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outBoxTexture.get_width() ||
-      gid.y >= outBoxTexture.get_height() ||
-      gid.z >= outBoxTexture.get_array_size()) return;
-  
-  float center_x = (gid.x + param.offset) * param.stepWidth;
-  float center_y = (gid.y + param.offset) * param.stepHeight;
-  
-  float box_width, box_height;
-  
-  
-  
-  if (gid.z == 0) {
-    box_width = box_height = param.minSize / 2;
-    
-    float4 box;
-    box.x = (center_x - box_width) / param.imageWidth;
-    box.y = (center_y - box_height) / param.imageHeight;
-    box.z = (center_x + box_width) / param.imageWidth;
-    box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = fmin(fmax(box, 0.0), 1.0);
-    } else {
-      res = box;
-    }
-    
-    outBoxTexture.write(half4(res), gid.xy, gid.z);
-  }
-  
-  if (gid.z == 1 && param.maxSizeSize > 0) {
-    
-    box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
-    float4 max_box;
-    max_box.x = (center_x - box_width) / param.imageWidth;
-    max_box.y = (center_y - box_height) / param.imageHeight;
-    max_box.z = (center_x + box_width) / param.imageWidth;
-    max_box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = min(max(max_box, 0.0), 1.0);
-    } else {
-      res = max_box;
-    }
-    outBoxTexture.write(half4(res), gid.xy, gid.z);
-  }
-  
-  int aspect_to = 0;
-  if (param.maxSizeSize > 0) {
-    aspect_to = gid.z - 2;
-  } else {
-    aspect_to = gid.z - 1;
-  }
-  
-  if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) {
-    float ar = aspect_ratios[aspect_to];
-    
-    box_width = param.minSize * sqrt(ar) / 2;
-    box_height = param.minSize / sqrt(ar) / 2;
-    float4 box;
-    box.x = (center_x - box_width) / param.imageWidth;
-    box.y = (center_y - box_height) / param.imageHeight;
-    box.z = (center_x + box_width) / param.imageWidth;
-    box.w = (center_y + box_height) / param.imageHeight;
-    
-    float4 res;
-    if (param.clip) {
-      res = fmin(fmax(box, 0.0), 1.0);
-    } else {
-      res = box;
-    }
-    
-    outBoxTexture.write(half4(res), gid.xy, gid.z);
-  }
-  
-  float4 variance = variances[0];
-  if (gid.z < param.numPriors) {
-    float4 variances_output;
-    variances_output.x = variance.x;
-    variances_output.y = variance.y;
-    variances_output.z = variance.z;
-    variances_output.w = variance.w;
-    varianceTexture.write(half4(variances_output), gid.xy, gid.z);
-  }
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal
deleted file mode 100644
index e725440bbe997d571f1860bce323516144a94da8..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-
-kernel void relu_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                 texture2d_array<half, access::write> outTexture [[texture(1)]],
-                 uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const half4 input = inTexture.read(gid.xy, gid.z);
-  const float4 relu = fmax((float4)input, 0.0);
-  outTexture.write(half4(relu), gid.xy, gid.z);
-}
-
-kernel void relu(texture2d_array<float, access::sample> inTexture [[texture(0)]],
-                      texture2d_array<float, access::write> outTexture [[texture(1)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
-  const float4 input = inTexture.read(gid.xy, gid.z);
-  const float4 relu = fmax((float4)input, 0.0);
-  outTexture.write(float4(relu), gid.xy, gid.z);
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
deleted file mode 100644
index 7583537c2b404b7a95eeedfb4c69793a608f18ac..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
-#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
-
-#define FUNC(f, r1, r2, p) CONCAT4_(f, r1, r2, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-#define FUNC_R(f, r) CONCAT2_(f, r)
-
-kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
-                    texture2d_array<P, access::write> outTexture [[texture(1)]],
-                    constant ReshapeParam &rp [[buffer(0)]],
-                    uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-
-  int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4];
-  ReshapeParam lrp = rp;
-  int oC = lrp.odim[lrp.otrans[3]];
-  int iC = lrp.idim[lrp.itrans[3]];
-  int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3];
-  VECTOR(P, 4) r;
-  for (int n = 0; n < 4; n++) {
-    oxyzn[3] = n;
-#if ROUT == 4
-    xyzn2abcd_4(oC, oxyzn, oabcd);
-#else
-    FUNC_R(xyzn2abcd, ROUT)(oxyzn, oabcd);
-#endif
-    int tabcd[4];
-    invtrans(lrp.otrans, oabcd, tabcd);
-    int index = abcd2index(lrp.odim, tabcd);
-    if (index < count) {
-      index2abcd(lrp.idim, index, tabcd);
-      trans(lrp.itrans, tabcd, iabcd);
-#if RIN == 4
-      abcd2xyzn_4(iC, iabcd, ixyzn);
-#else
-      FUNC_R(abcd2xyzn, RIN)(iabcd, ixyzn);
-#endif
-      r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
-    } else {
-      r[n] = 0;
-    }
-  }
-  outTexture.write(r, gid.xy, gid.z);
-}
-
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.metal
deleted file mode 100644
index d2f5815d422ec8c4f3e1e3c1992855547e002264..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.metal
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONRITIONS OF ANY KINR, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-
-using namespace metal;
-
-struct ReshapeParam {
-  int32_t idim[4];
-  int32_t itrans[4];
-  int32_t odim[4];
-  int32_t otrans[4];
-};
-
-#define P float
-#define RIN 4
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#define RIN 3
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#define RIN 2
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#define RIN 1
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#undef P
-
-#define P half
-#define RIN 4
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#define RIN 3
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#define RIN 2
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-
-#define RIN 1
-#define ROUT 4
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 3
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 2
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#define ROUT 1
-#include "ReshapeKernel.inc.metal"
-#undef ROUT
-#undef RIN
-#undef P
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ResizeBilinear.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ResizeBilinear.metal
deleted file mode 100644
index fbb4e12cb82c12f8dc5b94c397e43b8c8c5ae518..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ResizeBilinear.metal
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-struct resize_bilinear_param {
-//  int32_t out_h;
-//  int32_t out_w;
-  float ratio_h;
-  float ratio_w;
-};
-
-kernel void resize_bilinear(texture2d_array<float, access::read> input [[texture(0)]],
-                     texture2d_array<float, access::write> output [[texture(2)]],
-                     constant resize_bilinear_param & pm [[buffer(0)]],
-                     uint3 gid [[thread_position_in_grid]]) {
-  float4 r;
-  if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
-    r = input.read(gid.xy, gid.z);
-  } else {
-    float w = gid.x * pm.ratio_w;
-    float h = gid.y * pm.ratio_h;
-    uint w0 = w, h0 = h;
-    uint w1 = w0 + 1, h1 = h0 + 1;
-    float w1lambda = w - w0, h1lambda = h - h0;
-    float w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
-    if (w1 >= input.get_width()) w1 = w0;
-    if (h1 >= input.get_height()) h1 = h0;
-    float4 r0 = input.read(uint2(w0, h0), gid.z);
-    float4 r1 = input.read(uint2(w1, h0), gid.z);
-    float4 r2 = input.read(uint2(w0, h1), gid.z);
-    float4 r3 = input.read(uint2(w1, h1), gid.z);
-    r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
-  }
-  output.write(r, gid.xy, gid.z);
-}
-
-kernel void resize_bilinear_half(texture2d_array<half, access::read> input [[texture(0)]],
-                            texture2d_array<half, access::write> output [[texture(2)]],
-                            constant resize_bilinear_param & pm [[buffer(0)]],
-                            uint3 gid [[thread_position_in_grid]]) {
-  
-  half4 r;
-  if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
-    r = input.read(gid.xy, gid.z);
-  } else {
-    half w = gid.x * pm.ratio_w;
-    half h = gid.y * pm.ratio_h;
-    uint w0 = w, h0 = h;
-    uint w1 = w0 + 1, h1 = h0 + 1;
-    half w1lambda = w - w0, h1lambda = h - h0;
-    half w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
-    if (w1 >= input.get_width()) w1 = w0;
-    if (h1 >= input.get_height()) h1 = h0;
-    half4 r0 = input.read(uint2(w0, h0), gid.z);
-    half4 r1 = input.read(uint2(w1, h0), gid.z);
-    half4 r2 = input.read(uint2(w0, h1), gid.z);
-    half4 r3 = input.read(uint2(w1, h1), gid.z);
-    r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
-  }
-  output.write(r, gid.xy, gid.z);
-  output.write(r, gid.xy, gid.z);
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Shape.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Shape.metal
deleted file mode 100644
index b50d5547193ccc9a1bef1b3ed6bbd1b7a64c3527..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Shape.metal
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-kernel void shape() {
-}
-kernel void shape_half() {
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.inc.metal
deleted file mode 100644
index 455cf1471b5c369fc27040e03b57812e8d6bf0e8..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.inc.metal
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-
-#define FUNC(f, p) CONCAT2_(f, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-
-kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
-                    texture2d_array<P, access::write> outTexture [[texture(1)]],
-                    constant SoftmaxParam &sp [[buffer(0)]],
-                    uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-//  int zsize = inTexture.get_array_size();
-  P maxv = inTexture.read(uint2(0, gid.y), 0)[0];
-  int group = sp.K / 4;
-  int remain = sp.K % 4;
-  for (int x = 0; x < group; x++) {
-    VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
-    maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3]))));
-  }
-  if (remain > 0) {
-    VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
-    for (int i = 0; i < remain; i++) {
-      maxv = max(maxv, r[i]);
-    }
-  }
-  VECTOR(P, 4) rsum = {0, 0, 0, 0};
-  for (int x = 0; x < group; x++) {
-    VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
-    rsum += exp(r - maxv);
-  }
-  P sum = rsum[0] + rsum[1] + rsum[2] + rsum[3];
-  if (remain > 0) {
-    VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
-    for (int i = 0; i < remain; i++) {
-      sum += exp(r[i] - maxv);
-    }
-  }
-  VECTOR(P, 4) rr = inTexture.read(gid.xy, gid.z);
-  rr = exp(rr - maxv) / sum;
-  outTexture.write(rr, gid.xy, gid.z);
-}
-
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal
deleted file mode 100644
index 67c279a4441095e710985c65d85aac589b7d0f54..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-using namespace metal;
-
-struct SoftmaxParam {
-  int N;
-  int K;
-};
-
-#define P float
-#include "Softmax.inc.metal"
-#undef P
-
-#define P half
-#include "Softmax.inc.metal"
-#undef P
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.inc.metal
deleted file mode 100644
index 54e3f21e793a9c1474f13fed61857211cb7d117f..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.inc.metal
+++ /dev/null
@@ -1,122 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
-#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
-#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
-
-#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-#define FUNC_R(f, r) CONCAT2_(f, r)
-
-#if V == VX
-#define VV x
-#elif V == VY
-#define VV y
-#elif V == VZ
-#define VV z
-#else
-#define VV normal
-#endif
-
-#if V == VY
-kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
-                                 texture2d_array<P, access::write> out1 [[texture(1)]],
-                                 texture2d_array<P, access::write> out2 [[texture(2)]],
-#if N >= 3
-                                 texture2d_array<P, access::write> out3 [[texture(3)]],
-#endif // N >= 3
-#if N >= 4
-                                 texture2d_array<P, access::write> out4 [[texture(4)]],
-#endif // N >= 4
-                                 constant SplitParam &sp [[buffer(0)]],
-                                 uint3 gid [[thread_position_in_grid]]) {
-
-  VECTOR(P, 4) r = input.read(gid.xy, gid.z);
-  int y = gid.y - sp.offset;
-  if (y < sp.vdim[0]) {
-    out1.write(r, gid.xy, gid.z);
-    return;
-  }
-  y -= sp.vdim[0];
-  if (y < sp.vdim[1]) {
-    out2.write(r, uint2(gid.x, y), gid.z);
-    return;
-  }
-#if N >= 3
-  y -= sp.vdim[1];
-  if (y < sp.vdim[2]) {
-    out3.write(r, uint2(gid.x, y), gid.z);
-    return;
-  }
-#endif // N >= 3
-#if N >= 4
-  y -= sp.vdim[2];
-  if (y < sp.vdim[3]) {
-    out4.write(r, uint2(gid.x, y), gid.z);
-    return;
-  }
-#endif // N >= 4
-}
-#endif // V == VY
-
-
-#if V == VX
-kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
-                                     texture2d_array<P, access::write> out1 [[texture(1)]],
-                                     texture2d_array<P, access::write> out2 [[texture(2)]],
-#if N >= 3
-                                     texture2d_array<P, access::write> out3 [[texture(3)]],
-#endif // N >= 3
-#if N >= 4
-                                     texture2d_array<P, access::write> out4 [[texture(4)]],
-#endif // N >= 4
-                                     constant SplitParam &sp [[buffer(0)]],
-                                     uint3 gid [[thread_position_in_grid]]) {
-  VECTOR(P, 4) r = input.read(gid.xy, gid.z);
-  int x = gid.x;
-  if (x < sp.vdim[0]) {
-    out1.write(r, gid.xy, gid.z);
-    return;
-  }
-  x -= sp.vdim[0];
-  if (x < sp.vdim[1]) {
-    out2.write(r, uint2(x, gid.y), gid.z);
-    return;
-  }
-#if N >= 3
-  x -= sp.vdim[1];
-  if (x < sp.vdim[2]) {
-    out3.write(r, uint2(x, gid.y), gid.z);
-    return;
-  }
-#endif // N >= 3
-#if N >= 4
-  x -= sp.vdim[2];
-  if (x < sp.vdim[3]) {
-    out4.write(r, uint2(x, gid.y), gid.z);
-    return;
-  }
-#endif // N >= 4
-}
-#endif // V == VX
-
-
-
-#undef VV
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.metal
deleted file mode 100644
index 4c1e818d2bf5c7266169f406fbfaf8e322685dc4..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.metal
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-
-using namespace metal;
-
-struct SplitParam {
-  int32_t idim[4];
-  int32_t axis;
-  int32_t offset;
-  int32_t trans[4];
-  int32_t vdim[4];
-};
-
-#define VNORMAL 1
-#define VX 2
-#define VY 3
-#define VZ 4
-
-// only support split_{2, 3, 4}_{2, 3, 4}_y_{float, half}
-// only support split_{3, 4}_{2, 3, 4}_x_{float, half}
-
-
-//// ssd-ar: (R=3, N=2, V=y)
-#define V VY
-  #define R 3
-    #define N 2
-      #define P float
-        #include "Split.inc.metal"
-      #undef P
-      #define P half
-        #include "Split.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
-
-
-//// ssd-ar: (R=2, N=2, V=y)
-#define V VY
-  #define R 2
-    #define N 2
-      #define P float
-        #include "Split.inc.metal"
-      #undef P
-      #define P half
-        #include "Split.inc.metal"
-      #undef P
-    #undef N
-  #undef R
-#undef V
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.inc.metal
deleted file mode 100644
index 534166e45fc3db49cc5de526ec0d5179ca3f9899..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.inc.metal
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#ifdef P
-
-#define CONCAT2(a, b) a ## b
-#define CONCAT2_(a, b) a ## _ ## b
-#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
-
-#define FUNC(f, r, p) CONCAT3_(f, r, p)
-#define VECTOR(p, n) CONCAT2(p, n)
-
-kernel void FUNC(transpose, R, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
-                      texture2d_array<P, access::write> outTexture [[texture(1)]],
-                      constant TransposeParam &pm [[buffer(0)]],
-                      uint3 gid [[thread_position_in_grid]]) {
-  VECTOR(P, 4) r;
-  int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0};
-  int iabcd[4], oabcd[4], ixyzn[4];
-  for (int n = 0; n < 4; n++) {
-    oxyzn[3] = n;
-#if R == 4
-    xyzn2abcd_4(pm.oC, oxyzn, iabcd);
-#endif // R == 4
-#if R == 3
-    xyzn2abcd_3(oxyzn, oabcd);
-#endif // R == 3
-#if R == 2
-    xyzn2abcd_2(oxyzn, oabcd);
-#endif // R == 2
-    iabcd[pm.axis[0]] = oabcd[0];
-    iabcd[pm.axis[1]] = oabcd[1];
-    iabcd[pm.axis[2]] = oabcd[2];
-    iabcd[pm.axis[3]] = oabcd[3];
-#if R == 4
-    abcd2xyzn_4(pm.iC, iabcd, ixyzn);
-#endif // R == 4
-#if R == 3
-    abcd2xyzn_3(iabcd, ixyzn);
-#endif // R == 3
-#if R == 2
-    abcd2xyzn_2(iabcd, ixyzn);
-#endif // R == 2
-    r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
-  }
-  outTexture.write(r, gid.xy, gid.z);
-}
-
-#endif
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.metal
deleted file mode 100644
index 321663b9b7f09eba2041cb0932215d291e44aba6..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.metal
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#include <metal_stdlib>
-#include "Common.metal"
-using namespace metal;
-
-struct TransposeParam {
-  int iC;
-  int oC;
-  int axis[4];
-};
-
-kernel void transpose_copy_float(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                           texture2d_array<float, access::write> outTexture [[texture(1)]],
-                           constant TransposeParam &pm [[buffer(0)]],
-                           uint3 gid [[thread_position_in_grid]]) {
-  outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
-}
-kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                           texture2d_array<half, access::write> outTexture [[texture(1)]],
-                           constant TransposeParam &pm [[buffer(0)]],
-                           uint3 gid [[thread_position_in_grid]]) {
-  outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
-}
-
-#define R 4
-  #define P float
-    #include "TransposeKernel.inc.metal"
-  #undef P
-  #define P half
-    #include "TransposeKernel.inc.metal"
-  #undef P
-#undef R
-
-#define R 3
-  #define P float
-    #include "TransposeKernel.inc.metal"
-  #undef P
-  #define P half
-    #include "TransposeKernel.inc.metal"
-  #undef P
-#undef R
-
-#define R 2
-  #define P float
-    #include "TransposeKernel.inc.metal"
-  #undef P
-  #define P half
-    #include "TransposeKernel.inc.metal"
-  #undef P
-#undef R
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift
deleted file mode 100644
index fc1b3164c9cf623a1bc4d350cc8a5f72c369bae4..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class MulticlassNMSParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
-      bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
-      output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      
-      middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims)
-      
-      bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims)
-    } catch let error {
-      throw error
-    }
-  }
-  var bboxOutput: FetchHolder
-  var middleOutput: FetchHolder
-  let scores: Texture<P>
-  let bboxes: Texture<P>
-  var output: Texture<P>
-}
-
-class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
-
-  func inputVariant() -> [String : [Variant]] {
-    return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]]
-  }
-  
-  func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let _ {
-      fatalError()
-    }
-  }
-  
-  func inferShape() {
-    // para.output.dim = para.input.dim
-  }
-  
-  typealias OpType =  MulticlassNMSOp<P>
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-
-  }
-  
-  func delogOutput() {
-    print(" nms - output: ")
-    print(para.bboxes.metalTexture.float32Array().strideArray())
-  }
-}
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
index 6f42f2aa9f8d0515946ace625ed16c5040fd3099..07676defe71ec18560df4be630cd04008cd1aad6 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
@@ -15,60 +15,54 @@
 import Foundation
 
 class PoolParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
-      ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
-      stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
-      padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
-      ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
-      globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
-      assert(input.transpose == [0, 2, 3, 1])
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
+            ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
+            stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
+            globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+//        let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
     }
-    //        let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
-  }
-  let input: Texture<P>
-  var output: Texture<P>
-  var ksize: [Int32]
-  var stride: [Int32]
-  var padding: [Int32]
-  var poolType: String
-  var ceilMode: Bool
-  var globalPooling: Bool
+    let input: Texture<P>
+    var output: Texture<P>
+    var ksize: [Int32]
+    var stride: [Int32]
+    var padding: [Int32]
+    var poolType: String
+    var ceilMode: Bool
+    var globalPooling: Bool
 }
 
 class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = PoolOp<P>
-
-  func inferShape() {
-    // para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
     }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-
     
-//    print("pool2d delog")
-//    let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
-//    print(para.ksize)
-//    print(para.stride)
-//    print(para.padding)
-//    print(para.poolType)
-//    let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
-  }
+    typealias OpType = PoolOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    func delogOutput() {
+        print("pool2d delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
+        print(para.ksize)
+        print(para.stride)
+        print(para.padding)
+        print(para.poolType)
+        let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
+    }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift
deleted file mode 100644
index 2d7987e937b9ddf6410ebb0d23bb89c76c1a13ce..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class PreluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      alpha = try PreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
-      mode = try PreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-  }
-  let mode: String
-  let alpha: Tensor<P>
-  let input: Texture<P>
-  var output: Texture<P>
-}
-
-class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = PreluOp<P>
-
-  func inferShape() {
-    // para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) input: ")
-    print(para.input.metalTexture.toTensor(dim: (n: para.input.padToFourDim[0], c: para.input.padToFourDim[1], h: para.input.padToFourDim[2], w: para.input.padToFourDim[3])).strideArray())
-    
-    print(" \(type) Alpha: ")
-    let _: Float32? = para.alpha.buffer.logDesc(header: " alpha: ", stridable: false)
-    
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
-  }
-  
-//    print("softmax delog")
-//    let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
-//    let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
deleted file mode 100644
index 2a9f18463483a024545300661e1db33cedce585b..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
+++ /dev/null
@@ -1,124 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class PriorBoxParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs)
-    } catch _ {
-    }
-    
-    do {
-      input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope)
-      inputImage = try PriorBoxParam.inputImage(inputs: opDesc.inputs, from: inScope)
-      outputVariances = try PriorBoxParam.outputVariances(outputs: opDesc.outputs, from: inScope)
-      minSizes = try PriorBoxParam.getAttr(key: "min_sizes", attrs: opDesc.attrs)
-      maxSizes = try PriorBoxParam.getAttr(key: "max_sizes", attrs: opDesc.attrs)
-      aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs)
-      variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs)
-      flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs)
-      clip = try PriorBoxParam.getAttr(key: "clip", attrs: opDesc.attrs)
-      stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs)
-      stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs)
-      offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  var min_max_aspect_ratios_order: Bool = false
-  let minSizes: [Float32]
-  let maxSizes: [Float32]
-  let aspectRatios: [Float32]
-  var newAspectRatios: MTLBuffer?
-  let variances: [Float32]
-  let flip: Bool
-  let clip: Bool
-  var stepW: Float32
-  var stepH: Float32
-  let offset: Float32
-  
-  let input: Texture<P>
-  let inputImage: Texture<P>
-  var output: Texture<P>
-  let outputVariances: Texture<P>
-}
-
-class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = PriorBoxOp<P>
-
-  func inferShape() {
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-
-    print(" \(type) output: ")
-    // output
-//    let outputArray = para.output.metalTexture.float32Array()
-//    print(outputArray.strideArray())
-//    let device = para.input.metalTexture!.device
-//    let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3])
-//    let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3])
-//    print("boxes: ")
-//    print(boxes.strideArray())
-//    print("variances: ")
-//    print(variances.strideArray())
-    // output
-    print(" \(type) output: ")
-    
-    let box = para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3]))
-    print(" dim: \(para.output.dim)")
-    print(box.strideArray())
-//    print((0..<box.count).map { (index: $0, value: box[$0])})
-//    print(para.output.realNHWC().strideArray())
-    
-//    let padToFourDim = para.output.padToFourDim
-//    if para.output.transpose == [0, 1, 2, 3] {
-//      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
-//      print(outputArray.strideArray())
-//    } else if para.output.transpose == [0, 2, 3, 1] {
-//      print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3]), texturePrecision: computePrecision).strideArray())
-//    } else {
-//      print(" not implement")
-//    }
-    
-//    writeToLibrary(fileName: "box_out", array: outputArray)
-    
-    // output variance
-//    let outputVarianceArray = para.outputVariances.metalTexture.floatArray { (o: Float32) -> Float32 in
-//      return o
-//    }
-//
-//    print(" output variance: \(outputVarianceArray)")
-    
-//    writeToLibrary(fileName: "variance_out", array: outputVarianceArray)
-    
-  }
-}
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
index 7748df75fef3a2280a51dda159ead0392e146443..f65e402cdd2b6356199a2104f99556cd4fdd3b6a 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
@@ -1,58 +1,47 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
 
 import Foundation
 
 class ReluParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
+    let input: Texture<P>
+    var output: Texture<P>
 }
 
 class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = ReluOp<P>
-  
-  func inferShape() {
-    para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ReluOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(outputArray.strideArray())
-  }
-  
 }
 
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
index ac46baca91bd6eedab9241da68a05d08391ec931..759ffd4b8b46673e5245f8bbc67dbcc0956666aa 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
@@ -15,63 +15,36 @@
 import Foundation
 
 class ReshapeParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      shape = try ReshapeParam.getAttr(key: "shape", attrs: opDesc.attrs)
-        
-      var s: [Int] = shape.map { Int($0) }
-      
-      var di = -1
-      var ml = 1
-      for i in 0..<s.count {
-        if s[i] == -1 {
-          di = i
-          continue
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
         }
-        ml *= s[i]
-      }
-      if di >= 0 {
-        s[di] = input.dim.numel() / ml
-      }
-      output.tensorDim = Dim.init(inDim: s)
-      var dim: [Int] = [1, 1, 1, 1]
-      for i in 0..<s.count {
-        dim[4-s.count+i] = s[i]
-      }
-      output.padToFourDim = Dim.init(inDim: dim)
-      output.dim = output.padToFourDim
-    } catch let error {
-      throw error
     }
-  }
-  let input: Texture<P>
-  let shape: [Int32]
-  var output: Texture<P>
+    let input: Texture<P>
+    var output: Texture<P>
 }
 
 class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = ReshapeOp<P>
-
-  func inferShape() {
-    // para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ReshapeOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+        print("reshape delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
+        let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
     }
-  }
-  func delogOutput() {
-    print("reshape delog")
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(outputArray.strideArray())
-//    print(outputArray)
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ResizeBilinearOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ResizeBilinearOp.swift
deleted file mode 100644
index e0e699cdb8b3a17eb109877f1a7bd986b5e07403..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ResizeBilinearOp.swift
+++ /dev/null
@@ -1,64 +0,0 @@
-///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License. */
-
-import Foundation
-
-class ResizeBilinearParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope)
-//      if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
-//        fatalError()
-//      }
-      output = try ResizeBilinearParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      out_h = try ResizeBilinearParam.getAttr(key: "out_h", attrs: opDesc.attrs)
-      out_w = try ResizeBilinearParam.getAttr(key: "out_w", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
-  let out_h: Int32
-  let out_w: Int32
-}
-
-class ResizeBilinearOp<P: PrecisionType>: Operator<ResizeBilinearKernel<P>, ResizeBilinearParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = ResizeBilinearOp<P>
-
-  func inferShape() {
-    //        para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-  }
-  
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ShapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ShapeOp.swift
deleted file mode 100644
index b37eed0a9d398923bb866444cf224cb79bb2fecc..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/ShapeOp.swift
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class ShapeParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope)
-      output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
-    } catch let error {
-      throw error
-    }
-  }
-  var output: Texture<P>
-  let input: Texture<P>
-}
-
-class ShapeOp<P: PrecisionType>: Operator<ShapeKernel<P>, ShapeParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = ShapeOp<P>
-
-  func inferShape() {
-    //        para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-  }
-  
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
index 66b5c7b3146d4c433e12b846a971e4b5ae579f79..d323b21cfa7729876a78702d0098c267132b4ab1 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
@@ -15,48 +15,36 @@
 import Foundation
 
 class SoftmaxParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      
-      assert(input.tensorDim.dims.count == 2)
-      assert(input.transpose == [0, 1, 2, 3])
-      
-      output.dim = input.dim
-      output.tensorDim = input.tensorDim
-      output.padToFourDim = input.padToFourDim
-    } catch let error {
-      throw error
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
     }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
+    let input: Texture<P>
+    var output: Texture<P>
 }
 
 class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
-  typealias OpType = SoftmaxOp<P>
-
-  func inferShape() {
-    // para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
     }
-  }
-  
-  func delogOutput() {
-    print("softmax delog")
-    print(para.input)
     
-    print(para.output)
-    let padToFourDim = para.output.padToFourDim
-    let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-    print(outputArray.strideArray())
-  }
+    typealias OpType = SoftmaxOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+        print("softmax delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
+        let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
+    }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
deleted file mode 100644
index 4495902a46426e2a866ba81a2aa761951605f940..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class SplitParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = Texture<P>.init(device: input.metalTexture!.device, inDim: input.dim)
-      axis = try SplitParam.getAttr(key: "axis", attrs: opDesc.attrs)
-      sections = try SplitParam.getAttr(key: "sections", attrs: opDesc.attrs)
-      if axis < 0 {
-        axis = input.tensorDim.cout() + axis
-      }
-      guard let outlist = opDesc.outputs["Out"] else {
-        fatalError()
-      }
-      for out in outlist {
-        guard let variant = inScope[out], let v = variant as? Texture<P> else {
-          fatalError()
-        }
-        outputList.append(v)
-        sections.append(Int32(v.tensorDim.dims[axis]))
-      }
-    } catch let error {
-      throw error
-    }
-  }
-  
-  var axis: Int
-  let input: Texture<P>
-  var output: Texture<P>
-  var outputList: [Texture<P>] = []
-  var sections: [Int32] = []
-}
-
-class SplitOp<P: PrecisionType>: Operator<SplitKernel<P>, SplitParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = SplitOp<P>
-
-  func inferShape() {
-    //        para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    let device = para.input.metalTexture!.device
-    for out in para.outputList {
-      let arr: [Float32] = device.texture2tensor(texture: out.metalTexture, dim: out.tensorDim.dims, transpose: out.transpose)
-      print(arr.strideArray())
-    }
-  }
-  
-}
-
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
deleted file mode 100644
index 8b695ec76fcd46b46f503e21e70f8aac52cee717..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class TransposeParam<P: PrecisionType>: OpParam {
-  typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
-    do {
-      input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope)
-      output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope)
-      axis = try TransposeParam.getAttr(key: "axis", attrs: opDesc.attrs)
-    } catch let error {
-      throw error
-    }
-  }
-  let input: Texture<P>
-  var output: Texture<P>
-  let axis: [Int32]
-}
-
-class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam<P>>, Runable, Creator, InferShaperable{
-  
-  typealias OpType = TransposeOp<P>
-
-  func inferShape() {
-    //para.output.dim = para.input.dim
-  }
-  
-  func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
-    do {
-      try kernel.compute(commandBuffer: buffer, param: para)
-    } catch let error {
-      throw error
-    }
-  }
-  
-  func delogOutput() {
-    print(" \(type) output: ")
-    let device = para.output.metalTexture!.device
-    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
-    print(outputArray.strideArray())
-  }
-}
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
deleted file mode 100644
index 1d5ca03ecb9c0af1b83412ad44c343267f35a64b..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
+++ /dev/null
@@ -1,209 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Metal
-import MetalKit
-import Foundation
-
-@objc public enum Platform: Int{
-  case CPU, GPU
-}
-
-class ScaleKernel: CusomKernel {
-  init(device: MTLDevice, shape: Shape) {
-    if computePrecision == .Float32 {
-      super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
-    } else if computePrecision == .Float16 {
-      super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
-    } else {
-      fatalError(" unsupport ")
-    }
-  }
-  
-}
-
-public class Runner: NSObject {
-  var program: Program?
-  var executor: Executor<Float32>?
-  var queue: MTLCommandQueue?
-  var textureLoader: MTKTextureLoader?
-  public let net: Net
-  let device: MTLDevice?
-  let platform: Platform
-  var cpuPaddleMobile: PaddleMobileCPU?
-  let numel: Int
-  let meansNumber: [NSNumber]
-  
-  // dims num nchw
-  let dimsNum: [NSNumber]
-  /**
-   * inNet:        需要运行的网络
-   * commandQueue: GPU 是需要传入
-   * inPlatform:   需要使用的平台, GPU or CPU
-   */
-  @objc public init(inNet: Net, commandQueue: MTLCommandQueue?, inPlatform: Platform) {
-    net = inNet
-    queue = commandQueue
-    device = queue?.device
-    platform = inPlatform
-    if let inDevice = device {
-      textureLoader = MTKTextureLoader.init(device: inDevice)
-    }
-    if platform == .CPU {
-      cpuPaddleMobile = PaddleMobileCPU.init()
-    }
-    numel = net.dim.n * net.dim.c * net.dim.h * net.dim.w
-    meansNumber = net.means.map { NSNumber.init(value: $0) }
-    dimsNum = [NSNumber.init(value: net.dim.n),
-               NSNumber.init(value: net.dim.c),
-               NSNumber.init(value: net.dim.h),
-               NSNumber.init(value: net.dim.w)]
-  }
-  
-  /**
-   * load 模型, 返回 true 可进行预测
-   */
-  @objc public func load() -> Bool {
-    if platform == .GPU {
-      guard let inDevice = device, let inQueue = queue else {
-        print(" paddle mobile gpu load error, need MTLCommandQueue")
-        return false
-      }
-      let loader = Loader<Float32>.init()
-      do {
-        program = try loader.load(device: inDevice, paramPointer: net.paramPointer!, paramSize: net.paramSize,modePointer:net.modelPointer!,modelSize:net.modelSize)
-//        program = try loader.load(device: inDevice, modelPath: net.modelPath, paraPath: net.paramPath)
-        net.updateProgram(program: program!)
-
-        executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!)
-      } catch let error {
-        print(error)
-        return false
-      }
-    } else {
-      return cpuPaddleMobile?.load(net.modelPath, andWeightsPath: net.paramPath) ?? false
-    }
-    return true
-  }
-  
-  @objc public func predict(inputPointer: UnsafeMutablePointer<Float32>, completion: @escaping ( _ success: Bool, _ result: PaddleMobileCPUResult?) -> Void) {
-    
-    guard let res = cpuPaddleMobile?.predictInput(inputPointer, dim: dimsNum) else {
-      completion(false, nil)
-      return
-    }
-    completion(true, res)
-  }
-  
-  /**
-   * GPU 版本 predict
-   * texture: 需要预测的 texture 需要做过预处理
-   * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
-   */
-  @objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) {
-    do {
-      try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in
-        guard let SSelf = self else {
-          fatalError( " self nil " )
-        }
-        let result = SSelf.net.fetchResult(paddleMobileRes: res)
-        completion(true, result)
-      }, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
-    } catch let error {
-      print(error)
-      completion(false, nil)
-      return
-    }
-  }
-  
-  /**
-   * CPU GPU 通用版本 predict
-   * cgImage: 需要预测的图片
-   * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
-   */
-//  @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
-//    if platform == .GPU {
-//      getTexture(image: cgImage) { [weak self] (texture) in
-//        guard let SSelf = self else {
-//          fatalError( "" )
-//        }
-//        SSelf.predict(texture: texture, completion: completion)
-//      }
-//    } else if platform == .CPU {
-//      let input = preproccess(image: cgImage)
-//      predict(inputPointer: input, completion: completion)
-//      input.deinitialize(count: numel)
-//      input.deallocate()
-//    }
-//  }
-  
-  /*
-   * 清理内存, 调用此函数后, 不能再使用, 需重新 load
-   */
-  @objc public func clear() {
-    if platform == .GPU {
-      executor?.clear()
-      executor = nil
-      program = nil
-    } else if platform == .CPU {
-      cpuPaddleMobile?.clear()
-    }
-  }
-  
-  @objc public func preproccess(image: CGImage) -> UnsafeMutablePointer<Float> {
-    let output = UnsafeMutablePointer<Float>.allocate(capacity: numel)
-    let means = net.means.map { NSNumber.init(value: $0) }
-    let dims = [NSNumber.init(value: net.dim.n),
-                NSNumber.init(value: net.dim.c),
-                NSNumber.init(value: net.dim.h),
-                NSNumber.init(value: net.dim.w)]
-    cpuPaddleMobile?.preprocess(image, output: output, means: means, scale: net.scale, dim: dims)
-    return output
-  }
-  
-  /*
-   * 获取 texture, 对 texture 进行预处理, GPU 预测时使用
-   */
-  @objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
-    let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
-    scaleTexture(input: texture!, complete: getTexture)
-  }
-  
-  public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
-    
-    guard let inQueue = queue, let inDevice = device else {
-      fatalError( " queue or devcie nil " )
-    }
-    
-    guard let buffer = inQueue.makeCommandBuffer() else {
-      fatalError( " make buffer error" )
-    }
-    
-    let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3))
-    
-    do {
-      try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
-    } catch let error {
-      print(error)
-      fatalError()
-    }
-    
-    buffer.addCompletedHandler { (buffer) in
-      complete(scaleKernel.outputTexture)
-    }
-    buffer.commit()
-  }
-}
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.h b/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.h
deleted file mode 100644
index 00149053dfe6891f07f816feef524db35474a18b..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#import <Metal/Metal.h>
-#import <Foundation/Foundation.h>
-
-typedef enum : NSUInteger {
-  MobileNetType,
-  MobileNetSSDType,
-  GenetType,
-} NetType;
-
-@interface PaddleMobileGPUResult: NSObject
-
-@property (assign, nonatomic) float *output;
-
-@property (assign, nonatomic) int outputSize;
-
--(void)releaseOutput;
-
-@end
-
-@interface ModelConfig: NSObject
-
-/*
- * 预处理需要用到的值 (三个)
- */
-@property (strong, nonatomic) NSArray<NSNumber *> *means;
-/*
- * 预处理需要用到的 scale 值
- */
-@property (assign, nonatomic) float scale;
-
-/*
- * 输出维度信息  [n c h w]
- */
-@property (strong, nonatomic) NSArray<NSNumber *> *dims;
-
-
-/*
- * 模型参数内存地址
- */
-@property (assign, nonatomic) void *paramPointer;
-
-/*
- * 模型参数占用内存大小 (kb)
- */
-@property (assign, nonatomic) int paramSize;
-
-/*
- * 模型内存地址
- */
-@property (assign, nonatomic) void *modelPointer;
-
-/*
- * 模型占用内存大小 (kb)
- */
-@property (assign, nonatomic) int modelSize;
-
-@end
-
-@interface PaddleMobileGPU: NSObject
-
-/*
- * 初始化
- */
--(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config;
-
-/*
- * paramPointer 模型参数内存地址
- * paramSize    模型参数占用内存大小 (kb)
- * modelPointer 模型内存地址
- * modelSize    模型占用内存大小 (kb)
- */
--(BOOL)load;
-
-/*
- * texture:     需要进行预测的图像转换的 texture
- * completion:  预测完成回调
- */
--(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion;
-
-/*
- * texture:     需要进行预测的图像转换的 texture
- * completion:  预测完成回调
- */
--(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion;
-
-/*
- * 清理内存
- */
--(void)clear;
-
-@end
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.m b/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.m
deleted file mode 100644
index 4e56bf2f98db9cda0d36587bef576e90b3ee6553..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.m
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#import <Foundation/Foundation.h>
-#import "PaddleMobileGPU.h"
-#import "paddle_mobile.h"
-#import <paddle_mobile/paddle_mobile-Swift.h>
-
-@implementation ModelConfig
-@end
-
-@interface PaddleMobileGPUResult ()
-
-@property (strong, nonatomic) ResultHolder *resultHolder;
-
-- (void)setOutputResult:(ResultHolder *)resultHolder;
-
-@end
-
-@implementation PaddleMobileGPUResult
-- (void)setOutputResult:(ResultHolder *)resultHolder {
-  self.resultHolder = resultHolder;
-  self.output = resultHolder.result;
-  self.outputSize = resultHolder.capacity;
-}
-
--(void)releaseOutput {
-  [self.resultHolder releasePointer];
-}
-@end
-
-@interface PaddleMobileGPU ()
-{
-  Runner *runner;
-}
-@end
-
-@implementation PaddleMobileGPU
-
--(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config {
-  self = [super init];
-  if (self) {
-    Net *net = nil;
-    if (netType == GenetType) {
-      net = [[Genet alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
-    } else if (netType == MobileNetSSDType) {
-      net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
-    } else if (netType == MobileNetType) {
-      
-    }
-    runner = [[Runner alloc] initInNet:net commandQueue:queue inPlatform:PlatformGPU];
-  }
-  return self;
-}
-
--(BOOL)load {
-  return [runner load];
-}
-
--(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion {
-  [runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) {
-    NSMutableArray<NSNumber *> *resultArray = [NSMutableArray arrayWithCapacity:result.capacity];
-    for (int i = 0; i < result.capacity; ++i) {
-      [resultArray addObject:[NSNumber numberWithFloat:result.result[i]]];
-    }
-    completion(success, resultArray);
-    [result releasePointer];
-    
-  }];
-}
-
--(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion {
-  [runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) {
-    PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init];
-    [gpuResult setOutputResult:result];
-    completion(success, gpuResult);
-  }];
-}
-
--(void)clear {
-  [runner clear];
-}
-
-@end
diff --git a/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
index 98dd7ff39a71cadfe6cc33f3d468448ac5155242..8e1915a4975d5e444c2a5c0d0ee9e19d3cbe7577 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
@@ -14,7 +14,7 @@
 
 import Foundation
 
-class BlockDesc {
+struct BlockDesc {
     let index: Int
     let parentIndex: Int
     let vars: [VarDesc]
@@ -48,10 +48,8 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
     var description: String {
         var str = ""
         
-        for i in 0..<ops.count {
-          str += " op \(i): "
-          let op = ops[i]
-          str += op.description
+        for op in ops {
+            str += op.description
         }
         
         for varDesc in vars {
diff --git a/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
index 44fc09a29db0deec67e7682b303b1d0947b47a51..73f81152316ad6812f705979b9c2358ee03eb3c8 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
@@ -14,68 +14,68 @@
 
 import Foundation
 
-class OpDesc {
-  let inputs: [String : [String]]
-  var paraInputs: [String : [String]]
-  var outputs: [String : [String]]
-  let unusedOutputs: [String : [String]]
-  var attrs: [String : Attr] = [:]
-  var type: String
-  init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
-    type = protoOpDesc.type
-    let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
-      var map: [String : [String]] = [:]
-      for opDescVar  in vars {
-        if (canAdd(opDescVar.parameter)) {
-          map[opDescVar.parameter] = opDescVar.arguments
+struct OpDesc {
+    let inputs: [String : [String]]
+    var paraInputs: [String : [String]]
+    var outputs: [String : [String]]
+    let unusedOutputs: [String : [String]]
+    var attrs: [String : Attr] = [:]
+    var type: String
+    init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
+        type = protoOpDesc.type
+        let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
+            var map: [String : [String]] = [:]
+            for opDescVar  in vars {
+                if (canAdd(opDescVar.parameter)) {
+                    map[opDescVar.parameter] = opDescVar.arguments
+                }
+            }
+            return map
+        }
+        
+        inputs = creator(protoOpDesc.inputs) {
+            opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
+        }
+        
+        paraInputs = creator(protoOpDesc.inputs) {
+            !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
+        }
+        
+        outputs = creator(protoOpDesc.outputs) {
+            opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
+        }
+        
+        unusedOutputs = creator(protoOpDesc.outputs) {
+            !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
+        }
+        
+        for attr in protoOpDesc.attrs {
+            if (attr.type != .block) {
+                attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
+            }
         }
-      }
-      return map
-    }
-    
-    inputs = creator(protoOpDesc.inputs) {
-      opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
-    }
-    
-    paraInputs = creator(protoOpDesc.inputs) {
-      !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
     }
-    
-    outputs = creator(protoOpDesc.outputs) {
-      opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
+}
+
+extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        str += "op type: \(type): \n"
+        str += "    op inputs: \n"
+        str += "        \(inputs) \n"
+        str += "    op para inputs: \n"
+        str += "        \(paraInputs) \n"
+        str += "    op para outputs: \n"
+        str += "        \(outputs) \n"
+        str += "    op attrs: \n"
+        str += "        \(attrs) \n"
+        
+        return str
     }
     
-    unusedOutputs = creator(protoOpDesc.outputs) {
-      !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
+    var debugDescription: String {
+        return description
     }
     
-    for attr in protoOpDesc.attrs {
-      if (attr.type != .block) {
-        attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
-      }
-    }
-  }
-}
-
-extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
-  var description: String {
-    var str = ""
-    str += "op type: \(type): \n"
-    str += "    op inputs: \n"
-    str += "        \(inputs) \n"
-    str += "    op para inputs: \n"
-    str += "        \(paraInputs) \n"
-    str += "    op para outputs: \n"
-    str += "        \(outputs) \n"
-    str += "    op attrs: \n"
-    str += "        \(attrs) \n"
     
-    return str
-  }
-  
-  var debugDescription: String {
-    return description
-  }
-  
-  
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Program/Program.swift b/metal/paddle-mobile/paddle-mobile/Program/Program.swift
index 464705d6db2b87945029de1bfcebddb1bfb4d092..1481677b198f802cd5f29a967513b2df2107bc47 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/Program.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/Program.swift
@@ -14,7 +14,7 @@
 
 import Foundation
 
-public class Program {
+public struct Program {
     let paramPath: String
     let programDesc: ProgramDesc
     let scope: Scope
@@ -23,9 +23,4 @@ public class Program {
         paramPath = inParamPath
         scope = inScope
     }
-    init(inProgramDesc: ProgramDesc, inScope: Scope) {
-        programDesc = inProgramDesc
-        scope = inScope
-        paramPath = ""
-    }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
index ad472e5a7d1fe9db248e47f4417d7c61fb01eaa9..ef094a8a20790b4e0cf47eaea04bb7d4f7a2d046 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
@@ -14,7 +14,7 @@
 
 import Foundation
 
-public class ProgramDesc {
+public struct ProgramDesc {
     var blocks: [BlockDesc] = []
     init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
         for block in protoProgram.blocks {
diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
index 87aced32c0c2cd576f023eeb5a3daad15daf1ce8..d819cdad533e444c327e95baff7bf87e902d6bff 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
@@ -15,285 +15,204 @@
 import Foundation
 
 precedencegroup ChainNode {
-  associativity: left
-  higherThan: MultiplicationPrecedence
+    associativity: left
+    higherThan: MultiplicationPrecedence
 }
 
 infix operator --> : ChainNode
 
 class Node {
-  var inputs: [Node] = []
-  var outputs: [Node] = []
-  var type: String
-  var opDesc: OpDesc?
-  init(inOpDesc: OpDesc) {
-    type = inOpDesc.type
-    opDesc = inOpDesc
-  }
-  
-  init(inType: String) {
-    type = inType
-  }
-  
-  subscript(index: Int) -> [Node] {
-    var nodes: [Node] = []
-    getNodesWithLocation(index: index, nowIndex: 0, nodes: &nodes)
-    return nodes
-  }
-  
-  func getNodesWithLocation(index: Int, nowIndex: Int, nodes: inout [Node]) {
-    if index == nowIndex {
-      nodes.append(self)
+    var inputs: [Node] = []
+    var outputs: [Node] = []
+    var type: String
+    var opDesc: OpDesc?
+    init(inOpDesc: OpDesc) {
+        type = inOpDesc.type
+        opDesc = inOpDesc
     }
     
-    for output in outputs {
-      output.getNodesWithLocation(index: index, nowIndex: nowIndex + 1, nodes: &nodes)
-    }
-  }
-  
-  static func -->(lNode: Node, rNode: Node) -> Node {
-    lNode.outputs.append(rNode)
-    rNode.inputs.append(lNode)
-    return rNode
-  }
-  
-  func depth(begin: UInt = 1) -> UInt {
-    var beginMax: UInt = 1
-    for output in outputs {
-      let subDepth = output.depth(begin: begin + 1)
-      beginMax = max(begin, subDepth)
-    }
-    beginMax = max(begin, beginMax)
-    return beginMax
-  }
-  
-  func to(depth: UInt) -> Node {
-    let beginNode = Node.init(inType: type)
-    beginNode.opDesc = opDesc
-    to(depth: depth - 1, withNode: beginNode)
-    return beginNode
-  }
-  
-  func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
-    let fusionNode = fusion.fusionNode()
-    let change = fusion.change()
-    let inOutputs = outputs
-    outputs.removeAll()
-    opDesc?.outputs.removeAll()
-    for i in 0..<inOutputs.count {
-      inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
-    }
-    opDesc?.type = fusion.fusionType()
-    type = fusion.fusionType()
-  }
-  
-  private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
-    guard let inOpdesc = opDesc else {
-      fatalError()
+    init(inType: String) {
+        type = inType
     }
     
-    for attr in inOpdesc.attrs {
-      beginNode.opDesc?.attrs[attr.key] = attr.value
-      //            print(beginNode.opDesc?.attrs)
+    static func -->(lNode: Node, rNode: Node) -> Node {
+        lNode.outputs.append(rNode)
+        rNode.inputs.append(lNode)
+        return rNode
     }
     
-    for paraInput in inOpdesc.paraInputs {
-      if let inChanges = change[type] {
-        for keyChange in inChanges {
-          if keyChange.from == paraInput.key {
-            beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
-          } else {
-            beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
-          }
+    func depth(begin: UInt = 1) -> UInt {
+        var beginMax: UInt = 1
+        for output in outputs {
+            let subDepth = output.depth(begin: begin + 1)
+            beginMax = max(begin, subDepth)
         }
-      } else {
-        beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
-      }
+        beginMax = max(begin, beginMax)
+        return beginMax
     }
     
-    if matchNode.outputs.count == 0 {
-      beginNode.outputs.append(contentsOf: outputs)
-      beginNode.opDesc?.outputs = inOpdesc.outputs
-      
+    func to(depth: UInt) -> Node {
+        let beginNode = Node.init(inType: type)
+        to(depth: depth - 1, withNode: beginNode)
+        return beginNode
     }
-    removedNodes.append(self)
     
-    for i in 0..<matchNode.outputs.count {
-      outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
+    func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
+        let fusionNode = fusion.fusionNode()
+        let change = fusion.change()
+        let inOutputs = outputs
+        outputs.removeAll()
+        opDesc?.outputs.removeAll()
+        for i in 0..<inOutputs.count {
+            inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
+        }
+        opDesc?.type = fusion.fusionType()
+        type = fusion.fusionType()
     }
     
-  }
-  
-  private func to(depth: UInt, withNode: Node) {
-    if depth < 1 {
-      return
+    private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
+        guard let inOpdesc = opDesc else {
+            fatalError()
+        }
+        
+        for attr in inOpdesc.attrs {
+            beginNode.opDesc?.attrs[attr.key] = attr.value
+//            print(beginNode.opDesc?.attrs)
+        }
+        
+        for paraInput in inOpdesc.paraInputs {
+            if let inChanges = change[type] {
+                for keyChange in inChanges {
+                    if keyChange.from == paraInput.key {
+                        beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
+                    } else {
+                        beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
+                    }
+                }
+            } else {
+                beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
+            }
+        }
+        
+        if matchNode.outputs.count == 0 {
+            beginNode.outputs.append(contentsOf: outputs)
+            beginNode.opDesc?.outputs = inOpdesc.outputs
+            
+        }
+        removedNodes.append(self)
+        
+        for i in 0..<matchNode.outputs.count {
+            outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
+        }
+        
     }
     
-    for output in outputs {
-      let node = Node.init(inType: output.type)
-      node.opDesc = output.opDesc
-      withNode.outputs.append(node)
-      output.to(depth: depth - 1, withNode: node)
-    }
-  }
-  
-  func relationship() -> [String : Node]{
-    var map: [String : Node] = [:]
-    relationship(map: &map)
-    return map
-  }
-  
-  private func relationship(map: inout [String : Node]) {
-    guard let inOpDesc = opDesc else {
-      return
+    private func to(depth: UInt, withNode: Node) {
+        if depth < 1 {
+            return
+        }
+        
+        for output in outputs {
+            let node = Node.init(inType: output.type)
+            withNode.outputs.append(node)
+            output.to(depth: depth - 1, withNode: node)
+        }
     }
     
-    for output in inOpDesc.outputs {
-      for outputKey in output.value {
-        map[outputKey] = self
-      }
-    }
     
-    for output in outputs {
-      output.relationship(map: &map)
-    }
-  }
-  
 }
 
 extension Node: Equatable {
-  static func == (lhs: Node, rhs: Node) -> Bool {
-    if lhs.outputs.count != rhs.outputs.count {
-      return false
-    }
-    
-    if lhs.type != rhs.type {
-      return false
+    static func == (lhs: Node, rhs: Node) -> Bool {
+        if lhs.outputs.count != rhs.outputs.count {
+            return false
+        }
+        
+        if lhs.type != rhs.type {
+            return false
+        }
+        
+        for i in 0..<lhs.outputs.count {
+            if lhs.outputs[i] != rhs.outputs[i] {
+                return false
+            }
+        }
+        return true
     }
     
-    for i in 0..<lhs.outputs.count {
-      if lhs.outputs[i] != rhs.outputs[i] {
-        return false
-      }
-    }
-    return true
-  }
-  
 }
 
 class ProgramOptimize<P: PrecisionType> {
-  // register fusion
-  let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self,
-//                                  ConvAddAddPreluOp<P>.self,
-                                  ConvAddPreluOp<P>.self,
-                                  ConvAddOp<P>.self,
-                                  ConvBNReluOp<P>.self,
-                                  DwConvBNReluOp<P>.self,
-                                  ElementwiseAddPreluOp<P>.self
-  ]
-  
-  func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
-    
-    guard originProgramDesc.blocks.count == 1 else {
-      fatalError(" not support yet")
-    }
-    
-    var mapForNodeChain: [String : Node] = [:]
-    var nodes: [Node] = []
-    var typeMapNodes: [String : [(node: Node, output: [String : Node])]] = [:]
-    let block = originProgramDesc.blocks[0]
-    for opDesc in block.ops {
-      guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
-        fatalError()
-      }
-      
-      let node = Node.init(inOpDesc: opDesc)
-      for inputKey in opInputKeys {
-        if let inputs = opDesc.inputs[inputKey] {
-          for input in inputs {
-            if let inputNode = mapForNodeChain[input] {
-              _ = inputNode --> node
-            }
-          }
+    let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self, ConvAddOp<P>.self]
+    func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
+        
+        guard originProgramDesc.blocks.count == 1 else {
+            fatalError(" not support yet")
         }
-      }
-      
-      for outputKey in outputKeys {
-        if let outputs = opDesc.outputs[outputKey] {
-          for output in outputs {
-            mapForNodeChain[output] = node
-          }
-        }
-      }
-      
-      nodes.append(node)
-      
-      if var inNodes = typeMapNodes[opDesc.type] {
-        inNodes.append((node, mapForNodeChain))
-        typeMapNodes[opDesc.type] = inNodes
-      } else {
-        typeMapNodes[opDesc.type] = [(node, mapForNodeChain)]
-      }
-    }
-    
-    for fusion in fusionOps {
-      let fusionNode = fusion.fusionNode()
-      let depth = fusionNode.depth()
-      if let toMatchNodes = typeMapNodes[fusionNode.type] {
-        for node in toMatchNodes {
-          
-          let toNode = node.node.to(depth: depth)
-          if toNode == fusionNode {   // match
-            var canFolder = true
-            let relationshipMap = toNode.relationship()
-            
-            for toCheck in fusion.needCheck() {
-              //              let nodes = toCheck
-              let checkNodes = toNode[toCheck.0]
-              
-              for checkNode in checkNodes {
-                let inputToChecks = checkNode.opDesc?.inputs[toCheck.1] ?? []
-                for inputToCheck in inputToChecks {
-                  if node.output[inputToCheck] == nil {
-                    if relationshipMap[inputToCheck] == nil {
-                      canFolder = false
+        
+        var mapForNodeChain: [String : Node] = [:]
+        var nodes: [Node] = []
+        var typeMapNodes: [String : [Node]] = [:]
+        let block = originProgramDesc.blocks[0]
+            for opDesc in block.ops {
+                guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
+                    fatalError()
+                }
+                
+                let node = Node.init(inOpDesc: opDesc)
+                for inputKey in opInputKeys {
+                    if let inputs = opDesc.inputs[inputKey] {
+                        for input in inputs {
+                            if let inputNode = mapForNodeChain[input] {
+                                _ = inputNode --> node
+                            }
+                        }
                     }
-                  }
                 }
                 
-                let paramInputToChecks = checkNode.opDesc?.paraInputs[toCheck.1] ?? []
-                for paramInputToCheck in paramInputToChecks {
-                  if node.output[paramInputToCheck] == nil {
-                    if relationshipMap[paramInputToCheck] == nil {
-                      canFolder = false
+                for outputKey in outputKeys {
+                    if let outputs = opDesc.outputs[outputKey] {
+                        for output in outputs {
+                            mapForNodeChain[output] = node
+                        }
                     }
-                  }
                 }
-              }
-            }
-            
-            if !canFolder {
-              continue
+                
+                nodes.append(node)
+                
+                if var inNodes = typeMapNodes[opDesc.type] {
+                    inNodes.append(node)
+                    typeMapNodes[opDesc.type] = inNodes
+                } else {
+                    typeMapNodes[opDesc.type] = [node]
+                }
             }
             
-            var removeNodes: [Node] = []
-            node.node.folderWith(fusion: fusion, removedNodes: &removeNodes)
-            for removeNode in removeNodes {
-              nodes.remove(element: removeNode)
+            for fusion in fusionOps {
+                let fusionNode = fusion.fusionNode()
+                let depth = fusionNode.depth()
+                if let toMatchNodes = typeMapNodes[fusionNode.type] {
+                    for node in toMatchNodes {
+                        let toNode = node.to(depth: depth)
+                        if toNode == fusionNode {   // match
+                            var removeNodes: [Node] = []
+                            node.folderWith(fusion: fusion, removedNodes: &removeNodes)
+                            for removeNode in removeNodes {
+                                nodes.remove(element: removeNode)
+                            }
+                        }
+                    }
+                }
             }
-          }
+        
+        var ops: [OpDesc] = []
+        for node in nodes {
+            ops.append(node.opDesc!)
         }
-      }
+        
+        var newProgramDesc = ProgramDesc.init()
+        let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
+        newProgramDesc.blocks.append(newBlock)
+        return newProgramDesc
     }
-    
-    var ops: [OpDesc] = []
-    for node in nodes {
-      ops.append(node.opDesc!)
-    }
-    
-    var newProgramDesc = ProgramDesc.init()
-    let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
-    newProgramDesc.blocks.append(newBlock)
-    return newProgramDesc
-  }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
index 1a72f5ef717063136c4708c881befd789a57219c..e564821ab6a68fc96f00aeb10f3b2fba26d9600e 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
@@ -14,18 +14,18 @@
 
 import Foundation
 
-class TensorDesc {
+struct TensorDesc {
     let dims: [Int]
     let dataType: VarTypeType
-    let dataLayout: DataLayout = DataLayout.NCHW()
+    let dataLayout: DataLayout = .NCHW
     var NCHWDim: [Int] {
         get {
             if dims.count != 4 {
                 return dims
             }
-            if dataLayout == DataLayout.NCHW() {
+            if dataLayout == .NCHW {
                 return dims
-            } else if dataLayout == DataLayout.NHWC() {
+            } else if dataLayout == .NHWC{
                 var resultDims = dims
                 resultDims.swapAt(1, 3)
                 return resultDims
@@ -40,9 +40,9 @@ class TensorDesc {
             if dims.count != 4 {
                 return dims
             }
-            if dataLayout == DataLayout.NHWC() {
+            if dataLayout == .NHWC {
                 return dims
-            } else if dataLayout == DataLayout.NCHW() {
+            } else if dataLayout == .NCHW{
                 var resultDims = dims
                 resultDims.swapAt(1, 3)
                 return resultDims
@@ -53,7 +53,7 @@ class TensorDesc {
     }
     
     init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
-        dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) }
+        dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : 1 }
         dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
     }
     
diff --git a/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
index f29169598f69ec568bd9d08af8fa4738fe8f5eea..58411828c0c94316da089fc1e2442c87bd154594 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
@@ -56,7 +56,7 @@ enum VarTypeType: Int {
     }
 }
 
-class VarDesc {
+struct VarDesc {
     let name: String
     let persistable: Bool
     let type: VarTypeType
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
index 7e4a05a8dcfc17be10f183de36575342383bb560..672484cd9d055bbe65a61d41017199dd79d6cdb2 100644
--- a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
@@ -31,14 +31,15 @@ public struct Dim {
         return dims.reduce(1) { $0 * $1 }
     }
     
-    public static func ==(left: Dim, right: Dim) -> Bool {
+    static func ==(left: Dim, right: Dim) -> Bool {
         return left.dims == right.dims;
     }
     
-    public subscript(index: Int) -> Int {
+    subscript(index: Int) -> Int {
         return dims[index];
     }
     
+    
     private(set) var dims: [Int]
     private init(){
         fatalError()
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift
deleted file mode 100644
index bdaf8d0973ad3fa6c70e04ad84fd1b14bcb8b39a..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift
+++ /dev/null
@@ -1,201 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-
-let testTo = 81
-
-var isTest = false
-
-let computePrecision: ComputePrecision = .Float16
-
-public class GPUResultHolder {
-  public let dim: [Int]
-  public let capacity: Int
-  public var resultPointer: UnsafeMutablePointer<Float32>?
-  public var intermediateResults: [String : [Variant]]?
-  public let elapsedTime: Double
-  public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
-    dim = inDim
-    capacity = inCapacity
-    
-    if let inInPointer = inPointer {
-      resultPointer = UnsafeMutablePointer<Float32>.allocate(capacity: inCapacity)
-      resultPointer?.initialize(from: inInPointer, count: inCapacity)
-    }
-    
-    elapsedTime = inElapsedTime
-    intermediateResults = inIntermediateResults
-  }
-  
-}
-
-extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
-  public var debugDescription: String {
-//    var str = ""
-//    str += "Dim: \(dim) \n value:[ "
-//    if resultArr.count < 20 {
-//      for d in resultArr {
-//        str += " \(d) "
-//      }
-//    } else {
-//      for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
-//        str += " \(resultArr[d]) "
-//      }
-//    }
-//    str += " ]"
-//    return str
-    fatalError()
-  }
-  
-  public var description: String {
-    return debugDescription
-  }
-}
-
-public class Executor<P: PrecisionType> {
-  var ops: [Runable & InferShaperable] = []
-  let program: Program
-  let device: MTLDevice
-  let inflightSemaphore: DispatchSemaphore
-  let queue: MTLCommandQueue
-  public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
-    self.inflightSemaphore = DispatchSemaphore(value: 3)
-    program = inProgram
-    device = inDevice
-    queue = inQueue
-//    print("before for ")
-//print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
-    
-    
-    for block in inProgram.programDesc.blocks {
-      //block.ops.count
-      for i in 0..<block.ops.count {
-        let opDesc = block.ops[i]
-        do {
-//          print("in for i \(i): ")
-//      print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
-//
-//          if i == 56 {
-//          print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
-//
-//          }
-          
-          let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope)
-          ops.append(op)
-        } catch let error {
-          throw error
-        }
-      }
-    }
-  }
-  
-  public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
-    guard let buffer = queue.makeCommandBuffer() else {
-      throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
-    }
-    inflightSemaphore.wait()
-    
-    let resInput: MTLTexture
-    if let inPre = preProcessKernle {
-      do {
-        try inPre.compute(inputTexuture: input, commandBuffer: buffer)
-        resInput = inPre.outputTexture
-      } catch let error {
-        throw error
-      }
-    } else {
-      resInput = input
-    }
-    
-    let beforeDate = Date.init()
-    let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim))
-    program.scope.setInput(input: inputTexture)
-    //(ops.count - except)
-    for i in 0..<(ops.count - except) {
-      let op = ops[i]
-      do {
-        try op.run(device: device, buffer: buffer)
-      } catch let error {
-        throw error
-      }
-    }
-    
-    var outputTextures: [String : [Variant]]?
-    if except > 0 {
-      ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer)
-      outputTextures = ops[ops.count - except].inputVariant()
-    }
-    
-    buffer.addCompletedHandler { [weak self] (commandbuffer) in
-//      let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
-//      print(inputArr.strideArray())
-//
-////      print(dim)
-//      writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
-//      print(" write done ")
-
-//      print("write to library done")
-//      return
-//                  print(inputArr)
-//
-//                  let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
-//                  print(stridableInput)
-//
-//                  let _: Flo? = input.logDesc(header: "input: ", stridable: true)
-//      for i in 0..<self!.ops.count {
-//        let op = self!.ops[i]
-//        print(" 第 \(i) 个 op: ")
-//        op.delogOutput()
-//      }
-      
-//      return;
-//      self!.ops[testTo - 2].delogOutput()
-//      self!.ops[testTo - 1].delogOutput()
-//      self!.ops[5].delogOutput()
-
-//      return
-      
-      guard let SSelf = self else {
-//        return
-        fatalError()
-      }
-      
-      let afterDate = Date.init()
-      var resultHolder: GPUResultHolder
-      if except > 0 {
-        resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures)
-      } else {
-        let outputVar: Variant = SSelf.program.scope.output()!
-        let output: FetchHolder = outputVar as! FetchHolder
-//        let beforeToTensorDate = Date.init()
-
-        resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate))
-        
-//        let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate)
-//        print(timeToTensor)
-      }
-
-      completionHandle(resultHolder)
-      SSelf.inflightSemaphore.signal()
-    }
-    buffer.commit()
-  }
-  
-  public func clear() {
-    program.scope.clear()
-  }
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Loader.swift b/metal/paddle-mobile/paddle-mobile/framework/Loader.swift
deleted file mode 100644
index ee640ddf1163bb1f41da49fe9089964321792d9f..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/framework/Loader.swift
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-import SwiftProtobuf
-
-public class Loader<P: PrecisionType> {
-  class ParaLoader {
-    let file: UnsafeMutablePointer<FILE>
-    let fileSize: Int
-    var nowIndex: Int
-    init(paramPath: String) throws {
-      guard let tmpFile = fopen(paramPath, "rb") else {
-        throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
-      }
-      file = tmpFile
-      fseek(file, 0, SEEK_END)
-      fileSize = ftell(file)
-      guard fileSize > 0 else {
-        throw PaddleMobileError.loaderError(message: "param file size is too small")
-      }
-      rewind(file)
-      nowIndex = 0
-    }
-    
-    func read(tensor: Tensor<P>) throws {
-      guard nowIndex <= fileSize else {
-        throw PaddleMobileError.loaderError(message: "out of the file range")
-      }
-      
-      func pointerReader<T>(type: T.Type) -> T {
-        let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
-        fread(ptr, 1, MemoryLayout<T>.size, file)
-        nowIndex += MemoryLayout<T>.size
-        let pointee = ptr.pointee
-        ptr.deinitialize(count: MemoryLayout<UInt32>.size)
-        ptr.deallocate()
-        return pointee
-      }
-      
-      let _ = pointerReader(type: UInt32.self)
-      let lodLevel = pointerReader(type: UInt64.self)
-      for _ in 0..<lodLevel {
-        let size = pointerReader(type: UInt64.self)
-        for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
-          _ = pointerReader(type: size_t.self)
-        }
-      }
-      
-      let _ = pointerReader(type: UInt32.self)
-      
-      let tensorDescSize = pointerReader(type: Int32.self)
-      
-      fseek(file, Int(tensorDescSize), SEEK_CUR)
-      nowIndex += Int(tensorDescSize)
-      
-      /*
-       这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度
-       */
-      
-      //现在模型传入模型为  Float 类型, 这块应该根据模型来
-      //            let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
-      //            let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
-      let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
-      
-      guard bytesRead == tensor.data.size else {
-        throw PaddleMobileError.loaderError(message: "param read size error")
-      }
-      
-      // TODO: use script to convert
-      //            let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
-      //            for i in 0..<tensor.numel() {
-      //                tensor.data[i] = P.init(inFloat: tmpPointer[i])
-      //            }
-      //            tmpPointer.deinitialize(count: tmpCapacity)
-      //            tmpPointer.deallocate()
-      
-      nowIndex += bytesRead
-    }
-    
-    deinit {
-      fclose(file)
-    }
-  }
-  class ParaLoaderWithPointer {
-    var paramPointer: UnsafeMutableRawPointer
-      let paramSize: Int
-      var nowIndex: Int
-      init(pPointer: UnsafeMutableRawPointer,pSize:Int) throws {
-          paramPointer = UnsafeMutableRawPointer.init(pPointer)
-          paramSize = pSize
-          nowIndex = 0
-      }
-    
-      func read(tensor: Tensor<P>) throws {
-        guard nowIndex <= paramSize else {
-          throw PaddleMobileError.loaderError(message: "out of the file range")
-        }
-        var readerIndex: Int = 0
-        func pointerReader<T>(type: T.Type) -> T {
-          let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
-          memcpy(ptr, paramPointer.advanced(by: Int(readerIndex)), MemoryLayout<T>.size)
-          nowIndex += MemoryLayout<T>.size
-          readerIndex += MemoryLayout<T>.size
-          let pointee = ptr.pointee
-          ptr.deinitialize(count: MemoryLayout<UInt32>.size)
-          ptr.deallocate()
-          
-          return pointee
-        }
-        let _ = pointerReader(type: UInt32.self)
-        let lodLevel = pointerReader(type: UInt64.self)
-        for _ in 0..<lodLevel {
-          let size = pointerReader(type: UInt64.self)
-          for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
-            _ = pointerReader(type: size_t.self)
-          }
-        }
-        
-        let _ = pointerReader(type: UInt32.self)
-        let tensorDescSize = pointerReader(type: Int32.self)
-        
-        paramPointer = paramPointer.advanced(by: Int(readerIndex))
-        paramPointer = paramPointer.advanced(by: Int(tensorDescSize))
-        nowIndex += Int(tensorDescSize)
-        
-        let _ = memcpy(tensor.data.pointer, paramPointer, tensor.data.size)
-        paramPointer = paramPointer.advanced(by: Int(tensor.data.size))
-        nowIndex += tensor.data.size
-    }
-    deinit {
-    }
-  }
-  public init(){}
-  func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program {
-    do {
-      let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
-        serializedData: modelData)
-      
-      let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
-      let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
-      print(programDesc)
-      
-      guard programDesc.blocks.count > 0 else {
-        throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0")
-      }
-      
-      // to get feed key and fetch key
-      let block = programDesc.blocks[0]
-      guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
-        throw PaddleMobileError.loaderError(message: "at least two operator")
-      }
-      
-      guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
-        throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
-      }
-      
-      guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else {
-        throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found")
-      }
-      guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else {
-        throw PaddleMobileError.loaderError(message: "feed key or fetch key not found")
-      }
-      
-      let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey)
-      
-      // to load memory
-      for block in programDesc.blocks {
-        for varDesc in block.vars {
-          if (varDesc.type == .LodTensor) {
-            guard let tensorDesc = varDesc.tensorDesc else {
-              throw PaddleMobileError.loaderError(message: "get tensor desc failed")
-            }
-            
-            if (varDesc.persistable
-              && varDesc.type != .FeedMiniBatch
-              && varDesc.type != .FetchList) {
-              let dimArr = tensorDesc.dims
-              
-              guard dimArr.count > 0 else {
-                throw PaddleMobileError.loaderError(message: "tensor desc dim size error")
-              }
-              
-              let dim = Dim.init(inDim: dimArr)
-              let tensor = Tensor<P>.init(inDim: dim, inLayout: tensorDesc.dataLayout)
-              do {
-                if paraLoaderPointer != nil {
-                  try paraLoaderPointer!.read(tensor: tensor)
-                }
-                
-                if paraLoader != nil {
-                  try paraLoader!.read(tensor: tensor)
-                }
-              } catch let error {
-                throw error
-              }
-              //              tensor.convert(to: DataLayout.NHWC())
-              //                            tensor.initBuffer(device: device)
-              scope[varDesc.name] = tensor
-            } else {
-              let dim = Dim.init(inDim: tensorDesc.dims)
-              scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
-            }
-          } else {
-            if varDesc.name == fetchKey {
-//              scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0)
-            } else if varDesc.name == feedKey {
-            }
-          }
-        }
-      }
-      
-      let program = Program.init(inProgramDesc: programDesc, inScope: scope)
-      
-      return program
-    } catch _ {
-      throw PaddleMobileError.loaderError(message: "protobuf decoder error")
-    }
-  }
-  public func load(device:MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) throws -> Program {
-    let modelData = Data.init(bytes:modePointer, count:modelSize)
-    guard let paraLoader = try? ParaLoaderWithPointer.init(pPointer: paramPointer,pSize: paramSize) else {
-      throw PaddleMobileError.loaderError(message: "load para error")
-    }
-    do {
-      let program = try loadModelandParam(device,modelData,paraLoader,nil)
-      return program
-    } catch let error {
-      throw error
-    }
-  }
-    
-  public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
-    guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
-      throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
-    }
-    guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else {
-      throw PaddleMobileError.loaderError(message: "load para error")
-    }
-    
-    do {
-      let program = try loadModelandParam(device,modelData,nil,paraLoader)
-      return program
-    } catch let error {
-      throw error
-    }
-  }
-}
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
index c5ee1414521e7eb92011d4f4b608ad326b005531..7ffcd97f4418f17cd7085c5d03e8b58b45c623fd 100644
--- a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
@@ -12,308 +12,251 @@
  See the License for the specific language governing permissions and
  limitations under the License. */
 
+import Accelerate
 import Foundation
 
 protocol Tensorial: CustomStringConvertible, CustomDebugStringConvertible{
-  var dim: Dim { get set }
-  func numel() -> Int
-  var layout: DataLayout { get }
+    var dim: Dim { get set }
+    func numel() -> Int
+    var layout: DataLayout { get }
 }
 
 extension Tensorial {
-  func numel() -> Int {
-    return dim.numel()
-  }
-}
-
-public enum ComputePrecision {
-  case Float32, Float16
+    func numel() -> Int {
+        return dim.numel()
+    }
 }
 
 class Tensor<P: PrecisionType>: Tensorial {
-  
-  var data: Data
-  var dim: Dim
-  var buffer: MTLBuffer!
-  private(set) var layout: DataLayout
-  
-  class Data {
-    init(inSize: Int, inPointer: UnsafeMutablePointer<P>) {
-      size = inSize
-      pointer = inPointer
-    }
-    let size: Int
-    var pointer: UnsafeMutablePointer<P>
-    subscript(index: Int) -> P{
-      get {
-        return pointer[index]
-      }
-      set {
-        pointer[index] = newValue
-      }
-    }
-    func release() {
-      pointer.deinitialize(count: size)
-      pointer.deallocate()
-    }
-    deinit {
-      //            release()
-    }
-  }
-  
-  required init(inDim: Dim, inLayout: DataLayout = DataLayout.NCHW()) {
-    dim = inDim
-    let size = inDim.numel() * MemoryLayout<P>.size
-    let pointer = UnsafeMutablePointer<P>.allocate(capacity: size)
-    data = Data.init(inSize: size, inPointer: pointer)
-    layout = inLayout
-  }
-  
-  func convert(to: DataLayout) {
-    guard to != layout else {
-      return
+    enum BufferPrecision {
+        case Float32, Float16
     }
     
-    guard dim.cout() == 4 else {
-      return
-    }
+    var data: Data
+    var dim: Dim
+    var buffer: MTLBuffer!
+    private(set) var layout: DataLayout
     
-    guard layout == DataLayout.NCHW() && to == DataLayout.NHWC() else {
-      // other not support
-      return
+    class Data {
+        init(inSize: Int, inPointer: UnsafeMutablePointer<P>) {
+            size = inSize
+            pointer = inPointer
+        }
+        let size: Int
+        var pointer: UnsafeMutablePointer<P>
+        subscript(index: Int) -> P{
+            get {
+                return pointer[index]
+            }
+            set {
+                pointer[index] = newValue
+            }
+        }
+        func release() {
+            pointer.deinitialize(count: size)
+            pointer.deallocate()
+        }
+        deinit {
+//            release()
+        }
     }
-    let newPointer = UnsafeMutablePointer<P>.allocate(capacity: data.size)
-    
-    if layout == DataLayout.NCHW() {
-      NCHW2NHWC(newPtr: newPointer)
+ 
+    required init(inDim: Dim, inLayout: DataLayout = .NCHW) {
+        dim = inDim
+        let size = inDim.numel() * MemoryLayout<P>.size
+        let pointer = UnsafeMutablePointer<P>.allocate(capacity: size)
+        data = Data.init(inSize: size, inPointer: pointer)
+        layout = inLayout
     }
     
-    data.release()
-    data.pointer = newPointer
-    layout = to
-  }
-  
-
-  
-  func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, convertToNHWC: Bool = true, withTranspose: Bool = false) {
-    if convertToNHWC {
-//      print(layout)
-      convert(to: DataLayout.NHWC())
+    func convert(to: DataLayout) {
+        guard to != layout else {
+            return
+        }
+        
+        guard dim.cout() == 4 else {
+            return
+        }
+        
+        guard layout == .NCHW && to == .NHWC else {
+            // other not support
+            return
+        }
+        let newPointer = UnsafeMutablePointer<P>.allocate(capacity: data.size)
+        
+        if layout == .NCHW {
+            NCHW2NHWC(newPtr: newPointer)
+        }
+        
+        data.release()
+        data.pointer = newPointer
+        layout = to
     }
     
-    if withTranspose {
-      let transposePointer = UnsafeMutablePointer<P>.allocate(capacity: numel())
-      let n = dim[0]
-      let hwc = numel()/n
-      for j in 0..<hwc {
-        for i in 0..<n {
-          //data[i * hwc + j]
-          transposePointer[j * n + i] = data[i * hwc + j]
+    func float32ToFloat16(input: UnsafeMutablePointer<Float32>, output: UnsafeMutableRawPointer, count: Int) {
+        var float32Buffer = vImage_Buffer(data: input,  height: 1, width: UInt(count), rowBytes: count * 4)
+        var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2)
+        guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else {
+            fatalError(" float 32 to float 16 error ! ")
         }
-      }
-
-      dim.swapeDimAt(index1: 0, index2: 3)
-      data.release()
-      data.pointer = transposePointer
     }
     
-    guard let floatPointer = data.pointer as? UnsafeMutablePointer<Float32> else {
-      fatalError(" not support yet ")
+    func initBuffer(device: MTLDevice, precision: BufferPrecision = .Float32) {
+        guard let floatPointer = data.pointer as? UnsafeMutablePointer<Float32> else {
+            fatalError(" not support yet ")
+        }
+        
+        
+        let precisionSize: Int
+        switch precision {
+        case .Float32:
+            precisionSize = 4
+        case .Float16:
+            precisionSize = 2
+        }
+        
+        if dim.cout() == 4 {
+            if layout == .NHWC {
+                let C = dim[3]
+                let cSlices = (C + 3) / 4
+                let paddedC = cSlices * 4
+                let count = paddedC * dim[0] * dim[1] * dim[2]
+                if C == paddedC {
+                    buffer = device.makeBuffer(length: count * precisionSize)
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
+                    }
+                } else if C == 1 {
+                    buffer = device.makeBuffer(length: numel() * precisionSize)
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
+                    }
+                } else {
+                    buffer = device.makeBuffer(length: count * precisionSize)
+                    let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
+                    var tmpPointer = floatPointer
+                    var dstPtr = convertedPointer
+                    for _ in 0..<dim[0] * dim[1] * dim[2] {
+                        for j in 0..<paddedC {
+                            if j < C {
+                                dstPtr[j] = tmpPointer[j]
+                            }
+                        }
+                        tmpPointer += C
+                        dstPtr += paddedC
+                    }
+                    
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
+                    }
+                    
+                    convertedPointer.deinitialize(count: count)
+                    convertedPointer.deallocate()
+                }
+            }
+        } else if dim.cout() == 1 {
+            buffer = device.makeBuffer(length: numel() * precisionSize)
+            switch precision {
+            case .Float32:
+                buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
+            case .Float16:
+                float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
+            }
+        } else {
+            fatalError(" not support !")
+        }
+        //TODO: release
+        data.release()
     }
     
-    let precisionSize: Int
-    switch precision {
-    case .Float32:
-      precisionSize = 4
-    case .Float16:
-      precisionSize = 2
+    var width: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[1]
+            } else {
+                fatalError()
+            }
+        }
     }
     
-    if dim.cout() == 4 {
-      if layout == DataLayout.NHWC() {
-        let C = dim[3]
-        let cSlices = (C + 3) / 4
-        let paddedC = cSlices * 4
-        let count = paddedC * dim[0] * dim[1] * dim[2]
-        if C == paddedC {
-          buffer = device.makeBuffer(length: count * precisionSize)
-          switch precision {
-          case .Float32:
-            buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
-          case .Float16:
-            float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
-          }
-        } else if C == 1 {
-          buffer = device.makeBuffer(length: numel() * precisionSize)
-          switch precision {
-          case .Float32:
-            buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
-          case .Float16:
-            float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
-          }
-        } else {
-          buffer = device.makeBuffer(length: count * precisionSize)
-          let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
-          var tmpPointer = floatPointer
-          var dstPtr = convertedPointer
-          for _ in 0..<dim[0] * dim[1] * dim[2] {
-            for j in 0..<paddedC {
-              if j < C {
-                dstPtr[j] = tmpPointer[j]
-              } else {
-                dstPtr[j] = 0
-              }
+    var height: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[2]
+            } else {
+                fatalError()
             }
-            tmpPointer += C
-            dstPtr += paddedC
-          }
-          
-          switch precision {
-          case .Float32:
-            buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
-          case .Float16:
-            float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
-          }
-          
-          convertedPointer.deinitialize(count: count)
-          convertedPointer.deallocate()
         }
-      } else {
-        let C = dim[3]
-        let cSlices = (C + 3) / 4
-        let paddedC = cSlices * 4
-        let count = paddedC * dim[0] * dim[1] * dim[2]
-        if C == paddedC {
-          buffer = device.makeBuffer(length: count * precisionSize)
-          switch precision {
-          case .Float32:
-            buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
-          case .Float16:
-            float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
-          }
-        } else if C == 1 {
-          fatalError(" not support ")
-        } else {
-          buffer = device.makeBuffer(length: count * precisionSize)
-          let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
-          var tmpPointer = floatPointer
-          var dstPtr = convertedPointer
-          for _ in 0..<dim[0] * dim[1] * dim[2] {
-            for j in 0..<paddedC {
-              if j < C {
-                dstPtr[j] = tmpPointer[j]
-              } else {
-                dstPtr[j] = 0
-              }
+    }
+    
+    var channel: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[3]
+            } else {
+                fatalError()
             }
-            tmpPointer += C
-            dstPtr += paddedC
-          }
-          
-          switch precision {
-          case .Float32:
-            buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
-          case .Float16:
-            float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
-          }
-          convertedPointer.deinitialize(count: count)
-          convertedPointer.deallocate()
         }
-      }
-    } else if dim.cout() == 1 {
-      let num = ((numel() + 3) / 4) * 4
-      buffer = device.makeBuffer(length: num * precisionSize)
-      switch precision {
-      case .Float32:
-        buffer?.contents().copyMemory(from: data.pointer, byteCount: num * MemoryLayout<P>.stride)
-      case .Float16:
-        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: num)
-      }
-    } else {
-      fatalError(" not support !")
-    }
-    //TODO: release
-    data.release()
-  }
-  
-  var width: Int {
-    get {
-      if dim.cout() == 4 {
-        return dim[1]
-      } else {
-        fatalError()
-      }
-    }
-  }
-  
-  var height: Int {
-    get {
-      if dim.cout() == 4 {
-        return dim[2]
-      } else {
-        fatalError()
-      }
     }
-  }
-  
-  var channel: Int {
-    get {
-      if dim.cout() == 4 {
-        return dim[3]
-      } else {
-        fatalError()
-      }
-    }
-  }
-  
-  
-  func NCHW2NHWC(newPtr: UnsafeMutablePointer<P>) {
-    let N = dim[0]
-    let C = dim[1]
-    let H = dim[2]
-    let W = dim[3]
-    let HXW = H * W
-    let CXHXW = C * H * W
+
     
-    var index: Int = 0
-    for n in 0..<N {
-      for h in 0..<H{
-        for w in 0..<W{
-          for c in 0..<C{
-            newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
-            index += 1
-          }
+    func NCHW2NHWC(newPtr: UnsafeMutablePointer<P>) {
+        let N = dim[0]
+        let C = dim[1]
+        let H = dim[2]
+        let W = dim[3]
+        let HXW = H * W
+        let CXHXW = C * H * W
+        
+        var index: Int = 0
+        for n in 0..<N {
+            for h in 0..<H{
+                for w in 0..<W{
+                    for c in 0..<C{
+                        newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
+                        index += 1
+                    }
+                }
+            }
         }
-      }
+        dim.swapeDimAt(index1: 1, index2: 3)
     }
-    dim.swapeDimAt(index1: 1, index2: 3)
-  }
 }
 
+
 extension Tensor {
-  
-  var debugDescription: String {
-    var str = "dim: \(dim) \n"
-    str += "MTLBuffer: \(self.buffer) \n"
-    for i in 0..<buffer.length/MemoryLayout<P>.size {
-      str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
+    
+    var debugDescription: String {
+        var str = "dim: \(dim) \n"
+        str += "MTLBuffer: \(self.buffer) \n"
+        for i in 0..<buffer.length/MemoryLayout<P>.size {
+            str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
+        }
+        return str
     }
-    return str
-  }
-  
-  func logDataPointer(header: String = "") {
-    print(header)
-    var str = ""
-    str += "data size: \(data.size) \n"
-    str += "dim: \(dim) \n"
-    for i in 0..<numel() {
-      str += " \(data.pointer[i])"
+    
+    func logDataPointer(header: String = "") {
+        print(header)
+        var str = ""
+        str += "data size: \(data.size) \n"
+        str += "dim: \(dim) \n"
+        for i in 0..<numel() {
+            str += " \(data.pointer[i])"
+        }
+        print(str)
     }
-    print(str)
-  }
-  
-  var description: String {
-    return debugDescription
-  }
-  
+    
+    var description: String {
+        return debugDescription
+    }
+    
 }
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
index 194d3d3015754cd2faf2dc3f4b4b098d762f2e53..81894664c5dc4acb1a5edd4485543bb20a285ea4 100644
--- a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
@@ -16,163 +16,127 @@ import Metal
 import Foundation
 
 class InputTexture {
-  let mtlTexture: MTLTexture
-  let expectDim: Dim
-  init(inMTLTexture: MTLTexture, inExpectDim: Dim) {
-    mtlTexture = inMTLTexture
-    expectDim = inExpectDim
-  }
+    let mtlTexture: MTLTexture
+    let expectDim: Dim
+    init(inMTLTexture: MTLTexture, inExpectDim: Dim) {
+        mtlTexture = inMTLTexture
+        expectDim = inExpectDim
+    }
+    
 }
 
 extension InputTexture {
-  var description: String {
-    get{
-      return mtlTexture.description
+    var description: String {
+        get{
+            return mtlTexture.description
+        }
     }
-  }
-  
-  var debugDescription: String {
-    get {
-      return mtlTexture.debugDescription ?? " MetalTexture "
+    
+    var debugDescription: String {
+        get {
+            return mtlTexture.debugDescription ?? " MetalTexture "
+        }
     }
-  }
 }
 
+public class Texture<P: PrecisionType>: Tensorial {
+    var dim: Dim
+    let textureDesc: MTLTextureDescriptor
+    var metalTexture: MTLTexture
+    
+    init(device: MTLDevice, inDim: Dim, inLayout: DataLayout = .NHWC) {
+        dim = inDim
+        layout = inLayout
+        let tmpTextureDes = MTLTextureDescriptor.init()
+        if inDim.cout() == 1 {
+            tmpTextureDes.width = inDim[0]
+            tmpTextureDes.textureType = .type1D
+        } else if inDim.cout() == 4 {
+            tmpTextureDes.height = inDim[1]
+            tmpTextureDes.width = inDim[2]
+//            print("n : \(inDim[0])")
+//            print(inDim[3] * inDim[0])
+            tmpTextureDes.depth = 1
+            tmpTextureDes.arrayLength = (inDim[3] * inDim[0] + 3)/4
+            tmpTextureDes.textureType = .type2DArray
+        } else if inDim.cout() == 2 {
+            tmpTextureDes.height = 1
+            tmpTextureDes.width = 1
+            tmpTextureDes.depth = 1
+            tmpTextureDes.arrayLength = (inDim[0] * inDim[1] + 3)/4
+            tmpTextureDes.textureType = .type2DArray
+        } else {
+            fatalError(" not suuprt ")
+        }
+        
+        if MemoryLayout<P>.size == 1 {
+            tmpTextureDes.pixelFormat = .rgba8Unorm
+        } else if MemoryLayout<P>.size == 2 {
+            tmpTextureDes.pixelFormat = .rgba16Float
+        } else if MemoryLayout<P>.size == 4 {
+//            tmpTextureDes.pixelFormat = .r32Float
+            tmpTextureDes.pixelFormat = .rgba32Float
 
-/*
- 4 维 tensor 存储 texture，要考虑 transpose
- transpose 之后的维度是 [a, b, c, d]，对应的texture_2darray
- .width = c
- .height = b
- .len = a * d + 3 / 4
- 
-低于 4 维的 tensor，transpose 必须为 [0, 1, 2, 3] 既不考虑 transpose
- 
-// TODO transpose 对于低维 tensor 的扩展原则。。。
-// [a, b] -> [1, 1, a, b] transpose 必须为 [0, 1, x, x]
-// [a] -> [1, 1, 1, a] transpose 必须为 [0, 1, 2, 3]
-// [a, b, c] -> [1, a, b, c] tranpose 必须为 [0, x, x, x]
-
-3 维 tensor [a, b, c] 对应的 texture_2darray,
-.width = c
-.height = b
-.len = a + 3 / 4
- 
- 2 维 tensor [a, b] 对应的 texture_2darray
- .width = b + 3 / 4
- .height = a
- .len = 1
- 
- 1 维 tensor [a] 对应的 texture_2darray
- .width = a + 3 / 4
- .height = 1
- .len = 1
- */
-
+        }
+//        tmpTextureDes.pixelFormat = .rgba16Float
 
-public class Texture<P: PrecisionType>: Tensorial {
-  var dim: Dim
-  public var tensorDim: Dim
-  public var padToFourDim: Dim
-  private var textureDesc: MTLTextureDescriptor!
-  public var metalTexture: MTLTexture!
-  var transpose: [Int] = [0, 1, 2, 3]
-  
-  func toTensor() -> [Float32] {
-    guard  padToFourDim.cout() == 4 else {
-      fatalError("- not support -")
+        tmpTextureDes.usage = [.shaderRead, .shaderWrite]
+        tmpTextureDes.storageMode = .shared
+        textureDesc = tmpTextureDes
+        metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
     }
-    return metalTexture.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
-  }
-  
-  func realNHWC() -> [Float32] {
-    guard padToFourDim.cout() == 4 else {
-      fatalError(" - not support - ")
-    }
-    return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-  }
-  
-  func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) {
-    transpose = inTranspose
-    for i in 0..<(4 - tensorDim.cout()) {
-      if i != inTranspose[i] {
-        fatalError()
-      }
-    }
-    let newDim = transpose.map { padToFourDim[$0] }
-    
-    let newLayout = transpose.map { layout.layoutWithDim[$0] }
     
-    layout = DataLayout.init(newLayout)
-    dim = Dim.init(inDim: newDim)
+//    required public init(inDim: Dim, inLayout: DataLayout = .NHWC, inTexture: MTLTexture) {
+//        dim = inDim
+//        layout = inLayout
+//        metalTexture = inTexture
+//        let tmpTextureDes = MTLTextureDescriptor.init()
+//        
+//        if inDim.cout() == 1 {
+//            tmpTextureDes.width = inDim[0]
+//            tmpTextureDes.textureType = .type1D
+//        } else if inDim.cout() == 2 {
+//            tmpTextureDes.height = inDim[0]
+//            tmpTextureDes.width = inDim[1]
+//            tmpTextureDes.textureType = .type2D
+//        } else if inDim.cout() == 3 {
+//            fatalError(" not support texture dim 3")
+//        } else if inDim.cout() == 4 {
+//            tmpTextureDes.height = inDim[1]
+//            tmpTextureDes.width = inDim[2]
+//            tmpTextureDes.depth = inDim[3] * inDim[1]
+//            tmpTextureDes.textureType = .type2DArray
+//        }
+//        
+//        tmpTextureDes.pixelFormat = .r32Float
+//        tmpTextureDes.storageMode = .shared
+//        textureDesc = tmpTextureDes
+//        let device = MTLCreateSystemDefaultDevice()
+//        metalTexture = device!.makeTexture(descriptor: tmpTextureDes)!
+//    }
     
-    let tmpTextureDes = MTLTextureDescriptor.init()
-    tmpTextureDes.textureType = .type2DArray
-    tmpTextureDes.depth = 1
-    
-    switch tensorDim.cout() {
-    case 4:
-      tmpTextureDes.width = newDim[2]
-      tmpTextureDes.height = newDim[1]
-      tmpTextureDes.arrayLength = ((newDim[0]) * (newDim[3]) + 3) / 4
-    case 3:
-      tmpTextureDes.width = newDim[3]
-      tmpTextureDes.height = newDim[2]
-      tmpTextureDes.arrayLength = (newDim[1] + 3) / 4
-    case 2, 1:
-      tmpTextureDes.width = (newDim[3] + 3) / 4
-      tmpTextureDes.height = newDim[2]
-      tmpTextureDes.arrayLength = 1
-    default:
-      fatalError("unreachable")
-    }
-   
-    if computePrecision == .Float16 {
-      tmpTextureDes.pixelFormat = .rgba16Float
-    } else if computePrecision == .Float32 {
-      tmpTextureDes.pixelFormat = .rgba32Float
-    }
+//    init() {
+//        dim = Dim.init(inDim: [])
+//        layout = .NCHW
+//        let device = MTLCreateSystemDefaultDevice()
+//        textureDesc = MTLTextureDescriptor.init()
+//        metalTexture = device!.makeTexture(descriptor: textureDesc)!
+//    }
     
-    tmpTextureDes.usage = [.shaderRead, .shaderWrite]
-    tmpTextureDes.storageMode = .shared
-    textureDesc = tmpTextureDes
-    metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
-  }
-  
-  init(device: MTLDevice, inDim: Dim) {
-    var fourDim: Dim
-    if inDim.cout() == 4 {
-      fourDim = inDim
-    } else if inDim.cout() < 4 {
-      var fourDimNum: [Int] = []
-      for _ in 0..<(4 - inDim.cout()) {
-        fourDimNum.append(1)
-      }
-      fourDimNum.append(contentsOf: inDim.dims)
-      fourDim = Dim.init(inDim: fourDimNum)
-    } else {
-      fatalError(" not support ")
-    }
-    tensorDim = inDim
-    dim = fourDim
-    padToFourDim = fourDim
-    layout = DataLayout.init([(.N, fourDim[0]), (.C, fourDim[1]), (.H, fourDim[2]), (.W, fourDim[3])])
-  }
-  
-  private(set) var layout: DataLayout
+    private(set) var layout: DataLayout
 }
 
 extension Texture {
-  public var description: String {
-    return debugDescription
-  }
-  
-  public var debugDescription: String{
-    var str = ""
-    str += "Dim: \(dim) \n value:[ "
-    str += "\(metalTexture)"
-    str += " ]"
-    return str
-  }
-  
+    public var description: String {
+        return debugDescription
+    }
+    
+    public var debugDescription: String{
+        var str = ""
+        str += "Dim: \(dim) \n value:[ "
+        str += "\(metalTexture)"
+        str += " ]"
+        return str
+    }
+    
 }
diff --git a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
index 50b60e9fe6c973b675a97e16c3c15af2b72e3fc4..ffa44be38a4c3a1f3109c51b3d15506591f2de2e 100644
--- a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
+++ b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
@@ -14,15 +14,12 @@
 
 #pragma once
 
-#import "PaddleMobileCPU.h"
-#import "CPUCompute.h"
-#import "PaddleMobileGPU.h"
 #import <UIKit/UIKit.h>
 
 //! Project version number for paddle_mobile.
-//FOUNDATION_EXPORT double paddle_mobileVersionNumber;
+FOUNDATION_EXPORT double paddle_mobileVersionNumber;
 
 //! Project version string for paddle_mobile.
-//FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[];
+FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[];
 
 
diff --git a/python/tools/imagetools/imagetools.py b/python/tools/imagetools/imagetools.py
deleted file mode 100644
index 2a4432858007d6858f2728815670cfd1ed5ec786..0000000000000000000000000000000000000000
--- a/python/tools/imagetools/imagetools.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# coding=utf-8
-import cv2
-from array import array
-
-
-def resize_take_rgbs(path, shape_h_w):
-    print '--------------resize_take_rgbs-----------------begin'
-    image = cv2.imread(path)
-    # print image.shape
-    cv2.imshow("before", image)
-
-    print_rgb(image[0, 0])
-    # image len may be for .just check it
-    # image.resize(shape_h_w)
-
-    image = cv2.resize(image, (shape_h_w[0], shape_h_w[1]))
-
-    cv2.imshow("after", image)
-    print image.shape
-    height = shape_h_w[0]
-    width = shape_h_w[1]
-
-    rs_ = []
-    gs_ = []
-    bs_ = []
-    for h in range(0, height):
-        for w in range(0, width):
-            bs_.append(image[h, w, 0])
-            gs_.append(image[h, w, 1])
-            rs_.append(image[h, w, 2])
-
-    # print image[2, 2, 0]/255.
-    print len(bs_)
-    print len(gs_)
-    print len(rs_)
-    print '--------------resize_take_rgbs-----------------end'
-    return bs_, gs_, rs_
-
-
-def print_rgb((b, g, r)):
-    print "像素 - R:%d,G:%d,B:%d" % (r, g, b)  # 显示像素值
-    #
-    # image[0, 0] = (100, 150, 200)  # 更改位置(0,0)处的像素
-    #
-    # (b, g, r) = image[0, 0]  # 再次读取(0,0)像素
-    # print "位置(0,0)处的像素 - 红:%d,绿:%d,蓝:%d" % (r, g, b)  # 显示更改后的像素值
-    #
-    # corner = image[0:100, 0:100]  # 读取像素块
-    # cv2.imshow("Corner", corner)  # 显示读取的像素块
-    #
-    # image[0:100, 0:100] = (0, 255, 0);  # 更改读取的像素块
-    #
-    # cv2.imshow("Updated", image)  # 显示图像
-    #
-    # cv2.waitKey(0)  # 程序暂停
-
-
-def save_to_file(to_file_name, array):
-    to_file = open(to_file_name, "wb")
-    array.tofile(to_file)
-    to_file.close()
diff --git a/python/tools/imagetools/img2nchw.py b/python/tools/imagetools/img2nchw.py
deleted file mode 100644
index 70ca456a1b1b5d20b92d0aaa51b01abb352c1d54..0000000000000000000000000000000000000000
--- a/python/tools/imagetools/img2nchw.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# coding=utf-8
-import cv2
-from array import array
-import imagetools as tools
-from enum import Enum
-
-
-class ChannelType(Enum):
-    RGB = 0,
-    BGR = 1
-
-
-def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
-    print '--------------combine_bgrs_nchw-----------------begin'
-    print "scale: %f" % scale
-    print means_b_g_r
-    # print len(bgrs)
-    bs = bgrs[0]
-    gs = bgrs[1]
-    rs = bgrs[2]
-
-    assert len(bs) == len(gs) == len(rs)
-    print len(bs)
-    bgrs_float_array = array('f')
-
-    if channel_type == ChannelType.BGR:
-        print 'bgr'
-        for i in range(0, len(bs)):
-            bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
-        for i in range(0, len(gs)):
-            bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale)  # g
-        for i in range(0, len(rs)):
-            bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
-    elif channel_type == ChannelType.RGB:
-        print 'rgb'
-
-        for i in range(0, len(rs)):
-            bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
-        for i in range(0, len(gs)):
-            bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale)  # g
-        for i in range(0, len(bs)):
-            bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
-
-    print len(bgrs_float_array)
-
-    print '------------------'
-    print bgrs_float_array[0]
-    print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2]
-
-    # for i in range(0, 9):
-    #     print'bs %d' % i
-    #     print bs[i] / 255.
-
-    print bs[416 * 2 + 2] / 255.
-    print '--------------combine_bgrs_nchw-----------------end'
-
-    return bgrs_float_array
-
-
-# bgrs = tools.resize_take_rgbs('banana.jpeg', (224, 224, 3))
-# array = combine_bgrs_nchw(bgrs, (103.94, 116.78, 123.68), 0.017, array,ChannelType.BGR)
-# tools.save_to_file('banana_1_3_224_224_nchw_float')
-
-# cv2.waitKey(0)
-
-
-bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3))
-array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB)
-tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array)
diff --git a/python/tools/imagetools/img2nhwc.py b/python/tools/imagetools/img2nhwc.py
deleted file mode 100644
index c982fe303ecde08a9de1827ca67024567322d47f..0000000000000000000000000000000000000000
--- a/python/tools/imagetools/img2nhwc.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# coding=utf-8
-import cv2
-from array import array
-import imagetools as tools
-
-
-def combine_bgrs_nhwc(bgrs, means_b_g_r, scale):
-    print "scale: %f" % scale
-    print means_b_g_r
-    # print len(bgrs)
-    bs = bgrs[0]
-    gs = bgrs[1]
-    rs = bgrs[2]
-    assert len(bs) == len(gs) == len(rs)
-    # print len(bs)
-    bgrs_float_array = array('f')
-    for i in range(0, len(bs)):
-        bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
-        bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale)  # g
-        bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
-
-    print len(bgrs_float_array)
-
-    print '------------------'
-    print bgrs_float_array[0]
-    print bgrs_float_array[999]
-    return bgrs_float_array
-
-
-bgrs = tools.resize_take_rgbs('newyolo_1.jpg', (416, 416, 3))
-array = combine_bgrs_nhwc(bgrs, (0, 0, 0), 1.0 / 255)
-tools.save_to_file('desktop_1_3_416_416_nhwc_float', array)
-
-cv2.waitKey(0)
diff --git a/python/tools/imagetools/numpy2binary.py b/python/tools/imagetools/numpy2binary.py
deleted file mode 100644
index dd4bc6e10074183b8dcee4122860c4140ff54229..0000000000000000000000000000000000000000
--- a/python/tools/imagetools/numpy2binary.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# coding=utf-8
-
-# 这个脚本是可以将numpy合并到二进制
-import cv2
-import numpy as np
-import imagetools as tools
-from array import array
-
-#
-# image = cv2.imread(path)
-# print image.shape
-#
-# print_rgb(image[0, 0])
-# # image len may be for .just check it
-# image.resize(shape_h_w)
-
-
-data = np.fromfile('datas/img.res')
-print data.size
-print data[0]
-
-data.reshape(1, 3, 416, 416)
-out_array = array('f')
-print'--------------------'
-print data.size
-print data[0]
-
-print '如果是nhwc --------'
-# rgb rgb rgb rgb rgb
-print data[416 * 3 * 2 + 3 * 2 + 2]
-# print data[2]
-
-print '如果是nchw --------'
-# rgb rgb rgb rgb rgb
-print data[416 * 416 * 2 + 416 * 2 + 2]
-# print data[2]
-
-# 明明是nchw
-
-for i in range(0, data.size):
-    out_array.append(data[i])
-
-print len(out_array)
-
-print out_array[416 * 416 * 2 + 416 * 2 + 2]
-
-tools.save_to_file('datas/in_put_1_3_416_416_2', out_array)
diff --git a/python/tools/mdl2fluid/float2halffloat.py b/python/tools/mdl2fluid/float2halffloat.py
deleted file mode 100644
index 3df8d43f9548429cef5d49f72fb07f3cef264834..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/float2halffloat.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# encoding:utf-8
-import math
-import re
-
-
-def Real2HalfFloat(data):
-    MINNUM = -65536
-    MAXNUM = 65535
-    FloatVal = 0
-    if data:
-        if data < MINNUM:
-            data = MINNUM
-        if data > MAXNUM:
-            data = MAXNUM
-
-        sign = 0
-        if data < 0:
-            sign = 1
-            data = -data
-
-        exp = math.floor((math.log2(data)))
-        expout = exp + 16
-
-        Mantial = round(data / pow(2, exp - 10)) - 1024
-
-        if expout <= 0:
-            FloatVal = 0
-        else:
-            FloatVal = sign * 32768 + expout * 1024 + Mantial
-    return FloatVal
-
-
-def ReadCfloatData(sourcefile):
-    input = []
-    with open(sourcfile, 'r') as f:
-        for line in f.readlines():
-            line = line.strip()
-            line = re.sub('\s+', ' ', line)  # 两个数字间多个空格
-            input.append(line.split(' '))
-    destfile = sourcefile.replace('.dat', '')
-    destfile = destfile.replace('.txt', '')
-    destfile += 'Out.dat'
-    with open(destfile, 'w') as fw:
-        for i in range(len(input)):
-            if len(input[i]) == 2:
-                real = Real2HalfFloat(float(input[i][0]))
-                imag = Real2HalfFloat(float(input[i][1]))
-                result = real * 65536 + imag
-                if imag and not real:
-                    fw.write('0x0000' + "%X" % result + '\n')
-                elif not imag and not real:
-                    fw.write('0x00000000' + '\n')
-                else:
-                    fw.write('0x' + "%X" % result + '\n')
-            elif len(input[i]) == 1:
-                result = Real2HalfFloat(float(input[i][0]))
-                if result:
-                    fw.write('0x' + "%X" % result + '\n')
-                else:
-                    fw.write('0x0000' + '\n')
-
-
-if __name__ == '__main__':
-    print('Tips: Input number 0 if you want to exit!\n')
-    while True:
-        sourcfile = input("input source file:\n")
-        if sourcfile is '0':
-            break
-        ReadCfloatData(sourcfile)
-        print('Transfer Success!')
diff --git a/python/tools/mdl2fluid/framework.proto b/python/tools/mdl2fluid/framework.proto
deleted file mode 100644
index 07bfef1c2a69c236ac86732b2dbc00d8abb6334b..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/framework.proto
+++ /dev/null
@@ -1,176 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-syntax = "proto2";
-option optimize_for = LITE_RUNTIME;
-package paddle_mobile.framework.proto;
-
-enum AttrType {
-  INT = 0;
-  FLOAT = 1;
-  STRING = 2;
-  INTS = 3;
-  FLOATS = 4;
-  STRINGS = 5;
-  BOOLEAN = 6;
-  BOOLEANS = 7;
-  BLOCK = 8;
-  LONG = 9;
-}
-
-// OpDesc describes an instance of a C++ framework::OperatorBase
-// derived class type.
-message OpDesc {
-
-  message Attr {
-    required string name = 1;
-    required AttrType type = 2;
-    optional int32 i = 3;
-    optional float f = 4;
-    optional string s = 5;
-    repeated int32 ints = 6;
-    repeated float floats = 7;
-    repeated string strings = 8;
-    optional bool b = 10;
-    repeated bool bools = 11;
-    optional int32 block_idx = 12;
-    optional int64 l = 13;
-  };
-
-  message Var {
-    required string parameter = 1;
-    repeated string arguments = 2;
-  };
-
-  required string type = 3;
-  repeated Var inputs = 1;
-  repeated Var outputs = 2;
-  repeated Attr attrs = 4;
-  optional bool is_target = 5 [ default = false ];
-};
-
-// OpProto describes a C++ framework::OperatorBase derived class.
-message OpProto {
-
-  // VarProto describes the C++ type framework::Variable.
-  message Var {
-    required string name = 1;
-    required string comment = 2;
-
-    optional bool duplicable = 3 [ default = false ];
-    optional bool intermediate = 4 [ default = false ];
-    optional bool dispensable = 5 [ default = false ];
-  }
-
-  // AttrProto describes the C++ type Attribute.
-  message Attr {
-    required string name = 1;
-    required AttrType type = 2;
-    required string comment = 3;
-    // If that attribute is generated, it means the Paddle third
-    // language binding has responsibility to fill that
-    // attribute. End-User should not set that attribute.
-    optional bool generated = 4 [ default = false ];
-  }
-
-  required string type = 1;
-  repeated Var inputs = 2;
-  repeated Var outputs = 3;
-  repeated Attr attrs = 4;
-  required string comment = 5;
-}
-
-message VarType {
-  enum Type {
-    // Pod Types
-    BOOL = 0;
-    INT16 = 1;
-    INT32 = 2;
-    INT64 = 3;
-    FP16 = 4;
-    FP32 = 5;
-    FP64 = 6;
-
-    // Other types that may need additional descriptions
-    LOD_TENSOR = 7;
-    SELECTED_ROWS = 8;
-    FEED_MINIBATCH = 9;
-    FETCH_LIST = 10;
-    STEP_SCOPES = 11;
-    LOD_RANK_TABLE = 12;
-    LOD_TENSOR_ARRAY = 13;
-    PLACE_LIST = 14;
-    READER = 15;
-    CHANNEL = 16;
-    // Any runtime decided variable type is raw
-    // raw variables should manage their own allocations
-    // in operators like nccl_op
-    RAW = 17;
-    TUPLE = 18;
-  }
-
-  required Type type = 1;
-
-  message TensorDesc {
-    // Should only be PODType. Is enforced in C++
-    required Type data_type = 1;
-    repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
-  }
-  optional TensorDesc selected_rows = 2;
-
-  message LoDTensorDesc {
-    required TensorDesc tensor = 1;
-    optional int32 lod_level = 2 [ default = 0 ];
-  }
-  optional LoDTensorDesc lod_tensor = 3;
-
-  message LoDTensorArrayDesc {
-    required TensorDesc tensor = 1;
-    optional int32 lod_level = 2 [ default = 0 ];
-  }
-  optional LoDTensorArrayDesc tensor_array = 4;
-
-  message ReaderDesc { repeated LoDTensorDesc lod_tensor = 1; }
-  optional ReaderDesc reader = 5;
-
-  message ChannelDesc {
-    required Type data_type = 1;
-    required int64 capacity = 2;
-  }
-  optional ChannelDesc channel = 6;
-
-  message Tuple { repeated Type element_type = 1; }
-  optional Tuple tuple = 7;
-}
-
-message VarDesc {
-  required string name = 1;
-  required VarType type = 2;
-  optional bool persistable = 3 [ default = false ];
-}
-
-message BlockDesc {
-  required int32 idx = 1;
-  required int32 parent_idx = 2;
-  repeated VarDesc vars = 3;
-  repeated OpDesc ops = 4;
-  optional int32 forward_block_idx = 5 [ default = -1 ];
-}
-
-// Please refer to
-// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
-// for more details.
-// TODO(panyx0718): A model can have multiple programs. Need a
-// way to distinguish them. Maybe ID or name?
-message ProgramDesc { repeated BlockDesc blocks = 1; }
diff --git a/python/tools/mdl2fluid/framework_pb2.py b/python/tools/mdl2fluid/framework_pb2.py
deleted file mode 100644
index 3a43deebc91d42e9eb38cf9940020238041d81da..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/framework_pb2.py
+++ /dev/null
@@ -1,1141 +0,0 @@
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: framework.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf.internal import enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
-  name='framework.proto',
-  package='paddle_mobile.framework.proto',
-  syntax='proto2',
-  serialized_pb=_b('\n\x0f\x66ramework.proto\x12\x1dpaddle_mobile.framework.proto\"\xe5\x03\n\x06OpDesc\x12\x0c\n\x04type\x18\x03 \x02(\t\x12\x39\n\x06inputs\x18\x01 \x03(\x0b\x32).paddle_mobile.framework.proto.OpDesc.Var\x12:\n\x07outputs\x18\x02 \x03(\x0b\x32).paddle_mobile.framework.proto.OpDesc.Var\x12\x39\n\x05\x61ttrs\x18\x04 \x03(\x0b\x32*.paddle_mobile.framework.proto.OpDesc.Attr\x12\x18\n\tis_target\x18\x05 \x01(\x08:\x05\x66\x61lse\x1a\xd3\x01\n\x04\x41ttr\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x35\n\x04type\x18\x02 \x02(\x0e\x32\'.paddle_mobile.framework.proto.AttrType\x12\t\n\x01i\x18\x03 \x01(\x05\x12\t\n\x01\x66\x18\x04 \x01(\x02\x12\t\n\x01s\x18\x05 \x01(\t\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0e\n\x06\x66loats\x18\x07 \x03(\x02\x12\x0f\n\x07strings\x18\x08 \x03(\t\x12\t\n\x01\x62\x18\n \x01(\x08\x12\r\n\x05\x62ools\x18\x0b \x03(\x08\x12\x11\n\tblock_idx\x18\x0c \x01(\x05\x12\t\n\x01l\x18\r \x01(\x03\x1a+\n\x03Var\x12\x11\n\tparameter\x18\x01 \x02(\t\x12\x11\n\targuments\x18\x02 \x03(\t\"\xcf\x03\n\x07OpProto\x12\x0c\n\x04type\x18\x01 \x02(\t\x12:\n\x06inputs\x18\x02 \x03(\x0b\x32*.paddle_mobile.framework.proto.OpProto.Var\x12;\n\x07outputs\x18\x03 \x03(\x0b\x32*.paddle_mobile.framework.proto.OpProto.Var\x12:\n\x05\x61ttrs\x18\x04 \x03(\x0b\x32+.paddle_mobile.framework.proto.OpProto.Attr\x12\x0f\n\x07\x63omment\x18\x05 \x02(\t\x1ax\n\x03Var\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x0f\n\x07\x63omment\x18\x02 \x02(\t\x12\x19\n\nduplicable\x18\x03 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0cintermediate\x18\x04 \x01(\x08:\x05\x66\x61lse\x12\x1a\n\x0b\x64ispensable\x18\x05 \x01(\x08:\x05\x66\x61lse\x1av\n\x04\x41ttr\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x35\n\x04type\x18\x02 \x02(\x0e\x32\'.paddle_mobile.framework.proto.AttrType\x12\x0f\n\x07\x63omment\x18\x03 \x02(\t\x12\x18\n\tgenerated\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xb9\n\n\x07VarType\x12\x39\n\x04type\x18\x01 \x02(\x0e\x32+.paddle_mobile.framework.proto.VarType.Type\x12H\n\rselected_rows\x18\x02 \x01(\x0b\x32\x31.paddle_mobile.framework.proto.VarType.TensorDesc\x12H\n\nlod_tensor\x18\x03 \x01(\x0b\x32\x34.paddle_mobile.framework.proto.VarType.LoDTensorDesc\x12O\n\x0ctensor_array\x18\x04 \x01(\x0b\x32\x39.paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc\x12\x41\n\x06reader\x18\x05 \x01(\x0b\x32\x31.paddle_mobile.framework.proto.VarType.ReaderDesc\x12\x43\n\x07\x63hannel\x18\x06 \x01(\x0b\x32\x32.paddle_mobile.framework.proto.VarType.ChannelDesc\x12;\n\x05tuple\x18\x07 \x01(\x0b\x32,.paddle_mobile.framework.proto.VarType.Tuple\x1aZ\n\nTensorDesc\x12>\n\tdata_type\x18\x01 \x02(\x0e\x32+.paddle_mobile.framework.proto.VarType.Type\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x1ah\n\rLoDTensorDesc\x12\x41\n\x06tensor\x18\x01 \x02(\x0b\x32\x31.paddle_mobile.framework.proto.VarType.TensorDesc\x12\x14\n\tlod_level\x18\x02 \x01(\x05:\x01\x30\x1am\n\x12LoDTensorArrayDesc\x12\x41\n\x06tensor\x18\x01 \x02(\x0b\x32\x31.paddle_mobile.framework.proto.VarType.TensorDesc\x12\x14\n\tlod_level\x18\x02 \x01(\x05:\x01\x30\x1aV\n\nReaderDesc\x12H\n\nlod_tensor\x18\x01 \x03(\x0b\x32\x34.paddle_mobile.framework.proto.VarType.LoDTensorDesc\x1a_\n\x0b\x43hannelDesc\x12>\n\tdata_type\x18\x01 \x02(\x0e\x32+.paddle_mobile.framework.proto.VarType.Type\x12\x10\n\x08\x63\x61pacity\x18\x02 \x02(\x03\x1aJ\n\x05Tuple\x12\x41\n\x0c\x65lement_type\x18\x01 \x03(\x0e\x32+.paddle_mobile.framework.proto.VarType.Type\"\x8e\x02\n\x04Type\x12\x08\n\x04\x42OOL\x10\x00\x12\t\n\x05INT16\x10\x01\x12\t\n\x05INT32\x10\x02\x12\t\n\x05INT64\x10\x03\x12\x08\n\x04\x46P16\x10\x04\x12\x08\n\x04\x46P32\x10\x05\x12\x08\n\x04\x46P64\x10\x06\x12\x0e\n\nLOD_TENSOR\x10\x07\x12\x11\n\rSELECTED_ROWS\x10\x08\x12\x12\n\x0e\x46\x45\x45\x44_MINIBATCH\x10\t\x12\x0e\n\nFETCH_LIST\x10\n\x12\x0f\n\x0bSTEP_SCOPES\x10\x0b\x12\x12\n\x0eLOD_RANK_TABLE\x10\x0c\x12\x14\n\x10LOD_TENSOR_ARRAY\x10\r\x12\x0e\n\nPLACE_LIST\x10\x0e\x12\n\n\x06READER\x10\x0f\x12\x0b\n\x07\x43HANNEL\x10\x10\x12\x07\n\x03RAW\x10\x11\x12\t\n\x05TUPLE\x10\x12\"i\n\x07VarDesc\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x34\n\x04type\x18\x02 \x02(\x0b\x32&.paddle_mobile.framework.proto.VarType\x12\x1a\n\x0bpersistable\x18\x03 \x01(\x08:\x05\x66\x61lse\"\xb5\x01\n\tBlockDesc\x12\x0b\n\x03idx\x18\x01 \x02(\x05\x12\x12\n\nparent_idx\x18\x02 \x02(\x05\x12\x34\n\x04vars\x18\x03 \x03(\x0b\x32&.paddle_mobile.framework.proto.VarDesc\x12\x32\n\x03ops\x18\x04 \x03(\x0b\x32%.paddle_mobile.framework.proto.OpDesc\x12\x1d\n\x11\x66orward_block_idx\x18\x05 \x01(\x05:\x02-1\"G\n\x0bProgramDesc\x12\x38\n\x06\x62locks\x18\x01 \x03(\x0b\x32(.paddle_mobile.framework.proto.BlockDesc*}\n\x08\x41ttrType\x12\x07\n\x03INT\x10\x00\x12\t\n\x05\x46LOAT\x10\x01\x12\n\n\x06STRING\x10\x02\x12\x08\n\x04INTS\x10\x03\x12\n\n\x06\x46LOATS\x10\x04\x12\x0b\n\x07STRINGS\x10\x05\x12\x0b\n\x07\x42OOLEAN\x10\x06\x12\x0c\n\x08\x42OOLEANS\x10\x07\x12\t\n\x05\x42LOCK\x10\x08\x12\x08\n\x04LONG\x10\tB\x02H\x03')
-)
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-_ATTRTYPE = _descriptor.EnumDescriptor(
-  name='AttrType',
-  full_name='paddle_mobile.framework.proto.AttrType',
-  filename=None,
-  file=DESCRIPTOR,
-  values=[
-    _descriptor.EnumValueDescriptor(
-      name='INT', index=0, number=0,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FLOAT', index=1, number=1,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='STRING', index=2, number=2,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='INTS', index=3, number=3,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FLOATS', index=4, number=4,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='STRINGS', index=5, number=5,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='BOOLEAN', index=6, number=6,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='BOOLEANS', index=7, number=7,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='BLOCK', index=8, number=8,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='LONG', index=9, number=9,
-      options=None,
-      type=None),
-  ],
-  containing_type=None,
-  options=None,
-  serialized_start=2708,
-  serialized_end=2833,
-)
-_sym_db.RegisterEnumDescriptor(_ATTRTYPE)
-
-AttrType = enum_type_wrapper.EnumTypeWrapper(_ATTRTYPE)
-INT = 0
-FLOAT = 1
-STRING = 2
-INTS = 3
-FLOATS = 4
-STRINGS = 5
-BOOLEAN = 6
-BOOLEANS = 7
-BLOCK = 8
-LONG = 9
-
-
-_VARTYPE_TYPE = _descriptor.EnumDescriptor(
-  name='Type',
-  full_name='paddle_mobile.framework.proto.VarType.Type',
-  filename=None,
-  file=DESCRIPTOR,
-  values=[
-    _descriptor.EnumValueDescriptor(
-      name='BOOL', index=0, number=0,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='INT16', index=1, number=1,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='INT32', index=2, number=2,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='INT64', index=3, number=3,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FP16', index=4, number=4,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FP32', index=5, number=5,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FP64', index=6, number=6,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='LOD_TENSOR', index=7, number=7,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='SELECTED_ROWS', index=8, number=8,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FEED_MINIBATCH', index=9, number=9,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='FETCH_LIST', index=10, number=10,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='STEP_SCOPES', index=11, number=11,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='LOD_RANK_TABLE', index=12, number=12,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='LOD_TENSOR_ARRAY', index=13, number=13,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='PLACE_LIST', index=14, number=14,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='READER', index=15, number=15,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='CHANNEL', index=16, number=16,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='RAW', index=17, number=17,
-      options=None,
-      type=None),
-    _descriptor.EnumValueDescriptor(
-      name='TUPLE', index=18, number=18,
-      options=None,
-      type=None),
-  ],
-  containing_type=None,
-  options=None,
-  serialized_start=2072,
-  serialized_end=2342,
-)
-_sym_db.RegisterEnumDescriptor(_VARTYPE_TYPE)
-
-
-_OPDESC_ATTR = _descriptor.Descriptor(
-  name='Attr',
-  full_name='paddle_mobile.framework.proto.OpDesc.Attr',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='name', full_name='paddle_mobile.framework.proto.OpDesc.Attr.name', index=0,
-      number=1, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='type', full_name='paddle_mobile.framework.proto.OpDesc.Attr.type', index=1,
-      number=2, type=14, cpp_type=8, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='i', full_name='paddle_mobile.framework.proto.OpDesc.Attr.i', index=2,
-      number=3, type=5, cpp_type=1, label=1,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='f', full_name='paddle_mobile.framework.proto.OpDesc.Attr.f', index=3,
-      number=4, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='s', full_name='paddle_mobile.framework.proto.OpDesc.Attr.s', index=4,
-      number=5, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='ints', full_name='paddle_mobile.framework.proto.OpDesc.Attr.ints', index=5,
-      number=6, type=5, cpp_type=1, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='floats', full_name='paddle_mobile.framework.proto.OpDesc.Attr.floats', index=6,
-      number=7, type=2, cpp_type=6, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='strings', full_name='paddle_mobile.framework.proto.OpDesc.Attr.strings', index=7,
-      number=8, type=9, cpp_type=9, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='b', full_name='paddle_mobile.framework.proto.OpDesc.Attr.b', index=8,
-      number=10, type=8, cpp_type=7, label=1,
-      has_default_value=False, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='bools', full_name='paddle_mobile.framework.proto.OpDesc.Attr.bools', index=9,
-      number=11, type=8, cpp_type=7, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='block_idx', full_name='paddle_mobile.framework.proto.OpDesc.Attr.block_idx', index=10,
-      number=12, type=5, cpp_type=1, label=1,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='l', full_name='paddle_mobile.framework.proto.OpDesc.Attr.l', index=11,
-      number=13, type=3, cpp_type=2, label=1,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=280,
-  serialized_end=491,
-)
-
-_OPDESC_VAR = _descriptor.Descriptor(
-  name='Var',
-  full_name='paddle_mobile.framework.proto.OpDesc.Var',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='parameter', full_name='paddle_mobile.framework.proto.OpDesc.Var.parameter', index=0,
-      number=1, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='arguments', full_name='paddle_mobile.framework.proto.OpDesc.Var.arguments', index=1,
-      number=2, type=9, cpp_type=9, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=493,
-  serialized_end=536,
-)
-
-_OPDESC = _descriptor.Descriptor(
-  name='OpDesc',
-  full_name='paddle_mobile.framework.proto.OpDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='type', full_name='paddle_mobile.framework.proto.OpDesc.type', index=0,
-      number=3, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='inputs', full_name='paddle_mobile.framework.proto.OpDesc.inputs', index=1,
-      number=1, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='outputs', full_name='paddle_mobile.framework.proto.OpDesc.outputs', index=2,
-      number=2, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='attrs', full_name='paddle_mobile.framework.proto.OpDesc.attrs', index=3,
-      number=4, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='is_target', full_name='paddle_mobile.framework.proto.OpDesc.is_target', index=4,
-      number=5, type=8, cpp_type=7, label=1,
-      has_default_value=True, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[_OPDESC_ATTR, _OPDESC_VAR, ],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=51,
-  serialized_end=536,
-)
-
-
-_OPPROTO_VAR = _descriptor.Descriptor(
-  name='Var',
-  full_name='paddle_mobile.framework.proto.OpProto.Var',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='name', full_name='paddle_mobile.framework.proto.OpProto.Var.name', index=0,
-      number=1, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='comment', full_name='paddle_mobile.framework.proto.OpProto.Var.comment', index=1,
-      number=2, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='duplicable', full_name='paddle_mobile.framework.proto.OpProto.Var.duplicable', index=2,
-      number=3, type=8, cpp_type=7, label=1,
-      has_default_value=True, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='intermediate', full_name='paddle_mobile.framework.proto.OpProto.Var.intermediate', index=3,
-      number=4, type=8, cpp_type=7, label=1,
-      has_default_value=True, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='dispensable', full_name='paddle_mobile.framework.proto.OpProto.Var.dispensable', index=4,
-      number=5, type=8, cpp_type=7, label=1,
-      has_default_value=True, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=762,
-  serialized_end=882,
-)
-
-_OPPROTO_ATTR = _descriptor.Descriptor(
-  name='Attr',
-  full_name='paddle_mobile.framework.proto.OpProto.Attr',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='name', full_name='paddle_mobile.framework.proto.OpProto.Attr.name', index=0,
-      number=1, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='type', full_name='paddle_mobile.framework.proto.OpProto.Attr.type', index=1,
-      number=2, type=14, cpp_type=8, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='comment', full_name='paddle_mobile.framework.proto.OpProto.Attr.comment', index=2,
-      number=3, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='generated', full_name='paddle_mobile.framework.proto.OpProto.Attr.generated', index=3,
-      number=4, type=8, cpp_type=7, label=1,
-      has_default_value=True, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=884,
-  serialized_end=1002,
-)
-
-_OPPROTO = _descriptor.Descriptor(
-  name='OpProto',
-  full_name='paddle_mobile.framework.proto.OpProto',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='type', full_name='paddle_mobile.framework.proto.OpProto.type', index=0,
-      number=1, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='inputs', full_name='paddle_mobile.framework.proto.OpProto.inputs', index=1,
-      number=2, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='outputs', full_name='paddle_mobile.framework.proto.OpProto.outputs', index=2,
-      number=3, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='attrs', full_name='paddle_mobile.framework.proto.OpProto.attrs', index=3,
-      number=4, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='comment', full_name='paddle_mobile.framework.proto.OpProto.comment', index=4,
-      number=5, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[_OPPROTO_VAR, _OPPROTO_ATTR, ],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=539,
-  serialized_end=1002,
-)
-
-
-_VARTYPE_TENSORDESC = _descriptor.Descriptor(
-  name='TensorDesc',
-  full_name='paddle_mobile.framework.proto.VarType.TensorDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='data_type', full_name='paddle_mobile.framework.proto.VarType.TensorDesc.data_type', index=0,
-      number=1, type=14, cpp_type=8, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='dims', full_name='paddle_mobile.framework.proto.VarType.TensorDesc.dims', index=1,
-      number=2, type=3, cpp_type=2, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1501,
-  serialized_end=1591,
-)
-
-_VARTYPE_LODTENSORDESC = _descriptor.Descriptor(
-  name='LoDTensorDesc',
-  full_name='paddle_mobile.framework.proto.VarType.LoDTensorDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='tensor', full_name='paddle_mobile.framework.proto.VarType.LoDTensorDesc.tensor', index=0,
-      number=1, type=11, cpp_type=10, label=2,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='lod_level', full_name='paddle_mobile.framework.proto.VarType.LoDTensorDesc.lod_level', index=1,
-      number=2, type=5, cpp_type=1, label=1,
-      has_default_value=True, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1593,
-  serialized_end=1697,
-)
-
-_VARTYPE_LODTENSORARRAYDESC = _descriptor.Descriptor(
-  name='LoDTensorArrayDesc',
-  full_name='paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='tensor', full_name='paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc.tensor', index=0,
-      number=1, type=11, cpp_type=10, label=2,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='lod_level', full_name='paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc.lod_level', index=1,
-      number=2, type=5, cpp_type=1, label=1,
-      has_default_value=True, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1699,
-  serialized_end=1808,
-)
-
-_VARTYPE_READERDESC = _descriptor.Descriptor(
-  name='ReaderDesc',
-  full_name='paddle_mobile.framework.proto.VarType.ReaderDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='lod_tensor', full_name='paddle_mobile.framework.proto.VarType.ReaderDesc.lod_tensor', index=0,
-      number=1, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1810,
-  serialized_end=1896,
-)
-
-_VARTYPE_CHANNELDESC = _descriptor.Descriptor(
-  name='ChannelDesc',
-  full_name='paddle_mobile.framework.proto.VarType.ChannelDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='data_type', full_name='paddle_mobile.framework.proto.VarType.ChannelDesc.data_type', index=0,
-      number=1, type=14, cpp_type=8, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='capacity', full_name='paddle_mobile.framework.proto.VarType.ChannelDesc.capacity', index=1,
-      number=2, type=3, cpp_type=2, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1898,
-  serialized_end=1993,
-)
-
-_VARTYPE_TUPLE = _descriptor.Descriptor(
-  name='Tuple',
-  full_name='paddle_mobile.framework.proto.VarType.Tuple',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='element_type', full_name='paddle_mobile.framework.proto.VarType.Tuple.element_type', index=0,
-      number=1, type=14, cpp_type=8, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1995,
-  serialized_end=2069,
-)
-
-_VARTYPE = _descriptor.Descriptor(
-  name='VarType',
-  full_name='paddle_mobile.framework.proto.VarType',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='type', full_name='paddle_mobile.framework.proto.VarType.type', index=0,
-      number=1, type=14, cpp_type=8, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='selected_rows', full_name='paddle_mobile.framework.proto.VarType.selected_rows', index=1,
-      number=2, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='lod_tensor', full_name='paddle_mobile.framework.proto.VarType.lod_tensor', index=2,
-      number=3, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='tensor_array', full_name='paddle_mobile.framework.proto.VarType.tensor_array', index=3,
-      number=4, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='reader', full_name='paddle_mobile.framework.proto.VarType.reader', index=4,
-      number=5, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='channel', full_name='paddle_mobile.framework.proto.VarType.channel', index=5,
-      number=6, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='tuple', full_name='paddle_mobile.framework.proto.VarType.tuple', index=6,
-      number=7, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[_VARTYPE_TENSORDESC, _VARTYPE_LODTENSORDESC, _VARTYPE_LODTENSORARRAYDESC, _VARTYPE_READERDESC, _VARTYPE_CHANNELDESC, _VARTYPE_TUPLE, ],
-  enum_types=[
-    _VARTYPE_TYPE,
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=1005,
-  serialized_end=2342,
-)
-
-
-_VARDESC = _descriptor.Descriptor(
-  name='VarDesc',
-  full_name='paddle_mobile.framework.proto.VarDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='name', full_name='paddle_mobile.framework.proto.VarDesc.name', index=0,
-      number=1, type=9, cpp_type=9, label=2,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='type', full_name='paddle_mobile.framework.proto.VarDesc.type', index=1,
-      number=2, type=11, cpp_type=10, label=2,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='persistable', full_name='paddle_mobile.framework.proto.VarDesc.persistable', index=2,
-      number=3, type=8, cpp_type=7, label=1,
-      has_default_value=True, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=2344,
-  serialized_end=2449,
-)
-
-
-_BLOCKDESC = _descriptor.Descriptor(
-  name='BlockDesc',
-  full_name='paddle_mobile.framework.proto.BlockDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='idx', full_name='paddle_mobile.framework.proto.BlockDesc.idx', index=0,
-      number=1, type=5, cpp_type=1, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='parent_idx', full_name='paddle_mobile.framework.proto.BlockDesc.parent_idx', index=1,
-      number=2, type=5, cpp_type=1, label=2,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='vars', full_name='paddle_mobile.framework.proto.BlockDesc.vars', index=2,
-      number=3, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='ops', full_name='paddle_mobile.framework.proto.BlockDesc.ops', index=3,
-      number=4, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='forward_block_idx', full_name='paddle_mobile.framework.proto.BlockDesc.forward_block_idx', index=4,
-      number=5, type=5, cpp_type=1, label=1,
-      has_default_value=True, default_value=-1,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=2452,
-  serialized_end=2633,
-)
-
-
-_PROGRAMDESC = _descriptor.Descriptor(
-  name='ProgramDesc',
-  full_name='paddle_mobile.framework.proto.ProgramDesc',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='blocks', full_name='paddle_mobile.framework.proto.ProgramDesc.blocks', index=0,
-      number=1, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=2635,
-  serialized_end=2706,
-)
-
-_OPDESC_ATTR.fields_by_name['type'].enum_type = _ATTRTYPE
-_OPDESC_ATTR.containing_type = _OPDESC
-_OPDESC_VAR.containing_type = _OPDESC
-_OPDESC.fields_by_name['inputs'].message_type = _OPDESC_VAR
-_OPDESC.fields_by_name['outputs'].message_type = _OPDESC_VAR
-_OPDESC.fields_by_name['attrs'].message_type = _OPDESC_ATTR
-_OPPROTO_VAR.containing_type = _OPPROTO
-_OPPROTO_ATTR.fields_by_name['type'].enum_type = _ATTRTYPE
-_OPPROTO_ATTR.containing_type = _OPPROTO
-_OPPROTO.fields_by_name['inputs'].message_type = _OPPROTO_VAR
-_OPPROTO.fields_by_name['outputs'].message_type = _OPPROTO_VAR
-_OPPROTO.fields_by_name['attrs'].message_type = _OPPROTO_ATTR
-_VARTYPE_TENSORDESC.fields_by_name['data_type'].enum_type = _VARTYPE_TYPE
-_VARTYPE_TENSORDESC.containing_type = _VARTYPE
-_VARTYPE_LODTENSORDESC.fields_by_name['tensor'].message_type = _VARTYPE_TENSORDESC
-_VARTYPE_LODTENSORDESC.containing_type = _VARTYPE
-_VARTYPE_LODTENSORARRAYDESC.fields_by_name['tensor'].message_type = _VARTYPE_TENSORDESC
-_VARTYPE_LODTENSORARRAYDESC.containing_type = _VARTYPE
-_VARTYPE_READERDESC.fields_by_name['lod_tensor'].message_type = _VARTYPE_LODTENSORDESC
-_VARTYPE_READERDESC.containing_type = _VARTYPE
-_VARTYPE_CHANNELDESC.fields_by_name['data_type'].enum_type = _VARTYPE_TYPE
-_VARTYPE_CHANNELDESC.containing_type = _VARTYPE
-_VARTYPE_TUPLE.fields_by_name['element_type'].enum_type = _VARTYPE_TYPE
-_VARTYPE_TUPLE.containing_type = _VARTYPE
-_VARTYPE.fields_by_name['type'].enum_type = _VARTYPE_TYPE
-_VARTYPE.fields_by_name['selected_rows'].message_type = _VARTYPE_TENSORDESC
-_VARTYPE.fields_by_name['lod_tensor'].message_type = _VARTYPE_LODTENSORDESC
-_VARTYPE.fields_by_name['tensor_array'].message_type = _VARTYPE_LODTENSORARRAYDESC
-_VARTYPE.fields_by_name['reader'].message_type = _VARTYPE_READERDESC
-_VARTYPE.fields_by_name['channel'].message_type = _VARTYPE_CHANNELDESC
-_VARTYPE.fields_by_name['tuple'].message_type = _VARTYPE_TUPLE
-_VARTYPE_TYPE.containing_type = _VARTYPE
-_VARDESC.fields_by_name['type'].message_type = _VARTYPE
-_BLOCKDESC.fields_by_name['vars'].message_type = _VARDESC
-_BLOCKDESC.fields_by_name['ops'].message_type = _OPDESC
-_PROGRAMDESC.fields_by_name['blocks'].message_type = _BLOCKDESC
-DESCRIPTOR.message_types_by_name['OpDesc'] = _OPDESC
-DESCRIPTOR.message_types_by_name['OpProto'] = _OPPROTO
-DESCRIPTOR.message_types_by_name['VarType'] = _VARTYPE
-DESCRIPTOR.message_types_by_name['VarDesc'] = _VARDESC
-DESCRIPTOR.message_types_by_name['BlockDesc'] = _BLOCKDESC
-DESCRIPTOR.message_types_by_name['ProgramDesc'] = _PROGRAMDESC
-DESCRIPTOR.enum_types_by_name['AttrType'] = _ATTRTYPE
-
-OpDesc = _reflection.GeneratedProtocolMessageType('OpDesc', (_message.Message,), dict(
-
-  Attr = _reflection.GeneratedProtocolMessageType('Attr', (_message.Message,), dict(
-    DESCRIPTOR = _OPDESC_ATTR,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.OpDesc.Attr)
-    ))
-  ,
-
-  Var = _reflection.GeneratedProtocolMessageType('Var', (_message.Message,), dict(
-    DESCRIPTOR = _OPDESC_VAR,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.OpDesc.Var)
-    ))
-  ,
-  DESCRIPTOR = _OPDESC,
-  __module__ = 'framework_pb2'
-  # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.OpDesc)
-  ))
-_sym_db.RegisterMessage(OpDesc)
-_sym_db.RegisterMessage(OpDesc.Attr)
-_sym_db.RegisterMessage(OpDesc.Var)
-
-OpProto = _reflection.GeneratedProtocolMessageType('OpProto', (_message.Message,), dict(
-
-  Var = _reflection.GeneratedProtocolMessageType('Var', (_message.Message,), dict(
-    DESCRIPTOR = _OPPROTO_VAR,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.OpProto.Var)
-    ))
-  ,
-
-  Attr = _reflection.GeneratedProtocolMessageType('Attr', (_message.Message,), dict(
-    DESCRIPTOR = _OPPROTO_ATTR,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.OpProto.Attr)
-    ))
-  ,
-  DESCRIPTOR = _OPPROTO,
-  __module__ = 'framework_pb2'
-  # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.OpProto)
-  ))
-_sym_db.RegisterMessage(OpProto)
-_sym_db.RegisterMessage(OpProto.Var)
-_sym_db.RegisterMessage(OpProto.Attr)
-
-VarType = _reflection.GeneratedProtocolMessageType('VarType', (_message.Message,), dict(
-
-  TensorDesc = _reflection.GeneratedProtocolMessageType('TensorDesc', (_message.Message,), dict(
-    DESCRIPTOR = _VARTYPE_TENSORDESC,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType.TensorDesc)
-    ))
-  ,
-
-  LoDTensorDesc = _reflection.GeneratedProtocolMessageType('LoDTensorDesc', (_message.Message,), dict(
-    DESCRIPTOR = _VARTYPE_LODTENSORDESC,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType.LoDTensorDesc)
-    ))
-  ,
-
-  LoDTensorArrayDesc = _reflection.GeneratedProtocolMessageType('LoDTensorArrayDesc', (_message.Message,), dict(
-    DESCRIPTOR = _VARTYPE_LODTENSORARRAYDESC,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc)
-    ))
-  ,
-
-  ReaderDesc = _reflection.GeneratedProtocolMessageType('ReaderDesc', (_message.Message,), dict(
-    DESCRIPTOR = _VARTYPE_READERDESC,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType.ReaderDesc)
-    ))
-  ,
-
-  ChannelDesc = _reflection.GeneratedProtocolMessageType('ChannelDesc', (_message.Message,), dict(
-    DESCRIPTOR = _VARTYPE_CHANNELDESC,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType.ChannelDesc)
-    ))
-  ,
-
-  Tuple = _reflection.GeneratedProtocolMessageType('Tuple', (_message.Message,), dict(
-    DESCRIPTOR = _VARTYPE_TUPLE,
-    __module__ = 'framework_pb2'
-    # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType.Tuple)
-    ))
-  ,
-  DESCRIPTOR = _VARTYPE,
-  __module__ = 'framework_pb2'
-  # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarType)
-  ))
-_sym_db.RegisterMessage(VarType)
-_sym_db.RegisterMessage(VarType.TensorDesc)
-_sym_db.RegisterMessage(VarType.LoDTensorDesc)
-_sym_db.RegisterMessage(VarType.LoDTensorArrayDesc)
-_sym_db.RegisterMessage(VarType.ReaderDesc)
-_sym_db.RegisterMessage(VarType.ChannelDesc)
-_sym_db.RegisterMessage(VarType.Tuple)
-
-VarDesc = _reflection.GeneratedProtocolMessageType('VarDesc', (_message.Message,), dict(
-  DESCRIPTOR = _VARDESC,
-  __module__ = 'framework_pb2'
-  # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.VarDesc)
-  ))
-_sym_db.RegisterMessage(VarDesc)
-
-BlockDesc = _reflection.GeneratedProtocolMessageType('BlockDesc', (_message.Message,), dict(
-  DESCRIPTOR = _BLOCKDESC,
-  __module__ = 'framework_pb2'
-  # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.BlockDesc)
-  ))
-_sym_db.RegisterMessage(BlockDesc)
-
-ProgramDesc = _reflection.GeneratedProtocolMessageType('ProgramDesc', (_message.Message,), dict(
-  DESCRIPTOR = _PROGRAMDESC,
-  __module__ = 'framework_pb2'
-  # @@protoc_insertion_point(class_scope:paddle_mobile.framework.proto.ProgramDesc)
-  ))
-_sym_db.RegisterMessage(ProgramDesc)
-
-
-DESCRIPTOR.has_options = True
-DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003'))
-# @@protoc_insertion_point(module_scope)
diff --git a/python/tools/mdl2fluid/loader.py b/python/tools/mdl2fluid/loader.py
deleted file mode 100644
index ef2258e365a84003b7b90ac480abbd9798f48f59..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/loader.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import datetime
-import json
-import os
-
-import google.protobuf as pbg
-import framework_pb2 as framework_pb2
-
-
-def loadmdl(json_path):
-    print('mdl json path : ' + json_path)
-    with open(json_path, 'r') as f:
-        json_dick = json.load(f)
-        # print(json_dick)
-        layers = (json_dick['layer'])
-        for layer in layers:
-            print(layer)
-
-
diff --git a/python/tools/mdl2fluid/mdl2fluid.py b/python/tools/mdl2fluid/mdl2fluid.py
deleted file mode 100644
index a57a01d09eaf236fd9f890dcb9e8eead19aa7868..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/mdl2fluid.py
+++ /dev/null
@@ -1,335 +0,0 @@
-import json
-import os
-
-import framework_pb2 as framework_pb2
-import op_types as types
-from swicher import Swichter
-import shutil
-
-
-def load_mdl(mdl_json_path):
-    # print('mdl json path : ' + mdl_json_path)
-    with open(mdl_json_path, 'r') as f:
-        return json.load(f)
-
-
-class Converter:
-    'convert mdlmodel to fluidmodel'
-
-    def __init__(self, mdl_json_path):
-        self.mdl_json_path = mdl_json_path
-        print mdl_json_path
-        self.mdl_json = load_mdl(self.mdl_json_path)
-        self.program_desc = framework_pb2.ProgramDesc()
-        self.weight_list_ = []
-        self.deepwise_weight_list_ = []
-        # print(json_dick)
-        # layers = (json_dick['layer'])
-        # for layer in layers:
-        #     print(layer)
-
-    def convert(self):
-        print 'convert begin.....'
-        # add block_desc
-        block_desc = self.program_desc.blocks.add()
-        block_desc.idx = 0
-        block_desc.parent_idx = -1
-        self.package_ops(block_desc)
-        self.package_vars(block_desc)
-        print 'blocks: '
-        print self.program_desc.blocks
-        print 'convert end.....'
-        desc_serialize_to_string = self.program_desc.SerializeToString()
-        shutil.rmtree('newyolo/')
-        shutil.copytree('multiobjects/float32s_nchw_with_head', 'newyolo/')
-
-        f = open("newyolo/__model__", "wb")
-        f.write(desc_serialize_to_string)
-        f.close()
-
-    def package_ops(self, block_desc):
-
-        self.add_op_feed(block_desc)
-
-        # add ops with layer
-        if 'layer' in self.mdl_json:
-
-            layers_ = self.mdl_json['layer']
-            for layer in layers_:
-                desc_ops_add = block_desc.ops.add()
-
-                # print layer
-                # for i in layer:
-                #     print i
-                if 'name' in layer:
-                    l_name = layer['name']
-                if 'type' in layer:
-                    self.package_ops_type(desc_ops_add, layer)
-
-                if 'weight' in layer:
-                    self.package_ops_weight2inputs(desc_ops_add, layer)
-
-                if 'output' in layer:
-                    self.package_ops_outputs(desc_ops_add, layer)
-
-                if 'input' in layer:
-                    self.package_ops_inputs(desc_ops_add, layer)
-
-                self.package_ops_attrs(desc_ops_add, layer)
-
-        self.add_op_fetch(block_desc)
-
-    def add_op_feed(self, block_desc):
-        desc_ops_add = block_desc.ops.add()
-        inputs_add = desc_ops_add.inputs.add()
-        inputs_add.parameter = 'X'
-        inputs_add.arguments.append('feed')
-        desc_ops_add.type = 'feed'
-        outputs_add = desc_ops_add.outputs.add()
-        outputs_add.parameter = 'Out'
-        outputs_add.arguments.append('data')
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'col'
-        # boolean
-        attrs_add.type = 0
-        attrs_add.i = 0
-
-    def add_op_fetch(self, block_desc):
-        desc_ops_add = block_desc.ops.add()
-        inputs_add = desc_ops_add.inputs.add()
-        inputs_add.parameter = 'X'
-        inputs_add.arguments.append('conv_pred_87')
-        desc_ops_add.type = 'fetch'
-        outputs_add = desc_ops_add.outputs.add()
-        outputs_add.parameter = 'Out'
-        outputs_add.arguments.append('fetch')
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'col'
-        # boolean
-        attrs_add.type = 0
-        attrs_add.i = 0
-
-    @staticmethod
-    def package_ops_attrs(desc_ops_add, layer):
-        # print l_params
-        # print desc_ops_add.type
-        if desc_ops_add.type == types.op_fluid_fusion_conv_add:
-            Converter.pack_fusion_conv_add_attr(desc_ops_add, layer)
-        elif desc_ops_add.type == types.op_fluid_relu:
-            # fusion_conv_add : attrs
-            attrs_add = desc_ops_add.attrs.add()
-            attrs_add.name = 'use_mkldnn'
-            # boolean
-            attrs_add.type = 6
-            attrs_add.b = 0
-
-    @staticmethod
-    def pack_fusion_conv_add_attr(desc_ops_add, layer):
-
-        # fusion_conv_add : attrs
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'workspace_size_MB'
-        # 0-->INT
-        attrs_add.type = 0
-        attrs_add.i = 4096
-
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'data_format'
-        # 2-->STRING
-        attrs_add.type = 2
-        attrs_add.s = 'AnyLayout'
-
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'use_mkldnn'
-        # boolean
-        attrs_add.type = 6
-        attrs_add.b = 0
-
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'use_cudnn'
-        # boolean
-        attrs_add.type = 6
-        attrs_add.b = 1
-
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'dilations'
-        # ints
-        attrs_add.type = 3
-        attrs_add.ints.append(1)
-        attrs_add.ints.append(1)
-
-        attrs_add = desc_ops_add.attrs.add()
-        attrs_add.name = 'axis'
-        # int
-        attrs_add.type = 0
-        attrs_add.i = 1
-
-        if 'param' in layer:
-            l_params = layer['param']
-
-            attrs_add = desc_ops_add.attrs.add()
-            attrs_add.name = 'paddings'
-            # ints
-            attrs_add.type = 3
-            attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('paddings')])
-            attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('paddings')])
-
-            attrs_add = desc_ops_add.attrs.add()
-            attrs_add.name = 'strides'
-            # ints
-            attrs_add.type = 3
-            attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('strides')])
-            attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('strides')])
-
-            attrs_add = desc_ops_add.attrs.add()
-            attrs_add.name = 'groups'
-            # int
-            attrs_add.type = 0
-            attrs_add.i = l_params[types.fusion_conv_add_attrs_dict.get('groups')]
-            # attrs_add.i = 1
-
-        #
-        # op_attrs_tupl = types.op_io_dict.get(desc_ops_add.type) \
-        #     .get(types.mdl_attrs_key)
-        #
-        #
-        #
-        #
-        # # group stride padding
-        # print '----------------------'
-        # for i, val in enumerate(op_attrs_tupl):
-        #     attrs_add = desc_ops_add.attrs.add()
-        #     attr_name = op_attrs_tupl[i]
-        #     print attr_name
-        #     attrs_add.name = attr_name
-        #     attrs_add.type = types.fluid_attrs_type_dict.get(attr_name)
-        #     attrs_add.
-        #     print l_params[types.fusion_conv_add_attrs_dict.get(attr_name)]
-
-        # for p in l_params:
-        #     attrs_add = desc_ops_add.attrs.add()
-
-    @staticmethod
-    def package_ops_inputs(desc_ops_add, layer):
-        l_inputs = layer['input']
-        for i in l_inputs:
-            inputs_add = desc_ops_add.inputs.add()
-            # print i
-            inputs_add.parameter = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_inputs_key)
-            inputs_add.arguments.append(i)
-
-    @staticmethod
-    def package_ops_outputs(desc_ops_add, layer):
-        l_outputs = layer['output']
-        for o in l_outputs:
-            # print o
-            outputs_add = desc_ops_add.outputs.add()
-            outputs_add.parameter = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_outputs_key)
-            outputs_add.arguments.append(o)
-
-    def package_ops_weight2inputs(self, desc_ops_add, layer):
-        l_weights = layer['weight']
-        for w in l_weights:
-            self.weight_list_.append(w)
-
-        if layer['type'] == 'DepthwiseConvolutionLayer':
-            # print l_weights[0]
-            self.deepwise_weight_list_.append(l_weights[0])
-
-        op_weight_tup = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_weight_key)
-        # print len(op_weight_tup)
-        for i, val in enumerate(op_weight_tup):
-            # print i
-            # print val
-            inputs_add = desc_ops_add.inputs.add()
-            inputs_add.parameter = op_weight_tup[i]
-            inputs_add.arguments.append(l_weights[i])
-
-        # for w in l_weights:
-        #     inputs_add = desc_ops_add.inputs.add()
-        #     # print w
-        #     inputs_add.parameter = op_weight_tup[0]
-        #     inputs_add.arguments.append(w)
-
-    @staticmethod
-    def package_ops_type(desc_ops_add, layer):
-        l_type = layer['type']
-        # print l_type
-        # print mdl2fluid_op_layer_dict.get(l_type)
-        desc_ops_add.type = types.mdl2fluid_op_layer_dict.get(l_type)
-
-    def package_vars(self, block_desc):
-        vars_add = block_desc.vars.add()
-        vars_add.name = 'feed'
-        vars_add.type.type = 9  # 9 is FEED_MINIBATCH
-        vars_add.persistable = 1
-        # fetch
-        vars_add = block_desc.vars.add()
-        vars_add.name = 'fetch'
-        vars_add.type.type = 10  # 10 is fetch list
-        vars_add.persistable = 1
-
-        json_matrix_ = self.mdl_json['matrix']
-        # print json_matrix_
-        for j in json_matrix_:
-            vars_add = block_desc.vars.add()
-            vars_add.name = j
-            vars_add.type.type = 7  # 7 is lodtensor
-            # print j
-            tensor = vars_add.type.lod_tensor.tensor
-            tensor.data_type = 5  # 5 is FP32
-
-            # print json_matrix_
-
-            dims_of_matrix = json_matrix_.get(j)
-            # dims_size = len(dims_of_matrix)
-            # print dims_size
-
-            # if dims_size == 4:
-            #     tensor.dims.append(dims_of_matrix[0])  # N
-            #     tensor.dims.append(dims_of_matrix[3])  # C
-            #     tensor.dims.append(dims_of_matrix[1])  # H
-            #     tensor.dims.append(dims_of_matrix[2])  # W
-            # else:
-
-            # issues in mdl model filter swich n and c
-            if j in self.deepwise_weight_list_ and len(dims_of_matrix) == 4:
-                print j
-                tensor.dims.append(dims_of_matrix[1])
-                tensor.dims.append(dims_of_matrix[0])
-                tensor.dims.append(dims_of_matrix[2])
-                tensor.dims.append(dims_of_matrix[3])
-                print tensor.dims
-            else:
-                for dims in dims_of_matrix:
-                    # print dims
-                    tensor.dims.append(dims)
-
-            if j in self.weight_list_:
-                vars_add.persistable = 1
-                dims_size = len(dims_of_matrix)
-                # print dims_size
-                if dims_size == 4:
-                    # convert weight from nhwc to nchw
-                    Swichter().nhwc2nchw_one_slice_add_head(
-                        '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/' + j + '.bin',
-                        '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/' + j,
-                        '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/' + j + '.tmp',
-                        dims_of_matrix[0],
-                        dims_of_matrix[1],
-                        dims_of_matrix[2],
-                        dims_of_matrix[3]
-                    )
-                else:
-                    Swichter().copy_add_head(
-                        '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/' + j + '.bin',
-                        '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/' + j,
-                        '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/' + j + '.tmp'
-                    )
-            else:
-                vars_add.persistable = 0
-
-
-mdl_path = "/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/YOLO_Universal.json"
-converter = Converter(mdl_path)
-converter.convert()
diff --git a/python/tools/mdl2fluid/model_combine.py b/python/tools/mdl2fluid/model_combine.py
deleted file mode 100644
index ae3ca8a786dc0d4032deda35c33f44d3d96e983d..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/model_combine.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# coding=utf-8
-import os
-
-path = "yolo_v2_tofile_source/"  # 文件夹目录
-to_file_path = "yolo_v2_tofile_combined/params"
-files = os.listdir(path)  # 得到文件夹下的所有文件名称
-files.sort(cmp=None, key=str.lower)
-to_file = open(to_file_path, "wb")
-
-for file in files:  # 遍历文件夹
-    if not os.path.isdir(file):  # 判断是否是文件夹，不是文件夹才打开
-        f = open(path + "/" + file)  # 打开文件
-        name = f.name
-        print 'name:  ' + name
-        from_file = open(name, "rb")
-        to_file.write(from_file.read())
-        from_file.close()
-
-to_file.close()
diff --git a/python/tools/mdl2fluid/model_reader.py b/python/tools/mdl2fluid/model_reader.py
deleted file mode 100644
index 8d53350db20739526b77663f791942299d4bc149..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/model_reader.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import os
-
-import framework_pb2 as framework_pb2
-
-
-def read_model(model_path):
-    print('read_model.')
-    path_8 = unicode(model_path, 'utf8')
-
-    try:
-        with open(path_8, "rb") as f_model:
-            print get_file_size(model_path)
-            desc = framework_pb2.ProgramDesc()
-            desc.ParseFromString(f_model.read())
-            print desc
-            # print desc.blocks
-
-    except IOError:
-        print ": File not found.  Creating a new file."
-
-
-def get_file_size(file_path):
-    file_path = unicode(file_path, 'utf8')
-    fsize = os.path.getsize(file_path)
-    fsize = fsize / float(1024 * 1024)
-    return round(fsize, 2)
-
-
-path = "newyolo/__model__"
-read_model(path)
diff --git a/python/tools/mdl2fluid/op_types.py b/python/tools/mdl2fluid/op_types.py
deleted file mode 100644
index ff7d78d20835c605dc581ef14ad2d7d5171fea1d..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/op_types.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# coding=utf-8
-
-# mdl layers
-layer_mdl_conv = 'ConvolutionLayer'
-layer_mdl_deepwise_conv = 'DepthwiseConvolutionLayer'
-layer_mdl_relu = 'ReluLayer'
-layer_mdl_pointwise_add = 'PointwiseConvolutionLayer'
-
-# fluid ops
-op_fluid_fusion_conv_add = 'fusion_conv_add'
-op_fluid_relu = 'relu'
-
-# dict mdk layer ---  fluid op
-mdl2fluid_op_layer_dict = {
-    layer_mdl_conv: op_fluid_fusion_conv_add,
-    layer_mdl_deepwise_conv: op_fluid_fusion_conv_add,
-    layer_mdl_relu: op_fluid_relu,
-    layer_mdl_pointwise_add: op_fluid_fusion_conv_add
-}
-
-mdl_outputs_key = "outputs"
-mdl_inputs_key = "inputs"
-mdl_weight_key = "weights"
-mdl_attrs_key = "params"
-
-# dict of mdl-input _out param  to fluid input out attrs
-fusion_conv_add_dict = {
-    mdl_inputs_key: 'Input',
-    mdl_outputs_key: 'Out',
-    mdl_weight_key: ('Filter', 'Y'),
-    mdl_attrs_key: (
-        # 'workspace_size_MB', 'use_mkldnn', 'use_cudnn', 'data_format','dilations',
-        # dilations =  [1,1]
-        'groups', 'paddings', 'strides'
-        # 'axis'
-    )
-}
-
-relu_dict = {
-    mdl_inputs_key: 'X',
-    mdl_outputs_key: 'Out',
-    mdl_weight_key: ()
-
-}
-# mdl layers  ---  fluid ops
-op_io_dict = {
-    'fusion_conv_add': fusion_conv_add_dict,
-    'relu': relu_dict
-}
-
-# fluid attr key  ---  mdl params key
-fusion_conv_add_attrs_dict = {
-    'paddings': 'pad',
-    'strides': 'stride',
-    'groups': 'group'
-}
-# fluid attr key  ---  mdl params key
-fluid_attrs_type_dict = {
-    'paddings': 0,
-    'strides': 6,
-    'groups': 6
-}
-
-# '': "bias_term",    是不是要add   目前 yolo的模型都是 bias_term = 1
-
-
-# attrs {
-#       name: "axis"
-#       type: INT
-#       i: 1
-#     }
-
-
-# attrs_name = {
-#     'name': "workspace_size_MB",
-#     'type': 'INT',
-#     'i': '4096'
-# }
-# attrs
-# {
-#     name: "data_format"
-#     type: STRING
-#     s: "AnyLayout"
-# }
-# attrs
-# {
-#     name: "use_mkldnn"
-#     type: BOOLEAN
-#     b: false
-# }
-# attrs
-# {
-#     name: "use_cudnn"
-#     type: BOOLEAN
-#     b: true
-# }
-# attrs
-# {
-#     name: "dilations"
-#     type: INTS
-#     ints: 1
-#     ints: 1
-# }
-# attrs
-# {
-#     name: "groups"
-#     type: INT
-#     i: 1
-# }
-# attrs
-# {
-#     name: "paddings"
-#     type: INTS
-#     ints: 0
-#     ints: 0
-# }
-# attrs
-# {
-#     name: "strides"
-#     type: INTS
-#     ints: 1
-#     ints: 1
-# }
diff --git a/python/tools/mdl2fluid/swicher.py b/python/tools/mdl2fluid/swicher.py
deleted file mode 100644
index bfe0360fd5b32f5e6fa61f6f05a0a384fb3a1e9b..0000000000000000000000000000000000000000
--- a/python/tools/mdl2fluid/swicher.py
+++ /dev/null
@@ -1,115 +0,0 @@
-from array import array
-
-
-class Swichter:
-    def __init__(self):
-        pass
-
-    def nhwc2nchw_one_slice(self, from_file_name, to_file_name, batch, channel, height, width):
-        from_file = open(from_file_name, "rb")
-        to_file = open(to_file_name, "wb")
-
-        float_array = array("f")
-        float_array.fromfile(from_file, width * height * batch * channel)
-        float_write_array = array("f")
-
-        for b in range(batch):
-            for c in range(channel):
-                for h in range(height):
-                    for w in range(width):
-                        float_value = float_array[b * channel * width * height
-                                                  + channel * (h * width + w) + c]
-
-                        float_write_array.append(float_value)
-
-        float_write_array.tofile(to_file)
-        from_file.close()
-        to_file.close()
-
-    def copy(self, from_file_name, to_file_name):
-        from_file = open(from_file_name, "rb")
-        to_file = open(to_file_name, "wb")
-
-        to_file.write(from_file.read())
-        from_file.close()
-        to_file.close()
-
-    def nhwc2nchw_one_slice_add_head(self, from_file_name, to_file_name, tmp_file_name, batch, channel, height, width):
-        from_file = open(from_file_name, "rb")
-        tmp_file = open(tmp_file_name, "wb+")
-        float_array = array("f")
-        float_array.fromfile(from_file, width * height * batch * channel)
-        float_write_array = array("f")
-
-        for b in range(batch):
-            for c in range(channel):
-                for h in range(height):
-                    for w in range(width):
-                        float_value = float_array[b * channel * width * height
-                                                  + channel * (h * width + w) + c]
-
-                        float_write_array.append(float_value)
-
-        float_write_array.tofile(tmp_file)
-        tmp_file.close()
-        from_file.close()
-
-        tmp_file = open(tmp_file_name, "rb")
-        to_file = open(to_file_name, "wb")
-
-        tmp = tmp_file.read()
-        head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
-        to_file.write(head)
-        to_file.write(tmp)
-        tmp_file.close()
-        to_file.close()
-
-    def read_head(self, head_file):
-        from_file = open(head_file, "rb")
-        read = from_file.read(24)
-        # print read
-        from_file.close()
-        # print read
-        return read
-
-    def copy_add_head(self, from_file_name, to_file_name, tmp_file_name):
-        from_file = open(from_file_name, "rb")
-        to_file = open(to_file_name, "wb")
-        # tmp_file = open(tmp_file_name, "wb")
-
-        head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
-        to_file.write(head)
-        to_file.write(from_file.read())
-        from_file.close()
-        to_file.close()
-        pass
-
-    def copy_padding_add_head(self, from_file_name, to_file_name, tmp_file_name, padding):
-        print'padding  = %d' % padding
-        from_file = open(from_file_name, "rb")
-        # print len(from_file.read())
-        from_file.seek(padding, 0)
-
-        read = from_file.read()
-        print len(read)
-
-        to_file = open(to_file_name, "wb")
-        # tmp_file = open(tmp_file_name, "wb")
-
-        head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
-        to_file.write(head)
-        to_file.write(read)
-        from_file.close()
-        to_file.close()
-        pass
-
-# Swichter().nhwc2nchw_one_slice_add_head(
-#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv1_0.bin',
-#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/conv1_0',
-#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/.tmp',
-#     32,
-#     3, 3, 3)
-
-# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
-
-# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '')
diff --git a/src/common/dep_core.h b/src/common/dep_core.h
index d9873a3896d1ac83cfc45e0666ca8491a645ed8e..a9fdca5b1de0307ed9bde99dcc65ca92fd5aee53 100644
--- a/src/common/dep_core.h
+++ b/src/common/dep_core.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+
 #ifdef PADDLE_EXECUTOR_MULTITHREAD
 #include <string>
 #include <unordered_map>
@@ -60,6 +61,7 @@ class depCore {
   std::vector<std::vector<int>> deps;
   std::vector<std::vector<int>> next;
 };
+
 }  // namespace paddle_mobile
 
 #endif
diff --git a/src/common/protobuf-c.c b/src/common/protobuf-c.c
deleted file mode 100644
index 1092e3f78b02a343d8c8965ea7b2d777a6fac9ae..0000000000000000000000000000000000000000
--- a/src/common/protobuf-c.c
+++ /dev/null
@@ -1,2098 +0,0 @@
-/*
- * Copyright (c) 2008-2015, Dave Benson and the protobuf-c authors.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*! \file
- * Support library for `protoc-c` generated code.
- *
- * This file implements the public API used by the code generated
- * by `protoc-c`.
- *
- * \authors Dave Benson and the protobuf-c authors
- *
- * \copyright 2008-2014. Licensed under the terms of the [BSD-2-Clause] license.
- */
-
-/**
- * \todo 64-BIT OPTIMIZATION: certain implementations use 32-bit math
- * even on 64-bit platforms (uint64_size, uint64_pack, parse_uint64).
- *
- * \todo Use size_t consistently.
- */
-
-#include <stdlib.h> /* for malloc, free */
-#include <string.h> /* for strcmp, strlen, memcpy, memmove, memset */
-
-#include "protobuf-c.h"
-
-#define TRUE 1
-#define FALSE 0
-
-#define PROTOBUF_C__ASSERT_NOT_REACHED() assert(0)
-
-/* Workaround for Microsoft compilers. */
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
-/**
- * \defgroup internal Internal functions and macros
- *
- * These are not exported by the library but are useful to developers working
- * on `libprotobuf-c` itself.
- */
-
-/**
- * \defgroup macros Utility macros for manipulating structures
- *
- * Macros and constants used to manipulate the base "classes" generated by
- * `protobuf-c`. They also define limits and check correctness.
- *
- * \ingroup internal
- * @{
- */
-
-/** The maximum length of a 64-bit integer in varint encoding. */
-#define MAX_UINT64_ENCODED_SIZE 10
-
-#ifndef PROTOBUF_C_UNPACK_ERROR
-#define PROTOBUF_C_UNPACK_ERROR(...)
-#endif
-
-const char protobuf_c_empty_string[] = "";
-
-/**
- * Internal `ProtobufCMessage` manipulation macro.
- *
- * Base macro for manipulating a `ProtobufCMessage`. Used by STRUCT_MEMBER() and
- * STRUCT_MEMBER_PTR().
- */
-#define STRUCT_MEMBER_P(struct_p, struct_offset) \
-  ((void *)((uint8_t *)(struct_p) + (struct_offset)))
-
-/**
- * Return field in a `ProtobufCMessage` based on offset.
- *
- * Take a pointer to a `ProtobufCMessage` and find the field at the offset.
- * Cast it to the passed type.
- */
-#define STRUCT_MEMBER(member_type, struct_p, struct_offset) \
-  (*(member_type *)STRUCT_MEMBER_P((struct_p), (struct_offset)))
-
-/**
- * Return field in a `ProtobufCMessage` based on offset.
- *
- * Take a pointer to a `ProtobufCMessage` and find the field at the offset. Cast
- * it to a pointer to the passed type.
- */
-#define STRUCT_MEMBER_PTR(member_type, struct_p, struct_offset) \
-  ((member_type *)STRUCT_MEMBER_P((struct_p), (struct_offset)))
-
-/* Assertions for magic numbers. */
-
-#define ASSERT_IS_ENUM_DESCRIPTOR(desc) \
-  assert((desc)->magic == PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC)
-
-#define ASSERT_IS_MESSAGE_DESCRIPTOR(desc) \
-  assert((desc)->magic == PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC)
-
-#define ASSERT_IS_MESSAGE(message) \
-  ASSERT_IS_MESSAGE_DESCRIPTOR((message)->descriptor)
-
-#define ASSERT_IS_SERVICE_DESCRIPTOR(desc) \
-  assert((desc)->magic == PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC)
-
-/**@}*/
-
-/* --- version --- */
-
-const char *protobuf_c_version(void) { return PROTOBUF_C_VERSION; }
-
-uint32_t protobuf_c_version_number(void) { return PROTOBUF_C_VERSION_NUMBER; }
-
-/* --- allocator --- */
-
-static void *system_alloc(void *allocator_data, size_t size) {
-  return malloc(size);
-}
-
-static void system_free(void *allocator_data, void *data) { free(data); }
-
-static inline void *do_alloc(ProtobufCAllocator *allocator, size_t size) {
-  return allocator->alloc(allocator->allocator_data, size);
-}
-
-static inline void do_free(ProtobufCAllocator *allocator, void *data) {
-  if (data != NULL) allocator->free(allocator->allocator_data, data);
-}
-
-/*
- * This allocator uses the system's malloc() and free(). It is the default
- * allocator used if NULL is passed as the ProtobufCAllocator to an exported
- * function.
- */
-static ProtobufCAllocator protobuf_c__allocator = {
-    .alloc = &system_alloc,
-    .free = &system_free,
-    .allocator_data = NULL,
-};
-
-/* === buffer-simple === */
-
-void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len,
-                                     const uint8_t *data) {
-  ProtobufCBufferSimple *simp = (ProtobufCBufferSimple *)buffer;
-  size_t new_len = simp->len + len;
-
-  if (new_len > simp->alloced) {
-    ProtobufCAllocator *allocator = simp->allocator;
-    size_t new_alloced = simp->alloced * 2;
-    uint8_t *new_data;
-
-    if (allocator == NULL) allocator = &protobuf_c__allocator;
-    while (new_alloced < new_len) new_alloced += new_alloced;
-    new_data = do_alloc(allocator, new_alloced);
-    if (!new_data) return;
-    memcpy(new_data, simp->data, simp->len);
-    if (simp->must_free_data)
-      do_free(allocator, simp->data);
-    else
-      simp->must_free_data = TRUE;
-    simp->data = new_data;
-    simp->alloced = new_alloced;
-  }
-  memcpy(simp->data + simp->len, data, len);
-  simp->len = new_len;
-}
-
-/**
- * \defgroup packedsz protobuf_c_message_get_packed_size() implementation
- *
- * Routines mainly used by protobuf_c_message_get_packed_size().
- *
- * \ingroup internal
- * @{
- */
-
-/**
- * Return the number of bytes required to store the tag for the field. Includes
- * 3 bits for the wire-type, and a single bit that denotes the end-of-tag.
- *
- * \param number
- *      Field tag to encode.
- * \return
- *      Number of bytes required.
- */
-static inline size_t get_tag_size(uint32_t number) {
-  if (number < (1UL << 4)) {
-    return 1;
-  } else if (number < (1UL << 11)) {
-    return 2;
-  } else if (number < (1UL << 18)) {
-    return 3;
-  } else if (number < (1UL << 25)) {
-    return 4;
-  } else {
-    return 5;
-  }
-}
-
-/**
- * Return the number of bytes required to store a variable-length unsigned
- * 32-bit integer in base-128 varint encoding.
- *
- * \param v
- *      Value to encode.
- * \return
- *      Number of bytes required.
- */
-static inline size_t uint32_size(uint32_t v) {
-  if (v < (1UL << 7)) {
-    return 1;
-  } else if (v < (1UL << 14)) {
-    return 2;
-  } else if (v < (1UL << 21)) {
-    return 3;
-  } else if (v < (1UL << 28)) {
-    return 4;
-  } else {
-    return 5;
-  }
-}
-
-/**
- * Return the number of bytes required to store a variable-length signed 32-bit
- * integer in base-128 varint encoding.
- *
- * \param v
- *      Value to encode.
- * \return
- *      Number of bytes required.
- */
-static inline size_t int32_size(int32_t v) {
-  if (v < 0) {
-    return 10;
-  } else if (v < (1L << 7)) {
-    return 1;
-  } else if (v < (1L << 14)) {
-    return 2;
-  } else if (v < (1L << 21)) {
-    return 3;
-  } else if (v < (1L << 28)) {
-    return 4;
-  } else {
-    return 5;
-  }
-}
-
-/**
- * Return the ZigZag-encoded 32-bit unsigned integer form of a 32-bit signed
- * integer.
- *
- * \param v
- *      Value to encode.
- * \return
- *      ZigZag encoded integer.
- */
-static inline uint32_t zigzag32(int32_t v) {
-  if (v < 0)
-    return (-(uint32_t)v) * 2 - 1;
-  else
-    return (uint32_t)(v)*2;
-}
-
-/**
- * Return the number of bytes required to store a signed 32-bit integer,
- * converted to an unsigned 32-bit integer with ZigZag encoding, using base-128
- * varint encoding.
- *
- * \param v
- *      Value to encode.
- * \return
- *      Number of bytes required.
- */
-static inline size_t sint32_size(int32_t v) { return uint32_size(zigzag32(v)); }
-
-/**
- * Return the number of bytes required to store a 64-bit unsigned integer in
- * base-128 varint encoding.
- *
- * \param v
- *      Value to encode.
- * \return
- *      Number of bytes required.
- */
-static inline size_t uint64_size(uint64_t v) {
-  uint32_t upper_v = (uint32_t)(v >> 32);
-
-  if (upper_v == 0) {
-    return uint32_size((uint32_t)v);
-  } else if (upper_v < (1UL << 3)) {
-    return 5;
-  } else if (upper_v < (1UL << 10)) {
-    return 6;
-  } else if (upper_v < (1UL << 17)) {
-    return 7;
-  } else if (upper_v < (1UL << 24)) {
-    return 8;
-  } else if (upper_v < (1UL << 31)) {
-    return 9;
-  } else {
-    return 10;
-  }
-}
-
-/**
- * Return the ZigZag-encoded 64-bit unsigned integer form of a 64-bit signed
- * integer.
- *
- * \param v
- *      Value to encode.
- * \return
- *      ZigZag encoded integer.
- */
-static inline uint64_t zigzag64(int64_t v) {
-  if (v < 0)
-    return (-(uint64_t)v) * 2 - 1;
-  else
-    return (uint64_t)(v)*2;
-}
-
-/**
- * Return the number of bytes required to store a signed 64-bit integer,
- * converted to an unsigned 64-bit integer with ZigZag encoding, using base-128
- * varint encoding.
- *
- * \param v
- *      Value to encode.
- * \return
- *      Number of bytes required.
- */
-static inline size_t sint64_size(int64_t v) { return uint64_size(zigzag64(v)); }
-
-/**
- * Calculate the serialized size of a single required message field, including
- * the space needed by the preceding tag.
- *
- * \param field
- *      Field descriptor for member.
- * \param member
- *      Field to encode.
- * \return
- *      Number of bytes required.
- */
-static size_t required_field_get_packed_size(
-    const ProtobufCFieldDescriptor *field, const void *member) {
-  size_t rv = get_tag_size(field->id);
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_SINT32:
-      return rv + sint32_size(*(const int32_t *)member);
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-      return rv + int32_size(*(const int32_t *)member);
-    case PROTOBUF_C_TYPE_UINT32:
-      return rv + uint32_size(*(const uint32_t *)member);
-    case PROTOBUF_C_TYPE_SINT64:
-      return rv + sint64_size(*(const int64_t *)member);
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      return rv + uint64_size(*(const uint64_t *)member);
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-      return rv + 4;
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-      return rv + 8;
-    case PROTOBUF_C_TYPE_BOOL:
-      return rv + 1;
-    case PROTOBUF_C_TYPE_FLOAT:
-      return rv + 4;
-    case PROTOBUF_C_TYPE_DOUBLE:
-      return rv + 8;
-    case PROTOBUF_C_TYPE_STRING: {
-      const char *str = *(char *const *)member;
-      size_t len = str ? strlen(str) : 0;
-      return rv + uint32_size(len) + len;
-    }
-    case PROTOBUF_C_TYPE_BYTES: {
-      size_t len = ((const ProtobufCBinaryData *)member)->len;
-      return rv + uint32_size(len) + len;
-    }
-    case PROTOBUF_C_TYPE_MESSAGE: {
-      const ProtobufCMessage *msg = *(ProtobufCMessage *const *)member;
-      size_t subrv = msg ? protobuf_c_message_get_packed_size(msg) : 0;
-      return rv + uint32_size(subrv) + subrv;
-    }
-  }
-  PROTOBUF_C__ASSERT_NOT_REACHED();
-  return 0;
-}
-
-/**
- * Calculate the serialized size of a single oneof message field, including
- * the space needed by the preceding tag. Returns 0 if the oneof field isn't
- * selected or is not set.
- *
- * \param field
- *      Field descriptor for member.
- * \param oneof_case
- *      Enum value that selects the field in the oneof.
- * \param member
- *      Field to encode.
- * \return
- *      Number of bytes required.
- */
-static size_t oneof_field_get_packed_size(const ProtobufCFieldDescriptor *field,
-                                          uint32_t oneof_case,
-                                          const void *member) {
-  if (oneof_case != field->id) {
-    return 0;
-  }
-  if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
-      field->type == PROTOBUF_C_TYPE_STRING) {
-    const void *ptr = *(const void *const *)member;
-    if (ptr == NULL || ptr == field->default_value) return 0;
-  }
-  return required_field_get_packed_size(field, member);
-}
-
-/**
- * Calculate the serialized size of a single optional message field, including
- * the space needed by the preceding tag. Returns 0 if the optional field isn't
- * set.
- *
- * \param field
- *      Field descriptor for member.
- * \param has
- *      True if the field exists, false if not.
- * \param member
- *      Field to encode.
- * \return
- *      Number of bytes required.
- */
-static size_t optional_field_get_packed_size(
-    const ProtobufCFieldDescriptor *field, const protobuf_c_boolean has,
-    const void *member) {
-  if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
-      field->type == PROTOBUF_C_TYPE_STRING) {
-    const void *ptr = *(const void *const *)member;
-    if (ptr == NULL || ptr == field->default_value) return 0;
-  } else {
-    if (!has) return 0;
-  }
-  return required_field_get_packed_size(field, member);
-}
-
-static protobuf_c_boolean field_is_zeroish(
-    const ProtobufCFieldDescriptor *field, const void *member) {
-  protobuf_c_boolean ret = FALSE;
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_BOOL:
-      ret = (0 == *(const protobuf_c_boolean *)member);
-      break;
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_SINT32:
-    case PROTOBUF_C_TYPE_INT32:
-    case PROTOBUF_C_TYPE_UINT32:
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-      ret = (0 == *(const uint32_t *)member);
-      break;
-    case PROTOBUF_C_TYPE_SINT64:
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-      ret = (0 == *(const uint64_t *)member);
-      break;
-    case PROTOBUF_C_TYPE_FLOAT:
-      ret = (0 == *(const float *)member);
-      break;
-    case PROTOBUF_C_TYPE_DOUBLE:
-      ret = (0 == *(const double *)member);
-      break;
-    case PROTOBUF_C_TYPE_STRING:
-      ret = (NULL == *(const char *const *)member) ||
-            ('\0' == **(const char *const *)member);
-      break;
-    case PROTOBUF_C_TYPE_BYTES:
-    case PROTOBUF_C_TYPE_MESSAGE:
-      ret = (NULL == *(const void *const *)member);
-      break;
-    default:
-      ret = TRUE;
-      break;
-  }
-
-  return ret;
-}
-
-/**
- * Calculate the serialized size of a single unlabeled message field, including
- * the space needed by the preceding tag. Returns 0 if the field isn't set or
- * if it is set to a "zeroish" value (null pointer or 0 for numerical values).
- * Unlabeled fields are supported only in proto3.
- *
- * \param field
- *      Field descriptor for member.
- * \param member
- *      Field to encode.
- * \return
- *      Number of bytes required.
- */
-static size_t unlabeled_field_get_packed_size(
-    const ProtobufCFieldDescriptor *field, const void *member) {
-  if (field_is_zeroish(field, member)) return 0;
-  return required_field_get_packed_size(field, member);
-}
-
-/**
- * Calculate the serialized size of repeated message fields, which may consist
- * of any number of values (including 0). Includes the space needed by the
- * preceding tags (as needed).
- *
- * \param field
- *      Field descriptor for member.
- * \param count
- *      Number of repeated field members.
- * \param member
- *      Field to encode.
- * \return
- *      Number of bytes required.
- */
-static size_t repeated_field_get_packed_size(
-    const ProtobufCFieldDescriptor *field, size_t count, const void *member) {
-  size_t header_size;
-  size_t rv = 0;
-  unsigned i;
-  void *array = *(void *const *)member;
-
-  if (count == 0) return 0;
-  header_size = get_tag_size(field->id);
-  if (0 == (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED)) header_size *= count;
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_SINT32:
-      for (i = 0; i < count; i++) rv += sint32_size(((int32_t *)array)[i]);
-      break;
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-      for (i = 0; i < count; i++) rv += int32_size(((int32_t *)array)[i]);
-      break;
-    case PROTOBUF_C_TYPE_UINT32:
-      for (i = 0; i < count; i++) rv += uint32_size(((uint32_t *)array)[i]);
-      break;
-    case PROTOBUF_C_TYPE_SINT64:
-      for (i = 0; i < count; i++) rv += sint64_size(((int64_t *)array)[i]);
-      break;
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      for (i = 0; i < count; i++) rv += uint64_size(((uint64_t *)array)[i]);
-      break;
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-      rv += 4 * count;
-      break;
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      rv += 8 * count;
-      break;
-    case PROTOBUF_C_TYPE_BOOL:
-      rv += count;
-      break;
-    case PROTOBUF_C_TYPE_STRING:
-      for (i = 0; i < count; i++) {
-        size_t len = strlen(((char **)array)[i]);
-        rv += uint32_size(len) + len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_BYTES:
-      for (i = 0; i < count; i++) {
-        size_t len = ((ProtobufCBinaryData *)array)[i].len;
-        rv += uint32_size(len) + len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_MESSAGE:
-      for (i = 0; i < count; i++) {
-        size_t len =
-            protobuf_c_message_get_packed_size(((ProtobufCMessage **)array)[i]);
-        rv += uint32_size(len) + len;
-      }
-      break;
-  }
-
-  if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED))
-    header_size += uint32_size(rv);
-  return header_size + rv;
-}
-
-/**
- * Calculate the serialized size of an unknown field, i.e. one that is passed
- * through mostly uninterpreted. This is required for forward compatibility if
- * new fields are added to the message descriptor.
- *
- * \param field
- *      Unknown field type.
- * \return
- *      Number of bytes required.
- */
-static inline size_t unknown_field_get_packed_size(
-    const ProtobufCMessageUnknownField *field) {
-  return get_tag_size(field->tag) + field->len;
-}
-
-/**@}*/
-
-/*
- * Calculate the serialized size of the message.
- */
-size_t protobuf_c_message_get_packed_size(const ProtobufCMessage *message) {
-  unsigned i;
-  size_t rv = 0;
-
-  ASSERT_IS_MESSAGE(message);
-  for (i = 0; i < message->descriptor->n_fields; i++) {
-    const ProtobufCFieldDescriptor *field = message->descriptor->fields + i;
-    const void *member = ((const char *)message) + field->offset;
-    const void *qmember = ((const char *)message) + field->quantifier_offset;
-
-    if (field->label == PROTOBUF_C_LABEL_REQUIRED) {
-      rv += required_field_get_packed_size(field, member);
-    } else if ((field->label == PROTOBUF_C_LABEL_OPTIONAL ||
-                field->label == PROTOBUF_C_LABEL_NONE) &&
-               (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF))) {
-      rv += oneof_field_get_packed_size(field, *(const uint32_t *)qmember,
-                                        member);
-    } else if (field->label == PROTOBUF_C_LABEL_OPTIONAL) {
-      rv += optional_field_get_packed_size(
-          field, *(protobuf_c_boolean *)qmember, member);
-    } else if (field->label == PROTOBUF_C_LABEL_NONE) {
-      rv += unlabeled_field_get_packed_size(field, member);
-    } else {
-      rv += repeated_field_get_packed_size(field, *(const size_t *)qmember,
-                                           member);
-    }
-  }
-  for (i = 0; i < message->n_unknown_fields; i++)
-    rv += unknown_field_get_packed_size(&message->unknown_fields[i]);
-  return rv;
-}
-
-/**
- * \defgroup pack protobuf_c_message_pack() implementation
- *
- * Routines mainly used by protobuf_c_message_pack().
- *
- * \ingroup internal
- * @{
- */
-
-/**
- * Pack an unsigned 32-bit integer in base-128 varint encoding and return the
- * number of bytes written, which must be 5 or less.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t uint32_pack(uint32_t value, uint8_t *out) {
-  unsigned rv = 0;
-
-  if (value >= 0x80) {
-    out[rv++] = value | 0x80;
-    value >>= 7;
-    if (value >= 0x80) {
-      out[rv++] = value | 0x80;
-      value >>= 7;
-      if (value >= 0x80) {
-        out[rv++] = value | 0x80;
-        value >>= 7;
-        if (value >= 0x80) {
-          out[rv++] = value | 0x80;
-          value >>= 7;
-        }
-      }
-    }
-  }
-  /* assert: value<128 */
-  out[rv++] = value;
-  return rv;
-}
-
-/**
- * Pack a 64-bit unsigned integer using base-128 varint encoding and return the
- * number of bytes written.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static size_t uint64_pack(uint64_t value, uint8_t *out) {
-  uint32_t hi = (uint32_t)(value >> 32);
-  uint32_t lo = (uint32_t)value;
-  unsigned rv;
-
-  if (hi == 0) return uint32_pack((uint32_t)lo, out);
-  out[0] = (lo) | 0x80;
-  out[1] = (lo >> 7) | 0x80;
-  out[2] = (lo >> 14) | 0x80;
-  out[3] = (lo >> 21) | 0x80;
-  if (hi < 8) {
-    out[4] = (hi << 4) | (lo >> 28);
-    return 5;
-  } else {
-    out[4] = ((hi & 7) << 4) | (lo >> 28) | 0x80;
-    hi >>= 3;
-  }
-  rv = 5;
-  while (hi >= 128) {
-    out[rv++] = hi | 0x80;
-    hi >>= 7;
-  }
-  out[rv++] = hi;
-  return rv;
-}
-
-/**
- * Pack a ProtobufCBinaryData and return the number of bytes written. The output
- * includes a length delimiter.
- *
- * \param bd
- *      ProtobufCBinaryData to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t binary_data_pack(const ProtobufCBinaryData *bd,
-                                      uint8_t *out) {
-  size_t len = bd->len;
-  size_t rv = uint32_pack(len, out);
-  memcpy(out + rv, bd->data, len);
-  return rv + len;
-}
-
-/**
- * Pack a field tag.
- *
- * Wire-type will be added in required_field_pack().
- *
- * \todo Just call uint64_pack on 64-bit platforms.
- *
- * \param id
- *      Tag value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static size_t tag_pack(uint32_t id, uint8_t *out) {
-  if (id < (1UL << (32 - 3)))
-    return uint32_pack(id << 3, out);
-  else
-    return uint64_pack(((uint64_t)id) << 3, out);
-}
-
-/**
- * Given a field type, return the in-memory size.
- *
- * \todo Implement as a table lookup.
- *
- * \param type
- *      Field type.
- * \return
- *      Size of the field.
- */
-static inline size_t sizeof_elt_in_repeated_array(ProtobufCType type) {
-  switch (type) {
-    case PROTOBUF_C_TYPE_SINT32:
-    case PROTOBUF_C_TYPE_INT32:
-    case PROTOBUF_C_TYPE_UINT32:
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-    case PROTOBUF_C_TYPE_ENUM:
-      return 4;
-    case PROTOBUF_C_TYPE_SINT64:
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      return 8;
-    case PROTOBUF_C_TYPE_BOOL:
-      return sizeof(protobuf_c_boolean);
-    case PROTOBUF_C_TYPE_STRING:
-    case PROTOBUF_C_TYPE_MESSAGE:
-      return sizeof(void *);
-    case PROTOBUF_C_TYPE_BYTES:
-      return sizeof(ProtobufCBinaryData);
-  }
-  PROTOBUF_C__ASSERT_NOT_REACHED();
-  return 0;
-}
-
-static inline int int_range_lookup(unsigned n_ranges,
-                                   const ProtobufCIntRange *ranges, int value) {
-  unsigned n;
-  unsigned start;
-
-  if (n_ranges == 0) return -1;
-  start = 0;
-  n = n_ranges;
-  while (n > 1) {
-    unsigned mid = start + n / 2;
-
-    if (value < ranges[mid].start_value) {
-      n = mid - start;
-    } else if (value >=
-               ranges[mid].start_value +
-                   (int)(ranges[mid + 1].orig_index - ranges[mid].orig_index)) {
-      unsigned new_start = mid + 1;
-      n = start + n - new_start;
-      start = new_start;
-    } else
-      return (value - ranges[mid].start_value) + ranges[mid].orig_index;
-  }
-  if (n > 0) {
-    unsigned start_orig_index = ranges[start].orig_index;
-    unsigned range_size = ranges[start + 1].orig_index - start_orig_index;
-
-    if (ranges[start].start_value <= value &&
-        value < (int)(ranges[start].start_value + range_size)) {
-      return (value - ranges[start].start_value) + start_orig_index;
-    }
-  }
-  return -1;
-}
-
-static size_t parse_tag_and_wiretype(size_t len, const uint8_t *data,
-                                     uint32_t *tag_out,
-                                     ProtobufCWireType *wiretype_out) {
-  unsigned max_rv = len > 5 ? 5 : len;
-  uint32_t tag = (data[0] & 0x7f) >> 3;
-  unsigned shift = 4;
-  unsigned rv;
-
-  *wiretype_out = data[0] & 7;
-  if ((data[0] & 0x80) == 0) {
-    *tag_out = tag;
-    return 1;
-  }
-  for (rv = 1; rv < max_rv; rv++) {
-    if (data[rv] & 0x80) {
-      tag |= (data[rv] & 0x7f) << shift;
-      shift += 7;
-    } else {
-      tag |= data[rv] << shift;
-      *tag_out = tag;
-      return rv + 1;
-    }
-  }
-  return 0; /* error: bad header */
-}
-
-/* sizeof(ScannedMember) must be <= (1UL<<BOUND_SIZEOF_SCANNED_MEMBER_LOG2) */
-#define BOUND_SIZEOF_SCANNED_MEMBER_LOG2 5
-typedef struct _ScannedMember ScannedMember;
-/** Field as it's being read. */
-struct _ScannedMember {
-  uint32_t tag;                          /**< Field tag. */
-  uint8_t wire_type;                     /**< Field type. */
-  uint8_t length_prefix_len;             /**< Prefix length. */
-  const ProtobufCFieldDescriptor *field; /**< Field descriptor. */
-  size_t len;                            /**< Field length. */
-  const uint8_t *data;                   /**< Pointer to field data. */
-};
-
-static inline uint32_t scan_length_prefixed_data(size_t len,
-                                                 const uint8_t *data,
-                                                 size_t *prefix_len_out) {
-  unsigned hdr_max = len < 5 ? len : 5;
-  unsigned hdr_len;
-  uint32_t val = 0;
-  unsigned i;
-  unsigned shift = 0;
-
-  for (i = 0; i < hdr_max; i++) {
-    val |= (data[i] & 0x7f) << shift;
-    shift += 7;
-    if ((data[i] & 0x80) == 0) break;
-  }
-  if (i == hdr_max) {
-    PROTOBUF_C_UNPACK_ERROR("error parsing length for length-prefixed data");
-    return 0;
-  }
-  hdr_len = i + 1;
-  *prefix_len_out = hdr_len;
-  if (hdr_len + val > len) {
-    PROTOBUF_C_UNPACK_ERROR("data too short after length-prefix of %u", val);
-    return 0;
-  }
-  return hdr_len + val;
-}
-
-static size_t max_b128_numbers(size_t len, const uint8_t *data) {
-  size_t rv = 0;
-  while (len--)
-    if ((*data++ & 0x80) == 0) ++rv;
-  return rv;
-}
-
-/**@}*/
-
-/**
- * Merge earlier message into a latter message.
- *
- * For numeric types and strings, if the same value appears multiple
- * times, the parser accepts the last value it sees. For embedded
- * message fields, the parser merges multiple instances of the same
- * field. That is, all singular scalar fields in the latter instance
- * replace those in the former, singular embedded messages are merged,
- * and repeated fields are concatenated.
- *
- * The earlier message should be freed after calling this function, as
- * some of its fields may have been reused and changed to their default
- * values during the merge.
- */
-static protobuf_c_boolean merge_messages(ProtobufCMessage *earlier_msg,
-                                         ProtobufCMessage *latter_msg,
-                                         ProtobufCAllocator *allocator) {
-  unsigned i;
-  const ProtobufCFieldDescriptor *fields = latter_msg->descriptor->fields;
-  for (i = 0; i < latter_msg->descriptor->n_fields; i++) {
-    if (fields[i].label == PROTOBUF_C_LABEL_REPEATED) {
-      size_t *n_earlier =
-          STRUCT_MEMBER_PTR(size_t, earlier_msg, fields[i].quantifier_offset);
-      uint8_t **p_earlier =
-          STRUCT_MEMBER_PTR(uint8_t *, earlier_msg, fields[i].offset);
-      size_t *n_latter =
-          STRUCT_MEMBER_PTR(size_t, latter_msg, fields[i].quantifier_offset);
-      uint8_t **p_latter =
-          STRUCT_MEMBER_PTR(uint8_t *, latter_msg, fields[i].offset);
-
-      if (*n_earlier > 0) {
-        if (*n_latter > 0) {
-          /* Concatenate the repeated field */
-          size_t el_size = sizeof_elt_in_repeated_array(fields[i].type);
-          uint8_t *new_field;
-
-          new_field = do_alloc(allocator, (*n_earlier + *n_latter) * el_size);
-          if (!new_field) return FALSE;
-
-          memcpy(new_field, *p_earlier, *n_earlier * el_size);
-          memcpy(new_field + *n_earlier * el_size, *p_latter,
-                 *n_latter * el_size);
-
-          do_free(allocator, *p_latter);
-          do_free(allocator, *p_earlier);
-          *p_latter = new_field;
-          *n_latter = *n_earlier + *n_latter;
-        } else {
-          /* Zero copy the repeated field from the earlier message */
-          *n_latter = *n_earlier;
-          *p_latter = *p_earlier;
-        }
-        /* Make sure the field does not get double freed */
-        *n_earlier = 0;
-        *p_earlier = 0;
-      }
-    } else if (fields[i].label == PROTOBUF_C_LABEL_OPTIONAL ||
-               fields[i].label == PROTOBUF_C_LABEL_NONE) {
-      const ProtobufCFieldDescriptor *field;
-      uint32_t *earlier_case_p =
-          STRUCT_MEMBER_PTR(uint32_t, earlier_msg, fields[i].quantifier_offset);
-      uint32_t *latter_case_p =
-          STRUCT_MEMBER_PTR(uint32_t, latter_msg, fields[i].quantifier_offset);
-      protobuf_c_boolean need_to_merge = FALSE;
-      void *earlier_elem;
-      void *latter_elem;
-      const void *def_val;
-
-      if (fields[i].flags & PROTOBUF_C_FIELD_FLAG_ONEOF) {
-        if (*latter_case_p == 0) {
-          /* lookup correct oneof field */
-          int field_index = int_range_lookup(
-              latter_msg->descriptor->n_field_ranges,
-              latter_msg->descriptor->field_ranges, *earlier_case_p);
-          field = latter_msg->descriptor->fields + field_index;
-        } else {
-          /* Oneof is present in the latter message, move on */
-          continue;
-        }
-      } else {
-        field = &fields[i];
-      }
-
-      earlier_elem = STRUCT_MEMBER_P(earlier_msg, field->offset);
-      latter_elem = STRUCT_MEMBER_P(latter_msg, field->offset);
-      def_val = field->default_value;
-
-      switch (field->type) {
-        case PROTOBUF_C_TYPE_MESSAGE: {
-          ProtobufCMessage *em = *(ProtobufCMessage **)earlier_elem;
-          ProtobufCMessage *lm = *(ProtobufCMessage **)latter_elem;
-          if (em != NULL) {
-            if (lm != NULL) {
-              if (!merge_messages(em, lm, allocator)) return FALSE;
-              /* Already merged */
-              need_to_merge = FALSE;
-            } else {
-              /* Zero copy the message */
-              need_to_merge = TRUE;
-            }
-          }
-          break;
-        }
-        case PROTOBUF_C_TYPE_BYTES: {
-          uint8_t *e_data = ((ProtobufCBinaryData *)earlier_elem)->data;
-          uint8_t *l_data = ((ProtobufCBinaryData *)latter_elem)->data;
-          const ProtobufCBinaryData *d_bd = (ProtobufCBinaryData *)def_val;
-
-          need_to_merge =
-              (e_data != NULL && (d_bd == NULL || e_data != d_bd->data)) &&
-              (l_data == NULL || (d_bd != NULL && l_data == d_bd->data));
-          break;
-        }
-        case PROTOBUF_C_TYPE_STRING: {
-          char *e_str = *(char **)earlier_elem;
-          char *l_str = *(char **)latter_elem;
-          const char *d_str = def_val;
-
-          need_to_merge = e_str != d_str && l_str == d_str;
-          break;
-        }
-        default: {
-          /* Could be has field or case enum, the logic is
-           * equivalent, since 0 (FALSE) means not set for
-           * oneof */
-          need_to_merge = (*earlier_case_p != 0) && (*latter_case_p == 0);
-          break;
-        }
-      }
-
-      if (need_to_merge) {
-        size_t el_size = sizeof_elt_in_repeated_array(field->type);
-        memcpy(latter_elem, earlier_elem, el_size);
-        /*
-         * Reset the element from the old message to 0
-         * to make sure earlier message deallocation
-         * doesn't corrupt zero-copied data in the new
-         * message, earlier message will be freed after
-         * this function is called anyway
-         */
-        memset(earlier_elem, 0, el_size);
-
-        if (field->quantifier_offset != 0) {
-          /* Set the has field or the case enum,
-           * if applicable */
-          *latter_case_p = *earlier_case_p;
-          *earlier_case_p = 0;
-        }
-      }
-    }
-  }
-  return TRUE;
-}
-
-/**
- * Count packed elements.
- *
- * Given a raw slab of packed-repeated values, determine the number of
- * elements. This function detects certain kinds of errors but not
- * others; the remaining error checking is done by
- * parse_packed_repeated_member().
- */
-static protobuf_c_boolean count_packed_elements(ProtobufCType type, size_t len,
-                                                const uint8_t *data,
-                                                size_t *count_out) {
-  switch (type) {
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-      if (len % 4 != 0) {
-        PROTOBUF_C_UNPACK_ERROR(
-            "length must be a multiple of 4 for fixed-length 32-bit types");
-        return FALSE;
-      }
-      *count_out = len / 4;
-      return TRUE;
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      if (len % 8 != 0) {
-        PROTOBUF_C_UNPACK_ERROR(
-            "length must be a multiple of 8 for fixed-length 64-bit types");
-        return FALSE;
-      }
-      *count_out = len / 8;
-      return TRUE;
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-    case PROTOBUF_C_TYPE_SINT32:
-    case PROTOBUF_C_TYPE_UINT32:
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_SINT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      *count_out = max_b128_numbers(len, data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_BOOL:
-      *count_out = len;
-      return TRUE;
-    case PROTOBUF_C_TYPE_STRING:
-    case PROTOBUF_C_TYPE_BYTES:
-    case PROTOBUF_C_TYPE_MESSAGE:
-    default:
-      PROTOBUF_C_UNPACK_ERROR("bad protobuf-c type %u for packed-repeated",
-                              type);
-      return FALSE;
-  }
-}
-
-static inline uint32_t parse_uint32(unsigned len, const uint8_t *data) {
-  uint32_t rv = data[0] & 0x7f;
-  if (len > 1) {
-    rv |= ((uint32_t)(data[1] & 0x7f) << 7);
-    if (len > 2) {
-      rv |= ((uint32_t)(data[2] & 0x7f) << 14);
-      if (len > 3) {
-        rv |= ((uint32_t)(data[3] & 0x7f) << 21);
-        if (len > 4) rv |= ((uint32_t)(data[4]) << 28);
-      }
-    }
-  }
-  return rv;
-}
-
-static inline uint32_t parse_int32(unsigned len, const uint8_t *data) {
-  return parse_uint32(len, data);
-}
-
-static inline int32_t unzigzag32(uint32_t v) {
-  if (v & 1)
-    return -(v >> 1) - 1;
-  else
-    return v >> 1;
-}
-
-static inline uint32_t parse_fixed_uint32(const uint8_t *data) {
-#if !defined(WORDS_BIGENDIAN)
-  uint32_t t;
-  memcpy(&t, data, 4);
-  return t;
-#else
-  return data[0] | ((uint32_t)(data[1]) << 8) | ((uint32_t)(data[2]) << 16) |
-         ((uint32_t)(data[3]) << 24);
-#endif
-}
-
-static uint64_t parse_uint64(unsigned len, const uint8_t *data) {
-  unsigned shift, i;
-  uint64_t rv;
-
-  if (len < 5) return parse_uint32(len, data);
-  rv = ((uint64_t)(data[0] & 0x7f)) | ((uint64_t)(data[1] & 0x7f) << 7) |
-       ((uint64_t)(data[2] & 0x7f) << 14) | ((uint64_t)(data[3] & 0x7f) << 21);
-  shift = 28;
-  for (i = 4; i < len; i++) {
-    rv |= (((uint64_t)(data[i] & 0x7f)) << shift);
-    shift += 7;
-  }
-  return rv;
-}
-
-static inline int64_t unzigzag64(uint64_t v) {
-  if (v & 1)
-    return -(v >> 1) - 1;
-  else
-    return v >> 1;
-}
-
-static inline uint64_t parse_fixed_uint64(const uint8_t *data) {
-#if !defined(WORDS_BIGENDIAN)
-  uint64_t t;
-  memcpy(&t, data, 8);
-  return t;
-#else
-  return (uint64_t)parse_fixed_uint32(data) |
-         (((uint64_t)parse_fixed_uint32(data + 4)) << 32);
-#endif
-}
-
-static protobuf_c_boolean parse_boolean(unsigned len, const uint8_t *data) {
-  unsigned i;
-  for (i = 0; i < len; i++)
-    if (data[i] & 0x7f) return TRUE;
-  return FALSE;
-}
-
-static protobuf_c_boolean parse_required_member(
-    ScannedMember *scanned_member, void *member, ProtobufCAllocator *allocator,
-    protobuf_c_boolean maybe_clear) {
-  unsigned len = scanned_member->len;
-  const uint8_t *data = scanned_member->data;
-  ProtobufCWireType wire_type = scanned_member->wire_type;
-
-  switch (scanned_member->field->type) {
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE;
-      *(int32_t *)member = parse_int32(len, data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_UINT32:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE;
-      *(uint32_t *)member = parse_uint32(len, data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_SINT32:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE;
-      *(int32_t *)member = unzigzag32(parse_uint32(len, data));
-      return TRUE;
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_32BIT) return FALSE;
-      *(uint32_t *)member = parse_fixed_uint32(data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE;
-      *(uint64_t *)member = parse_uint64(len, data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_SINT64:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE;
-      *(int64_t *)member = unzigzag64(parse_uint64(len, data));
-      return TRUE;
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_64BIT) return FALSE;
-      *(uint64_t *)member = parse_fixed_uint64(data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_BOOL:
-      *(protobuf_c_boolean *)member = parse_boolean(len, data);
-      return TRUE;
-    case PROTOBUF_C_TYPE_STRING: {
-      char **pstr = member;
-      unsigned pref_len = scanned_member->length_prefix_len;
-
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE;
-
-      if (maybe_clear && *pstr != NULL) {
-        const char *def = scanned_member->field->default_value;
-        if (*pstr != NULL && *pstr != def) do_free(allocator, *pstr);
-      }
-      *pstr = do_alloc(allocator, len - pref_len + 1);
-      if (*pstr == NULL) return FALSE;
-      memcpy(*pstr, data + pref_len, len - pref_len);
-      (*pstr)[len - pref_len] = 0;
-      return TRUE;
-    }
-    case PROTOBUF_C_TYPE_BYTES: {
-      ProtobufCBinaryData *bd = member;
-      const ProtobufCBinaryData *def_bd;
-      unsigned pref_len = scanned_member->length_prefix_len;
-
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE;
-
-      def_bd = scanned_member->field->default_value;
-      if (maybe_clear && bd->data != NULL &&
-          (def_bd == NULL || bd->data != def_bd->data)) {
-        do_free(allocator, bd->data);
-      }
-      if (len - pref_len > 0) {
-        bd->data = do_alloc(allocator, len - pref_len);
-        if (bd->data == NULL) return FALSE;
-        memcpy(bd->data, data + pref_len, len - pref_len);
-      } else {
-        bd->data = NULL;
-      }
-      bd->len = len - pref_len;
-      return TRUE;
-    }
-    case PROTOBUF_C_TYPE_MESSAGE: {
-      ProtobufCMessage **pmessage = member;
-      ProtobufCMessage *subm;
-      const ProtobufCMessage *def_mess;
-      protobuf_c_boolean merge_successful = TRUE;
-      unsigned pref_len = scanned_member->length_prefix_len;
-
-      if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE;
-
-      def_mess = scanned_member->field->default_value;
-      subm =
-          protobuf_c_message_unpack(scanned_member->field->descriptor,
-                                    allocator, len - pref_len, data + pref_len);
-
-      if (maybe_clear && *pmessage != NULL && *pmessage != def_mess) {
-        if (subm != NULL)
-          merge_successful = merge_messages(*pmessage, subm, allocator);
-        /* Delete the previous message */
-        protobuf_c_message_free_unpacked(*pmessage, allocator);
-      }
-      *pmessage = subm;
-      if (subm == NULL || !merge_successful) return FALSE;
-      return TRUE;
-    }
-  }
-  return FALSE;
-}
-
-static protobuf_c_boolean parse_oneof_member(ScannedMember *scanned_member,
-                                             void *member,
-                                             ProtobufCMessage *message,
-                                             ProtobufCAllocator *allocator) {
-  uint32_t *oneof_case = STRUCT_MEMBER_PTR(
-      uint32_t, message, scanned_member->field->quantifier_offset);
-
-  /* If we have already parsed a member of this oneof, free it. */
-  if (*oneof_case != 0) {
-    /* lookup field */
-    int field_index =
-        int_range_lookup(message->descriptor->n_field_ranges,
-                         message->descriptor->field_ranges, *oneof_case);
-    const ProtobufCFieldDescriptor *old_field =
-        message->descriptor->fields + field_index;
-    size_t el_size = sizeof_elt_in_repeated_array(old_field->type);
-
-    switch (old_field->type) {
-      case PROTOBUF_C_TYPE_STRING: {
-        char **pstr = member;
-        const char *def = old_field->default_value;
-        if (*pstr != NULL && *pstr != def) do_free(allocator, *pstr);
-        break;
-      }
-      case PROTOBUF_C_TYPE_BYTES: {
-        ProtobufCBinaryData *bd = member;
-        const ProtobufCBinaryData *def_bd = old_field->default_value;
-        if (bd->data != NULL && (def_bd == NULL || bd->data != def_bd->data)) {
-          do_free(allocator, bd->data);
-        }
-        break;
-      }
-      case PROTOBUF_C_TYPE_MESSAGE: {
-        ProtobufCMessage **pmessage = member;
-        const ProtobufCMessage *def_mess = old_field->default_value;
-        if (*pmessage != NULL && *pmessage != def_mess)
-          protobuf_c_message_free_unpacked(*pmessage, allocator);
-        break;
-      }
-      default:
-        break;
-    }
-
-    memset(member, 0, el_size);
-  }
-  if (!parse_required_member(scanned_member, member, allocator, TRUE))
-    return FALSE;
-
-  *oneof_case = scanned_member->tag;
-  return TRUE;
-}
-
-static protobuf_c_boolean parse_optional_member(ScannedMember *scanned_member,
-                                                void *member,
-                                                ProtobufCMessage *message,
-                                                ProtobufCAllocator *allocator) {
-  if (!parse_required_member(scanned_member, member, allocator, TRUE))
-    return FALSE;
-  if (scanned_member->field->quantifier_offset != 0)
-    STRUCT_MEMBER(protobuf_c_boolean, message,
-                  scanned_member->field->quantifier_offset) = TRUE;
-  return TRUE;
-}
-
-static protobuf_c_boolean parse_repeated_member(ScannedMember *scanned_member,
-                                                void *member,
-                                                ProtobufCMessage *message,
-                                                ProtobufCAllocator *allocator) {
-  const ProtobufCFieldDescriptor *field = scanned_member->field;
-  size_t *p_n = STRUCT_MEMBER_PTR(size_t, message, field->quantifier_offset);
-  size_t siz = sizeof_elt_in_repeated_array(field->type);
-  char *array = *(char **)member;
-
-  if (!parse_required_member(scanned_member, array + siz * (*p_n), allocator,
-                             FALSE)) {
-    return FALSE;
-  }
-  *p_n += 1;
-  return TRUE;
-}
-
-static unsigned scan_varint(unsigned len, const uint8_t *data) {
-  unsigned i;
-  if (len > 10) len = 10;
-  for (i = 0; i < len; i++)
-    if ((data[i] & 0x80) == 0) break;
-  if (i == len) return 0;
-  return i + 1;
-}
-
-static protobuf_c_boolean parse_packed_repeated_member(
-    ScannedMember *scanned_member, void *member, ProtobufCMessage *message) {
-  const ProtobufCFieldDescriptor *field = scanned_member->field;
-  size_t *p_n = STRUCT_MEMBER_PTR(size_t, message, field->quantifier_offset);
-  size_t siz = sizeof_elt_in_repeated_array(field->type);
-  void *array = *(char **)member + siz * (*p_n);
-  const uint8_t *at = scanned_member->data + scanned_member->length_prefix_len;
-  size_t rem = scanned_member->len - scanned_member->length_prefix_len;
-  size_t count = 0;
-  unsigned i;
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-      count = (scanned_member->len - scanned_member->length_prefix_len) / 4;
-#if !defined(WORDS_BIGENDIAN)
-      goto no_unpacking_needed;
-#else
-      for (i = 0; i < count; i++) {
-        ((uint32_t *)array)[i] = parse_fixed_uint32(at);
-        at += 4;
-      }
-      break;
-#endif
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      count = (scanned_member->len - scanned_member->length_prefix_len) / 8;
-#if !defined(WORDS_BIGENDIAN)
-      goto no_unpacking_needed;
-#else
-      for (i = 0; i < count; i++) {
-        ((uint64_t *)array)[i] = parse_fixed_uint64(at);
-        at += 8;
-      }
-      break;
-#endif
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-      while (rem > 0) {
-        unsigned s = scan_varint(rem, at);
-        if (s == 0) {
-          PROTOBUF_C_UNPACK_ERROR("bad packed-repeated int32 value");
-          return FALSE;
-        }
-        ((int32_t *)array)[count++] = parse_int32(s, at);
-        at += s;
-        rem -= s;
-      }
-      break;
-    case PROTOBUF_C_TYPE_SINT32:
-      while (rem > 0) {
-        unsigned s = scan_varint(rem, at);
-        if (s == 0) {
-          PROTOBUF_C_UNPACK_ERROR("bad packed-repeated sint32 value");
-          return FALSE;
-        }
-        ((int32_t *)array)[count++] = unzigzag32(parse_uint32(s, at));
-        at += s;
-        rem -= s;
-      }
-      break;
-    case PROTOBUF_C_TYPE_UINT32:
-      while (rem > 0) {
-        unsigned s = scan_varint(rem, at);
-        if (s == 0) {
-          PROTOBUF_C_UNPACK_ERROR("bad packed-repeated enum or uint32 value");
-          return FALSE;
-        }
-        ((uint32_t *)array)[count++] = parse_uint32(s, at);
-        at += s;
-        rem -= s;
-      }
-      break;
-
-    case PROTOBUF_C_TYPE_SINT64:
-      while (rem > 0) {
-        unsigned s = scan_varint(rem, at);
-        if (s == 0) {
-          PROTOBUF_C_UNPACK_ERROR("bad packed-repeated sint64 value");
-          return FALSE;
-        }
-        ((int64_t *)array)[count++] = unzigzag64(parse_uint64(s, at));
-        at += s;
-        rem -= s;
-      }
-      break;
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      while (rem > 0) {
-        unsigned s = scan_varint(rem, at);
-        if (s == 0) {
-          PROTOBUF_C_UNPACK_ERROR("bad packed-repeated int64/uint64 value");
-          return FALSE;
-        }
-        ((int64_t *)array)[count++] = parse_uint64(s, at);
-        at += s;
-        rem -= s;
-      }
-      break;
-    case PROTOBUF_C_TYPE_BOOL:
-      count = rem;
-      for (i = 0; i < count; i++) {
-        if (at[i] > 1) {
-          PROTOBUF_C_UNPACK_ERROR("bad packed-repeated boolean value");
-          return FALSE;
-        }
-        ((protobuf_c_boolean *)array)[i] = at[i];
-      }
-      break;
-    default:
-      PROTOBUF_C__ASSERT_NOT_REACHED();
-  }
-  *p_n += count;
-  return TRUE;
-
-#if !defined(WORDS_BIGENDIAN)
-no_unpacking_needed:
-  memcpy(array, at, count * siz);
-  *p_n += count;
-  return TRUE;
-#endif
-}
-
-static protobuf_c_boolean is_packable_type(ProtobufCType type) {
-  return type != PROTOBUF_C_TYPE_STRING && type != PROTOBUF_C_TYPE_BYTES &&
-         type != PROTOBUF_C_TYPE_MESSAGE;
-}
-
-static protobuf_c_boolean parse_member(ScannedMember *scanned_member,
-                                       ProtobufCMessage *message,
-                                       ProtobufCAllocator *allocator) {
-  const ProtobufCFieldDescriptor *field = scanned_member->field;
-  void *member;
-
-  if (field == NULL) {
-    ProtobufCMessageUnknownField *ufield =
-        message->unknown_fields + (message->n_unknown_fields++);
-    ufield->tag = scanned_member->tag;
-    ufield->wire_type = scanned_member->wire_type;
-    ufield->len = scanned_member->len;
-    ufield->data = do_alloc(allocator, scanned_member->len);
-    if (ufield->data == NULL) return FALSE;
-    memcpy(ufield->data, scanned_member->data, ufield->len);
-    return TRUE;
-  }
-  member = (char *)message + field->offset;
-  switch (field->label) {
-    case PROTOBUF_C_LABEL_REQUIRED:
-      return parse_required_member(scanned_member, member, allocator, TRUE);
-    case PROTOBUF_C_LABEL_OPTIONAL:
-    case PROTOBUF_C_LABEL_NONE:
-      if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF)) {
-        return parse_oneof_member(scanned_member, member, message, allocator);
-      } else {
-        return parse_optional_member(scanned_member, member, message,
-                                     allocator);
-      }
-    case PROTOBUF_C_LABEL_REPEATED:
-      if (scanned_member->wire_type == PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED &&
-          (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED) ||
-           is_packable_type(field->type))) {
-        return parse_packed_repeated_member(scanned_member, member, message);
-      } else {
-        return parse_repeated_member(scanned_member, member, message,
-                                     allocator);
-      }
-  }
-  PROTOBUF_C__ASSERT_NOT_REACHED();
-  return 0;
-}
-
-/**
- * Initialise messages generated by old code.
- *
- * This function is used if desc->message_init == NULL (which occurs
- * for old code, and which would be useful to support allocating
- * descriptors dynamically).
- */
-static void message_init_generic(const ProtobufCMessageDescriptor *desc,
-                                 ProtobufCMessage *message) {
-  unsigned i;
-
-  memset(message, 0, desc->sizeof_message);
-  message->descriptor = desc;
-  for (i = 0; i < desc->n_fields; i++) {
-    if (desc->fields[i].default_value != NULL &&
-        desc->fields[i].label != PROTOBUF_C_LABEL_REPEATED) {
-      void *field = STRUCT_MEMBER_P(message, desc->fields[i].offset);
-      const void *dv = desc->fields[i].default_value;
-
-      switch (desc->fields[i].type) {
-        case PROTOBUF_C_TYPE_INT32:
-        case PROTOBUF_C_TYPE_SINT32:
-        case PROTOBUF_C_TYPE_SFIXED32:
-        case PROTOBUF_C_TYPE_UINT32:
-        case PROTOBUF_C_TYPE_FIXED32:
-        case PROTOBUF_C_TYPE_FLOAT:
-        case PROTOBUF_C_TYPE_ENUM:
-          memcpy(field, dv, 4);
-          break;
-        case PROTOBUF_C_TYPE_INT64:
-        case PROTOBUF_C_TYPE_SINT64:
-        case PROTOBUF_C_TYPE_SFIXED64:
-        case PROTOBUF_C_TYPE_UINT64:
-        case PROTOBUF_C_TYPE_FIXED64:
-        case PROTOBUF_C_TYPE_DOUBLE:
-          memcpy(field, dv, 8);
-          break;
-        case PROTOBUF_C_TYPE_BOOL:
-          memcpy(field, dv, sizeof(protobuf_c_boolean));
-          break;
-        case PROTOBUF_C_TYPE_BYTES:
-          memcpy(field, dv, sizeof(ProtobufCBinaryData));
-          break;
-
-        case PROTOBUF_C_TYPE_STRING:
-        case PROTOBUF_C_TYPE_MESSAGE:
-          /*
-           * The next line essentially implements a cast
-           * from const, which is totally unavoidable.
-           */
-          *(const void **)field = dv;
-          break;
-      }
-    }
-  }
-}
-
-/**@}*/
-
-/*
- * ScannedMember slabs (an unpacking implementation detail). Before doing real
- * unpacking, we first scan through the elements to see how many there are (for
- * repeated fields), and which field to use (for non-repeated fields given
- * twice).
- *
- * In order to avoid allocations for small messages, we keep a stack-allocated
- * slab of ScannedMembers of size FIRST_SCANNED_MEMBER_SLAB_SIZE (16). After we
- * fill that up, we allocate each slab twice as large as the previous one.
- */
-#define FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2 4
-
-/*
- * The number of slabs, including the stack-allocated ones; choose the number so
- * that we would overflow if we needed a slab larger than provided.
- */
-#define MAX_SCANNED_MEMBER_SLAB                                      \
-  (sizeof(unsigned int) * 8 - 1 - BOUND_SIZEOF_SCANNED_MEMBER_LOG2 - \
-   FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2)
-
-#define REQUIRED_FIELD_BITMAP_SET(index) \
-  (required_fields_bitmap[(index) / 8] |= (1UL << ((index) % 8)))
-
-#define REQUIRED_FIELD_BITMAP_IS_SET(index) \
-  (required_fields_bitmap[(index) / 8] & (1UL << ((index) % 8)))
-
-ProtobufCMessage *protobuf_c_message_unpack(
-    const ProtobufCMessageDescriptor *desc, ProtobufCAllocator *allocator,
-    size_t len, const uint8_t *data) {
-  ProtobufCMessage *rv;
-  size_t rem = len;
-  const uint8_t *at = data;
-  const ProtobufCFieldDescriptor *last_field = desc->fields + 0;
-  ScannedMember first_member_slab[1UL << FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2];
-
-  /*
-   * scanned_member_slabs[i] is an array of arrays of ScannedMember.
-   * The first slab (scanned_member_slabs[0] is just a pointer to
-   * first_member_slab), above. All subsequent slabs will be allocated
-   * using the allocator.
-   */
-  ScannedMember *scanned_member_slabs[MAX_SCANNED_MEMBER_SLAB + 1];
-  unsigned which_slab = 0;    /* the slab we are currently populating */
-  unsigned in_slab_index = 0; /* number of members in the slab */
-  size_t n_unknown = 0;
-  unsigned f;
-  unsigned j;
-  unsigned i_slab;
-  unsigned last_field_index = 0;
-  unsigned required_fields_bitmap_len;
-  unsigned char required_fields_bitmap_stack[16];
-  unsigned char *required_fields_bitmap = required_fields_bitmap_stack;
-  protobuf_c_boolean required_fields_bitmap_alloced = FALSE;
-
-  ASSERT_IS_MESSAGE_DESCRIPTOR(desc);
-
-  if (allocator == NULL) allocator = &protobuf_c__allocator;
-
-  rv = do_alloc(allocator, desc->sizeof_message);
-  if (!rv) return (NULL);
-  scanned_member_slabs[0] = first_member_slab;
-
-  required_fields_bitmap_len = (desc->n_fields + 7) / 8;
-  if (required_fields_bitmap_len > sizeof(required_fields_bitmap_stack)) {
-    required_fields_bitmap = do_alloc(allocator, required_fields_bitmap_len);
-    if (!required_fields_bitmap) {
-      do_free(allocator, rv);
-      return (NULL);
-    }
-    required_fields_bitmap_alloced = TRUE;
-  }
-  memset(required_fields_bitmap, 0, required_fields_bitmap_len);
-
-  /*
-   * Generated code always defines "message_init". However, we provide a
-   * fallback for (1) users of old protobuf-c generated-code that do not
-   * provide the function, and (2) descriptors constructed from some other
-   * source (most likely, direct construction from the .proto file).
-   */
-  if (desc->message_init != NULL)
-    protobuf_c_message_init(desc, rv);
-  else
-    message_init_generic(desc, rv);
-
-  while (rem > 0) {
-    uint32_t tag;
-    ProtobufCWireType wire_type;
-    size_t used = parse_tag_and_wiretype(rem, at, &tag, &wire_type);
-    const ProtobufCFieldDescriptor *field;
-    ScannedMember tmp;
-
-    if (used == 0) {
-      PROTOBUF_C_UNPACK_ERROR("error parsing tag/wiretype at offset %u",
-                              (unsigned)(at - data));
-      goto error_cleanup_during_scan;
-    }
-    /*
-     * \todo Consider optimizing for field[1].id == tag, if field[1]
-     * exists!
-     */
-    if (last_field == NULL || last_field->id != tag) {
-      /* lookup field */
-      int field_index =
-          int_range_lookup(desc->n_field_ranges, desc->field_ranges, tag);
-      if (field_index < 0) {
-        field = NULL;
-        n_unknown++;
-      } else {
-        field = desc->fields + field_index;
-        last_field = field;
-        last_field_index = field_index;
-      }
-    } else {
-      field = last_field;
-    }
-
-    if (field != NULL && field->label == PROTOBUF_C_LABEL_REQUIRED)
-      REQUIRED_FIELD_BITMAP_SET(last_field_index);
-
-    at += used;
-    rem -= used;
-    tmp.tag = tag;
-    tmp.wire_type = wire_type;
-    tmp.field = field;
-    tmp.data = at;
-    tmp.length_prefix_len = 0;
-
-    switch (wire_type) {
-      case PROTOBUF_C_WIRE_TYPE_VARINT: {
-        unsigned max_len = rem < 10 ? rem : 10;
-        unsigned i;
-
-        for (i = 0; i < max_len; i++)
-          if ((at[i] & 0x80) == 0) break;
-        if (i == max_len) {
-          PROTOBUF_C_UNPACK_ERROR("unterminated varint at offset %u",
-                                  (unsigned)(at - data));
-          goto error_cleanup_during_scan;
-        }
-        tmp.len = i + 1;
-        break;
-      }
-      case PROTOBUF_C_WIRE_TYPE_64BIT:
-        if (rem < 8) {
-          PROTOBUF_C_UNPACK_ERROR("too short after 64bit wiretype at offset %u",
-                                  (unsigned)(at - data));
-          goto error_cleanup_during_scan;
-        }
-        tmp.len = 8;
-        break;
-      case PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED: {
-        size_t pref_len;
-
-        tmp.len = scan_length_prefixed_data(rem, at, &pref_len);
-        if (tmp.len == 0) {
-          /* NOTE: scan_length_prefixed_data calls UNPACK_ERROR */
-          goto error_cleanup_during_scan;
-        }
-        tmp.length_prefix_len = pref_len;
-        break;
-      }
-      case PROTOBUF_C_WIRE_TYPE_32BIT:
-        if (rem < 4) {
-          PROTOBUF_C_UNPACK_ERROR("too short after 32bit wiretype at offset %u",
-                                  (unsigned)(at - data));
-          goto error_cleanup_during_scan;
-        }
-        tmp.len = 4;
-        break;
-      default:
-        PROTOBUF_C_UNPACK_ERROR("unsupported tag %u at offset %u", wire_type,
-                                (unsigned)(at - data));
-        goto error_cleanup_during_scan;
-    }
-
-    if (in_slab_index ==
-        (1UL << (which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2))) {
-      size_t size;
-
-      in_slab_index = 0;
-      if (which_slab == MAX_SCANNED_MEMBER_SLAB) {
-        PROTOBUF_C_UNPACK_ERROR("too many fields");
-        goto error_cleanup_during_scan;
-      }
-      which_slab++;
-      size = sizeof(ScannedMember)
-             << (which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2);
-      scanned_member_slabs[which_slab] = do_alloc(allocator, size);
-      if (scanned_member_slabs[which_slab] == NULL)
-        goto error_cleanup_during_scan;
-    }
-    scanned_member_slabs[which_slab][in_slab_index++] = tmp;
-
-    if (field != NULL && field->label == PROTOBUF_C_LABEL_REPEATED) {
-      size_t *n = STRUCT_MEMBER_PTR(size_t, rv, field->quantifier_offset);
-      if (wire_type == PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED &&
-          (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED) ||
-           is_packable_type(field->type))) {
-        size_t count;
-        if (!count_packed_elements(field->type, tmp.len - tmp.length_prefix_len,
-                                   tmp.data + tmp.length_prefix_len, &count)) {
-          PROTOBUF_C_UNPACK_ERROR("counting packed elements");
-          goto error_cleanup_during_scan;
-        }
-        *n += count;
-      } else {
-        *n += 1;
-      }
-    }
-
-    at += tmp.len;
-    rem -= tmp.len;
-  }
-
-  /* allocate space for repeated fields, also check that all required fields
-   * have been set */
-  for (f = 0; f < desc->n_fields; f++) {
-    const ProtobufCFieldDescriptor *field = desc->fields + f;
-    if (field->label == PROTOBUF_C_LABEL_REPEATED) {
-      size_t siz = sizeof_elt_in_repeated_array(field->type);
-      size_t *n_ptr = STRUCT_MEMBER_PTR(size_t, rv, field->quantifier_offset);
-      if (*n_ptr != 0) {
-        unsigned n = *n_ptr;
-        void *a;
-        *n_ptr = 0;
-        assert(rv->descriptor != NULL);
-#define CLEAR_REMAINING_N_PTRS()                               \
-  for (f++; f < desc->n_fields; f++) {                         \
-    field = desc->fields + f;                                  \
-    if (field->label == PROTOBUF_C_LABEL_REPEATED)             \
-      STRUCT_MEMBER(size_t, rv, field->quantifier_offset) = 0; \
-  }
-        a = do_alloc(allocator, siz * n);
-        if (!a) {
-          CLEAR_REMAINING_N_PTRS();
-          goto error_cleanup;
-        }
-        STRUCT_MEMBER(void *, rv, field->offset) = a;
-      }
-    } else if (field->label == PROTOBUF_C_LABEL_REQUIRED) {
-      if (field->default_value == NULL && !REQUIRED_FIELD_BITMAP_IS_SET(f)) {
-        CLEAR_REMAINING_N_PTRS();
-        PROTOBUF_C_UNPACK_ERROR("message '%s': missing required field '%s'",
-                                desc->name, field->name);
-        goto error_cleanup;
-      }
-    }
-  }
-#undef CLEAR_REMAINING_N_PTRS
-
-  /* allocate space for unknown fields */
-  if (n_unknown) {
-    rv->unknown_fields =
-        do_alloc(allocator, n_unknown * sizeof(ProtobufCMessageUnknownField));
-    if (rv->unknown_fields == NULL) goto error_cleanup;
-  }
-
-  /* do real parsing */
-  for (i_slab = 0; i_slab <= which_slab; i_slab++) {
-    unsigned max =
-        (i_slab == which_slab) ? in_slab_index : (1UL << (i_slab + 4));
-    ScannedMember *slab = scanned_member_slabs[i_slab];
-
-    for (j = 0; j < max; j++) {
-      if (!parse_member(slab + j, rv, allocator)) {
-        PROTOBUF_C_UNPACK_ERROR(
-            "error parsing member %s of %s",
-            slab->field ? slab->field->name : "*unknown-field*", desc->name);
-        goto error_cleanup;
-      }
-    }
-  }
-
-  /* cleanup */
-  for (j = 1; j <= which_slab; j++) do_free(allocator, scanned_member_slabs[j]);
-  if (required_fields_bitmap_alloced)
-    do_free(allocator, required_fields_bitmap);
-  return rv;
-
-error_cleanup:
-  protobuf_c_message_free_unpacked(rv, allocator);
-  for (j = 1; j <= which_slab; j++) do_free(allocator, scanned_member_slabs[j]);
-  if (required_fields_bitmap_alloced)
-    do_free(allocator, required_fields_bitmap);
-  return NULL;
-
-error_cleanup_during_scan:
-  do_free(allocator, rv);
-  for (j = 1; j <= which_slab; j++) do_free(allocator, scanned_member_slabs[j]);
-  if (required_fields_bitmap_alloced)
-    do_free(allocator, required_fields_bitmap);
-  return NULL;
-}
-
-void protobuf_c_message_free_unpacked(ProtobufCMessage *message,
-                                      ProtobufCAllocator *allocator) {
-  const ProtobufCMessageDescriptor *desc;
-  unsigned f;
-
-  if (message == NULL) return;
-
-  desc = message->descriptor;
-
-  ASSERT_IS_MESSAGE(message);
-
-  if (allocator == NULL) allocator = &protobuf_c__allocator;
-  message->descriptor = NULL;
-  for (f = 0; f < desc->n_fields; f++) {
-    if (0 != (desc->fields[f].flags & PROTOBUF_C_FIELD_FLAG_ONEOF) &&
-        desc->fields[f].id !=
-            STRUCT_MEMBER(uint32_t, message,
-                          desc->fields[f].quantifier_offset)) {
-      /* This is not the selected oneof, skip it */
-      continue;
-    }
-
-    if (desc->fields[f].label == PROTOBUF_C_LABEL_REPEATED) {
-      size_t n =
-          STRUCT_MEMBER(size_t, message, desc->fields[f].quantifier_offset);
-      void *arr = STRUCT_MEMBER(void *, message, desc->fields[f].offset);
-
-      if (arr != NULL) {
-        if (desc->fields[f].type == PROTOBUF_C_TYPE_STRING) {
-          unsigned i;
-          for (i = 0; i < n; i++) do_free(allocator, ((char **)arr)[i]);
-        } else if (desc->fields[f].type == PROTOBUF_C_TYPE_BYTES) {
-          unsigned i;
-          for (i = 0; i < n; i++)
-            do_free(allocator, ((ProtobufCBinaryData *)arr)[i].data);
-        } else if (desc->fields[f].type == PROTOBUF_C_TYPE_MESSAGE) {
-          unsigned i;
-          for (i = 0; i < n; i++)
-            protobuf_c_message_free_unpacked(((ProtobufCMessage **)arr)[i],
-                                             allocator);
-        }
-        do_free(allocator, arr);
-      }
-    } else if (desc->fields[f].type == PROTOBUF_C_TYPE_STRING) {
-      char *str = STRUCT_MEMBER(char *, message, desc->fields[f].offset);
-
-      if (str && str != desc->fields[f].default_value) do_free(allocator, str);
-    } else if (desc->fields[f].type == PROTOBUF_C_TYPE_BYTES) {
-      void *data =
-          STRUCT_MEMBER(ProtobufCBinaryData, message, desc->fields[f].offset)
-              .data;
-      const ProtobufCBinaryData *default_bd;
-
-      default_bd = desc->fields[f].default_value;
-      if (data != NULL && (default_bd == NULL || default_bd->data != data)) {
-        do_free(allocator, data);
-      }
-    } else if (desc->fields[f].type == PROTOBUF_C_TYPE_MESSAGE) {
-      ProtobufCMessage *sm;
-
-      sm = STRUCT_MEMBER(ProtobufCMessage *, message, desc->fields[f].offset);
-      if (sm && sm != desc->fields[f].default_value)
-        protobuf_c_message_free_unpacked(sm, allocator);
-    }
-  }
-
-  for (f = 0; f < message->n_unknown_fields; f++)
-    do_free(allocator, message->unknown_fields[f].data);
-  if (message->unknown_fields != NULL)
-    do_free(allocator, message->unknown_fields);
-
-  do_free(allocator, message);
-}
-
-void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor,
-                             void *message) {
-  descriptor->message_init((ProtobufCMessage *)(message));
-}
-
-protobuf_c_boolean protobuf_c_message_check(const ProtobufCMessage *message) {
-  unsigned i;
-
-  if (!message || !message->descriptor ||
-      message->descriptor->magic != PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC) {
-    return FALSE;
-  }
-
-  for (i = 0; i < message->descriptor->n_fields; i++) {
-    const ProtobufCFieldDescriptor *f = message->descriptor->fields + i;
-    ProtobufCType type = f->type;
-    ProtobufCLabel label = f->label;
-    void *field = STRUCT_MEMBER_P(message, f->offset);
-
-    if (label == PROTOBUF_C_LABEL_REPEATED) {
-      size_t *quantity = STRUCT_MEMBER_P(message, f->quantifier_offset);
-
-      if (*quantity > 0 && *(void **)field == NULL) {
-        return FALSE;
-      }
-
-      if (type == PROTOBUF_C_TYPE_MESSAGE) {
-        ProtobufCMessage **submessage = *(ProtobufCMessage ***)field;
-        unsigned j;
-        for (j = 0; j < *quantity; j++) {
-          if (!protobuf_c_message_check(submessage[j])) return FALSE;
-        }
-      } else if (type == PROTOBUF_C_TYPE_STRING) {
-        char **string = *(char ***)field;
-        unsigned j;
-        for (j = 0; j < *quantity; j++) {
-          if (!string[j]) return FALSE;
-        }
-      } else if (type == PROTOBUF_C_TYPE_BYTES) {
-        ProtobufCBinaryData *bd = *(ProtobufCBinaryData **)field;
-        unsigned j;
-        for (j = 0; j < *quantity; j++) {
-          if (bd[j].len > 0 && bd[j].data == NULL) return FALSE;
-        }
-      }
-
-    } else { /* PROTOBUF_C_LABEL_REQUIRED or PROTOBUF_C_LABEL_OPTIONAL */
-
-      if (type == PROTOBUF_C_TYPE_MESSAGE) {
-        ProtobufCMessage *submessage = *(ProtobufCMessage **)field;
-        if (label == PROTOBUF_C_LABEL_REQUIRED || submessage != NULL) {
-          if (!protobuf_c_message_check(submessage)) return FALSE;
-        }
-      } else if (type == PROTOBUF_C_TYPE_STRING) {
-        char *string = *(char **)field;
-        if (label == PROTOBUF_C_LABEL_REQUIRED && string == NULL) return FALSE;
-      } else if (type == PROTOBUF_C_TYPE_BYTES) {
-        protobuf_c_boolean *has =
-            STRUCT_MEMBER_P(message, f->quantifier_offset);
-        ProtobufCBinaryData *bd = field;
-        if (label == PROTOBUF_C_LABEL_REQUIRED || *has == TRUE) {
-          if (bd->len > 0 && bd->data == NULL) return FALSE;
-        }
-      }
-    }
-  }
-
-  return TRUE;
-}
-
-/* === services === */
-
-typedef void (*GenericHandler)(void *service, const ProtobufCMessage *input,
-                               ProtobufCClosure closure, void *closure_data);
diff --git a/src/common/types.cpp b/src/common/types.cpp
index a0a3b6954ebd3cf32519fa3d91012d4e3be170fa..25b6d41e3a161e37a6395e8ae51dea643cbd5a30 100644
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "common/types.h"
 #include <vector>
 
+
 namespace paddle_mobile {
 
 const char *G_OP_TYPE_CONV = "conv2d";
diff --git a/src/common/types.h b/src/common/types.h
index 6d38e4178907aa30968a6760a6ae5d69f4b61167..36e7b1921e8d956fc69f151fe2ae5da990f04679 100644
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -39,7 +39,7 @@ struct PrecisionTrait<Precision::FP16> {
 };
 
 //! device type
-enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
+enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2, kX86 = 3 };
 
 template <DeviceTypeEnum T>
 struct DeviceType {};
@@ -47,6 +47,7 @@ struct DeviceType {};
 typedef DeviceType<kCPU> CPU;
 typedef DeviceType<kFPGA> FPGA;
 typedef DeviceType<kGPU_MALI> GPU_MALI;
+typedef DeviceType<kX86> X86;
 
 //! data type
 enum DataType {
@@ -79,6 +80,12 @@ enum PMStatus {
   PMWrongDevice = 0x08     /*!< un-correct device. */
 };
 
+enum RoundType {
+  ROUND_UNK = 0,
+  ROUND_NEAREST_TOWARDS_ZERO = 1,
+  ROUND_NEAREST_TO_EVEN = 2
+};
+
 extern const char *G_OP_TYPE_CONV;
 extern const char *G_OP_TYPE_BATCHNORM;
 extern const char *G_OP_TYPE_BOX_CODER;
diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp
index 10787b915594a12a826a087e5453b2c2e8c03f9a..d1014ff87a86efeeefec731ebac05a8a30abe3b1 100644
--- a/src/fpga/api.cpp
+++ b/src/fpga/api.cpp
@@ -14,28 +14,26 @@ limitations under the License. */
 
 #include "api.h"
 #include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <sys/ioctl.h>
-#include <sys/mman.h>
 #include <algorithm>
-#include <map>
+#include <cstring>
 #include "bias_scale.h"
 #include "filter.h"
 #include "image.h"
+
 #define FPGA_TEST_MODE
-#define PADDLE_MOBILE_OS_LINUX
 
 namespace paddle_mobile {
 namespace fpga {
 
 static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";
-static std::map<void *, size_t> memory_map;
 
 static inline int do_ioctl(int req, const void *arg) {
 #ifdef PADDLE_MOBILE_OS_LINUX
-  int result = ioctl(fd, req, (uint64_t)arg);
-  PADDLE_MOBILE_ENFORCE(result == 0, "ioctl didn't return correctly");
-  return result;
+  return ioctl(req, (unsigned int64_t)arg);
 #else
   return -1;
 #endif
@@ -50,94 +48,50 @@ int open_device() {
 
 // memory management;
 void *fpga_malloc(size_t size) {
-  static uint64_t counter = 0;
-
 #ifdef PADDLE_MOBILE_OS_LINUX
-  auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  return reinterpret_cast<void *>(
+      mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
 #else
-  auto ptr = malloc(size);
+  return malloc(size);
 #endif
-  counter += size;
-  memory_map.insert(std::make_pair(ptr, size));
-  DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total "
-       << counter << " bytes";
-  return ptr;
 }
 
 void fpga_free(void *ptr) {
-  static uint64_t counter = 0;
-  size_t size = 0;
-
-  auto iter = memory_map.find(ptr);  // std::map<void *, size_t>::iterator
-  if (iter != memory_map.end()) {
-    size = iter->second;
-    memory_map.erase(iter);
 #ifdef PADDLE_MOBILE_OS_LINUX
-    munmap(ptr, size);
+  munmap(ptr, 0);
 #else
-    free(ptr);
+  free(ptr);
 #endif
-    counter += size;
-    DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total "
-         << counter << " bytes";
-  } else {
-    DLOG << "Invalid pointer";
-  }
 }
 
 void fpga_copy(void *dest, const void *src, size_t num) {
   memcpy(dest, src, num);
 }
 
-int fpga_flush(void *address, size_t size) {
-  struct MemoryCacheArgs args = {nullptr};
-  args.address = address;
-  args.size = size;
-  return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args);
-}
-
-int fpga_invalidate(void *address, size_t size) {
-  struct MemoryCacheArgs args = {nullptr};
-  args.address = address;
-  args.size = size;
-  return do_ioctl(IOCTL_MEMCACHE_INVAL, &args);
-}
-
-int ComputeBasicConv(const struct ConvArgs &args) {
-  DLOG << "======Compute Basic Conv======";
-  DLOG << "   relu_enabled:" << args.relu_enabled
-       << "   sb_address:" << args.sb_address
-       << "   filter_address:" << args.filter_address
-       << "   filter_num:" << args.filter_num
-       << "   group_num:" << args.group_num;
-  DLOG << "   image_address:" << args.image.address
-       << "   image_scale_address:" << args.image.scale_address
-       << "   image_channels:" << args.image.channels
-       << "   image_height:" << args.image.height
-       << "   image_width:" << args.image.width
-       << "   pad_height:" << args.image.pad_height
-       << "   pad_width:" << args.image.pad_width;
-  DLOG << "   kernel_height:" << args.kernel.height
-       << "   kernel_width:" << args.kernel.width
-       << "   stride_h:" << args.kernel.stride_h
-       << "   stride_w:" << args.kernel.stride_w;
-  DLOG << "   out_address:" << args.output.address
-       << "   out_scale_address:" << args.output.scale_address;
-
-  return do_ioctl(IOCTL_CONFIG_CONV, &args);
-}
-
 int ComputeFpgaConv(const struct WrapperConvArgs &args) {
 #ifdef FPGA_TEST_MODE
-  DLOG << "=============ComputeFPGAConv===========";
-  DLOG << "   filter_num:" << args.filter_num
-       << "   group_num:" << args.group_num
-       << "   split_num:" << args.split_num;
+/*DLOG << "   relu_enabled:" << args.relu_enabled
+     << "   sb_address:" << args.sb_address
+     << "   filter_address:" << args.filter_address
+     << "   filter_num:" << args.filter_num
+     << "   group_num:" << args.group_num;
+DLOG << "   image_address:" << args.image.address
+     << "   image_scale_address:" << args.image.scale_address
+     << "   image_channels:" << args.image.channels
+     << "   image_height:" << args.image.height
+     << "   image_width:" << args.image.width
+     << "   pad_height:" << args.image.pad_height
+     << "   pad_width:" << args.image.pad_width;
+DLOG << "   kernel_height:" << args.kernel.height
+     << "   kernel_width:" << args.kernel.width
+     << "   stride_h:" << args.kernel.stride_h
+     << "   stride_w:" << args.kernel.stride_w;
+DLOG << "   out_address:" << args.output.address
+     << "   out_scale_address:" << args.output.scale_address;*/
 #endif
-
   int split_num = args.split_num;
   for (int i = 0; i < split_num; i++) {
-    ComputeBasicConv(args.conv_args[i]);
+    do_ioctl(IOCTL_CONFIG_CONV, &args.conv_args[i]);
   }
 
   if (split_num > 1) {
@@ -147,7 +101,6 @@ int ComputeFpgaConv(const struct WrapperConvArgs &args) {
 
 int ComputeFpgaPool(const struct PoolingArgs &args) {
 #ifdef FPGA_TEST_MODE
-  DLOG << "=============ComputeFpgaPool===========";
   DLOG << "   image_address:" << args.image.address
        << "   image_scale_address:" << args.image.scale_address
        << "   image_channels:" << args.image.channels
@@ -168,7 +121,6 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
 
 int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
 #ifdef FPGA_TEST_MODE
-  DLOG << "=============ComputeFpgaEWAdd===========";
   DLOG << "   relu_enabled:" << args.relu_enabled << "   const0:" << args.const0
        << "   const1:" << args.const1;
   DLOG << "   image0_address:" << args.image0.address
@@ -193,11 +145,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
 }
 int PerformBypass(const struct BypassArgs &args) {
 #ifdef FPGA_TEST_MODE
-  DLOG << "=============ComputeFpgaBypass===========";
-  DLOG << "   input_type:" << args.input_data_type
-       << "   output_type:" << args.output_data_type
-       << "   input_layout_type:" << args.input_layout_type
-       << "   output_layout_type:" << args.output_layout_type;
+  DLOG << "   layout_type:" << args.layout_type
+       << "   convert_type:" << args.convert_type;
   DLOG << "   image_address:" << args.image.address
        << "   image_scale_address:" << args.image.scale_address
        << "   image_channels:" << args.image.channels
@@ -213,71 +162,29 @@ int PerformBypass(const struct BypassArgs &args) {
 }
 
 int ComputeFPGAConcat(const struct ConcatArgs &args) {
-#ifdef FPGA_TEST_MODE
-  DLOG << "=============ComputeFpgaConcat===========";
-  DLOG << "   Image_num: " << args.image_num
-       << "   out_address:" << args.image_out
-       << "   out_scale_address:" << args.scale_out;
-  DLOG << "   image_height:" << args.height << "   image_width:" << args.width;
-  for (int i = 0; i < args.image_num; i++) {
-    DLOG << "   " << i << "th:        ";
-    DLOG << "   channel_num:" << args.channel_num[i]
-         << "   image_address:" << args.images_in[i]
-         << "   image_scale_address:" << args.scales_in[i];
-  }
-#endif
-
   image::concat_images(args.images_in, args.scales_in, args.image_out,
                        args.scale_out, args.image_num, args.channel_num,
                        args.height, args.width);
   return 0;
 }
 
-int get_align_image_cw(int cw) { return align_to_x(cw, IMAGE_ALIGNMENT); }
-
 void format_image(framework::Tensor *image_tensor) {
   auto dims = image_tensor->dims();
   auto channel = dims[1], height = dims[2], width = dims[3];
-  auto data_ptr = image_tensor->data<float>();
+  auto data_ptr = image_tensor->mutable_data<float>();
   size_t memory_size = channel * height * width * sizeof(float);
-  auto new_data = (float *)fpga_malloc(memory_size);
+  float *new_data = (float *)fpga_malloc(memory_size);
   fpga_copy(new_data, data_ptr, memory_size);
   image::format_image(&new_data, channel, height, width);
   image_tensor->reset_data_ptr(new_data);
 }
 
-void format_fp16_ofm(framework::Tensor *ofm_tensor) {
+void format_ofm(framework::Tensor *ofm_tensor) {
   auto dims = ofm_tensor->dims();
-  size_t memory_size = 0;
-  if (dims.size() == 4) {
-    auto channel = dims[1], height = dims[2], width = dims[3];
-    memory_size =
-        height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
-  } else if (dims.size() == 2) {
-    memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half);
-  } else {
-    DLOG << "Wrong ofm dimension";
-  }
-  auto p = fpga_malloc(memory_size);
-  memset(p, 0, memory_size);
-  ofm_tensor->reset_data_ptr(p);
-}
-
-void format_fp32_ofm(framework::Tensor *ofm_tensor) {
-  auto dims = ofm_tensor->dims();
-  size_t memory_size = 0;
-  if (dims.size() == 4) {
-    auto channel = dims[1], height = dims[2], width = dims[3];
-    memory_size =
-        height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(float);
-  } else if (dims.size() == 2) {
-    memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(float);
-  } else {
-    DLOG << "Wrong ofm dimension";
-  }
-  auto p = fpga_malloc(memory_size);
-  memset(p, 0, memory_size);
-  ofm_tensor->reset_data_ptr(p);
+  auto channel = dims[1], height = dims[2], width = dims[3];
+  size_t memory_size =
+      height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
+  ofm_tensor->reset_data_ptr(fpga_malloc(memory_size));
 }
 
 float filter_find_max(framework::Tensor *filter_tensor) {
@@ -293,7 +200,7 @@ int get_plit_num(framework::Tensor *filter_tensor) {
   return filter::calc_split_num(num, div_capacity);
 }
 
-int get_filter_num_per_div(framework::Tensor *filter_tensor, int group_num) {
+int get_element_num_per_div(framework::Tensor *filter_tensor, int group_num) {
   auto dims = filter_tensor->dims();
   auto chw = dims[1] * dims[2] * dims[3];
   auto num = dims[0];
@@ -313,7 +220,7 @@ void format_filter(framework::Tensor *filter_tensor, float max_value,
                    int group_num) {
   auto dims = filter_tensor->dims();
   auto num = dims[0], channel = dims[1], height = dims[2], width = dims[3];
-  auto data_ptr = filter_tensor->data<float>();
+  auto data_ptr = filter_tensor->mutable_data<float>();
   size_t memory_size = num * channel * height * width * sizeof(float);
   auto new_data = (float *)fpga_malloc(memory_size);
   fpga_copy(new_data, data_ptr, memory_size);
@@ -337,7 +244,7 @@ void format_concat_output(framework::Tensor *out, int height, int width,
 
   sum_cw = align_to_x(width * sum_channel, IMAGE_ALIGNMENT);
   auto data_ptr = fpga_malloc(height * sum_cw * sizeof(half));
-  auto ddim = framework::make_ddim({1, sum_channel, height, width});
+  auto ddim = framework::make_ddim({-1, sum_channel, height, width});
   out->Resize(ddim);
   out->reset_data_ptr(data_ptr);
 }
@@ -348,15 +255,15 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
                    int padding_h, int padding_w, float *bs_ptr) {
   auto input_ptr = input->data<float>();
   auto filter_ptr = filter->data<float>();
-  auto out_ptr = out->data<float>();
+  auto out_ptr = out->mutable_data<float>();
 
   arg->group_num = (uint32_t)group_num;
-  // Either group_num or split_num = 1;
-  arg->split_num = group_num == 1 ? (uint32_t)get_plit_num(filter) : 1;
+  arg->split_num = (uint32_t)fpga::get_plit_num(filter);
   arg->filter_num = (uint32_t)filter->dims()[0];
   arg->output.address = out_ptr;
   arg->output.scale_address = out->scale;
-  arg->conv_args = (ConvArgs *)fpga_malloc(arg->split_num * sizeof(ConvArgs));
+  arg->conv_args = (fpga::ConvArgs *)fpga::fpga_malloc(arg->split_num *
+                                                       sizeof(fpga::ConvArgs));
 
   arg->concat_arg.image_num = arg->split_num;
   arg->concat_arg.image_out = out_ptr;
@@ -365,14 +272,15 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
   arg->concat_arg.width = (uint32_t)filter->dims()[3];
 
   int n = arg->split_num;
-  arg->concat_arg.images_in = (half **)fpga_malloc(n * sizeof(int *));
-  arg->concat_arg.scales_in = (float **)fpga_malloc(n * sizeof(float *));
-  arg->concat_arg.channel_num = (uint32_t *)fpga_malloc(n * sizeof(uint32_t));
+  arg->concat_arg.images_in = (half **)fpga::fpga_malloc(n * sizeof(int *));
+  arg->concat_arg.scales_in = (float **)fpga::fpga_malloc(n * sizeof(float *));
+  arg->concat_arg.channel_num =
+      (uint32_t *)fpga::fpga_malloc(n * sizeof(uint32_t));
   arg->concat_arg.image_out = out_ptr;
 
-  auto channel = (int)out->dims()[1];
-  int filter_num_per_div = get_filter_num_per_div(filter, group_num);
-  int element_num = get_aligned_filter_element_num(
+  const int channel = (int)out->dims()[1];
+  int element_num_per_div = fpga::get_element_num_per_div(filter, group_num);
+  int element_num = fpga::get_aligned_filter_element_num(
       filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
 
   for (int i = 0; i < n; i++) {
@@ -389,22 +297,19 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
     arg->conv_args[i].image.scale_address = input->scale;
     arg->conv_args[i].image.pad_height = (uint32_t)padding_h;
     arg->conv_args[i].image.pad_width = (uint32_t)padding_w;
-    arg->conv_args[i].filter_scale_address = filter->scale;
-    arg->conv_args[i].filter_address =
-        &((int8_t *)filter_ptr)[i * element_num * filter_num_per_div];
-    arg->conv_args[i].sb_address = &bs_ptr[i * filter_num_per_div * 2];
+    arg->conv_args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
+    arg->conv_args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
     arg->conv_args[i].filter_num =
-        (uint32_t)(i == n - 1 ? channel - (n - 1) * filter_num_per_div
-                              : filter_num_per_div);
+        (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
+                                    channel - (n - 1) * element_num_per_div)
+                              : element_num_per_div);
 
     if (n > 1) {
       arg->conv_args[i].output.scale_address =
-          (float *)fpga_malloc(2 * sizeof(float));
-      arg->conv_args[i].output.address = fpga_malloc(
-          input->dims()[2] *
-          align_to_x(input->dims()[3] * arg->conv_args[i].filter_num,
-                     IMAGE_ALIGNMENT) *
-          sizeof(half));
+          (float *)fpga::fpga_malloc(2 * sizeof(float));
+      arg->conv_args[i].output.address =
+          fpga::fpga_malloc(input->dims()[2] * input->dims()[3] *
+                            arg->conv_args[i].filter_num * sizeof(half));
     }
 
     else {
@@ -413,7 +318,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
     }
 
     arg->concat_arg.images_in[i] = (half *)arg->conv_args[i].output.address;
-    arg->concat_arg.scales_in[i] = arg->conv_args[i].output.scale_address;
+    arg->concat_arg.scales_in[i] = (float *)arg->conv_args[i].sb_address;
     arg->concat_arg.channel_num[i] = arg->conv_args[i].filter_num;
   }
 }
diff --git a/src/fpga/api.h b/src/fpga/api.h
index f5fa05b6750996ee391a30d2651a69d90e357547..096f847170501784f0ee74b5a98ca91349587cfc 100644
--- a/src/fpga/api.h
+++ b/src/fpga/api.h
@@ -20,17 +20,28 @@ limitations under the License. */
 #include <limits>
 #include "framework/tensor.h"
 
+// memory management;
+
 namespace paddle_mobile {
 namespace fpga {
 
-enum DataType {
-  DATA_TYPE_FP32 = 1,
-  DATA_TYPE_FP16 = 0,
+int open_device();
+int close_device();
+
+void* fpga_malloc(size_t size);
+void fpga_free(void* ptr);
+void fpga_copy(void* dst, const void* src, size_t num);
+
+enum DataConvertType {
+  DATA_NO_CONVERT = 0,
+  DATA_FP32_TO_FP16 = 1,
+  DATA_FP16_TO_FP32 = 2,
 };
 
-enum LayoutType {
-  LAYOUT_CHW = 1,
-  LAYOUT_HWC = 0,
+enum LayoutConvertType {
+  LAYOUT_NO_CONVERT = 0,
+  LAYOUT_CHW_TO_HWC = 1,
+  LAYOUT_HWC_TO_CHW = 2,
 };
 
 struct VersionArgs {
@@ -43,6 +54,9 @@ struct MemoryCopyArgs {
   size_t size;
 };
 
+/**
+Conv and Pooling kernel
+*/
 struct KernelArgs {
   uint32_t width;
   uint32_t height;
@@ -104,21 +118,20 @@ struct PoolingArgs {
   struct ImageOutputArgs output;
 };
 
+// elementwise add arguments
 struct EWAddArgs {
   bool relu_enabled;
 
-  uint32_t const0;  // output0 = const0 x input0 + const1 x input1;
-  uint32_t const1;
+  float const0;  // output0 = const0 x input0 + const1 x input1;
+  float const1;
   struct ImageInputArgs image0;
   struct ImageInputArgs image1;
   struct ImageOutputArgs output;
 };
 
 struct BypassArgs {
-  enum DataType input_data_type;
-  enum DataType output_data_type;
-  enum LayoutType input_layout_type;
-  enum LayoutType output_layout_type;
+  enum DataConvertType convert_type;
+  enum LayoutConvertType layout_type;
   struct ImageInputArgs image;
   struct ImageOutputArgs output;
 };
@@ -128,16 +141,6 @@ struct FpgaRegWriteArgs {
   uint64_t value;
 };
 
-struct FpgaRegReadArgs {
-  uint64_t address;
-  uint64_t value;
-};
-
-struct MemoryCacheArgs {
-  void* address;
-  size_t size;
-};
-
 #define IOCTL_FPGA_MAGIC 'FPGA'
 
 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
@@ -145,8 +148,6 @@ struct MemoryCacheArgs {
 #define IOCTL_SEPARATOR_0 10
 
 #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
-#define IOCTL_MEMCACHE_INVAL _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryCacheArgs)
-#define IOCTL_MEMCACHE_FLUSH _IOW(IOCTL_FPGA_MAGIC, 13, struct MemoryCacheArgs)
 
 #define IOCTL_SEPARATOR_1 20
 
@@ -183,15 +184,6 @@ enum FPGA_ERR_TYPE {
 
 //============================== API =============================
 
-int open_device();
-int close_device();
-
-void* fpga_malloc(size_t size);
-void fpga_free(void* ptr);
-void fpga_copy(void* dst, const void* src, size_t num);
-int fpga_flush(void* address, size_t size);
-int fpga_invalidate(void* address, size_t size);
-
 int PerformBypass(const struct BypassArgs& args);
 int ComputeFpgaConv(const struct WrapperConvArgs& args);
 int ComputeFpgaPool(const struct PoolingArgs& args);
@@ -200,13 +192,11 @@ int ComputeFPGAConcat(const struct ConcatArgs& args);
 
 static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; }
 
-int get_align_image_cw(int cw);
 void format_image(framework::Tensor* image_tensor);
-void format_fp16_ofm(framework::Tensor* ofm_tensor);  // only allocate memory
-void format_fp32_ofm(framework::Tensor* ofm_tensor);
+void format_ofm(framework::Tensor* ofm_tensor);  // only allocate memory
 
 float filter_find_max(framework::Tensor* filter_tensor);
-int get_filter_num_per_div(framework::Tensor* filter_tensor, int group_num);
+int get_element_num_per_div(framework::Tensor* filter_tensor, int group_num);
 int get_plit_num(framework::Tensor* filter_tensor);
 int get_aligned_filter_element_num(int chw);
 int get_aligned_filter_num(int num);
diff --git a/src/fpga/bias_scale.cpp b/src/fpga/bias_scale.cpp
index 3e5c3419a0c35b5c7c81b0ee1fd89a58838b5a26..a1b0c8577b9100f69f823a39e9e136c46b7e09ff 100644
--- a/src/fpga/bias_scale.cpp
+++ b/src/fpga/bias_scale.cpp
@@ -79,7 +79,6 @@ void format_bias_scale_array(float **bias_scale_array,
   int element_num_after_division =
       align_to_x(element_num_per_division, BS_NUM_ALIGNMENT);
   interleave(bias_scale_array, div_num * element_num_after_division);
-  fpga_flush(*bias_scale_array, 2 * element_num_after_division * sizeof(float));
 }
 
 }  // namespace bias_scale
diff --git a/src/fpga/filter.cpp b/src/fpga/filter.cpp
index 3b09ede10d10f605e69d06df2e148dd463e94d5b..5f1a16d2339f3859f4cd85408c965d8d2634a55f 100644
--- a/src/fpga/filter.cpp
+++ b/src/fpga/filter.cpp
@@ -101,6 +101,7 @@ void align_element(char **data_in, int num, int chw) {
   int j = 0;
   int align_chw = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
   if (align_chw != chw) {
+    printf("align %d \n", align_chw);
     char *tmp = *data_in;
     char *data_tmp = (char *)fpga_malloc(num * align_chw * sizeof(char));
 
@@ -206,8 +207,6 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
   align_num(quantize_data, num_per_div_before_alignment, num, chw);
   reorder(quantize_data, num_after_alignment, chw);
   interleave(quantize_data, num_after_alignment, chw);
-  fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) *
-                                 num_after_alignment * sizeof(char));
 }
 
 }  // namespace filter
diff --git a/src/fpga/image.cpp b/src/fpga/image.cpp
index ad5053f9780895d94cc3095dc694e86dbbb1abac..872abcd7c2dd6b16ab8ec8077e9afa6ec60c10d4 100644
--- a/src/fpga/image.cpp
+++ b/src/fpga/image.cpp
@@ -38,6 +38,7 @@ void convert_to_hwc(float **data_in, int channel, int height, int width) {
 }
 
 void align_element_conv(float **data_in, int height, int cw) {
+  int i = 0;
   int h = 0;
   int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
   if (align_cw != cw) {
@@ -59,8 +60,6 @@ void align_element_conv(float **data_in, int height, int cw) {
 void format_image(float **data_in, int channel, int height, int width) {
   convert_to_hwc(data_in, channel, height, width);
   align_element_conv(data_in, height, channel * width);
-  fpga_flush(*data_in, align_to_x(channel * width, IMAGE_ALIGNMENT) * height *
-                           sizeof(float));
 }
 
 void concat_images(int16_t **images_in, float **scales_in, void *image_out,
@@ -74,17 +73,11 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
   int align_each_in_area_cw = 0;
   int align_each_out_area_cw_differ = 0;
   int tmp_channel = 0;
-  scale_out[0] = 0.0;
-  scale_out[1] = 0.0;
+  *scale_out = 0;
   for (i = 0; i < image_num; i++) {
     each_out_line_channel += channel_num[i];
-    scale_out[0] = std::max(*scale_out, scales_in[i][0]);
-    fpga_invalidate(images_in[i],
-                    height *
-                        align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) *
-                        sizeof(int16_t));
+    *scale_out = std::max(*scale_out, scales_in[i][0]);
   }
-  scale_out[1] = 1 / scale_out[0];
   align_each_out_area_cw =
       align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT);
   align_each_out_area_cw_differ =
@@ -104,8 +97,6 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
       }
     }
   }
-
-  fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t));
 }
 
 }  // namespace image
diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/framework/framework.pb-c.c b/src/framework/framework.pb-c.c
index aed0a6c9c0614da74a82cea8c7aa705978dddafc..80d655e422c7fbcbb50642115bdb1c9902dfc31f 100644
--- a/src/framework/framework.pb-c.c
+++ b/src/framework/framework.pb-c.c
@@ -7,1397 +7,1747 @@
 #endif
 
 #include "framework.pb-c.h"
-void paddle_mobile__framework__proto__op_desc__attr__init(
-    PaddleMobile__Framework__Proto__OpDesc__Attr *message) {
-  static const PaddleMobile__Framework__Proto__OpDesc__Attr init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT;
+void   paddle_mobile__framework__proto__version__init
+                     (PaddleMobile__Framework__Proto__Version         *message)
+{
+  static const PaddleMobile__Framework__Proto__Version init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VERSION__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__op_desc__var__init(
-    PaddleMobile__Framework__Proto__OpDesc__Var *message) {
-  static const PaddleMobile__Framework__Proto__OpDesc__Var init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__VAR__INIT;
+size_t paddle_mobile__framework__proto__version__get_packed_size
+                     (const PaddleMobile__Framework__Proto__Version *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__version__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__version__pack
+                     (const PaddleMobile__Framework__Proto__Version *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__version__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__version__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__Version *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__version__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
+}
+PaddleMobile__Framework__Proto__Version *
+       paddle_mobile__framework__proto__version__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
+  return (PaddleMobile__Framework__Proto__Version *)
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__version__descriptor,
+                                allocator, len, data);
+}
+void   paddle_mobile__framework__proto__version__free_unpacked
+                     (PaddleMobile__Framework__Proto__Version *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__version__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
+}
+void   paddle_mobile__framework__proto__op_desc__attr__init
+                     (PaddleMobile__Framework__Proto__OpDesc__Attr         *message)
+{
+  static const PaddleMobile__Framework__Proto__OpDesc__Attr init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__op_desc__init(
-    PaddleMobile__Framework__Proto__OpDesc *message) {
-  static const PaddleMobile__Framework__Proto__OpDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT;
+void   paddle_mobile__framework__proto__op_desc__var__init
+                     (PaddleMobile__Framework__Proto__OpDesc__Var         *message)
+{
+  static const PaddleMobile__Framework__Proto__OpDesc__Var init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__VAR__INIT;
   *message = init_value;
 }
-size_t paddle_mobile__framework__proto__op_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__OpDesc *message) {
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__op_desc__descriptor);
-  return protobuf_c_message_get_packed_size(
-      (const ProtobufCMessage *)(message));
+void   paddle_mobile__framework__proto__op_desc__init
+                     (PaddleMobile__Framework__Proto__OpDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__OpDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT;
+  *message = init_value;
+}
+size_t paddle_mobile__framework__proto__op_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__OpDesc *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_desc__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__op_desc__pack
+                     (const PaddleMobile__Framework__Proto__OpDesc *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_desc__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__op_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__OpDesc *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_desc__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
 }
-
 PaddleMobile__Framework__Proto__OpDesc *
-paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator,
-                                                 size_t len,
-                                                 const uint8_t *data) {
-  return (PaddleMobile__Framework__Proto__OpDesc *)protobuf_c_message_unpack(
-      &paddle_mobile__framework__proto__op_desc__descriptor, allocator, len,
-      data);
+       paddle_mobile__framework__proto__op_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
+  return (PaddleMobile__Framework__Proto__OpDesc *)
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__op_desc__descriptor,
+                                allocator, len, data);
 }
-void paddle_mobile__framework__proto__op_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__OpDesc *message,
-    ProtobufCAllocator *allocator) {
-  if (!message) return;
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__op_desc__descriptor);
-  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+void   paddle_mobile__framework__proto__op_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__OpDesc *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_desc__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
-void paddle_mobile__framework__proto__op_proto__var__init(
-    PaddleMobile__Framework__Proto__OpProto__Var *message) {
-  static const PaddleMobile__Framework__Proto__OpProto__Var init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__VAR__INIT;
+void   paddle_mobile__framework__proto__op_proto__var__init
+                     (PaddleMobile__Framework__Proto__OpProto__Var         *message)
+{
+  static const PaddleMobile__Framework__Proto__OpProto__Var init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__VAR__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__op_proto__attr__init(
-    PaddleMobile__Framework__Proto__OpProto__Attr *message) {
-  static const PaddleMobile__Framework__Proto__OpProto__Attr init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__ATTR__INIT;
+void   paddle_mobile__framework__proto__op_proto__attr__init
+                     (PaddleMobile__Framework__Proto__OpProto__Attr         *message)
+{
+  static const PaddleMobile__Framework__Proto__OpProto__Attr init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__ATTR__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__op_proto__init(
-    PaddleMobile__Framework__Proto__OpProto *message) {
-  static const PaddleMobile__Framework__Proto__OpProto init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT;
+void   paddle_mobile__framework__proto__op_proto__init
+                     (PaddleMobile__Framework__Proto__OpProto         *message)
+{
+  static const PaddleMobile__Framework__Proto__OpProto init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT;
   *message = init_value;
 }
-size_t paddle_mobile__framework__proto__op_proto__get_packed_size(
-    const PaddleMobile__Framework__Proto__OpProto *message) {
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__op_proto__descriptor);
-  return protobuf_c_message_get_packed_size(
-      (const ProtobufCMessage *)(message));
+size_t paddle_mobile__framework__proto__op_proto__get_packed_size
+                     (const PaddleMobile__Framework__Proto__OpProto *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_proto__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__op_proto__pack
+                     (const PaddleMobile__Framework__Proto__OpProto *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_proto__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__op_proto__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__OpProto *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_proto__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
 }
-
 PaddleMobile__Framework__Proto__OpProto *
-paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator,
-                                                  size_t len,
-                                                  const uint8_t *data) {
-  return (PaddleMobile__Framework__Proto__OpProto *)protobuf_c_message_unpack(
-      &paddle_mobile__framework__proto__op_proto__descriptor, allocator, len,
-      data);
+       paddle_mobile__framework__proto__op_proto__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
+  return (PaddleMobile__Framework__Proto__OpProto *)
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__op_proto__descriptor,
+                                allocator, len, data);
 }
-void paddle_mobile__framework__proto__op_proto__free_unpacked(
-    PaddleMobile__Framework__Proto__OpProto *message,
-    ProtobufCAllocator *allocator) {
-  if (!message) return;
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__op_proto__descriptor);
-  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+void   paddle_mobile__framework__proto__op_proto__free_unpacked
+                     (PaddleMobile__Framework__Proto__OpProto *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__op_proto__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
-void paddle_mobile__framework__proto__var_type__tensor_desc__init(
-    PaddleMobile__Framework__Proto__VarType__TensorDesc *message) {
-  static const PaddleMobile__Framework__Proto__VarType__TensorDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TENSOR_DESC__INIT;
+void   paddle_mobile__framework__proto__var_type__tensor_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__TensorDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType__TensorDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TENSOR_DESC__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init(
-    PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message) {
-  static const PaddleMobile__Framework__Proto__VarType__LoDTensorDesc
-      init_value =
-          PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_DESC__INIT;
+void   paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__LoDTensorDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType__LoDTensorDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_DESC__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init(
-    PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message) {
-  static const PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc
-      init_value =
-          PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_ARRAY_DESC__INIT;
+void   paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_ARRAY_DESC__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__var_type__reader_desc__init(
-    PaddleMobile__Framework__Proto__VarType__ReaderDesc *message) {
-  static const PaddleMobile__Framework__Proto__VarType__ReaderDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__READER_DESC__INIT;
+void   paddle_mobile__framework__proto__var_type__reader_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__ReaderDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType__ReaderDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__READER_DESC__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__var_type__channel_desc__init(
-    PaddleMobile__Framework__Proto__VarType__ChannelDesc *message) {
-  static const PaddleMobile__Framework__Proto__VarType__ChannelDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__CHANNEL_DESC__INIT;
+void   paddle_mobile__framework__proto__var_type__channel_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__ChannelDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType__ChannelDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__CHANNEL_DESC__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__var_type__tuple__init(
-    PaddleMobile__Framework__Proto__VarType__Tuple *message) {
-  static const PaddleMobile__Framework__Proto__VarType__Tuple init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TUPLE__INIT;
+void   paddle_mobile__framework__proto__var_type__tuple__init
+                     (PaddleMobile__Framework__Proto__VarType__Tuple         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType__Tuple init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TUPLE__INIT;
   *message = init_value;
 }
-void paddle_mobile__framework__proto__var_type__init(
-    PaddleMobile__Framework__Proto__VarType *message) {
-  static const PaddleMobile__Framework__Proto__VarType init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT;
+void   paddle_mobile__framework__proto__var_type__init
+                     (PaddleMobile__Framework__Proto__VarType         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarType init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT;
   *message = init_value;
 }
-size_t paddle_mobile__framework__proto__var_type__get_packed_size(
-    const PaddleMobile__Framework__Proto__VarType *message) {
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__var_type__descriptor);
-  return protobuf_c_message_get_packed_size(
-      (const ProtobufCMessage *)(message));
+size_t paddle_mobile__framework__proto__var_type__get_packed_size
+                     (const PaddleMobile__Framework__Proto__VarType *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_type__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__var_type__pack
+                     (const PaddleMobile__Framework__Proto__VarType *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_type__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__var_type__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__VarType *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_type__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
 }
 PaddleMobile__Framework__Proto__VarType *
-paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator,
-                                                  size_t len,
-                                                  const uint8_t *data) {
-  return (PaddleMobile__Framework__Proto__VarType *)protobuf_c_message_unpack(
-      &paddle_mobile__framework__proto__var_type__descriptor, allocator, len,
-      data);
+       paddle_mobile__framework__proto__var_type__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
+  return (PaddleMobile__Framework__Proto__VarType *)
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__var_type__descriptor,
+                                allocator, len, data);
 }
-void paddle_mobile__framework__proto__var_type__free_unpacked(
-    PaddleMobile__Framework__Proto__VarType *message,
-    ProtobufCAllocator *allocator) {
-  if (!message) return;
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__var_type__descriptor);
-  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+void   paddle_mobile__framework__proto__var_type__free_unpacked
+                     (PaddleMobile__Framework__Proto__VarType *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_type__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
-void paddle_mobile__framework__proto__var_desc__init(
-    PaddleMobile__Framework__Proto__VarDesc *message) {
-  static const PaddleMobile__Framework__Proto__VarDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT;
+void   paddle_mobile__framework__proto__var_desc__init
+                     (PaddleMobile__Framework__Proto__VarDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__VarDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT;
   *message = init_value;
 }
-size_t paddle_mobile__framework__proto__var_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__VarDesc *message) {
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__var_desc__descriptor);
-  return protobuf_c_message_get_packed_size(
-      (const ProtobufCMessage *)(message));
+size_t paddle_mobile__framework__proto__var_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__VarDesc *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_desc__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__var_desc__pack
+                     (const PaddleMobile__Framework__Proto__VarDesc *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_desc__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__var_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__VarDesc *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_desc__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
 }
-
 PaddleMobile__Framework__Proto__VarDesc *
-paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator,
-                                                  size_t len,
-                                                  const uint8_t *data) {
-  return (PaddleMobile__Framework__Proto__VarDesc *)protobuf_c_message_unpack(
-      &paddle_mobile__framework__proto__var_desc__descriptor, allocator, len,
-      data);
+       paddle_mobile__framework__proto__var_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
+  return (PaddleMobile__Framework__Proto__VarDesc *)
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__var_desc__descriptor,
+                                allocator, len, data);
 }
-void paddle_mobile__framework__proto__var_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__VarDesc *message,
-    ProtobufCAllocator *allocator) {
-  if (!message) return;
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__var_desc__descriptor);
-  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+void   paddle_mobile__framework__proto__var_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__VarDesc *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__var_desc__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
-void paddle_mobile__framework__proto__block_desc__init(
-    PaddleMobile__Framework__Proto__BlockDesc *message) {
-  static const PaddleMobile__Framework__Proto__BlockDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT;
+void   paddle_mobile__framework__proto__block_desc__init
+                     (PaddleMobile__Framework__Proto__BlockDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__BlockDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT;
   *message = init_value;
 }
-size_t paddle_mobile__framework__proto__block_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__BlockDesc *message) {
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__block_desc__descriptor);
-  return protobuf_c_message_get_packed_size(
-      (const ProtobufCMessage *)(message));
+size_t paddle_mobile__framework__proto__block_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__BlockDesc *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__block_desc__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__block_desc__pack
+                     (const PaddleMobile__Framework__Proto__BlockDesc *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__block_desc__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__block_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__BlockDesc *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__block_desc__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
 }
-
 PaddleMobile__Framework__Proto__BlockDesc *
-paddle_mobile__framework__proto__block_desc__unpack(
-    ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {
-  return (PaddleMobile__Framework__Proto__BlockDesc *)protobuf_c_message_unpack(
-      &paddle_mobile__framework__proto__block_desc__descriptor, allocator, len,
-      data);
+       paddle_mobile__framework__proto__block_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
+  return (PaddleMobile__Framework__Proto__BlockDesc *)
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__block_desc__descriptor,
+                                allocator, len, data);
 }
-void paddle_mobile__framework__proto__block_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__BlockDesc *message,
-    ProtobufCAllocator *allocator) {
-  if (!message) return;
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__block_desc__descriptor);
-  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+void   paddle_mobile__framework__proto__block_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__BlockDesc *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__block_desc__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
-void paddle_mobile__framework__proto__program_desc__init(
-    PaddleMobile__Framework__Proto__ProgramDesc *message) {
-  static const PaddleMobile__Framework__Proto__ProgramDesc init_value =
-      PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT;
+void   paddle_mobile__framework__proto__program_desc__init
+                     (PaddleMobile__Framework__Proto__ProgramDesc         *message)
+{
+  static const PaddleMobile__Framework__Proto__ProgramDesc init_value = PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT;
   *message = init_value;
 }
-size_t paddle_mobile__framework__proto__program_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__ProgramDesc *message) {
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__program_desc__descriptor);
-  return protobuf_c_message_get_packed_size(
-      (const ProtobufCMessage *)(message));
+size_t paddle_mobile__framework__proto__program_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__ProgramDesc *message)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__program_desc__descriptor);
+  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
+}
+size_t paddle_mobile__framework__proto__program_desc__pack
+                     (const PaddleMobile__Framework__Proto__ProgramDesc *message,
+                      uint8_t       *out)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__program_desc__descriptor);
+  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
+}
+size_t paddle_mobile__framework__proto__program_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__ProgramDesc *message,
+                      ProtobufCBuffer *buffer)
+{
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__program_desc__descriptor);
+  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
 }
-
 PaddleMobile__Framework__Proto__ProgramDesc *
-paddle_mobile__framework__proto__program_desc__unpack(
-    ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {
+       paddle_mobile__framework__proto__program_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data)
+{
   return (PaddleMobile__Framework__Proto__ProgramDesc *)
-      protobuf_c_message_unpack(
-          &paddle_mobile__framework__proto__program_desc__descriptor, allocator,
-          len, data);
+     protobuf_c_message_unpack (&paddle_mobile__framework__proto__program_desc__descriptor,
+                                allocator, len, data);
 }
-void paddle_mobile__framework__proto__program_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__ProgramDesc *message,
-    ProtobufCAllocator *allocator) {
-  if (!message) return;
-  assert(message->base.descriptor ==
-         &paddle_mobile__framework__proto__program_desc__descriptor);
-  protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator);
+void   paddle_mobile__framework__proto__program_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__ProgramDesc *message,
+                      ProtobufCAllocator *allocator)
+{
+  if(!message)
+    return;
+  assert(message->base.descriptor == &paddle_mobile__framework__proto__program_desc__descriptor);
+  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
 }
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__op_desc__attr__field_descriptors[12] = {
-        {
-            "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, name), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "type", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, type),
-            &paddle_mobile__framework__proto__attr_type__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "i", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_INT32,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_i),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, i), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "f", 4, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_FLOAT,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_f),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, f), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "s", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, s), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "ints", 6, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_INT32,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_ints),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, ints), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "floats", 7, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_FLOAT,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_floats),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, floats),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "strings", 8, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_STRING,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_strings),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, strings),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "b", 10, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_b),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, b), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "bools", 11, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_bools),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, bools), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "block_idx", 12, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_INT32,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr,
-                     has_block_idx),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, block_idx),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "l", 13, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_INT64,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_l),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, l), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = {
-        8,  /* field[8] = b */
-        10, /* field[10] = block_idx */
-        9,  /* field[9] = bools */
-        3,  /* field[3] = f */
-        6,  /* field[6] = floats */
-        2,  /* field[2] = i */
-        5,  /* field[5] = ints */
-        11, /* field[11] = l */
-        0,  /* field[0] = name */
-        4,  /* field[4] = s */
-        7,  /* field[7] = strings */
-        1,  /* field[1] = type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] = {
-        {1, 0}, {10, 8}, {0, 12}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_desc__attr__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.OpDesc.Attr",
-        "Attr",
-        "PaddleMobile__Framework__Proto__OpDesc__Attr",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr),
-        12,
-        paddle_mobile__framework__proto__op_desc__attr__field_descriptors,
-        paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name,
-        2,
-        paddle_mobile__framework__proto__op_desc__attr__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__op_desc__attr__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__op_desc__var__field_descriptors[2] = {
-        {
-            "parameter", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, parameter),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "arguments", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_STRING,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, n_arguments),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, arguments),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__op_desc__var__field_indices_by_name[] = {
-        1, /* field[1] = arguments */
-        0, /* field[0] = parameter */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__op_desc__var__number_ranges[1 + 1] = {
-        {1, 0}, {0, 2}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_desc__var__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.OpDesc.Var",
-        "Var",
-        "PaddleMobile__Framework__Proto__OpDesc__Var",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__OpDesc__Var),
-        2,
-        paddle_mobile__framework__proto__op_desc__var__field_descriptors,
-        paddle_mobile__framework__proto__op_desc__var__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__op_desc__var__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__op_desc__var__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const protobuf_c_boolean
-    paddle_mobile__framework__proto__op_desc__is_target__default_value = 0;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__op_desc__field_descriptors[5] = {
-        {
-            "inputs", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, n_inputs),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, inputs),
-            &paddle_mobile__framework__proto__op_desc__var__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "outputs", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, n_outputs),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, outputs),
-            &paddle_mobile__framework__proto__op_desc__var__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "type", 3, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, type), NULL, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "attrs", 4, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, n_attrs),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, attrs),
-            &paddle_mobile__framework__proto__op_desc__attr__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "is_target", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, has_is_target),
-            offsetof(PaddleMobile__Framework__Proto__OpDesc, is_target), NULL,
-            &paddle_mobile__framework__proto__op_desc__is_target__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__op_desc__field_indices_by_name[] = {
-        3, /* field[3] = attrs */
-        0, /* field[0] = inputs */
-        4, /* field[4] = is_target */
-        1, /* field[1] = outputs */
-        2, /* field[2] = type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__op_desc__number_ranges[1 + 1] = {{1, 0},
-                                                                      {0, 5}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.OpDesc",
-        "OpDesc",
-        "PaddleMobile__Framework__Proto__OpDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__OpDesc),
-        5,
-        paddle_mobile__framework__proto__op_desc__field_descriptors,
-        paddle_mobile__framework__proto__op_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__op_desc__number_ranges,
-        (ProtobufCMessageInit)paddle_mobile__framework__proto__op_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const protobuf_c_boolean
-    paddle_mobile__framework__proto__op_proto__var__duplicable__default_value =
-        0;
-static const protobuf_c_boolean
-    paddle_mobile__framework__proto__op_proto__var__intermediate__default_value =
-        0;
-static const protobuf_c_boolean
-    paddle_mobile__framework__proto__op_proto__var__dispensable__default_value =
-        0;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__op_proto__var__field_descriptors[5] = {
-        {
-            "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var, name), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "comment", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var, comment),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "duplicable", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var,
-                     has_duplicable),
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var, duplicable),
-            NULL,
-            &paddle_mobile__framework__proto__op_proto__var__duplicable__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "intermediate", 4, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var,
-                     has_intermediate),
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var,
-                     intermediate),
-            NULL,
-            &paddle_mobile__framework__proto__op_proto__var__intermediate__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "dispensable", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var,
-                     has_dispensable),
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Var, dispensable),
-            NULL,
-            &paddle_mobile__framework__proto__op_proto__var__dispensable__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__op_proto__var__field_indices_by_name[] = {
-        1, /* field[1] = comment */
-        4, /* field[4] = dispensable */
-        2, /* field[2] = duplicable */
-        3, /* field[3] = intermediate */
-        0, /* field[0] = name */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__op_proto__var__number_ranges[1 + 1] = {
-        {1, 0}, {0, 5}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_proto__var__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.OpProto.Var",
-        "Var",
-        "PaddleMobile__Framework__Proto__OpProto__Var",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__OpProto__Var),
-        5,
-        paddle_mobile__framework__proto__op_proto__var__field_descriptors,
-        paddle_mobile__framework__proto__op_proto__var__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__op_proto__var__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__op_proto__var__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const protobuf_c_boolean
-    paddle_mobile__framework__proto__op_proto__attr__generated__default_value =
-        0;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__op_proto__attr__field_descriptors[4] = {
-        {
-            "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, name), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "type", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, type),
-            &paddle_mobile__framework__proto__attr_type__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "comment", 3, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, comment),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "generated", 4, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Attr,
-                     has_generated),
-            offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, generated),
-            NULL,
-            &paddle_mobile__framework__proto__op_proto__attr__generated__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__op_proto__attr__field_indices_by_name[] = {
-        2, /* field[2] = comment */
-        3, /* field[3] = generated */
-        0, /* field[0] = name */
-        1, /* field[1] = type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__op_proto__attr__number_ranges[1 + 1] = {
-        {1, 0}, {0, 4}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_proto__attr__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.OpProto.Attr",
-        "Attr",
-        "PaddleMobile__Framework__Proto__OpProto__Attr",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__OpProto__Attr),
-        4,
-        paddle_mobile__framework__proto__op_proto__attr__field_descriptors,
-        paddle_mobile__framework__proto__op_proto__attr__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__op_proto__attr__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__op_proto__attr__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__op_proto__field_descriptors[5] = {
-        {
-            "type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto, type), NULL, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "inputs", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__OpProto, n_inputs),
-            offsetof(PaddleMobile__Framework__Proto__OpProto, inputs),
-            &paddle_mobile__framework__proto__op_proto__var__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "outputs", 3, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__OpProto, n_outputs),
-            offsetof(PaddleMobile__Framework__Proto__OpProto, outputs),
-            &paddle_mobile__framework__proto__op_proto__var__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "attrs", 4, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__OpProto, n_attrs),
-            offsetof(PaddleMobile__Framework__Proto__OpProto, attrs),
-            &paddle_mobile__framework__proto__op_proto__attr__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "comment", 5, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__OpProto, comment), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__op_proto__field_indices_by_name[] = {
-        3, /* field[3] = attrs */
-        4, /* field[4] = comment */
-        1, /* field[1] = inputs */
-        2, /* field[2] = outputs */
-        0, /* field[0] = type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__op_proto__number_ranges[1 + 1] = {{1, 0},
-                                                                       {0, 5}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_proto__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.OpProto",
-        "OpProto",
-        "PaddleMobile__Framework__Proto__OpProto",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__OpProto),
-        5,
-        paddle_mobile__framework__proto__op_proto__field_descriptors,
-        paddle_mobile__framework__proto__op_proto__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__op_proto__number_ranges,
-        (ProtobufCMessageInit)paddle_mobile__framework__proto__op_proto__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__tensor_desc__field_descriptors
-        [2] = {
-            {
-                "data_type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM,
-                0, /* quantifier_offset */
-                offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc,
-                         data_type),
-                &paddle_mobile__framework__proto__var_type__type__descriptor,
-                NULL, 0,      /* flags */
-                0, NULL, NULL /* reserved1,reserved2, etc */
-            },
-            {
-                "dims", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_INT64,
-                offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc,
-                         n_dims),
-                offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc,
-                         dims),
-                NULL, NULL, 0, /* flags */
-                0, NULL, NULL  /* reserved1,reserved2, etc */
-            },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__tensor_desc__field_indices_by_name
-        [] = {
-            0, /* field[0] = data_type */
-            1, /* field[1] = dims */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__tensor_desc__number_ranges[1 +
-                                                                          1] = {
-        {1, 0}, {0, 2}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__tensor_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.TensorDesc",
-        "TensorDesc",
-        "PaddleMobile__Framework__Proto__VarType__TensorDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType__TensorDesc),
-        2,
-        paddle_mobile__framework__proto__var_type__tensor_desc__field_descriptors,
-        paddle_mobile__framework__proto__var_type__tensor_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__tensor_desc__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__var_type__tensor_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const int32_t
-    paddle_mobile__framework__proto__var_type__lo_dtensor_desc__lod_level__default_value =
-        0;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_descriptors
-        [2] = {
-            {
-                "tensor", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_MESSAGE,
-                0, /* quantifier_offset */
-                offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc,
-                         tensor),
-                &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor,
-                NULL, 0,      /* flags */
-                0, NULL, NULL /* reserved1,reserved2, etc */
-            },
-            {
-                "lod_level", 2, PROTOBUF_C_LABEL_OPTIONAL,
-                PROTOBUF_C_TYPE_INT32,
-                offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc,
-                         has_lod_level),
-                offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc,
-                         lod_level),
-                NULL,
-                &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__lod_level__default_value,
-                0,            /* flags */
-                0, NULL, NULL /* reserved1,reserved2, etc */
-            },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_indices_by_name
-        [] = {
-            1, /* field[1] = lod_level */
-            0, /* field[0] = tensor */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__lo_dtensor_desc__number_ranges
-        [1 + 1] = {{1, 0}, {0, 2}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.LoDTensorDesc",
-        "LoDTensorDesc",
-        "PaddleMobile__Framework__Proto__VarType__LoDTensorDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc),
-        2,
-        paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_descriptors,
-        paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__lo_dtensor_desc__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const int32_t
-    paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__lod_level__default_value =
-        0;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_descriptors
-        [2] = {
-            {
-                "tensor", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_MESSAGE,
-                0, /* quantifier_offset */
-                offsetof(
-                    PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc,
-                    tensor),
-                &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor,
-                NULL, 0,      /* flags */
-                0, NULL, NULL /* reserved1,reserved2, etc */
-            },
-            {
-                "lod_level", 2, PROTOBUF_C_LABEL_OPTIONAL,
-                PROTOBUF_C_TYPE_INT32,
-                offsetof(
-                    PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc,
-                    has_lod_level),
-                offsetof(
-                    PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc,
-                    lod_level),
-                NULL,
-                &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__lod_level__default_value,
-                0,            /* flags */
-                0, NULL, NULL /* reserved1,reserved2, etc */
-            },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_indices_by_name
-        [] = {
-            1, /* field[1] = lod_level */
-            0, /* field[0] = tensor */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__number_ranges
-        [1 + 1] = {{1, 0}, {0, 2}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc",
-        "LoDTensorArrayDesc",
-        "PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc),
-        2,
-        paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_descriptors,
-        paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__reader_desc__field_descriptors[1] = {
-        {
-            "lod_tensor", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__VarType__ReaderDesc,
-                     n_lod_tensor),
-            offsetof(PaddleMobile__Framework__Proto__VarType__ReaderDesc,
-                     lod_tensor),
-            &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__reader_desc__field_indices_by_name
-        [] = {
-            0, /* field[0] = lod_tensor */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__reader_desc__number_ranges[1 +
-                                                                          1] = {
-        {1, 0}, {0, 1}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__reader_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.ReaderDesc",
-        "ReaderDesc",
-        "PaddleMobile__Framework__Proto__VarType__ReaderDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType__ReaderDesc),
-        1,
-        paddle_mobile__framework__proto__var_type__reader_desc__field_descriptors,
-        paddle_mobile__framework__proto__var_type__reader_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__reader_desc__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__var_type__reader_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__channel_desc__field_descriptors
-        [2] = {
-            {
-                "data_type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM,
-                0, /* quantifier_offset */
-                offsetof(PaddleMobile__Framework__Proto__VarType__ChannelDesc,
-                         data_type),
-                &paddle_mobile__framework__proto__var_type__type__descriptor,
-                NULL, 0,      /* flags */
-                0, NULL, NULL /* reserved1,reserved2, etc */
-            },
-            {
-                "capacity", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_INT64,
-                0, /* quantifier_offset */
-                offsetof(PaddleMobile__Framework__Proto__VarType__ChannelDesc,
-                         capacity),
-                NULL, NULL, 0, /* flags */
-                0, NULL, NULL  /* reserved1,reserved2, etc */
-            },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__channel_desc__field_indices_by_name
-        [] = {
-            1, /* field[1] = capacity */
-            0, /* field[0] = data_type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__channel_desc__number_ranges[1 +
-                                                                           1] =
-        {{1, 0}, {0, 2}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__channel_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.ChannelDesc",
-        "ChannelDesc",
-        "PaddleMobile__Framework__Proto__VarType__ChannelDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType__ChannelDesc),
-        2,
-        paddle_mobile__framework__proto__var_type__channel_desc__field_descriptors,
-        paddle_mobile__framework__proto__var_type__channel_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__channel_desc__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__var_type__channel_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__tuple__field_descriptors[1] = {
-        {
-            "element_type", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_ENUM,
-            offsetof(PaddleMobile__Framework__Proto__VarType__Tuple,
-                     n_element_type),
-            offsetof(PaddleMobile__Framework__Proto__VarType__Tuple,
-                     element_type),
-            &paddle_mobile__framework__proto__var_type__type__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__tuple__field_indices_by_name[] =
-        {
-            0, /* field[0] = element_type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__tuple__number_ranges[1 + 1] = {
-        {1, 0}, {0, 1}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__tuple__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.Tuple",
-        "Tuple",
-        "PaddleMobile__Framework__Proto__VarType__Tuple",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType__Tuple),
-        1,
-        paddle_mobile__framework__proto__var_type__tuple__field_descriptors,
-        paddle_mobile__framework__proto__var_type__tuple__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__tuple__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__var_type__tuple__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCEnumValue
-    paddle_mobile__framework__proto__var_type__type__enum_values_by_number[19] =
-        {
-            {"BOOL", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL",
-             0},
-            {"INT16", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16",
-             1},
-            {"INT32", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32",
-             2},
-            {"INT64", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64",
-             3},
-            {"FP16", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16",
-             4},
-            {"FP32", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32",
-             5},
-            {"FP64", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64",
-             6},
-            {"LOD_TENSOR",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR", 7},
-            {"SELECTED_ROWS",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS",
-             8},
-            {"FEED_MINIBATCH",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH",
-             9},
-            {"FETCH_LIST",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST", 10},
-            {"STEP_SCOPES",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES",
-             11},
-            {"LOD_RANK_TABLE",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE",
-             12},
-            {"LOD_TENSOR_ARRAY",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_"
-             "ARRAY",
-             13},
-            {"PLACE_LIST",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST", 14},
-            {"READER",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER", 15},
-            {"CHANNEL",
-             "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL", 16},
-            {"RAW", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW", 17},
-            {"TUPLE", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE",
-             18},
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__type__value_ranges[] = {{0, 0},
-                                                                       {0, 19}};
-static const ProtobufCEnumValueIndex
-    paddle_mobile__framework__proto__var_type__type__enum_values_by_name[19] = {
-        {"BOOL", 0},
-        {"CHANNEL", 16},
-        {"FEED_MINIBATCH", 9},
-        {"FETCH_LIST", 10},
-        {"FP16", 4},
-        {"FP32", 5},
-        {"FP64", 6},
-        {"INT16", 1},
-        {"INT32", 2},
-        {"INT64", 3},
-        {"LOD_RANK_TABLE", 12},
-        {"LOD_TENSOR", 7},
-        {"LOD_TENSOR_ARRAY", 13},
-        {"PLACE_LIST", 14},
-        {"RAW", 17},
-        {"READER", 15},
-        {"SELECTED_ROWS", 8},
-        {"STEP_SCOPES", 11},
-        {"TUPLE", 18},
-};
-const ProtobufCEnumDescriptor
-    paddle_mobile__framework__proto__var_type__type__descriptor = {
-        PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType.Type",
-        "Type",
-        "PaddleMobile__Framework__Proto__VarType__Type",
-        "paddle_mobile.framework.proto",
-        19,
-        paddle_mobile__framework__proto__var_type__type__enum_values_by_number,
-        19,
-        paddle_mobile__framework__proto__var_type__type__enum_values_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__type__value_ranges,
-        NULL,
-        NULL,
-        NULL,
-        NULL /* reserved[1234] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_type__field_descriptors[7] = {
-        {
-            "type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, type),
-            &paddle_mobile__framework__proto__var_type__type__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "selected_rows", 2, PROTOBUF_C_LABEL_OPTIONAL,
-            PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, selected_rows),
-            &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "lod_tensor", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, lod_tensor),
-            &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "tensor_array", 4, PROTOBUF_C_LABEL_OPTIONAL,
-            PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, tensor_array),
-            &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "reader", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, reader),
-            &paddle_mobile__framework__proto__var_type__reader_desc__descriptor,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "channel", 6, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, channel),
-            &paddle_mobile__framework__proto__var_type__channel_desc__descriptor,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "tuple", 7, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarType, tuple),
-            &paddle_mobile__framework__proto__var_type__tuple__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_type__field_indices_by_name[] = {
-        5, /* field[5] = channel */
-        2, /* field[2] = lod_tensor */
-        4, /* field[4] = reader */
-        1, /* field[1] = selected_rows */
-        3, /* field[3] = tensor_array */
-        6, /* field[6] = tuple */
-        0, /* field[0] = type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_type__number_ranges[1 + 1] = {{1, 0},
-                                                                       {0, 7}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarType",
-        "VarType",
-        "PaddleMobile__Framework__Proto__VarType",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarType),
-        7,
-        paddle_mobile__framework__proto__var_type__field_descriptors,
-        paddle_mobile__framework__proto__var_type__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_type__number_ranges,
-        (ProtobufCMessageInit)paddle_mobile__framework__proto__var_type__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const protobuf_c_boolean
-    paddle_mobile__framework__proto__var_desc__persistable__default_value = 0;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__var_desc__field_descriptors[3] = {
-        {
-            "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarDesc, name), NULL, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "type", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_MESSAGE,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__VarDesc, type),
-            &paddle_mobile__framework__proto__var_type__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "persistable", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL,
-            offsetof(PaddleMobile__Framework__Proto__VarDesc, has_persistable),
-            offsetof(PaddleMobile__Framework__Proto__VarDesc, persistable),
-            NULL,
-            &paddle_mobile__framework__proto__var_desc__persistable__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__var_desc__field_indices_by_name[] = {
-        0, /* field[0] = name */
-        2, /* field[2] = persistable */
-        1, /* field[1] = type */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__var_desc__number_ranges[1 + 1] = {{1, 0},
-                                                                       {0, 3}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.VarDesc",
-        "VarDesc",
-        "PaddleMobile__Framework__Proto__VarDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__VarDesc),
-        3,
-        paddle_mobile__framework__proto__var_desc__field_descriptors,
-        paddle_mobile__framework__proto__var_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__var_desc__number_ranges,
-        (ProtobufCMessageInit)paddle_mobile__framework__proto__var_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const int32_t
-    paddle_mobile__framework__proto__block_desc__forward_block_idx__default_value =
-        -1;
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__block_desc__field_descriptors[5] = {
-        {
-            "idx", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_INT32,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc, idx), NULL,
-            NULL, 0,      /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "parent_idx", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_INT32,
-            0, /* quantifier_offset */
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc, parent_idx),
-            NULL, NULL, 0, /* flags */
-            0, NULL, NULL  /* reserved1,reserved2, etc */
-        },
-        {
-            "vars", 3, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc, n_vars),
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc, vars),
-            &paddle_mobile__framework__proto__var_desc__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "ops", 4, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc, n_ops),
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc, ops),
-            &paddle_mobile__framework__proto__op_desc__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-        {
-            "forward_block_idx", 5, PROTOBUF_C_LABEL_OPTIONAL,
-            PROTOBUF_C_TYPE_INT32,
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc,
-                     has_forward_block_idx),
-            offsetof(PaddleMobile__Framework__Proto__BlockDesc,
-                     forward_block_idx),
-            NULL,
-            &paddle_mobile__framework__proto__block_desc__forward_block_idx__default_value,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__block_desc__field_indices_by_name[] = {
-        4, /* field[4] = forward_block_idx */
-        0, /* field[0] = idx */
-        3, /* field[3] = ops */
-        1, /* field[1] = parent_idx */
-        2, /* field[2] = vars */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__block_desc__number_ranges[1 + 1] = {
-        {1, 0}, {0, 5}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__block_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.BlockDesc",
-        "BlockDesc",
-        "PaddleMobile__Framework__Proto__BlockDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__BlockDesc),
-        5,
-        paddle_mobile__framework__proto__block_desc__field_descriptors,
-        paddle_mobile__framework__proto__block_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__block_desc__number_ranges,
-        (ProtobufCMessageInit)paddle_mobile__framework__proto__block_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor
-    paddle_mobile__framework__proto__program_desc__field_descriptors[1] = {
-        {
-            "blocks", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE,
-            offsetof(PaddleMobile__Framework__Proto__ProgramDesc, n_blocks),
-            offsetof(PaddleMobile__Framework__Proto__ProgramDesc, blocks),
-            &paddle_mobile__framework__proto__block_desc__descriptor, NULL,
-            0,            /* flags */
-            0, NULL, NULL /* reserved1,reserved2, etc */
-        },
-};
-static const unsigned
-    paddle_mobile__framework__proto__program_desc__field_indices_by_name[] = {
-        0, /* field[0] = blocks */
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__program_desc__number_ranges[1 + 1] = {
-        {1, 0}, {0, 1}};
-const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__program_desc__descriptor = {
-        PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.ProgramDesc",
-        "ProgramDesc",
-        "PaddleMobile__Framework__Proto__ProgramDesc",
-        "paddle_mobile.framework.proto",
-        sizeof(PaddleMobile__Framework__Proto__ProgramDesc),
-        1,
-        paddle_mobile__framework__proto__program_desc__field_descriptors,
-        paddle_mobile__framework__proto__program_desc__field_indices_by_name,
-        1,
-        paddle_mobile__framework__proto__program_desc__number_ranges,
-        (ProtobufCMessageInit)
-            paddle_mobile__framework__proto__program_desc__init,
-        NULL,
-        NULL,
-        NULL /* reserved[123] */
-};
-static const ProtobufCEnumValue
-    paddle_mobile__framework__proto__attr_type__enum_values_by_number[10] = {
-        {"INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0},
-        {"FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1},
-        {"STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2},
-        {"INTS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS", 3},
-        {"FLOATS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS", 4},
-        {"STRINGS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS", 5},
-        {"BOOLEAN", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN", 6},
-        {"BOOLEANS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS", 7},
-        {"BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8},
-        {"LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9},
-};
-static const ProtobufCIntRange
-    paddle_mobile__framework__proto__attr_type__value_ranges[] = {{0, 0},
-                                                                  {0, 10}};
-static const ProtobufCEnumValueIndex
-    paddle_mobile__framework__proto__attr_type__enum_values_by_name[10] = {
-        {"BLOCK", 8},  {"BOOLEAN", 6}, {"BOOLEANS", 7}, {"FLOAT", 1},
-        {"FLOATS", 4}, {"INT", 0},     {"INTS", 3},     {"LONG", 9},
-        {"STRING", 2}, {"STRINGS", 5},
-};
-const ProtobufCEnumDescriptor
-    paddle_mobile__framework__proto__attr_type__descriptor = {
-        PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC,
-        "paddle_mobile.framework.proto.AttrType",
-        "AttrType",
-        "PaddleMobile__Framework__Proto__AttrType",
-        "paddle_mobile.framework.proto",
-        10,
-        paddle_mobile__framework__proto__attr_type__enum_values_by_number,
-        10,
-        paddle_mobile__framework__proto__attr_type__enum_values_by_name,
-        1,
-        paddle_mobile__framework__proto__attr_type__value_ranges,
-        NULL,
-        NULL,
-        NULL,
-        NULL /* reserved[1234] */
+static const int64_t paddle_mobile__framework__proto__version__version__default_value = 0ll;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__version__field_descriptors[1] =
+{
+  {
+    "version",
+    1,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT64,
+    offsetof(PaddleMobile__Framework__Proto__Version, has_version),
+    offsetof(PaddleMobile__Framework__Proto__Version, version),
+    NULL,
+    &paddle_mobile__framework__proto__version__version__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__version__field_indices_by_name[] = {
+  0,   /* field[0] = version */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__version__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 1 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__version__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.Version",
+  "Version",
+  "PaddleMobile__Framework__Proto__Version",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__Version),
+  1,
+  paddle_mobile__framework__proto__version__field_descriptors,
+  paddle_mobile__framework__proto__version__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__version__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__version__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__op_desc__attr__field_descriptors[13] =
+{
+  {
+    "name",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, name),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "type",
+    2,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_ENUM,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, type),
+    &paddle_mobile__framework__proto__attr_type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "i",
+    3,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_i),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, i),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "f",
+    4,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_FLOAT,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_f),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, f),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "s",
+    5,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, s),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "ints",
+    6,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_ints),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, ints),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "floats",
+    7,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_FLOAT,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_floats),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, floats),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "strings",
+    8,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_STRING,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_strings),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, strings),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "b",
+    10,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_b),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, b),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "bools",
+    11,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_bools),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, bools),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "block_idx",
+    12,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_block_idx),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, block_idx),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "l",
+    13,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT64,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_l),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, l),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "blocks_idx",
+    14,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_blocks_idx),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, blocks_idx),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = {
+  8,   /* field[8] = b */
+  10,   /* field[10] = block_idx */
+  12,   /* field[12] = blocks_idx */
+  9,   /* field[9] = bools */
+  3,   /* field[3] = f */
+  6,   /* field[6] = floats */
+  2,   /* field[2] = i */
+  5,   /* field[5] = ints */
+  11,   /* field[11] = l */
+  0,   /* field[0] = name */
+  4,   /* field[4] = s */
+  7,   /* field[7] = strings */
+  1,   /* field[1] = type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] =
+{
+  { 1, 0 },
+  { 10, 8 },
+  { 0, 13 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_desc__attr__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.OpDesc.Attr",
+  "Attr",
+  "PaddleMobile__Framework__Proto__OpDesc__Attr",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr),
+  13,
+  paddle_mobile__framework__proto__op_desc__attr__field_descriptors,
+  paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name,
+  2,  paddle_mobile__framework__proto__op_desc__attr__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__op_desc__attr__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__op_desc__var__field_descriptors[2] =
+{
+  {
+    "parameter",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, parameter),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "arguments",
+    2,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_STRING,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, n_arguments),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, arguments),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__op_desc__var__field_indices_by_name[] = {
+  1,   /* field[1] = arguments */
+  0,   /* field[0] = parameter */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__op_desc__var__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 2 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_desc__var__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.OpDesc.Var",
+  "Var",
+  "PaddleMobile__Framework__Proto__OpDesc__Var",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__OpDesc__Var),
+  2,
+  paddle_mobile__framework__proto__op_desc__var__field_descriptors,
+  paddle_mobile__framework__proto__op_desc__var__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__op_desc__var__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__op_desc__var__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const protobuf_c_boolean paddle_mobile__framework__proto__op_desc__is_target__default_value = 0;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__op_desc__field_descriptors[5] =
+{
+  {
+    "inputs",
+    1,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, n_inputs),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, inputs),
+    &paddle_mobile__framework__proto__op_desc__var__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "outputs",
+    2,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, n_outputs),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, outputs),
+    &paddle_mobile__framework__proto__op_desc__var__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "type",
+    3,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, type),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "attrs",
+    4,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, n_attrs),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, attrs),
+    &paddle_mobile__framework__proto__op_desc__attr__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "is_target",
+    5,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, has_is_target),
+    offsetof(PaddleMobile__Framework__Proto__OpDesc, is_target),
+    NULL,
+    &paddle_mobile__framework__proto__op_desc__is_target__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__op_desc__field_indices_by_name[] = {
+  3,   /* field[3] = attrs */
+  0,   /* field[0] = inputs */
+  4,   /* field[4] = is_target */
+  1,   /* field[1] = outputs */
+  2,   /* field[2] = type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__op_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 5 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.OpDesc",
+  "OpDesc",
+  "PaddleMobile__Framework__Proto__OpDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__OpDesc),
+  5,
+  paddle_mobile__framework__proto__op_desc__field_descriptors,
+  paddle_mobile__framework__proto__op_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__op_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__op_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const protobuf_c_boolean paddle_mobile__framework__proto__op_proto__var__duplicable__default_value = 0;
+static const protobuf_c_boolean paddle_mobile__framework__proto__op_proto__var__intermediate__default_value = 0;
+static const protobuf_c_boolean paddle_mobile__framework__proto__op_proto__var__dispensable__default_value = 0;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__op_proto__var__field_descriptors[6] =
+{
+  {
+    "name",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, name),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "comment",
+    2,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, comment),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "duplicable",
+    3,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, has_duplicable),
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, duplicable),
+    NULL,
+    &paddle_mobile__framework__proto__op_proto__var__duplicable__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "intermediate",
+    4,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, has_intermediate),
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, intermediate),
+    NULL,
+    &paddle_mobile__framework__proto__op_proto__var__intermediate__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "dispensable",
+    5,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, has_dispensable),
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, dispensable),
+    NULL,
+    &paddle_mobile__framework__proto__op_proto__var__dispensable__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "reuse",
+    6,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Var, reuse),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__op_proto__var__field_indices_by_name[] = {
+  1,   /* field[1] = comment */
+  4,   /* field[4] = dispensable */
+  2,   /* field[2] = duplicable */
+  3,   /* field[3] = intermediate */
+  0,   /* field[0] = name */
+  5,   /* field[5] = reuse */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__op_proto__var__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 6 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_proto__var__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.OpProto.Var",
+  "Var",
+  "PaddleMobile__Framework__Proto__OpProto__Var",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__OpProto__Var),
+  6,
+  paddle_mobile__framework__proto__op_proto__var__field_descriptors,
+  paddle_mobile__framework__proto__op_proto__var__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__op_proto__var__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__op_proto__var__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const protobuf_c_boolean paddle_mobile__framework__proto__op_proto__attr__generated__default_value = 0;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__op_proto__attr__field_descriptors[4] =
+{
+  {
+    "name",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, name),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "type",
+    2,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_ENUM,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, type),
+    &paddle_mobile__framework__proto__attr_type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "comment",
+    3,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, comment),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "generated",
+    4,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, has_generated),
+    offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, generated),
+    NULL,
+    &paddle_mobile__framework__proto__op_proto__attr__generated__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__op_proto__attr__field_indices_by_name[] = {
+  2,   /* field[2] = comment */
+  3,   /* field[3] = generated */
+  0,   /* field[0] = name */
+  1,   /* field[1] = type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__op_proto__attr__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 4 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_proto__attr__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.OpProto.Attr",
+  "Attr",
+  "PaddleMobile__Framework__Proto__OpProto__Attr",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__OpProto__Attr),
+  4,
+  paddle_mobile__framework__proto__op_proto__attr__field_descriptors,
+  paddle_mobile__framework__proto__op_proto__attr__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__op_proto__attr__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__op_proto__attr__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__op_proto__field_descriptors[5] =
+{
+  {
+    "type",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto, type),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "inputs",
+    2,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__OpProto, n_inputs),
+    offsetof(PaddleMobile__Framework__Proto__OpProto, inputs),
+    &paddle_mobile__framework__proto__op_proto__var__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "outputs",
+    3,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__OpProto, n_outputs),
+    offsetof(PaddleMobile__Framework__Proto__OpProto, outputs),
+    &paddle_mobile__framework__proto__op_proto__var__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "attrs",
+    4,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__OpProto, n_attrs),
+    offsetof(PaddleMobile__Framework__Proto__OpProto, attrs),
+    &paddle_mobile__framework__proto__op_proto__attr__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "comment",
+    5,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__OpProto, comment),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__op_proto__field_indices_by_name[] = {
+  3,   /* field[3] = attrs */
+  4,   /* field[4] = comment */
+  1,   /* field[1] = inputs */
+  2,   /* field[2] = outputs */
+  0,   /* field[0] = type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__op_proto__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 5 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_proto__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.OpProto",
+  "OpProto",
+  "PaddleMobile__Framework__Proto__OpProto",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__OpProto),
+  5,
+  paddle_mobile__framework__proto__op_proto__field_descriptors,
+  paddle_mobile__framework__proto__op_proto__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__op_proto__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__op_proto__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__tensor_desc__field_descriptors[2] =
+{
+  {
+    "data_type",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_ENUM,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc, data_type),
+    &paddle_mobile__framework__proto__var_type__type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "dims",
+    2,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_INT64,
+    offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc, n_dims),
+    offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc, dims),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__tensor_desc__field_indices_by_name[] = {
+  0,   /* field[0] = data_type */
+  1,   /* field[1] = dims */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__tensor_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 2 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__tensor_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.TensorDesc",
+  "TensorDesc",
+  "PaddleMobile__Framework__Proto__VarType__TensorDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType__TensorDesc),
+  2,
+  paddle_mobile__framework__proto__var_type__tensor_desc__field_descriptors,
+  paddle_mobile__framework__proto__var_type__tensor_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__tensor_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__tensor_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const int32_t paddle_mobile__framework__proto__var_type__lo_dtensor_desc__lod_level__default_value = 0;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_descriptors[2] =
+{
+  {
+    "tensor",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc, tensor),
+    &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "lod_level",
+    2,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc, has_lod_level),
+    offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc, lod_level),
+    NULL,
+    &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__lod_level__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_indices_by_name[] = {
+  1,   /* field[1] = lod_level */
+  0,   /* field[0] = tensor */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__lo_dtensor_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 2 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.LoDTensorDesc",
+  "LoDTensorDesc",
+  "PaddleMobile__Framework__Proto__VarType__LoDTensorDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc),
+  2,
+  paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_descriptors,
+  paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__lo_dtensor_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const int32_t paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__lod_level__default_value = 0;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_descriptors[2] =
+{
+  {
+    "tensor",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc, tensor),
+    &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "lod_level",
+    2,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc, has_lod_level),
+    offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc, lod_level),
+    NULL,
+    &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__lod_level__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_indices_by_name[] = {
+  1,   /* field[1] = lod_level */
+  0,   /* field[0] = tensor */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 2 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc",
+  "LoDTensorArrayDesc",
+  "PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc),
+  2,
+  paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_descriptors,
+  paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__reader_desc__field_descriptors[1] =
+{
+  {
+    "lod_tensor",
+    1,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__VarType__ReaderDesc, n_lod_tensor),
+    offsetof(PaddleMobile__Framework__Proto__VarType__ReaderDesc, lod_tensor),
+    &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__reader_desc__field_indices_by_name[] = {
+  0,   /* field[0] = lod_tensor */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__reader_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 1 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__reader_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.ReaderDesc",
+  "ReaderDesc",
+  "PaddleMobile__Framework__Proto__VarType__ReaderDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType__ReaderDesc),
+  1,
+  paddle_mobile__framework__proto__var_type__reader_desc__field_descriptors,
+  paddle_mobile__framework__proto__var_type__reader_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__reader_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__reader_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__channel_desc__field_descriptors[2] =
+{
+  {
+    "data_type",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_ENUM,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType__ChannelDesc, data_type),
+    &paddle_mobile__framework__proto__var_type__type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "capacity",
+    2,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_INT64,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType__ChannelDesc, capacity),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__channel_desc__field_indices_by_name[] = {
+  1,   /* field[1] = capacity */
+  0,   /* field[0] = data_type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__channel_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 2 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__channel_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.ChannelDesc",
+  "ChannelDesc",
+  "PaddleMobile__Framework__Proto__VarType__ChannelDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType__ChannelDesc),
+  2,
+  paddle_mobile__framework__proto__var_type__channel_desc__field_descriptors,
+  paddle_mobile__framework__proto__var_type__channel_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__channel_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__channel_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__tuple__field_descriptors[1] =
+{
+  {
+    "element_type",
+    1,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_ENUM,
+    offsetof(PaddleMobile__Framework__Proto__VarType__Tuple, n_element_type),
+    offsetof(PaddleMobile__Framework__Proto__VarType__Tuple, element_type),
+    &paddle_mobile__framework__proto__var_type__type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__tuple__field_indices_by_name[] = {
+  0,   /* field[0] = element_type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__tuple__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 1 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__tuple__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.Tuple",
+  "Tuple",
+  "PaddleMobile__Framework__Proto__VarType__Tuple",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType__Tuple),
+  1,
+  paddle_mobile__framework__proto__var_type__tuple__field_descriptors,
+  paddle_mobile__framework__proto__var_type__tuple__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__tuple__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__tuple__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCEnumValue paddle_mobile__framework__proto__var_type__type__enum_values_by_number[22] =
+{
+  { "BOOL", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL", 0 },
+  { "INT16", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16", 1 },
+  { "INT32", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32", 2 },
+  { "INT64", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64", 3 },
+  { "FP16", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16", 4 },
+  { "FP32", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32", 5 },
+  { "FP64", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64", 6 },
+  { "LOD_TENSOR", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR", 7 },
+  { "SELECTED_ROWS", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS", 8 },
+  { "FEED_MINIBATCH", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH", 9 },
+  { "FETCH_LIST", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST", 10 },
+  { "STEP_SCOPES", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES", 11 },
+  { "LOD_RANK_TABLE", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE", 12 },
+  { "LOD_TENSOR_ARRAY", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_ARRAY", 13 },
+  { "PLACE_LIST", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST", 14 },
+  { "READER", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER", 15 },
+  { "CHANNEL", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL", 16 },
+  { "RAW", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW", 17 },
+  { "TUPLE", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE", 18 },
+  { "SIZE_T", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SIZE_T", 19 },
+  { "UINT8", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__UINT8", 20 },
+  { "INT8", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT8", 21 },
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__type__value_ranges[] = {
+{0, 0},{0, 22}
+};
+static const ProtobufCEnumValueIndex paddle_mobile__framework__proto__var_type__type__enum_values_by_name[22] =
+{
+  { "BOOL", 0 },
+  { "CHANNEL", 16 },
+  { "FEED_MINIBATCH", 9 },
+  { "FETCH_LIST", 10 },
+  { "FP16", 4 },
+  { "FP32", 5 },
+  { "FP64", 6 },
+  { "INT16", 1 },
+  { "INT32", 2 },
+  { "INT64", 3 },
+  { "INT8", 21 },
+  { "LOD_RANK_TABLE", 12 },
+  { "LOD_TENSOR", 7 },
+  { "LOD_TENSOR_ARRAY", 13 },
+  { "PLACE_LIST", 14 },
+  { "RAW", 17 },
+  { "READER", 15 },
+  { "SELECTED_ROWS", 8 },
+  { "SIZE_T", 19 },
+  { "STEP_SCOPES", 11 },
+  { "TUPLE", 18 },
+  { "UINT8", 20 },
+};
+const ProtobufCEnumDescriptor paddle_mobile__framework__proto__var_type__type__descriptor =
+{
+  PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType.Type",
+  "Type",
+  "PaddleMobile__Framework__Proto__VarType__Type",
+  "paddle_mobile.framework.proto",
+  22,
+  paddle_mobile__framework__proto__var_type__type__enum_values_by_number,
+  22,
+  paddle_mobile__framework__proto__var_type__type__enum_values_by_name,
+  1,
+  paddle_mobile__framework__proto__var_type__type__value_ranges,
+  NULL,NULL,NULL,NULL   /* reserved[1234] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_type__field_descriptors[7] =
+{
+  {
+    "type",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_ENUM,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, type),
+    &paddle_mobile__framework__proto__var_type__type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "selected_rows",
+    2,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, selected_rows),
+    &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "lod_tensor",
+    3,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, lod_tensor),
+    &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "tensor_array",
+    4,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, tensor_array),
+    &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "reader",
+    5,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, reader),
+    &paddle_mobile__framework__proto__var_type__reader_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "channel",
+    6,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, channel),
+    &paddle_mobile__framework__proto__var_type__channel_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "tuple",
+    7,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarType, tuple),
+    &paddle_mobile__framework__proto__var_type__tuple__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_type__field_indices_by_name[] = {
+  5,   /* field[5] = channel */
+  2,   /* field[2] = lod_tensor */
+  4,   /* field[4] = reader */
+  1,   /* field[1] = selected_rows */
+  3,   /* field[3] = tensor_array */
+  6,   /* field[6] = tuple */
+  0,   /* field[0] = type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_type__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 7 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarType",
+  "VarType",
+  "PaddleMobile__Framework__Proto__VarType",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarType),
+  7,
+  paddle_mobile__framework__proto__var_type__field_descriptors,
+  paddle_mobile__framework__proto__var_type__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_type__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_type__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const protobuf_c_boolean paddle_mobile__framework__proto__var_desc__persistable__default_value = 0;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__var_desc__field_descriptors[3] =
+{
+  {
+    "name",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_STRING,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarDesc, name),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "type",
+    2,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__VarDesc, type),
+    &paddle_mobile__framework__proto__var_type__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "persistable",
+    3,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_BOOL,
+    offsetof(PaddleMobile__Framework__Proto__VarDesc, has_persistable),
+    offsetof(PaddleMobile__Framework__Proto__VarDesc, persistable),
+    NULL,
+    &paddle_mobile__framework__proto__var_desc__persistable__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__var_desc__field_indices_by_name[] = {
+  0,   /* field[0] = name */
+  2,   /* field[2] = persistable */
+  1,   /* field[1] = type */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__var_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 3 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.VarDesc",
+  "VarDesc",
+  "PaddleMobile__Framework__Proto__VarDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__VarDesc),
+  3,
+  paddle_mobile__framework__proto__var_desc__field_descriptors,
+  paddle_mobile__framework__proto__var_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__var_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__var_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const int32_t paddle_mobile__framework__proto__block_desc__forward_block_idx__default_value = -1;
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__block_desc__field_descriptors[5] =
+{
+  {
+    "idx",
+    1,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_INT32,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, idx),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "parent_idx",
+    2,
+    PROTOBUF_C_LABEL_REQUIRED,
+    PROTOBUF_C_TYPE_INT32,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, parent_idx),
+    NULL,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "vars",
+    3,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, n_vars),
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, vars),
+    &paddle_mobile__framework__proto__var_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "ops",
+    4,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, n_ops),
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, ops),
+    &paddle_mobile__framework__proto__op_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "forward_block_idx",
+    5,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_INT32,
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, has_forward_block_idx),
+    offsetof(PaddleMobile__Framework__Proto__BlockDesc, forward_block_idx),
+    NULL,
+    &paddle_mobile__framework__proto__block_desc__forward_block_idx__default_value,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__block_desc__field_indices_by_name[] = {
+  4,   /* field[4] = forward_block_idx */
+  0,   /* field[0] = idx */
+  3,   /* field[3] = ops */
+  1,   /* field[1] = parent_idx */
+  2,   /* field[2] = vars */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__block_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 5 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__block_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.BlockDesc",
+  "BlockDesc",
+  "PaddleMobile__Framework__Proto__BlockDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__BlockDesc),
+  5,
+  paddle_mobile__framework__proto__block_desc__field_descriptors,
+  paddle_mobile__framework__proto__block_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__block_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__block_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCFieldDescriptor paddle_mobile__framework__proto__program_desc__field_descriptors[2] =
+{
+  {
+    "blocks",
+    1,
+    PROTOBUF_C_LABEL_REPEATED,
+    PROTOBUF_C_TYPE_MESSAGE,
+    offsetof(PaddleMobile__Framework__Proto__ProgramDesc, n_blocks),
+    offsetof(PaddleMobile__Framework__Proto__ProgramDesc, blocks),
+    &paddle_mobile__framework__proto__block_desc__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+  {
+    "version",
+    2,
+    PROTOBUF_C_LABEL_OPTIONAL,
+    PROTOBUF_C_TYPE_MESSAGE,
+    0,   /* quantifier_offset */
+    offsetof(PaddleMobile__Framework__Proto__ProgramDesc, version),
+    &paddle_mobile__framework__proto__version__descriptor,
+    NULL,
+    0,             /* flags */
+    0,NULL,NULL    /* reserved1,reserved2, etc */
+  },
+};
+static const unsigned paddle_mobile__framework__proto__program_desc__field_indices_by_name[] = {
+  0,   /* field[0] = blocks */
+  1,   /* field[1] = version */
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__program_desc__number_ranges[1 + 1] =
+{
+  { 1, 0 },
+  { 0, 2 }
+};
+const ProtobufCMessageDescriptor paddle_mobile__framework__proto__program_desc__descriptor =
+{
+  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.ProgramDesc",
+  "ProgramDesc",
+  "PaddleMobile__Framework__Proto__ProgramDesc",
+  "paddle_mobile.framework.proto",
+  sizeof(PaddleMobile__Framework__Proto__ProgramDesc),
+  2,
+  paddle_mobile__framework__proto__program_desc__field_descriptors,
+  paddle_mobile__framework__proto__program_desc__field_indices_by_name,
+  1,  paddle_mobile__framework__proto__program_desc__number_ranges,
+  (ProtobufCMessageInit) paddle_mobile__framework__proto__program_desc__init,
+  NULL,NULL,NULL    /* reserved[123] */
+};
+static const ProtobufCEnumValue paddle_mobile__framework__proto__attr_type__enum_values_by_number[11] =
+{
+  { "INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0 },
+  { "FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1 },
+  { "STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2 },
+  { "INTS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS", 3 },
+  { "FLOATS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS", 4 },
+  { "STRINGS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS", 5 },
+  { "BOOLEAN", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN", 6 },
+  { "BOOLEANS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS", 7 },
+  { "BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8 },
+  { "LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9 },
+  { "BLOCKS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS", 10 },
+};
+static const ProtobufCIntRange paddle_mobile__framework__proto__attr_type__value_ranges[] = {
+{0, 0},{0, 11}
+};
+static const ProtobufCEnumValueIndex paddle_mobile__framework__proto__attr_type__enum_values_by_name[11] =
+{
+  { "BLOCK", 8 },
+  { "BLOCKS", 10 },
+  { "BOOLEAN", 6 },
+  { "BOOLEANS", 7 },
+  { "FLOAT", 1 },
+  { "FLOATS", 4 },
+  { "INT", 0 },
+  { "INTS", 3 },
+  { "LONG", 9 },
+  { "STRING", 2 },
+  { "STRINGS", 5 },
+};
+const ProtobufCEnumDescriptor paddle_mobile__framework__proto__attr_type__descriptor =
+{
+  PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC,
+  "paddle_mobile.framework.proto.AttrType",
+  "AttrType",
+  "PaddleMobile__Framework__Proto__AttrType",
+  "paddle_mobile.framework.proto",
+  11,
+  paddle_mobile__framework__proto__attr_type__enum_values_by_number,
+  11,
+  paddle_mobile__framework__proto__attr_type__enum_values_by_name,
+  1,
+  paddle_mobile__framework__proto__attr_type__value_ranges,
+  NULL,NULL,NULL,NULL   /* reserved[1234] */
 };
diff --git a/src/framework/framework.pb-c.h b/src/framework/framework.pb-c.h
index 2e8c405dffdb3ab65b4cda63d4b09336ff676d5d..4044ecbc8897161ecdce488a468783b6a3d34e2f 100644
--- a/src/framework/framework.pb-c.h
+++ b/src/framework/framework.pb-c.h
@@ -4,48 +4,35 @@
 #ifndef PROTOBUF_C_framework_2eproto__INCLUDED
 #define PROTOBUF_C_framework_2eproto__INCLUDED
 
-#include "common/protobuf-c.h"
+#include <protobuf-c/protobuf-c.h>
 
 PROTOBUF_C__BEGIN_DECLS
 
 #if PROTOBUF_C_VERSION_NUMBER < 1000000
 # error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers.
-#elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION
+#elif 1003001 < PROTOBUF_C_MIN_COMPILER_VERSION
 # error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c.
 #endif
 
-typedef struct _PaddleMobile__Framework__Proto__OpDesc
-    PaddleMobile__Framework__Proto__OpDesc;
-typedef struct _PaddleMobile__Framework__Proto__OpDesc__Attr
-    PaddleMobile__Framework__Proto__OpDesc__Attr;
-typedef struct _PaddleMobile__Framework__Proto__OpDesc__Var
-    PaddleMobile__Framework__Proto__OpDesc__Var;
-typedef struct _PaddleMobile__Framework__Proto__OpProto
-    PaddleMobile__Framework__Proto__OpProto;
-typedef struct _PaddleMobile__Framework__Proto__OpProto__Var
-    PaddleMobile__Framework__Proto__OpProto__Var;
-typedef struct _PaddleMobile__Framework__Proto__OpProto__Attr
-    PaddleMobile__Framework__Proto__OpProto__Attr;
-typedef struct _PaddleMobile__Framework__Proto__VarType
-    PaddleMobile__Framework__Proto__VarType;
-typedef struct _PaddleMobile__Framework__Proto__VarType__TensorDesc
-    PaddleMobile__Framework__Proto__VarType__TensorDesc;
-typedef struct _PaddleMobile__Framework__Proto__VarType__LoDTensorDesc
-    PaddleMobile__Framework__Proto__VarType__LoDTensorDesc;
-typedef struct _PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc
-    PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc;
-typedef struct _PaddleMobile__Framework__Proto__VarType__ReaderDesc
-    PaddleMobile__Framework__Proto__VarType__ReaderDesc;
-typedef struct _PaddleMobile__Framework__Proto__VarType__ChannelDesc
-    PaddleMobile__Framework__Proto__VarType__ChannelDesc;
-typedef struct _PaddleMobile__Framework__Proto__VarType__Tuple
-    PaddleMobile__Framework__Proto__VarType__Tuple;
-typedef struct _PaddleMobile__Framework__Proto__VarDesc
-    PaddleMobile__Framework__Proto__VarDesc;
-typedef struct _PaddleMobile__Framework__Proto__BlockDesc
-    PaddleMobile__Framework__Proto__BlockDesc;
-typedef struct _PaddleMobile__Framework__Proto__ProgramDesc
-    PaddleMobile__Framework__Proto__ProgramDesc;
+
+typedef struct _PaddleMobile__Framework__Proto__Version PaddleMobile__Framework__Proto__Version;
+typedef struct _PaddleMobile__Framework__Proto__OpDesc PaddleMobile__Framework__Proto__OpDesc;
+typedef struct _PaddleMobile__Framework__Proto__OpDesc__Attr PaddleMobile__Framework__Proto__OpDesc__Attr;
+typedef struct _PaddleMobile__Framework__Proto__OpDesc__Var PaddleMobile__Framework__Proto__OpDesc__Var;
+typedef struct _PaddleMobile__Framework__Proto__OpProto PaddleMobile__Framework__Proto__OpProto;
+typedef struct _PaddleMobile__Framework__Proto__OpProto__Var PaddleMobile__Framework__Proto__OpProto__Var;
+typedef struct _PaddleMobile__Framework__Proto__OpProto__Attr PaddleMobile__Framework__Proto__OpProto__Attr;
+typedef struct _PaddleMobile__Framework__Proto__VarType PaddleMobile__Framework__Proto__VarType;
+typedef struct _PaddleMobile__Framework__Proto__VarType__TensorDesc PaddleMobile__Framework__Proto__VarType__TensorDesc;
+typedef struct _PaddleMobile__Framework__Proto__VarType__LoDTensorDesc PaddleMobile__Framework__Proto__VarType__LoDTensorDesc;
+typedef struct _PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc;
+typedef struct _PaddleMobile__Framework__Proto__VarType__ReaderDesc PaddleMobile__Framework__Proto__VarType__ReaderDesc;
+typedef struct _PaddleMobile__Framework__Proto__VarType__ChannelDesc PaddleMobile__Framework__Proto__VarType__ChannelDesc;
+typedef struct _PaddleMobile__Framework__Proto__VarType__Tuple PaddleMobile__Framework__Proto__VarType__Tuple;
+typedef struct _PaddleMobile__Framework__Proto__VarDesc PaddleMobile__Framework__Proto__VarDesc;
+typedef struct _PaddleMobile__Framework__Proto__BlockDesc PaddleMobile__Framework__Proto__BlockDesc;
+typedef struct _PaddleMobile__Framework__Proto__ProgramDesc PaddleMobile__Framework__Proto__ProgramDesc;
+
 
 /* --- enums --- */
 
@@ -60,6 +47,12 @@ typedef enum _PaddleMobile__Framework__Proto__VarType__Type {
   PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16 = 4,
   PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32 = 5,
   PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64 = 6,
+  /*
+   * Tensor<size_t> is used in C++.
+   */
+  PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SIZE_T = 19,
+  PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__UINT8 = 20,
+  PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT8 = 21,
   /*
    * Other types that may need additional descriptions
    */
@@ -79,9 +72,8 @@ typedef enum _PaddleMobile__Framework__Proto__VarType__Type {
    * in operators like nccl_op
    */
   PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW = 17,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE =
-      18 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(
-          PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE)
+  PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE = 18
+    PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE)
 } PaddleMobile__Framework__Proto__VarType__Type;
 typedef enum _PaddleMobile__Framework__Proto__AttrType {
   PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT = 0,
@@ -93,14 +85,32 @@ typedef enum _PaddleMobile__Framework__Proto__AttrType {
   PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN = 6,
   PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
   PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG =
-      9 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(
-          PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
+  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9,
+  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS = 10
+    PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
 } PaddleMobile__Framework__Proto__AttrType;
 
 /* --- messages --- */
 
-struct _PaddleMobile__Framework__Proto__OpDesc__Attr {
+/*
+ * Any incompatible changes to ProgramDesc and its dependencies should
+ * raise the version defined version.h.
+ * Serailization and Deserialization codes should be modified in a way
+ * that supports old versions following the version and compatibility policy.
+ */
+struct  _PaddleMobile__Framework__Proto__Version
+{
+  ProtobufCMessage base;
+  protobuf_c_boolean has_version;
+  int64_t version;
+};
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VERSION__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__version__descriptor) \
+    , 0, 0ll }
+
+
+struct  _PaddleMobile__Framework__Proto__OpDesc__Attr
+{
   ProtobufCMessage base;
   char *name;
   PaddleMobile__Framework__Proto__AttrType type;
@@ -123,33 +133,32 @@ struct _PaddleMobile__Framework__Proto__OpDesc__Attr {
   int32_t block_idx;
   protobuf_c_boolean has_l;
   int64_t l;
+  size_t n_blocks_idx;
+  int32_t *blocks_idx;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT                   \
-  {                                                                            \
-    PROTOBUF_C_MESSAGE_INIT(                                                   \
-        &paddle_mobile__framework__proto__op_desc__attr__descriptor)           \
-    , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, \
-        0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, 0, 0, 0                   \
-  }
-
-struct _PaddleMobile__Framework__Proto__OpDesc__Var {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__op_desc__attr__descriptor) \
+    , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, 0,NULL, 0,NULL, 0,NULL, 0, 0, 0,NULL, 0, 0, 0, 0, 0,NULL }
+
+
+struct  _PaddleMobile__Framework__Proto__OpDesc__Var
+{
   ProtobufCMessage base;
   char *parameter;
   size_t n_arguments;
   char **arguments;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__VAR__INIT         \
-  {                                                                 \
-    PROTOBUF_C_MESSAGE_INIT(                                        \
-        &paddle_mobile__framework__proto__op_desc__var__descriptor) \
-    , NULL, 0, NULL                                                 \
-  }
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__VAR__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__op_desc__var__descriptor) \
+    , NULL, 0,NULL }
+
 
 /*
  * OpDesc describes an instance of a C++ framework::OperatorBase
  * derived class type.
  */
-struct _PaddleMobile__Framework__Proto__OpDesc {
+struct  _PaddleMobile__Framework__Proto__OpDesc
+{
   ProtobufCMessage base;
   char *type;
   size_t n_inputs;
@@ -161,17 +170,16 @@ struct _PaddleMobile__Framework__Proto__OpDesc {
   protobuf_c_boolean has_is_target;
   protobuf_c_boolean is_target;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT         \
-  {                                                            \
-    PROTOBUF_C_MESSAGE_INIT(                                   \
-        &paddle_mobile__framework__proto__op_desc__descriptor) \
-    , NULL, 0, NULL, 0, NULL, 0, NULL, 0, 0                    \
-  }
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__op_desc__descriptor) \
+    , NULL, 0,NULL, 0,NULL, 0,NULL, 0, 0 }
+
 
 /*
  * VarProto describes the C++ type framework::Variable.
  */
-struct _PaddleMobile__Framework__Proto__OpProto__Var {
+struct  _PaddleMobile__Framework__Proto__OpProto__Var
+{
   ProtobufCMessage base;
   char *name;
   char *comment;
@@ -181,18 +189,18 @@ struct _PaddleMobile__Framework__Proto__OpProto__Var {
   protobuf_c_boolean intermediate;
   protobuf_c_boolean has_dispensable;
   protobuf_c_boolean dispensable;
+  char *reuse;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__VAR__INIT         \
-  {                                                                  \
-    PROTOBUF_C_MESSAGE_INIT(                                         \
-        &paddle_mobile__framework__proto__op_proto__var__descriptor) \
-    , NULL, NULL, 0, 0, 0, 0, 0, 0                                   \
-  }
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__VAR__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__op_proto__var__descriptor) \
+    , NULL, NULL, 0, 0, 0, 0, 0, 0, NULL }
+
 
 /*
  * AttrProto describes the C++ type Attribute.
  */
-struct _PaddleMobile__Framework__Proto__OpProto__Attr {
+struct  _PaddleMobile__Framework__Proto__OpProto__Attr
+{
   ProtobufCMessage base;
   char *name;
   PaddleMobile__Framework__Proto__AttrType type;
@@ -205,17 +213,16 @@ struct _PaddleMobile__Framework__Proto__OpProto__Attr {
   protobuf_c_boolean has_generated;
   protobuf_c_boolean generated;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__ATTR__INIT           \
-  {                                                                     \
-    PROTOBUF_C_MESSAGE_INIT(                                            \
-        &paddle_mobile__framework__proto__op_proto__attr__descriptor)   \
-    , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, NULL, 0, 0 \
-  }
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__ATTR__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__op_proto__attr__descriptor) \
+    , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, NULL, 0, 0 }
+
 
 /*
  * OpProto describes a C++ framework::OperatorBase derived class.
  */
-struct _PaddleMobile__Framework__Proto__OpProto {
+struct  _PaddleMobile__Framework__Proto__OpProto
+{
   ProtobufCMessage base;
   char *type;
   size_t n_inputs;
@@ -226,14 +233,13 @@ struct _PaddleMobile__Framework__Proto__OpProto {
   PaddleMobile__Framework__Proto__OpProto__Attr **attrs;
   char *comment;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT         \
-  {                                                             \
-    PROTOBUF_C_MESSAGE_INIT(                                    \
-        &paddle_mobile__framework__proto__op_proto__descriptor) \
-    , NULL, 0, NULL, 0, NULL, 0, NULL, NULL                     \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType__TensorDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__op_proto__descriptor) \
+    , NULL, 0,NULL, 0,NULL, 0,NULL, NULL }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType__TensorDesc
+{
   ProtobufCMessage base;
   /*
    * Should only be PODType. Is enforced in C++
@@ -245,76 +251,70 @@ struct _PaddleMobile__Framework__Proto__VarType__TensorDesc {
   size_t n_dims;
   int64_t *dims;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TENSOR_DESC__INIT         \
-  {                                                                          \
-    PROTOBUF_C_MESSAGE_INIT(                                                 \
-        &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor) \
-    , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, 0, NULL         \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType__LoDTensorDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TENSOR_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__tensor_desc__descriptor) \
+    , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, 0,NULL }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType__LoDTensorDesc
+{
   ProtobufCMessage base;
   PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor;
   protobuf_c_boolean has_lod_level;
   int32_t lod_level;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_DESC__INIT         \
-  {                                                                              \
-    PROTOBUF_C_MESSAGE_INIT(                                                     \
-        &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor) \
-    , NULL, 0, 0                                                                 \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor) \
+    , NULL, 0, 0 }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc
+{
   ProtobufCMessage base;
   PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor;
   protobuf_c_boolean has_lod_level;
   int32_t lod_level;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_ARRAY_DESC__INIT         \
-  {                                                                                    \
-    PROTOBUF_C_MESSAGE_INIT(                                                           \
-        &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor) \
-    , NULL, 0, 0                                                                       \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType__ReaderDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_ARRAY_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor) \
+    , NULL, 0, 0 }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType__ReaderDesc
+{
   ProtobufCMessage base;
   size_t n_lod_tensor;
   PaddleMobile__Framework__Proto__VarType__LoDTensorDesc **lod_tensor;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__READER_DESC__INIT         \
-  {                                                                          \
-    PROTOBUF_C_MESSAGE_INIT(                                                 \
-        &paddle_mobile__framework__proto__var_type__reader_desc__descriptor) \
-    , 0, NULL                                                                \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType__ChannelDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__READER_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__reader_desc__descriptor) \
+    , 0,NULL }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType__ChannelDesc
+{
   ProtobufCMessage base;
   PaddleMobile__Framework__Proto__VarType__Type data_type;
   int64_t capacity;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__CHANNEL_DESC__INIT         \
-  {                                                                           \
-    PROTOBUF_C_MESSAGE_INIT(                                                  \
-        &paddle_mobile__framework__proto__var_type__channel_desc__descriptor) \
-    , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, 0                \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType__Tuple {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__CHANNEL_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__channel_desc__descriptor) \
+    , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, 0 }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType__Tuple
+{
   ProtobufCMessage base;
   size_t n_element_type;
   PaddleMobile__Framework__Proto__VarType__Type *element_type;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TUPLE__INIT         \
-  {                                                                    \
-    PROTOBUF_C_MESSAGE_INIT(                                           \
-        &paddle_mobile__framework__proto__var_type__tuple__descriptor) \
-    , 0, NULL                                                          \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarType {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TUPLE__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__tuple__descriptor) \
+    , 0,NULL }
+
+
+struct  _PaddleMobile__Framework__Proto__VarType
+{
   ProtobufCMessage base;
   PaddleMobile__Framework__Proto__VarType__Type type;
   PaddleMobile__Framework__Proto__VarType__TensorDesc *selected_rows;
@@ -324,29 +324,26 @@ struct _PaddleMobile__Framework__Proto__VarType {
   PaddleMobile__Framework__Proto__VarType__ChannelDesc *channel;
   PaddleMobile__Framework__Proto__VarType__Tuple *tuple;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT                        \
-  {                                                                            \
-    PROTOBUF_C_MESSAGE_INIT(                                                   \
-        &paddle_mobile__framework__proto__var_type__descriptor)                \
-    , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, NULL, NULL, NULL, \
-        NULL, NULL, NULL                                                       \
-  }
-
-struct _PaddleMobile__Framework__Proto__VarDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_type__descriptor) \
+    , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, NULL, NULL, NULL, NULL, NULL, NULL }
+
+
+struct  _PaddleMobile__Framework__Proto__VarDesc
+{
   ProtobufCMessage base;
   char *name;
   PaddleMobile__Framework__Proto__VarType *type;
   protobuf_c_boolean has_persistable;
   protobuf_c_boolean persistable;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT         \
-  {                                                             \
-    PROTOBUF_C_MESSAGE_INIT(                                    \
-        &paddle_mobile__framework__proto__var_desc__descriptor) \
-    , NULL, NULL, 0, 0                                          \
-  }
-
-struct _PaddleMobile__Framework__Proto__BlockDesc {
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__var_desc__descriptor) \
+    , NULL, NULL, 0, 0 }
+
+
+struct  _PaddleMobile__Framework__Proto__BlockDesc
+{
   ProtobufCMessage base;
   int32_t idx;
   int32_t parent_idx;
@@ -357,12 +354,10 @@ struct _PaddleMobile__Framework__Proto__BlockDesc {
   protobuf_c_boolean has_forward_block_idx;
   int32_t forward_block_idx;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT         \
-  {                                                               \
-    PROTOBUF_C_MESSAGE_INIT(                                      \
-        &paddle_mobile__framework__proto__block_desc__descriptor) \
-    , 0, 0, 0, NULL, 0, NULL, 0, -1                               \
-  }
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__block_desc__descriptor) \
+    , 0, 0, 0,NULL, 0,NULL, 0, -1 }
+
 
 /*
  * Please refer to
@@ -371,209 +366,261 @@ struct _PaddleMobile__Framework__Proto__BlockDesc {
  * TODO(panyx0718): A model can have multiple programs. Need a
  * way to distinguish them. Maybe ID or name?
  */
-struct _PaddleMobile__Framework__Proto__ProgramDesc {
+struct  _PaddleMobile__Framework__Proto__ProgramDesc
+{
   ProtobufCMessage base;
   size_t n_blocks;
   PaddleMobile__Framework__Proto__BlockDesc **blocks;
+  PaddleMobile__Framework__Proto__Version *version;
 };
-#define PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT         \
-  {                                                                 \
-    PROTOBUF_C_MESSAGE_INIT(                                        \
-        &paddle_mobile__framework__proto__program_desc__descriptor) \
-    , 0, NULL                                                       \
-  }
-
+#define PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT \
+ { PROTOBUF_C_MESSAGE_INIT (&paddle_mobile__framework__proto__program_desc__descriptor) \
+    , 0,NULL, NULL }
+
+
+/* PaddleMobile__Framework__Proto__Version methods */
+void   paddle_mobile__framework__proto__version__init
+                     (PaddleMobile__Framework__Proto__Version         *message);
+size_t paddle_mobile__framework__proto__version__get_packed_size
+                     (const PaddleMobile__Framework__Proto__Version   *message);
+size_t paddle_mobile__framework__proto__version__pack
+                     (const PaddleMobile__Framework__Proto__Version   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__version__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__Version   *message,
+                      ProtobufCBuffer     *buffer);
+PaddleMobile__Framework__Proto__Version *
+       paddle_mobile__framework__proto__version__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__version__free_unpacked
+                     (PaddleMobile__Framework__Proto__Version *message,
+                      ProtobufCAllocator *allocator);
 /* PaddleMobile__Framework__Proto__OpDesc__Attr methods */
-void paddle_mobile__framework__proto__op_desc__attr__init(
-    PaddleMobile__Framework__Proto__OpDesc__Attr *message);
+void   paddle_mobile__framework__proto__op_desc__attr__init
+                     (PaddleMobile__Framework__Proto__OpDesc__Attr         *message);
 /* PaddleMobile__Framework__Proto__OpDesc__Var methods */
-void paddle_mobile__framework__proto__op_desc__var__init(
-    PaddleMobile__Framework__Proto__OpDesc__Var *message);
+void   paddle_mobile__framework__proto__op_desc__var__init
+                     (PaddleMobile__Framework__Proto__OpDesc__Var         *message);
 /* PaddleMobile__Framework__Proto__OpDesc methods */
-void paddle_mobile__framework__proto__op_desc__init(
-    PaddleMobile__Framework__Proto__OpDesc *message);
-
-size_t paddle_mobile__framework__proto__op_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__OpDesc *message);
-
+void   paddle_mobile__framework__proto__op_desc__init
+                     (PaddleMobile__Framework__Proto__OpDesc         *message);
+size_t paddle_mobile__framework__proto__op_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__OpDesc   *message);
+size_t paddle_mobile__framework__proto__op_desc__pack
+                     (const PaddleMobile__Framework__Proto__OpDesc   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__op_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__OpDesc   *message,
+                      ProtobufCBuffer     *buffer);
 PaddleMobile__Framework__Proto__OpDesc *
-paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator,
-                                                 size_t len,
-                                                 const uint8_t *data);
-void paddle_mobile__framework__proto__op_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__OpDesc *message,
-    ProtobufCAllocator *allocator);
+       paddle_mobile__framework__proto__op_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__op_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__OpDesc *message,
+                      ProtobufCAllocator *allocator);
 /* PaddleMobile__Framework__Proto__OpProto__Var methods */
-void paddle_mobile__framework__proto__op_proto__var__init(
-    PaddleMobile__Framework__Proto__OpProto__Var *message);
+void   paddle_mobile__framework__proto__op_proto__var__init
+                     (PaddleMobile__Framework__Proto__OpProto__Var         *message);
 /* PaddleMobile__Framework__Proto__OpProto__Attr methods */
-void paddle_mobile__framework__proto__op_proto__attr__init(
-    PaddleMobile__Framework__Proto__OpProto__Attr *message);
+void   paddle_mobile__framework__proto__op_proto__attr__init
+                     (PaddleMobile__Framework__Proto__OpProto__Attr         *message);
 /* PaddleMobile__Framework__Proto__OpProto methods */
-void paddle_mobile__framework__proto__op_proto__init(
-    PaddleMobile__Framework__Proto__OpProto *message);
-size_t paddle_mobile__framework__proto__op_proto__get_packed_size(
-    const PaddleMobile__Framework__Proto__OpProto *message);
+void   paddle_mobile__framework__proto__op_proto__init
+                     (PaddleMobile__Framework__Proto__OpProto         *message);
+size_t paddle_mobile__framework__proto__op_proto__get_packed_size
+                     (const PaddleMobile__Framework__Proto__OpProto   *message);
+size_t paddle_mobile__framework__proto__op_proto__pack
+                     (const PaddleMobile__Framework__Proto__OpProto   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__op_proto__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__OpProto   *message,
+                      ProtobufCBuffer     *buffer);
 PaddleMobile__Framework__Proto__OpProto *
-paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator,
-                                                  size_t len,
-                                                  const uint8_t *data);
-void paddle_mobile__framework__proto__op_proto__free_unpacked(
-    PaddleMobile__Framework__Proto__OpProto *message,
-    ProtobufCAllocator *allocator);
+       paddle_mobile__framework__proto__op_proto__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__op_proto__free_unpacked
+                     (PaddleMobile__Framework__Proto__OpProto *message,
+                      ProtobufCAllocator *allocator);
 /* PaddleMobile__Framework__Proto__VarType__TensorDesc methods */
-void paddle_mobile__framework__proto__var_type__tensor_desc__init(
-    PaddleMobile__Framework__Proto__VarType__TensorDesc *message);
+void   paddle_mobile__framework__proto__var_type__tensor_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__TensorDesc         *message);
 /* PaddleMobile__Framework__Proto__VarType__LoDTensorDesc methods */
-void paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init(
-    PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message);
+void   paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__LoDTensorDesc         *message);
 /* PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc methods */
-void paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init(
-    PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message);
+void   paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc         *message);
 /* PaddleMobile__Framework__Proto__VarType__ReaderDesc methods */
-void paddle_mobile__framework__proto__var_type__reader_desc__init(
-    PaddleMobile__Framework__Proto__VarType__ReaderDesc *message);
+void   paddle_mobile__framework__proto__var_type__reader_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__ReaderDesc         *message);
 /* PaddleMobile__Framework__Proto__VarType__ChannelDesc methods */
-void paddle_mobile__framework__proto__var_type__channel_desc__init(
-    PaddleMobile__Framework__Proto__VarType__ChannelDesc *message);
+void   paddle_mobile__framework__proto__var_type__channel_desc__init
+                     (PaddleMobile__Framework__Proto__VarType__ChannelDesc         *message);
 /* PaddleMobile__Framework__Proto__VarType__Tuple methods */
-void paddle_mobile__framework__proto__var_type__tuple__init(
-    PaddleMobile__Framework__Proto__VarType__Tuple *message);
+void   paddle_mobile__framework__proto__var_type__tuple__init
+                     (PaddleMobile__Framework__Proto__VarType__Tuple         *message);
 /* PaddleMobile__Framework__Proto__VarType methods */
-void paddle_mobile__framework__proto__var_type__init(
-    PaddleMobile__Framework__Proto__VarType *message);
-size_t paddle_mobile__framework__proto__var_type__get_packed_size(
-    const PaddleMobile__Framework__Proto__VarType *message);
+void   paddle_mobile__framework__proto__var_type__init
+                     (PaddleMobile__Framework__Proto__VarType         *message);
+size_t paddle_mobile__framework__proto__var_type__get_packed_size
+                     (const PaddleMobile__Framework__Proto__VarType   *message);
+size_t paddle_mobile__framework__proto__var_type__pack
+                     (const PaddleMobile__Framework__Proto__VarType   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__var_type__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__VarType   *message,
+                      ProtobufCBuffer     *buffer);
 PaddleMobile__Framework__Proto__VarType *
-paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator,
-                                                  size_t len,
-                                                  const uint8_t *data);
-void paddle_mobile__framework__proto__var_type__free_unpacked(
-    PaddleMobile__Framework__Proto__VarType *message,
-    ProtobufCAllocator *allocator);
+       paddle_mobile__framework__proto__var_type__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__var_type__free_unpacked
+                     (PaddleMobile__Framework__Proto__VarType *message,
+                      ProtobufCAllocator *allocator);
 /* PaddleMobile__Framework__Proto__VarDesc methods */
-void paddle_mobile__framework__proto__var_desc__init(
-    PaddleMobile__Framework__Proto__VarDesc *message);
-size_t paddle_mobile__framework__proto__var_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__VarDesc *message);
+void   paddle_mobile__framework__proto__var_desc__init
+                     (PaddleMobile__Framework__Proto__VarDesc         *message);
+size_t paddle_mobile__framework__proto__var_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__VarDesc   *message);
+size_t paddle_mobile__framework__proto__var_desc__pack
+                     (const PaddleMobile__Framework__Proto__VarDesc   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__var_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__VarDesc   *message,
+                      ProtobufCBuffer     *buffer);
 PaddleMobile__Framework__Proto__VarDesc *
-paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator,
-                                                  size_t len,
-                                                  const uint8_t *data);
-void paddle_mobile__framework__proto__var_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__VarDesc *message,
-    ProtobufCAllocator *allocator);
+       paddle_mobile__framework__proto__var_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__var_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__VarDesc *message,
+                      ProtobufCAllocator *allocator);
 /* PaddleMobile__Framework__Proto__BlockDesc methods */
-void paddle_mobile__framework__proto__block_desc__init(
-    PaddleMobile__Framework__Proto__BlockDesc *message);
-size_t paddle_mobile__framework__proto__block_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__BlockDesc *message);
+void   paddle_mobile__framework__proto__block_desc__init
+                     (PaddleMobile__Framework__Proto__BlockDesc         *message);
+size_t paddle_mobile__framework__proto__block_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__BlockDesc   *message);
+size_t paddle_mobile__framework__proto__block_desc__pack
+                     (const PaddleMobile__Framework__Proto__BlockDesc   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__block_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__BlockDesc   *message,
+                      ProtobufCBuffer     *buffer);
 PaddleMobile__Framework__Proto__BlockDesc *
-paddle_mobile__framework__proto__block_desc__unpack(
-    ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
-void paddle_mobile__framework__proto__block_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__BlockDesc *message,
-    ProtobufCAllocator *allocator);
+       paddle_mobile__framework__proto__block_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__block_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__BlockDesc *message,
+                      ProtobufCAllocator *allocator);
 /* PaddleMobile__Framework__Proto__ProgramDesc methods */
-void paddle_mobile__framework__proto__program_desc__init(
-    PaddleMobile__Framework__Proto__ProgramDesc *message);
-size_t paddle_mobile__framework__proto__program_desc__get_packed_size(
-    const PaddleMobile__Framework__Proto__ProgramDesc *message);
+void   paddle_mobile__framework__proto__program_desc__init
+                     (PaddleMobile__Framework__Proto__ProgramDesc         *message);
+size_t paddle_mobile__framework__proto__program_desc__get_packed_size
+                     (const PaddleMobile__Framework__Proto__ProgramDesc   *message);
+size_t paddle_mobile__framework__proto__program_desc__pack
+                     (const PaddleMobile__Framework__Proto__ProgramDesc   *message,
+                      uint8_t             *out);
+size_t paddle_mobile__framework__proto__program_desc__pack_to_buffer
+                     (const PaddleMobile__Framework__Proto__ProgramDesc   *message,
+                      ProtobufCBuffer     *buffer);
 PaddleMobile__Framework__Proto__ProgramDesc *
-paddle_mobile__framework__proto__program_desc__unpack(
-    ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
-void paddle_mobile__framework__proto__program_desc__free_unpacked(
-    PaddleMobile__Framework__Proto__ProgramDesc *message,
-    ProtobufCAllocator *allocator);
+       paddle_mobile__framework__proto__program_desc__unpack
+                     (ProtobufCAllocator  *allocator,
+                      size_t               len,
+                      const uint8_t       *data);
+void   paddle_mobile__framework__proto__program_desc__free_unpacked
+                     (PaddleMobile__Framework__Proto__ProgramDesc *message,
+                      ProtobufCAllocator *allocator);
 /* --- per-message closures --- */
 
-typedef void (*PaddleMobile__Framework__Proto__OpDesc__Attr_Closure)(
-    const PaddleMobile__Framework__Proto__OpDesc__Attr *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__OpDesc__Var_Closure)(
-    const PaddleMobile__Framework__Proto__OpDesc__Var *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__OpDesc_Closure)(
-    const PaddleMobile__Framework__Proto__OpDesc *message, void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__OpProto__Var_Closure)(
-    const PaddleMobile__Framework__Proto__OpProto__Var *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__OpProto__Attr_Closure)(
-    const PaddleMobile__Framework__Proto__OpProto__Attr *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__OpProto_Closure)(
-    const PaddleMobile__Framework__Proto__OpProto *message, void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure)(
-    const PaddleMobile__Framework__Proto__VarType__TensorDesc *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarType__LoDTensorDesc_Closure)(
-    const PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message,
-    void *closure_data);
-typedef void (
-    *PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc_Closure)(
-    const PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarType__ReaderDesc_Closure)(
-    const PaddleMobile__Framework__Proto__VarType__ReaderDesc *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarType__ChannelDesc_Closure)(
-    const PaddleMobile__Framework__Proto__VarType__ChannelDesc *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarType__Tuple_Closure)(
-    const PaddleMobile__Framework__Proto__VarType__Tuple *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarType_Closure)(
-    const PaddleMobile__Framework__Proto__VarType *message, void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__VarDesc_Closure)(
-    const PaddleMobile__Framework__Proto__VarDesc *message, void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__BlockDesc_Closure)(
-    const PaddleMobile__Framework__Proto__BlockDesc *message,
-    void *closure_data);
-typedef void (*PaddleMobile__Framework__Proto__ProgramDesc_Closure)(
-    const PaddleMobile__Framework__Proto__ProgramDesc *message,
-    void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__Version_Closure)
+                 (const PaddleMobile__Framework__Proto__Version *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__OpDesc__Attr_Closure)
+                 (const PaddleMobile__Framework__Proto__OpDesc__Attr *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__OpDesc__Var_Closure)
+                 (const PaddleMobile__Framework__Proto__OpDesc__Var *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__OpDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__OpDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__OpProto__Var_Closure)
+                 (const PaddleMobile__Framework__Proto__OpProto__Var *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__OpProto__Attr_Closure)
+                 (const PaddleMobile__Framework__Proto__OpProto__Attr *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__OpProto_Closure)
+                 (const PaddleMobile__Framework__Proto__OpProto *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType__TensorDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType__LoDTensorDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType__ReaderDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType__ReaderDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType__ChannelDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType__ChannelDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType__Tuple_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType__Tuple *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarType_Closure)
+                 (const PaddleMobile__Framework__Proto__VarType *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__VarDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__VarDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__BlockDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__BlockDesc *message,
+                  void *closure_data);
+typedef void (*PaddleMobile__Framework__Proto__ProgramDesc_Closure)
+                 (const PaddleMobile__Framework__Proto__ProgramDesc *message,
+                  void *closure_data);
 
 /* --- services --- */
 
+
 /* --- descriptors --- */
 
-extern const ProtobufCEnumDescriptor
-    paddle_mobile__framework__proto__attr_type__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_desc__attr__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_desc__var__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_proto__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_proto__var__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__op_proto__attr__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__tensor_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__reader_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__channel_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_type__tuple__descriptor;
-extern const ProtobufCEnumDescriptor
-    paddle_mobile__framework__proto__var_type__type__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__var_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__block_desc__descriptor;
-extern const ProtobufCMessageDescriptor
-    paddle_mobile__framework__proto__program_desc__descriptor;
+extern const ProtobufCEnumDescriptor    paddle_mobile__framework__proto__attr_type__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__version__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_desc__attr__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_desc__var__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_proto__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_proto__var__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__op_proto__attr__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__tensor_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__reader_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__channel_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_type__tuple__descriptor;
+extern const ProtobufCEnumDescriptor    paddle_mobile__framework__proto__var_type__type__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__var_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__block_desc__descriptor;
+extern const ProtobufCMessageDescriptor paddle_mobile__framework__proto__program_desc__descriptor;
 
 PROTOBUF_C__END_DECLS
 
-#endif /* PROTOBUF_C_framework_2eproto__INCLUDED */
+
+#endif  /* PROTOBUF_C_framework_2eproto__INCLUDED */
diff --git a/src/framework/framework.proto b/src/framework/framework.proto
index 07bfef1c2a69c236ac86732b2dbc00d8abb6334b..4f41e26dc2df8550a6ce318d6e39ef4f3e875e73 100644
--- a/src/framework/framework.proto
+++ b/src/framework/framework.proto
@@ -16,6 +16,13 @@ syntax = "proto2";
 option optimize_for = LITE_RUNTIME;
 package paddle_mobile.framework.proto;
 
+// Any incompatible changes to ProgramDesc and its dependencies should
+// raise the version defined version.h.
+//
+// Serailization and Deserialization codes should be modified in a way
+// that supports old versions following the version and compatibility policy.
+message Version { optional int64 version = 1 [ default = 0 ]; }
+
 enum AttrType {
   INT = 0;
   FLOAT = 1;
@@ -27,6 +34,7 @@ enum AttrType {
   BOOLEANS = 7;
   BLOCK = 8;
   LONG = 9;
+  BLOCKS = 10;
 }
 
 // OpDesc describes an instance of a C++ framework::OperatorBase
@@ -46,6 +54,7 @@ message OpDesc {
     repeated bool bools = 11;
     optional int32 block_idx = 12;
     optional int64 l = 13;
+    repeated int32 blocks_idx = 14;
   };
 
   message Var {
@@ -71,6 +80,7 @@ message OpProto {
     optional bool duplicable = 3 [ default = false ];
     optional bool intermediate = 4 [ default = false ];
     optional bool dispensable = 5 [ default = false ];
+    optional string reuse = 6;
   }
 
   // AttrProto describes the C++ type Attribute.
@@ -101,6 +111,10 @@ message VarType {
     FP16 = 4;
     FP32 = 5;
     FP64 = 6;
+    // Tensor<size_t> is used in C++.
+    SIZE_T = 19;
+    UINT8 = 20;
+    INT8 = 21;
 
     // Other types that may need additional descriptions
     LOD_TENSOR = 7;
@@ -173,4 +187,8 @@ message BlockDesc {
 // for more details.
 // TODO(panyx0718): A model can have multiple programs. Need a
 // way to distinguish them. Maybe ID or name?
-message ProgramDesc { repeated BlockDesc blocks = 1; }
+message ProgramDesc {
+  repeated BlockDesc blocks = 1;
+
+  optional Version version = 2;
+}
diff --git a/src/framework/op_registry.h b/src/framework/op_registry.h
index 8a7beae993be1a9f2a52fb48d4930754aba784e1..1d625e00f61e8eb7f02b72920c6e42672b83d739 100644
--- a/src/framework/op_registry.h
+++ b/src/framework/op_registry.h
@@ -97,6 +97,7 @@ class OpRegistry {
 };
 
 #define REGISTER_OPERATOR(op_type, op_class, device_name, device_type)     \
+  template class op_class<device_type, float>;                             \
   template <typename Dtype, typename T>                                    \
   class _OpClass_##op_type##_##device_name : public op_class<Dtype, T> {   \
    public:                                                                 \
@@ -104,11 +105,7 @@ class OpRegistry {
   };                                                                       \
   static paddle_mobile::framework::OperatorRegistrar<                      \
       device_type, _OpClass_##op_type##_##device_name<device_type, float>> \
-      __op_registrar_##op_type##_##device_name(#op_type);                  \
-  int TouchOpRegistrar_##op_type##_##device_name() {                       \
-    __op_registrar_##op_type##_##device_name.Touch();                      \
-    return 0;                                                              \
-  }
+      __op_registrar_##op_type##_##device_name(#op_type);
 
 #define REGISTER_OPERATOR_CPU(op_type, op_class) \
   REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
@@ -119,16 +116,8 @@ class OpRegistry {
 #define REGISTER_OPERATOR_FPGA(op_type, op_class) \
   REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
 
-#define USE_OP(op_type, device_name)                                           \
-  extern int TouchOpRegistrar_##op_type##_##device_name();                     \
-  static int use_op_itself_##op_type##_##device_name __attribute__((unused)) = \
-      TouchOpRegistrar_##op_type##_##device_name()
-
-#define USE_OP_CPU(op_type) USE_OP(op_type, cpu);
-
-#define USE_OP_MALI_GPU(op_type) USE_OP(op_type, mali_gpu);
-
-#define USE_OP_FPGA(op_type) USE_OP(op_type, fpga);
+#define REGISTER_OPERATOR_X86(op_type, op_class) \
+  REGISTER_OPERATOR(op_type, op_class, x86, paddle_mobile::X86);
 
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp
index dd865fb27d4345f16ddca8005463986787d681be..d95becc51cc4ccdf07b26d3b185b7ded12492a14 100644
--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -62,24 +62,13 @@ void OperatorBase<Dtype>::Run() const {
   DLOG << "-------------" << type_ << "----------------------------";
   vector<string> input_keys = GetInputKeys();
   for (const auto key : input_keys) {
-    auto var_vec_in = inputs_.at(key);
-    for (int i = 0; i < var_vec_in.size(); ++i) {
-      auto vari = scope_->FindVar(var_vec_in[i]);
-      if (vari->IsInitialized()) {
-        Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
-        if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
-      }
-    }
+    Tensor *input = GetVarValue<framework::LoDTensor>(key, inputs_, *scope_);
+    if (input) DLOG << type_ << " input- " << key << "=" << *input;
   }
-  for (const auto key : GetOutKeys()) {
-    auto var_vec_out = outputs_.at(key);
-    for (int i = 0; i < var_vec_out.size(); ++i) {
-      auto vari = scope_->FindVar(var_vec_out[i]);
-      if (vari->IsInitialized()) {
-        Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
-        if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
-      }
-    }
+  vector<string> output_keys = GetOutKeys();
+  for (const auto key : output_keys) {
+    Tensor *out_ = GetVarValue<framework::LoDTensor>(key, outputs_, *scope_);
+    DLOG << type_ << " output- " << key << "=" << *out_;
   }
 #endif
 }
@@ -87,6 +76,7 @@ void OperatorBase<Dtype>::Run() const {
 template class OperatorBase<CPU>;
 template class OperatorBase<FPGA>;
 template class OperatorBase<GPU_MALI>;
+template class OperatorBase<X86>;
 
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/program/tensor_desc.h b/src/framework/program/tensor_desc.h
index b5fdf9ee45a441a45ed9dc91f09499bf22ce7fe0..f1634c6503516551fb1986d5b64ba1a2638148e6 100644
--- a/src/framework/program/tensor_desc.h
+++ b/src/framework/program/tensor_desc.h
@@ -40,7 +40,10 @@ enum VarType_Type {
   VARTYPE_TYPE_READER = 15,
   VARTYPE_TYPE_CHANNEL = 16,
   VARTYPE_TYPE_RAW = 17,
-  VARTYPE_TYPE_TUPLE = 18
+  VARTYPE_TYPE_TUPLE = 18,
+  VARTYPE_TYPE_SIZE_T = 19,
+  VARTYPE_TYPE_UINT8 = 20,
+  VARTYPE_TYPE_INT8 = 21,
 };
 
 class TensorDesc {
@@ -58,8 +61,9 @@ class TensorDesc {
     }
     data_type_ = (VarType_Type)desc->data_type;
   }
-
+  // return tensor dim as a vector
   std::vector<int64_t> Dims() const { return dims_; };
+  // return tensor data type
   VarType_Type DataType() const { return data_type_; }
 
  private:
diff --git a/src/framework/program/var_desc.h b/src/framework/program/var_desc.h
index f6f04f2c7026166e1024dcc1a4b2a233deac649b..ede7263a7250747b7a777e894735c6818903dfd0 100644
--- a/src/framework/program/var_desc.h
+++ b/src/framework/program/var_desc.h
@@ -31,6 +31,7 @@ class VarDesc {
     this->tensor_desc_ = var_desc.tensor_desc_;
     this->type_ = var_desc.type_;
   }
+
   VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) {
     type_ = (VarType_Type)desc->type->type;
     name_ = std::string(desc->name);
@@ -44,9 +45,7 @@ class VarDesc {
         tensor_desc_ = TensorDesc(desc->type->lod_tensor->tensor);
         break;
       case VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY:
-        desc->type->tensor_array->tensor->data_type;
         tensor_desc_ = TensorDesc(desc->type->tensor_array->tensor);
-
         break;
       default:
         break;
@@ -60,6 +59,7 @@ class VarDesc {
         break;
     }
   }
+
   std::string Name() const { return name_; }
 
   VarType_Type Type() const { return type_; }
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index ba8e3d3402f16966f08c370bff8cd6b0d1f2637b..c5572dcbfdbd665994be7ebe005b6c9c98b5bca9 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -289,8 +289,12 @@ class Tensor {
     virtual std::type_index type() const { return type_; }
 
     virtual void set_type(std::type_index type) { type_ = type; }
-
+#ifndef PADDLE_MOBILE_FPGA
+    /*! the pointer of memory block. */
     std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
+#else
+    std::shared_ptr<uint8_t> ptr_;
+#endif
 
     /*! the size of memory block. */
     size_t size_;
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index c8d8f52a427bb1ee2b9fa04c9ef09f8e626f11b0..72b9112e623b5bf1ceada5053d97fc87a52de0c0 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -26,13 +26,10 @@ limitations under the License. */
 #include "framework/program/var_desc.h"
 #include "framework/scope.h"
 #include "framework/tensor.h"
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-#include <queue>
-#include <utility>
-#include "common/threadpool.h"
-#endif
+
 
 namespace paddle_mobile {
+
 using framework::Variable;
 
 char *Get_binary_data(std::string filename) {
@@ -51,35 +48,30 @@ char *Get_binary_data(std::string filename) {
   return data;
 }
 
-#pragma mark - executor
 template <typename Dtype, Precision P>
-Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
-                             bool use_optimize, bool loddable)
-    : program_(p),
-      batch_size_(batch_size),
-      use_optimize_(use_optimize),
-      loddable_(loddable) {
+Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
+                             const bool use_optimize,
+			     const bool loddable)
+      : program_(p), use_optimize_(use_optimize), loddable_(loddable) {
   if (use_optimize_) {
     to_predict_program_ = program_.optimizeProgram;
   } else {
     to_predict_program_ = program_.originProgram;
   }
   Variable *variable_ptr = program_.scope->Var("batch_size");
-  variable_ptr[0].SetValue<int>(batch_size);
+  variable_ptr->SetValue<int>(1);
   PADDLE_MOBILE_ENFORCE(to_predict_program_ != nullptr,
                         "to_predict_program_ == NULL!");
-  const std::vector<std::shared_ptr<framework::BlockDesc>> blocks =
+  const std::vector<std::shared_ptr<framework::BlockDesc>> &blocks =
       to_predict_program_->Blocks();
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-  depManager.resize(blocks.size());
-#endif
-  DLOG << "executer in loaddable mode: " << loddable_;
+
+  DLOG << "executor in loaddable mode: " << loddable_;
   for (int i = 0; i < blocks.size(); ++i) {
     std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
     std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
     for (int j = 0; j < ops.size(); ++j) {
       std::shared_ptr<framework::OpDesc> op = ops[j];
-      DLOG << "create op: " << j << "  " << op->Type();
+      DLOG << "create op: " << op->Type();
       auto op_base = framework::OpRegistry<Dtype>::CreateOp(
           op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
           program_.scope);
@@ -89,11 +81,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
         op_base->InferShape();
       }
       ops_of_block_[*block_desc.get()].push_back(op_base);
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-      depManager[i].analysisDep(ops_of_block_[*block_desc.get()]);
-#endif
     }
-    DLOG << "Total " << ops.size() << " ops have been created ";
   }
   if (program_.combined) {
     InitCombineMemory();
@@ -103,118 +91,83 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   std::shared_ptr<framework::BlockDesc> to_predict_block =
       to_predict_program_->Block(0);
   auto &ops = ops_of_block_[*to_predict_block.get()];
-  int i = 0;
   for (const auto &op : ops) {
-    DLOG << "Init op: " << i++ << "  " << op->Type();
     op->Init();
   }
 }
 
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
-                                    framework::LoDTensor *tensor, char **data) {
-  // 1. version
-  uint32_t version = *reinterpret_cast<uint32_t *>(*data);
-
-  (*data) += sizeof(uint32_t);
-
-  // 2 Lod information
-  uint64_t *lod_level_ptr = new uint64_t();
-  memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
-  uint64_t lod_level = *lod_level_ptr;
-  delete lod_level_ptr;
-  (*data) += sizeof(uint64_t);
-
-  auto &lod = *tensor->mutable_lod();
-  lod.resize(lod_level);
-  for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size = *reinterpret_cast<uint64_t *>(*data);
-    (*data) += sizeof(uint64_t);
-    std::vector<size_t> tmp(size / sizeof(size_t));
-
-    for (int k = 0; k < tmp.size(); ++k) {
-      tmp[k] = *reinterpret_cast<size_t *>(*data);
-      (*data) += sizeof(size_t);
-    }
-
-    for (auto j : tmp) {
-      LOG(kLOG_DEBUG1) << "    lod - " << j;
+// should use istream to keep offset for data
+template<typename Dtype>
+void LoadMemInternal(const void *data, framework::LoDTensor *tensor) {
+  const char *data_buf = static_cast<const char *>(data);
+  int64_t size = tensor->numel();
+  Dtype* tensor_data = tensor->mutable_data<Dtype>();
+  // stored as low precision, but compute with float
+  // TODO(hjchen2) must consider signed and unsigned
+  if (0) {
+    float min_value;
+    float max_value;
+    memcpy(&min_value, data_buf, sizeof(float));
+    memcpy(&max_value, data_buf + sizeof(float), sizeof(float));
+    data_buf += 2 * sizeof(float);
+    const float factor = (max_value - min_value) / 255.0;
+    const uint8_t *uint8_data = reinterpret_cast<const uint8_t*>(data_buf);
+    for (int k = 0; k < size; ++k) {
+      tensor_data[k] = uint8_data[k] * factor + min_value;
     }
-    lod[i] = tmp;
-  }
-
-  // 3. tensor version
-  uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
-  (*data) += sizeof(uint32_t);
-
-  // 4. tensor desc
-  int32_t size = *reinterpret_cast<int32_t *>(*data);
-  (*data) += sizeof(int32_t);
-
-  std::unique_ptr<char[]> buf(new char[size]);
-  for (int m = 0; m < size; ++m) {
-    buf.get()[m] = (*data)[m];
+    data_buf += size * sizeof(uint8_t);
+  } else {
+    memcpy(tensor_data, data_buf, size * sizeof(Dtype));
+    data_buf += size * sizeof(Dtype);
   }
-  (*data) += (sizeof(char) * size);
+}
 
-  const framework::TensorDesc &desc = var_desc.Tensor_desc();
-  int memory_size = 1;
-  for (auto l : desc.Dims()) {
-    memory_size *= l;
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::LoadMemory(const void *data,
+		                    const framework::VarDesc var_desc,
+                                    framework::LoDTensor *tensor) {
+  const char *data_buf = static_cast<const char*>(data);
+  // version
+  uint32_t version = *(reinterpret_cast<const uint32_t*>(data_buf));
+  data_buf += sizeof(uint32_t);
+  // lod information
+  uint64_t lod_level = *(reinterpret_cast<const uint64_t*>(data_buf));
+  data_buf += sizeof(uint64_t);
+
+  auto *lod = tensor->mutable_lod();
+  lod->resize(lod_level);
+  for (uint64_t i = 0; i < lod_level; ++i) {
+    uint64_t size = *(reinterpret_cast<const uint64_t*>(data_buf));
+    data_buf += sizeof(uint64_t);
+    std::vector<size_t> tmp_dim(size / sizeof(size_t));
+    memcpy(tmp_dim.data(), data_buf, size);
+    (*lod)[i] = std::move(tmp_dim);
+    data_buf += size;
   }
-
-  tensor->Resize(framework::make_ddim(desc.Dims()));
-
-  void *memory = nullptr;
-  int type_size = 0;
-  switch (desc.DataType()) {
-    case framework::VARTYPE_TYPE_FP16:
-      type_size = 2;
-      break;
+  // tensor version
+  uint32_t tensor_version = *(reinterpret_cast<const uint32_t*>(data_buf));
+  data_buf += sizeof(uint32_t);
+  // tensor desc size
+  int32_t tensor_desc_size = *(reinterpret_cast<const int32_t*>(data_buf));
+  data_buf += sizeof(int32_t);
+  // skip tensor desc
+  data_buf += tensor_desc_size;
+
+  const framework::TensorDesc &tensor_desc = var_desc.Tensor_desc();
+  tensor->Resize(framework::make_ddim(tensor_desc.Dims()));
+  // parse tensor from stream
+  switch (tensor_desc.DataType()) {
     case framework::VARTYPE_TYPE_FP32:
-      type_size = 4;
-      memory = tensor->mutable_data<float>();
+      LoadMemInternal<float>(data_buf, tensor);
       break;
-    case framework::VARTYPE_TYPE_FP64:
-      type_size = 8;
+    case framework::VARTYPE_TYPE_INT8:
+      LoadMemInternal<int8_t>(data_buf, tensor);
       break;
     case framework::VARTYPE_TYPE_INT32:
-      memory = tensor->mutable_data<int32_t>();
-      type_size = 4;
-      break;
-    case framework::VARTYPE_TYPE_INT64:
-      type_size = 8;
-      break;
-    case framework::VARTYPE_TYPE_BOOL:
-      type_size = 1;
+      LoadMemInternal<int>(data_buf, tensor);
       break;
     default:
-      break;
-  }
-  if (program_.quantification) {
-    float min_value;
-    float max_value;
-
-    memcpy(&min_value, *data, sizeof(float));
-    memcpy(&max_value, *data + sizeof(float), sizeof(float));
-    *data += 2 * sizeof(float);
-    const float factor = (max_value - min_value) / 255.0;
-    uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
-    for (int k = 0; k < memory_size; ++k) {
-      static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
-    }
-    *data += (memory_size * sizeof(uint8_t));
-  } else {
-    for (int n = 0; n < memory_size; n++) {
-      float value;
-      memcpy(&value, *data + n * type_size, type_size);
-      if (value < 1e-30 && value > -1e-30) {
-        static_cast<float *>(memory)[n] = 0.0;
-      } else {
-        static_cast<float *>(memory)[n] = value;
-      }
-    }
-    (*data) += (sizeof(char) * memory_size * type_size);
+      LOG(kLOG_ERROR) << "data type is not supported";
   }
 }
 
@@ -223,35 +176,19 @@ void Executor<Dtype, P>::InitMemory() {
   for (const auto &block : to_predict_program_->Blocks()) {
     for (const auto &var_desc : block->Vars()) {
       auto var = program_.scope->Var(var_desc->Name());
+      auto tensor = var->template GetMutable<framework::LoDTensor>();
       if (var_desc->Persistable()) {
-        auto tensor = var->template GetMutable<framework::LoDTensor>();
         if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
           continue;
         }
-
         char *origin_data =
             Get_binary_data(program_.model_path + "/" + var_desc->Name());
         char *data = origin_data;
-        LoadMemory(*var_desc, tensor, &data);
-
-        //        DLOG << "-----      " << var_desc->Name();
-        //        DLOG << "-----      " << tensor->dims();
-        //        float *pDouble = tensor->template data<float>();
-        //        for (int i = 0; i < tensor->numel() && i < 30; ++i) {
-        //          std::cout << pDouble[i] << std::endl;
-        //        }
-        delete origin_data;
+        LoadMemory(data, *var_desc, tensor);
+        delete[] origin_data;
       } else {
         if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
-          bool is_mute_match;
-          framework::LoDTensor *tensor = nullptr;
-
-          is_mute_match = varInputMemory(var_desc, var, tensor);
-
-          PADDLE_MOBILE_ENFORCE(
-              is_mute_match,
-              "got unhandled var_desc->Tensor_desc().DataType(): %d",
-              var_desc->Tensor_desc().DataType());
+          varInputMemory(var_desc, var, tensor);
         }
       }
     }
@@ -273,71 +210,56 @@ void Executor<Dtype, P>::InitCombineMemory() {
   for (const auto &block : to_predict_program_->Blocks()) {
     for (const auto &var_desc : block->Vars()) {
       auto var = program_.scope->Var(var_desc->Name());
+      auto tensor = var->template GetMutable<framework::LoDTensor>();
       if (var_desc->Persistable()) {
-        auto tensor = var->template GetMutable<framework::LoDTensor>();
         if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
           continue;
         }
-        LoadMemory(*var_desc, tensor, &data);
+        LoadMemory(data, *var_desc, tensor);
       } else {
         if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
-          bool is_mute_match = false;
-          framework::LoDTensor *tensor;
-
-          is_mute_match = varInputMemory(var_desc, var, tensor);
-
-          PADDLE_MOBILE_ENFORCE(
-              is_mute_match,
-              "got unhandled var_desc->Tensor_desc().DataType(): %d",
-              var_desc->Tensor_desc().DataType());
+          varInputMemory(var_desc, var, tensor);
         }
       }
     }
   }
-  delete origin_data;
+
+  delete[] origin_data;
   LOG(kLOG_INFO) << " end init combine memory ";
 }
+
 template <typename Dtype, Precision P>
 bool Executor<Dtype, P>::varInputMemory(
     const std::shared_ptr<framework::VarDesc> &var_desc, Variable *var,
     framework::LoDTensor *tensor) const {
-  bool is_mute_match = false;
-  switch (var_desc->Tensor_desc().DataType()) {
-    case framework::VARTYPE_TYPE_FP16: {
-      break;
-    }
-
+  auto type = var_desc->Tensor_desc().DataType();
+  bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) ||
+	               (type == framework::VARTYPE_TYPE_INT8) ||
+		       (type == framework::VARTYPE_TYPE_INT32) ||
+		       (type == framework::VARTYPE_TYPE_INT64);
+  PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type);
+
+  switch (type) {
     case framework::VARTYPE_TYPE_FP32: {
-      tensor = var->template GetMutable<framework::LoDTensor>();
-      tensor->template mutable_data<Ptype>();
-      is_mute_match = true;
+      tensor->mutable_data<float>();
       break;
     }
-
-    case framework::VARTYPE_TYPE_FP64: {
-      break;
+    case framework::VARTYPE_TYPE_INT8: {
+      tensor->mutable_data<int8_t>();
+      break; 
     }
-
     case framework::VARTYPE_TYPE_INT32: {
-      tensor = var->template GetMutable<framework::LoDTensor>();
-      tensor->template mutable_data<int32_t>();
-      is_mute_match = true;
+      tensor->mutable_data<int32_t>();
       break;
     }
-
     case framework::VARTYPE_TYPE_INT64: {
-      tensor = var->template GetMutable<framework::LoDTensor>();
-      tensor->template mutable_data<int64_t>();
-      is_mute_match = true;
+      tensor->mutable_data<int64_t>();
       break;
     }
-    case framework::VARTYPE_TYPE_BOOL: {
+    default: {
       break;
     }
-
-    default: { break; }
   }
-
   return is_mute_match;
 }
 
@@ -356,61 +278,6 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
 #ifdef PADDLE_MOBILE_PROFILE
   std::vector<ProfInfo> profile(ops.size());
 #endif
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-  std::mutex m;
-  std::condition_variable cv;
-  std::queue<int> next;
-  next.push(0);
-  int rsize = ops.size();
-  std::vector<int> status(rsize, 0);
-  auto &threadPool = ThreadPool::getThreadPool();
-  auto &dep = depManager[0];
-  auto finishF = [&ops, &m, &cv, &next, &status, &rsize, &dep](int opi) {
-    std::lock_guard<std::mutex> lk(m);
-    rsize--;
-    status[opi] = 2;
-    for (int i : dep.getNext(opi)) {
-      bool ok = true;
-      for (int j : dep.getDeps(i)) {
-        if (status[j] != 2) {
-          ok = false;
-          break;
-        }
-      }
-      if (ok && (status[i] == 0)) {
-        next.push(i);
-      }
-    }
-    cv.notify_one();
-  };
-  for (;;) {
-    std::unique_lock<std::mutex> lk(m);
-    cv.wait(lk, [&next, &rsize] { return rsize == 0 || !next.empty(); });
-    if (rsize == 0) {
-      break;
-    }
-    while (next.size() > 0) {
-      int opi = next.front();
-      next.pop();
-      status[opi] = 1;
-      threadPool.enqueue([opi, &ops, &finishF, &profile] {
-        auto &op = ops[opi];
-#ifdef PADDLE_MOBILE_PROFILE
-        struct timespec ts;
-        clock_gettime(CLOCK_MONOTONIC, &ts);
-        profile[opi].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-        profile[opi].tid = ThreadPool::getThreadPoolThreadId();
-#endif
-        ops[opi]->Run();
-#ifdef PADDLE_MOBILE_PROFILE
-        clock_gettime(CLOCK_MONOTONIC, &ts);
-        profile[opi].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-#endif
-        finishF(opi);
-      });
-    }
-  }
-#else
   for (int i = 0; i < ops.size(); i++) {
 #ifdef PADDLE_MOBILE_PROFILE
     struct timespec ts;
@@ -424,7 +291,6 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
     profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
   }
-#endif
   auto last_op = ops.rbegin();
   auto output_map = (*last_op)->Outputs();
   std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
@@ -433,23 +299,6 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
       framework::GetVarValue<framework::LoDTensor>(out_keys[0], output_map,
                                                    *(program_.scope));
 #ifdef PADDLE_MOBILE_PROFILE
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-  // TODO(haipeng): expose profile info as an interface, user can get them to
-  // analysis
-  //      the performance of their deepnet.
-  FILE *df = fopen("net.dot", "w");
-  fprintf(df, "digraph {\n");
-  for (int i = 0; i < ops.size(); i++) {
-    for (int j : dep.getNext(i)) {
-      fprintf(df, "op_%d -> op_%d\n", i, j);
-    }
-  }
-  for (int i = 0; i < ops.size(); i++) {
-    fprintf(df, "op_%d[label=\"%s (%d)\"]\n", i, ops[i]->Type().c_str(), i);
-  }
-  fprintf(df, "}\n");
-  fclose(df);
-#endif
   //  FILE *pf = fopen("profile.out", "w");
   std::unordered_map<std::string, uint64_t> _tp;
   for (int i = 0; i < profile.size(); i++) {
@@ -501,61 +350,6 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
 #ifdef PADDLE_MOBILE_PROFILE
   std::vector<ProfInfo> profile(ops.size());
 #endif
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-  std::mutex m;
-  std::condition_variable cv;
-  std::queue<int> next;
-  next.push(0);
-  int rsize = ops.size();
-  std::vector<int> status(rsize, 0);
-  auto &threadPool = ThreadPool::getThreadPool();
-  auto &dep = depManager[0];
-  auto finishF = [&ops, &m, &cv, &next, &status, &rsize, &dep](int opi) {
-    std::lock_guard<std::mutex> lk(m);
-    rsize--;
-    status[opi] = 2;
-    for (int i : dep.getNext(opi)) {
-      bool ok = true;
-      for (int j : dep.getDeps(i)) {
-        if (status[j] != 2) {
-          ok = false;
-          break;
-        }
-      }
-      if (ok && (status[i] == 0)) {
-        next.push(i);
-      }
-    }
-    cv.notify_one();
-  };
-  for (;;) {
-    std::unique_lock<std::mutex> lk(m);
-    cv.wait(lk, [&next, &rsize] { return rsize == 0 || !next.empty(); });
-    if (rsize == 0) {
-      break;
-    }
-    while (next.size() > 0) {
-      int opi = next.front();
-      next.pop();
-      status[opi] = 1;
-      threadPool.enqueue([opi, &ops, &finishF, &profile] {
-        auto &op = ops[opi];
-#ifdef PADDLE_MOBILE_PROFILE
-        struct timespec ts;
-        clock_gettime(CLOCK_MONOTONIC, &ts);
-        profile[opi].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-        profile[opi].tid = ThreadPool::getThreadPoolThreadId();
-#endif
-        ops[opi]->Run();
-#ifdef PADDLE_MOBILE_PROFILE
-        clock_gettime(CLOCK_MONOTONIC, &ts);
-        profile[opi].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-#endif
-        finishF(opi);
-      });
-    }
-  }
-#else
   for (int i = 0; i < ops.size(); i++) {
 #ifdef PADDLE_MOBILE_PROFILE
     struct timespec ts;
@@ -572,7 +366,6 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
     profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
   }
-#endif
   auto last_op = ops.rbegin();
 
   auto output_map = (*last_op)->Outputs();
@@ -582,23 +375,6 @@ std::shared_ptr<framework::LoDTensor> Executor<Dtype, P>::PredictLod(
       framework::GetVarValue<framework::LoDTensor>(out_keys[0], output_map,
                                                    *(program_.scope));
 #ifdef PADDLE_MOBILE_PROFILE
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-  // TODO(haipeng): expose profile info as an interface, user can get them to
-  // analysis
-  //      the performance of their deepnet.
-  FILE *df = fopen("net.dot", "w");
-  fprintf(df, "digraph {\n");
-  for (int i = 0; i < ops.size(); i++) {
-    for (int j : dep.getNext(i)) {
-      fprintf(df, "op_%d -> op_%d\n", i, j);
-    }
-  }
-  for (int i = 0; i < ops.size(); i++) {
-    fprintf(df, "op_%d[label=\"%s (%d)\"]\n", i, ops[i]->Type().c_str(), i);
-  }
-  fprintf(df, "}\n");
-  fclose(df);
-#endif
   //  FILE *pf = fopen("profile.out", "w");
   std::unordered_map<std::string, uint64_t> _tp;
   for (int i = 0; i < profile.size(); i++) {
@@ -653,80 +429,9 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
   return result_vector;
 }
 
-#ifdef PADDLE_MOBILE_FPGA
-
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
-                                        string var_name) {
-  framework::Variable *g_feed_value = program_.scope->Var(var_name);
-  framework::Tensor *feed_tensor =
-      g_feed_value->GetMutable<framework::LoDTensor>();
-  feed_tensor->Resize(t.dims());
-  feed_tensor->ShareDataWith(t);
-};
-
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
-  InjectVariable(t, "feed");
-};
-
-template <typename Dtype, Precision P>
-std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
-  std::shared_ptr<framework::BlockDesc> to_predict_block =
-      to_predict_program_->Block(0);
-  auto &ops = ops_of_block_[*to_predict_block.get()];
-
-  PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range");
-  auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id];
-  auto output_map = last_op->Outputs();
-  std::vector<std::string> out_keys = last_op->GetOutKeys();
-  PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output");
-  auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
-      out_keys[0], output_map, *(program_.scope));
-  return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
-};
-
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::Predict_From_To(int start, int end) {
-  std::shared_ptr<framework::BlockDesc> to_predict_block =
-      to_predict_program_->Block(0);
-  auto &ops = ops_of_block_[*to_predict_block.get()];
-  end = end < 0 ? (int)ops.size() : end;
-  PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
-                        "start or end parameter is wrong");
-
-#ifdef PADDLE_MOBILE_PROFILE
-  std::vector<ProfInfo> profile(ops.size());
-#endif
-  for (int i = start; i < end; i++) {
-#ifdef PADDLE_MOBILE_PROFILE
-    struct timespec ts;
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-#endif
-    DLOG << "Running op: " << i << "  " << ops[i]->Type();
-    ops[i]->Run();
-
-#ifdef PADDLE_MOBILE_PROFILE
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-#endif
-  }
-};
-
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::Predict_From(int start) {
-  Predict_From_To(start);
-};
-
-template <typename Dtype, Precision P>
-void Executor<Dtype, P>::Predict_To(int end) {
-  Predict_From_To(0, end);
-};
-#endif
-
 template class Executor<CPU, Precision::FP32>;
 template class Executor<GPU_MALI, Precision::FP32>;
 template class Executor<FPGA, Precision::FP32>;
+template class Executor<X86, Precision::FP32>;
 
 }  // namespace paddle_mobile
diff --git a/src/io/executor.h b/src/io/executor.h
index 67d3f02ac37c4203950a2679d30d7aa9072c70ba..c75979db578c99bd9c5b366588a2cb950bcfe54f 100644
--- a/src/io/executor.h
+++ b/src/io/executor.h
@@ -18,19 +18,11 @@ limitations under the License. */
 #include <memory>
 #include <string>
 #include <vector>
-
 #include "common/types.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
 #include "framework/program/program.h"
 #include "framework/tensor.h"
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-#include "common/dep_core.h"
-#endif
-using std::string;
 
 namespace paddle_mobile {
 
@@ -38,36 +30,37 @@ template <typename Dtype = CPU, Precision P = Precision::FP32>
 class Executor {
  public:
   typedef typename PrecisionTrait<P>::ptype Ptype;
-
-  /*
-   * @b init executor with program load by Loader class
-   * @b 用 loader load 的 program 实例化 executor
-   * */
-  Executor(const framework::Program<Dtype> p, int batch_size = 1,
-           bool use_optimize = true, bool loddable = false);
-
-  /*
-   * @b to predict
-   * */
+  // exector constructor
+  // @param program program converted from proto program in PaddlePaddle
+  // @param use_optimize bool whether use operator fusion to speed up or not
+  // @param loddable bool
+  Executor(const framework::Program<Dtype> program,
+           const bool use_optimize = true,
+	   const bool loddable = false);
+  // predict with tensor
+  // @param input input tensor to do prediction
+  // @return predicted tensor
   std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
-  /*
-   * @b to predict
-   * */
+  // predict with lod tensor
+  // @param input input lod tensor to do prediction
+  // @return predicted lod tensor
   std::shared_ptr<framework::LoDTensor> PredictLod(
       const framework::LoDTensor &t);
-  /*
-   * @b to predict with vector and dim
-   *
-   * @b 使用 输入 和 输入的维度信息 进行预测
-   * */
+  // predict with vector input and dims
+  // @param input vector whose elements will be formed
+  // @param       input lod tensor to do prediction
+  // @param dims  vector whose elements will be formed
+  // @param       input tensor shape
+  // @return vector which is flatted from predicted tensor
   std::vector<Ptype> Predict(const std::vector<Ptype> &input,
                              const std::vector<int64_t> &dims);
 
  protected:
   Executor() = default;
   void InitMemory();
-  void LoadMemory(const framework::VarDesc var_desc,
-                  framework::LoDTensor *tensor, char **data);
+  void LoadMemory(const void* data,
+		  const framework::VarDesc var_desc,
+                  framework::LoDTensor *tensor);
   void InitCombineMemory();
   framework::Program<Dtype> program_;
   int batch_size_ = 1;
@@ -79,9 +72,6 @@ class Executor {
       ops_of_block_;
   bool use_optimize_ = false;
   bool loddable_ = false;
-#ifdef PADDLE_EXECUTOR_MULTITHREAD
-  std::vector<depCore> depManager;
-#endif
 #ifdef PADDLE_MOBILE_PROFILE
   struct ProfInfo {
     int tid = 0;
@@ -93,17 +83,6 @@ class Executor {
   bool varInputMemory(const std::shared_ptr<framework::VarDesc> &var_desc,
                       framework::Variable *var,
                       framework::LoDTensor *tensor) const;
-
-#ifdef PADDLE_MOBILE_FPGA
-
- public:
-  void InjectVariable(const framework::Tensor &t, string var_name);
-  void FeedData(const framework::Tensor &t);
-  std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
-  void Predict_From_To(int start = 0, int end = -1);
-  void Predict_From(int start);
-  void Predict_To(int end);
-#endif
 };
 
 }  // namespace paddle_mobile
diff --git a/src/io/loader.cpp b/src/io/loader.cpp
index 48a2b5cfdaa5f53cd9611dd0be1ce3df05988311..f736372c460160c6433be1a1140d814f5fbf76ab 100644
--- a/src/io/loader.cpp
+++ b/src/io/loader.cpp
@@ -45,6 +45,7 @@ void InitMemoryFromProgram(
           tensor->Resize(framework::make_ddim(dim));
         }
       } else {
+        // var_desc type is always lod tensor in any time?? (houjiang)
         // TODO(codeWorm): some.
       }
     }
@@ -67,6 +68,9 @@ void FusionAndPrintInfos(
     framework::ProgramOptimize program_optimize;
     program.optimizeProgram =
         program_optimize.FusionOptimize(originProgramDesc, can_add_split);
+    if (!program.optimizeProgram) {
+      program.optimizeProgram = originProgramDesc;
+    }
   }
   if (optimize) {
     program.optimizeProgram->Description("optimize: ");
@@ -193,5 +197,6 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
 template class Loader<CPU, Precision::FP32>;
 template class Loader<FPGA, Precision::FP32>;
 template class Loader<GPU_MALI, Precision::FP32>;
+template class Loader<X86, Precision::FP32>;
 
 }  // namespace paddle_mobile
diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp
index 0b84f1ff45e519dbbc244863db481f2364907a89..f436c00adb22e826cdce4f5af61f0d85acc25450 100644
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -19,14 +19,14 @@ namespace paddle_mobile {
 template <typename Dtype, Precision P>
 void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
 #ifdef _OPENMP
-  //  omp_set_dynamic(0);
   omp_set_num_threads(num);
 #endif
 };
 
 template <typename Dtype, Precision P>
-bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
-                                  bool quantification, int batch_size,
+bool PaddleMobile<Dtype, P>::Load(const std::string &dirname,
+                                  bool optimize,
+                                  bool quantification,
                                   bool loddable) {
   if (loader_.get() == nullptr) {
     loader_ = std::make_shared<Loader<Dtype, P>>();
@@ -36,8 +36,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
 
   if (executor_.get() == nullptr) {
     executor_ = std::make_shared<Executor<Dtype, P>>(
-        loader_->Load(dirname, optimize, quantification), batch_size, optimize,
-        loddable);
+        loader_->Load(dirname, optimize, quantification), optimize, loddable);
   } else {
     LOG(kLOG_INFO) << "executor inited";
   }
@@ -47,8 +46,9 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
 
 template <typename Dtype, Precision P>
 bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
-                                  const std::string &para_path, bool optimize,
-                                  bool quantification, int batch_size,
+                                  const std::string &para_path,
+                                  bool optimize,
+                                  bool quantification,
                                   bool loddable) {
   if (loader_.get() == nullptr) {
     loader_ = std::make_shared<Loader<Dtype, P>>();
@@ -59,7 +59,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
   if (executor_.get() == nullptr) {
     executor_ = std::make_shared<Executor<Dtype, P>>(
         loader_->Load(model_path, para_path, optimize, quantification),
-        batch_size, optimize, loddable);
+        optimize, loddable);
   } else {
     LOG(kLOG_INFO) << "executor inited";
   }
@@ -71,7 +71,6 @@ template <typename Dtype, Precision P>
 bool PaddleMobile<Dtype, P>::LoadCombinedMemory(
     size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
     const uint8_t *combined_params_buf) {
-  int batch_size = 1;
   bool optimise = true;
   bool quantification = false;
 
@@ -85,8 +84,7 @@ bool PaddleMobile<Dtype, P>::LoadCombinedMemory(
     executor_ = std::make_shared<Executor<Dtype, P>>(
         loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
                                     combined_params_buf, optimise,
-                                    quantification),
-        batch_size, optimise);
+                                    quantification), optimise);
   } else {
     LOG(kLOG_INFO) << "executor inited";
   }
@@ -124,44 +122,9 @@ PaddleMobile<Dtype, P>::~PaddleMobile() {
   loader_ = nullptr;
 }
 
-#ifdef PADDLE_MOBILE_FPGA
-
-template <typename Dtype, Precision P>
-void PaddleMobile<Dtype, P>::InjectVariable(const framework::Tensor &t,
-                                            string var_name) {
-  executor_->InjectVariable(t, var_name);
-}
-
-template <typename Dtype, Precision P>
-void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) {
-  executor_->FeedData(t);
-};
-
-template <typename Dtype, Precision P>
-std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult(int id) {
-  return executor_->FetchResult(id);
-};
-
-template <typename Dtype, Precision P>
-void PaddleMobile<Dtype, P>::Predict_From_To(int start, int end) {
-  executor_->Predict_From_To(start, end);
-};
-
-template <typename Dtype, Precision P>
-void PaddleMobile<Dtype, P>::Predict_From(int start) {
-  executor_->Predict_From(start);
-};
-
-template <typename Dtype, Precision P>
-void PaddleMobile<Dtype, P>::Predict_To(int end) {
-  executor_->Predict_To(end);
-};
-#endif
-
 template class PaddleMobile<CPU, Precision::FP32>;
-
 template class PaddleMobile<FPGA, Precision::FP32>;
-
 template class PaddleMobile<GPU_MALI, Precision::FP32>;
+template class PaddleMobile<X86, Precision::FP32>;
 
 }  // namespace paddle_mobile
diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h
index 73c5553d91c1b4781718265aba8b7fa8dd5e2777..8abd186752170326552454a4bd3478e34b0256f0 100644
--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -34,57 +34,29 @@ class PaddleMobile {
 
  public:
   PaddleMobile() {}
-  /*
-   * @b load separate format fluid model
-   * @b 加载分开形式的 fluid 模型
-   * */
-  bool Load(const std::string &dirname, bool optimize = false,
-            bool quantification = false, int batch_size = 1,
+  bool Load(const std::string &dirname,
+            bool optimize = false,
+            bool quantification = false,
             bool loddable = false);
 
-  /*
-   * @b load combine format fluid mode
-   * @b 加载结合在一起格式的模型
-   * */
-  bool Load(const std::string &model_path, const std::string &para_path,
-            bool optimize = false, bool quantification = false,
-            int batch_size = 1, bool loddable = false);
-  /*
-   * @b 设置线程数, 当 cmake 中开启 openmp 时生效
-   * */
-  void SetThreadNum(int num);
+  bool Load(const std::string &model_path,
+            const std::string &para_path,
+            bool optimize = false,
+            bool quantification = false,
+            bool loddable = false);
 
-  /*
-   * @b to predict
-   * */
   std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
 
-  /*
-   * @b to predict
-   * */
   std::shared_ptr<framework::Tensor> PredictLod(const framework::LoDTensor &t);
 
-  /*
-   * @b to predict with vector and dim
-   *
-   * @b 使用 输入 和 输入的维度信息 进行预测
-   * */
   std::vector<Ptype> Predict(const std::vector<Ptype> &input,
                              const std::vector<int64_t> &dims);
 
-  /**
-   * 从内存加载model 以及 combinedparams的接口
-   *
-   * @param model_len model 文件的内存大小
-   * @param model_buf model文件的内存
-   * @param combined_params_len  params文件的内存大小
-   * @param combined_params_buf  params文件的内存
-   * @return
-   */
   bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
                           size_t combined_params_len,
                           const uint8_t *combined_params_buf);
 
+  void SetThreadNum(int num);
   void Clear();
 
   ~PaddleMobile();
@@ -92,16 +64,6 @@ class PaddleMobile {
  private:
   std::shared_ptr<Loader<Dtype, P>> loader_;
   std::shared_ptr<Executor<Dtype, P>> executor_;
-
-#ifdef PADDLE_MOBILE_FPGA
- public:
-  void InjectVariable(const framework::Tensor &t, string var_name);
-  void FeedData(const framework::Tensor &t);
-  std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
-  void Predict_From_To(int start = 0, int end = -1);
-  void Predict_From(int start);
-  void Predict_To(int end);
-#endif
 };
 
 }  // namespace paddle_mobile
diff --git a/metal/paddle-mobile/paddle-mobile/CPU/PaddleMobileCPU.h b/src/ios_io/PaddleMobile.h
similarity index 55%
rename from metal/paddle-mobile/paddle-mobile/CPU/PaddleMobileCPU.h
rename to src/ios_io/PaddleMobile.h
index c68d81f328f4ce9a9bf16624f677b2996644c35c..5854c5c3a4d4c899feb88822b2f7993860d1ed76 100644
--- a/metal/paddle-mobile/paddle-mobile/CPU/PaddleMobileCPU.h
+++ b/src/ios_io/PaddleMobile.h
@@ -17,17 +17,7 @@
 #import <CoreImage/CoreImage.h>
 #import <Foundation/Foundation.h>
 
-@interface PaddleMobileCPUResult: NSObject
-
-@property (assign, nonatomic, readonly) float *output;
-
-@property (assign, nonatomic, readonly) int outputSize;
-
--(void)releaseOutput;
-
-@end
-
-@interface PaddleMobileCPU : NSObject
+@interface PaddleMobile : NSObject
 
 /*
     创建对象
@@ -44,36 +34,13 @@
 */
 - (BOOL)load:(NSString *)modelAndWeightPath;
 
-/*
- * 从内存中加载模型
- * */
-- (BOOL)LoadCombinedMemory:(size_t)modelLen
-               andModelBuf:(const uint8_t *)modelBuf
-         andModelParamsLen:(size_t)combinedParamsLen
-      andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
-
-/*
- *  对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
- * */
--(void)preprocess:(CGImageRef)image
-           output:(float *)output
-            means:(NSArray<NSNumber *> *)means
-        scale:(float)scale
-        dim:(NSArray<NSNumber *> *)dim;
-
-/*
- * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
- * */
-- (PaddleMobileCPUResult *)predictInput:(float *)input
-                                    dim:(NSArray<NSNumber *> *)dim;
-
 /*
     进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
 */
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
 
 /*
-    进行预测, 默认 means 为 0, scale 为 1.0
+    进行预测
 */
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
 
diff --git a/src/ios_io/PaddleMobileCPU.mm b/src/ios_io/PaddleMobile.mm
similarity index 55%
rename from src/ios_io/PaddleMobileCPU.mm
rename to src/ios_io/PaddleMobile.mm
index 5a21418ef5fa9cbf7b24436cb778fc8c6c164e16..5c7b801be0ea7967ea0c94813325d41071bb890b 100644
--- a/src/ios_io/PaddleMobileCPU.mm
+++ b/src/ios_io/PaddleMobile.mm
@@ -12,51 +12,24 @@
  See the License for the specific language governing permissions and
  limitations under the License. */
 
-#import "PaddleMobileCPU.h"
+#import "PaddleMobile.h"
 
 #import "op_symbols.h"
-#include "framework/tensor.h"
 #import "io/paddle_mobile.h"
 
 #import <memory>
 #import <vector>
 
-
-@interface PaddleMobileCPUResult()
-
--(void)toSetOutput:(float *)output;
-
--(void)toSetOutputSize:(int)outputSize;
-
-@end
-
-@implementation PaddleMobileCPUResult
-
--(void)releaseOutput {
-  delete [] _output;
-  _output = nil;
-  _outputSize = 0;
-}
-
--(void)toSetOutput:(float *)output {
-  _output = output;
-}
-
--(void)toSetOutputSize:(int)outputSize {
-  _outputSize = outputSize;
-}
-
-@end
-
-
-@interface  PaddleMobileCPU()
+@interface  PaddleMobile()
 {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32> *pam_;
   BOOL loaded_;
+  std::vector<float> *predict_input_;
+
 }
 @end
 
-@implementation PaddleMobileCPU
+@implementation PaddleMobile
 
 static std::mutex shared_mutex;
 
@@ -93,14 +66,6 @@ static std::mutex shared_mutex;
   }
 }
 
-- (BOOL)LoadCombinedMemory:(size_t)modelLen
-               andModelBuf:(const uint8_t *)modelBuf
-         andModelParamsLen:(size_t)combinedParamsLen
-      andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf {
-  pam_->SetThreadNum(2);
-  return loaded_ = pam_->LoadCombinedMemory(modelLen, modelBuf, combinedParamsLen, combinedParamsBuf);
-}
-
 - (BOOL)load:(NSString *)modelAndWeightPath{
   std::string model_path_str = std::string([modelAndWeightPath UTF8String]);
   if (loaded_ = pam_->Load(model_path_str)) {
@@ -110,57 +75,6 @@ static std::mutex shared_mutex;
   }
 }
 
-
--(void)preprocess:(CGImageRef)image
-           output:(float *)output
-            means:(NSArray<NSNumber *> *)means
-        scale:(float)scale
-        dim:(NSArray<NSNumber *> *)dim {
-  std::lock_guard<std::mutex> lock(shared_mutex);
-
-  // dim to c++ vector, get numel
-  std::vector<int64_t > dim_vec;
-  int numel = 1;
-  for (int k = 0; k < dim.count; ++k) {
-    int d = dim[k].intValue;
-    numel *= d;
-    dim_vec.push_back(d);
-  }
-
-  const int sourceRowBytes = CGImageGetBytesPerRow(image);
-  const int imageWidth = CGImageGetWidth(image);
-  const int imageHeight = CGImageGetHeight(image);
-  const int imageChannels = 4;
-  CGDataProviderRef provider = CGImageGetDataProvider(image);
-  CFDataRef cfData = CGDataProviderCopyData(provider);
-  const UInt8 *input = CFDataGetBytePtr(cfData);
-
-  int wanted_input_width = dim_vec[3];
-  int wanted_input_height = dim_vec[2];
-  int wanted_input_channels = dim_vec[1];
-
-  for (int c = 0; c < wanted_input_channels; ++c) {
-    float *out_channel = output + c * wanted_input_height * wanted_input_width;
-    for (int y = 0; y < wanted_input_height; ++y) {
-      float *out_row = out_channel + y * wanted_input_width;
-      for (int x = 0; x < wanted_input_width; ++x) {
-        int in_row = (y * imageHeight) / wanted_input_height;
-        int in_col = (x * imageWidth) / wanted_input_width;
-        const UInt8 *in_pixel = input + (in_row * imageWidth * imageChannels) + (in_col * imageChannels);
-        float *out_pos = out_row + x;
-        if (c == 0) {
-          *out_pos = (in_pixel[c] - means[c].floatValue) * scale;
-        }else if (c == 1){
-          *out_pos = (in_pixel[c] - means[c].floatValue) * scale;
-        }else if (c == 2){
-          *out_pos = (in_pixel[c] - means[c].floatValue) * scale;
-        }
-      }
-    }
-  }
-
-}
-
 -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray<NSNumber *> *)means scale:(float)scale dim:(std::vector<int64_t>)dim{
   if (means == nil) {
     means = @[@0, @0, @0];
@@ -191,54 +105,27 @@ static std::mutex shared_mutex;
   }
 }
 
-- (PaddleMobileCPUResult *)predictInput:(float *)input
-                      dim:(NSArray<NSNumber *> *)dim {
-  std::lock_guard<std::mutex> lock(shared_mutex);
-  if (!loaded_) {
-    printf("PaddleMobile doesn't be loaded yet");
-    return nil;
-  }
-
-  if (dim.count != 4) {
-    printf("dim must have 4 elements");
+- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale{
+//  printf(" hi i am here");
+  if (predict_input_) {
+//    printf(" fukc -- ");
+//    printf(" %d \n", predict_input_->size());
+    // dim to c++ vector, get numel
+    std::vector<int64_t > dim_vec = {1, 3, 300, 300};
+//    int numel = 1;
+//    for (int k = 0; k < dim.count; ++k) {
+//      int d = dim[k].intValue;
+//      numel *= d;
+//      dim_vec.push_back(d);
+//    }
+
+
+    std::vector<float> cpp_result = pam_->Predict(*predict_input_, dim_vec);
     return nil;
   }
-
-  // dim to c++ vector, get numel
-  std::vector<int64_t > dim_vec;
-  int numel = 1;
-  for (int k = 0; k < dim.count; ++k) {
-    int d = dim[k].intValue;
-    numel *= d;
-    dim_vec.push_back(d);
-  }
-
-  paddle_mobile::framework::Tensor input_tensor;
-
-  paddle_mobile::framework::DDim dims = paddle_mobile::framework::make_ddim(dim_vec);
-
-  float *input_ptr = input_tensor.mutable_data<float>(dims);
-
-  memcpy(input_ptr, input,
-         numel * sizeof(float));
-
-  std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Predict(input_tensor);
-
-  float *output_pointer = new float[output->numel()];
-
-  memcpy(output_pointer, output->data<float>(),
-         output->numel() * sizeof(float));
-
-  PaddleMobileCPUResult *cpuResult = [[PaddleMobileCPUResult alloc] init];
-  [cpuResult toSetOutput: output_pointer];
-  [cpuResult toSetOutputSize: output->numel()];
-
-  return cpuResult;
-}
-
-- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale{
 //  printf(" predict one ");
-  std::lock_guard<std::mutex> lock(shared_mutex);
+
+//  std::lock_guard<std::mutex> lock(shared_mutex);
   if (!loaded_) {
     printf("PaddleMobile doesn't be loaded yet");
     return nil;
@@ -277,13 +164,15 @@ static std::mutex shared_mutex;
   }
 
   // input
-  std::vector<float> predict_input;
+  std::vector<float> *predict_input = new std::vector<float>();
   for (int j = 0; j < numel; ++j) {
-    predict_input.push_back(dataPointer[j]);
+    predict_input->push_back(dataPointer[j]);
   }
 
+  predict_input_ = predict_input;
+
   // predict
-  std::vector<float> cpp_result = pam_->Predict(predict_input, dim_vec);
+  std::vector<float> cpp_result = pam_->Predict(*predict_input, dim_vec);
 
   // result
   long count = 0;
diff --git a/src/ios_io/PaddleMobileCPU.h b/src/ios_io/PaddleMobileCPU.h
deleted file mode 100644
index c68d81f328f4ce9a9bf16624f677b2996644c35c..0000000000000000000000000000000000000000
--- a/src/ios_io/PaddleMobileCPU.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#pragma once
-
-#import <CoreImage/CoreImage.h>
-#import <Foundation/Foundation.h>
-
-@interface PaddleMobileCPUResult: NSObject
-
-@property (assign, nonatomic, readonly) float *output;
-
-@property (assign, nonatomic, readonly) int outputSize;
-
--(void)releaseOutput;
-
-@end
-
-@interface PaddleMobileCPU : NSObject
-
-/*
-    创建对象
-*/
-- (instancetype)init;
-
-/*
-    load 模型, 开辟内存
-*/
-- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
-
-/*
-  加载散开形式的模型, 需传入模型的目录
-*/
-- (BOOL)load:(NSString *)modelAndWeightPath;
-
-/*
- * 从内存中加载模型
- * */
-- (BOOL)LoadCombinedMemory:(size_t)modelLen
-               andModelBuf:(const uint8_t *)modelBuf
-         andModelParamsLen:(size_t)combinedParamsLen
-      andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
-
-/*
- *  对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
- * */
--(void)preprocess:(CGImageRef)image
-           output:(float *)output
-            means:(NSArray<NSNumber *> *)means
-        scale:(float)scale
-        dim:(NSArray<NSNumber *> *)dim;
-
-/*
- * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
- * */
-- (PaddleMobileCPUResult *)predictInput:(float *)input
-                                    dim:(NSArray<NSNumber *> *)dim;
-
-/*
-    进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
-*/
-- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
-
-/*
-    进行预测, 默认 means 为 0, scale 为 1.0
-*/
-- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
-
-/*
-    清理内存
-*/
-- (void)clear;
-
-@end
diff --git a/src/ios_io/op_symbols.h b/src/ios_io/op_symbols.h
index af0401c15ab28b0baa0cdbffb16a46215a26953e..0fe1137278d19ab4c9c9aaecf2db108e4a184993 100644
--- a/src/ios_io/op_symbols.h
+++ b/src/ios_io/op_symbols.h
@@ -15,46 +15,27 @@
 #pragma once
 
 #include "operators/batchnorm_op.h"
-#include "operators/bilinear_interp_op.h"
 #include "operators/box_coder_op.h"
 #include "operators/concat_op.h"
 #include "operators/conv_op.h"
-#include "operators/conv_transpose_op.h"
-#include "operators/crf_op.h"
 #include "operators/depthwise_conv_op.h"
 #include "operators/dropout_op.h"
 #include "operators/elementwise_add_op.h"
 #include "operators/feed_op.h"
 #include "operators/fetch_op.h"
-#include "operators/flatten_op.h"
 #include "operators/fusion_conv_add.h"
-#include "operators/fusion_conv_add_add_prelu_op.h"
-#include "operators/fusion_conv_add_bn_op.h"
 #include "operators/fusion_conv_add_bn_relu_op.h"
-#include "operators/fusion_conv_add_prelu_op.h"
-#include "operators/fusion_conv_add_relu_op.h"
-#include "operators/fusion_conv_bn_add_relu_op.h"
 #include "operators/fusion_conv_bn_relu_op.h"
 #include "operators/fusion_dwconv_bn_relu_op.h"
-#include "operators/fusion_elementwise_add_relu_op.h"
 #include "operators/fusion_fc_op.h"
-#include "operators/fusion_fc_relu_op.h"
-#include "operators/gru_op.h"
 #include "operators/im2sequence_op.h"
-#include "operators/lookup_op.h"
 #include "operators/lrn_op.h"
 #include "operators/mul_op.h"
 #include "operators/multiclass_nms_op.h"
 #include "operators/pool_op.h"
-#include "operators/prelu_op.h"
 #include "operators/prior_box_op.h"
 #include "operators/relu_op.h"
 #include "operators/reshape_op.h"
-#include "operators/resize_op.h"
-#include "operators/scale_op.h"
-#include "operators/shape_op.h"
 #include "operators/sigmoid_op.h"
-#include "operators/slice_op.h"
 #include "operators/softmax_op.h"
-#include "operators/split_op.h"
 #include "operators/transpose_op.h"
diff --git a/src/operators/batchnorm_op.cpp b/src/operators/batchnorm_op.cpp
index f820908404ea637d9680c32d5c4b5568e191dd7e..a36f6dd39c0a9d75250e64cd80443d946a28a755 100644
--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -40,4 +40,5 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
 #ifdef PADDLE_MOBILE_FPGA
 #endif
 
+REGISTER_OPERATOR_X86(batch_norm, ops::BatchNormOp);
 #endif
diff --git a/src/operators/batchnorm_op.h b/src/operators/batchnorm_op.h
index 52c423f1bb90428e867ea6fb992036ab83c683d7..a6df70c9356c9bdb8b1fe3ef4520f26ce911490a 100644
--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -46,13 +46,4 @@ class BatchNormOp
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(batch_norm);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(batch_norm);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/bilinear_interp_op.cpp b/src/operators/bilinear_interp_op.cpp
index b3388c38ec6050faff1cb7bbe49e8dd042291fc9..608e2ab3a6ae8db428d4dd3a0294cafd81ed682d 100644
--- a/src/operators/bilinear_interp_op.cpp
+++ b/src/operators/bilinear_interp_op.cpp
@@ -53,4 +53,6 @@ REGISTER_OPERATOR_CPU(bilinear_interp, ops::BilinearOp);
 #ifdef PADDLE_MOBILE_FPGA
 #endif
 
+REGISTER_OPERATOR_X86(bilinear_interp, ops::BilinearOp);
+
 #endif
diff --git a/src/operators/bilinear_interp_op.h b/src/operators/bilinear_interp_op.h
index dbbf24eeac7a900d49f49242fddb8e568968dddc..1b17406c546d336fd42b0a818d16627c87aedb09 100644
--- a/src/operators/bilinear_interp_op.h
+++ b/src/operators/bilinear_interp_op.h
@@ -50,12 +50,4 @@ class BilinearOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(bilinear_interp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/box_coder_op.cpp b/src/operators/box_coder_op.cpp
index 9e57c9021dac1b6857752989727c1c86051e33f7..9c2f53a3576d48b2ab233fc385dd07549eee949c 100644
--- a/src/operators/box_coder_op.cpp
+++ b/src/operators/box_coder_op.cpp
@@ -60,4 +60,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
 #ifdef PADDLE_MOBILE_FPGA
 #endif
 
+REGISTER_OPERATOR_X86(box_coder, ops::BoxCoderOp);
+
 #endif
diff --git a/src/operators/box_coder_op.h b/src/operators/box_coder_op.h
index 5d475c98b6859a33b39e6b36419fa055cde7a1d3..c06ca8265dd495acb79e4e2ec6c497941b822b21 100644
--- a/src/operators/box_coder_op.h
+++ b/src/operators/box_coder_op.h
@@ -51,12 +51,4 @@ class BoxCoderOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(box_coder);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/concat_op.cpp b/src/operators/concat_op.cpp
index f767f3481c999a16da46e75e314e8ebcb54193fa..2e26d2764b0e1b0a98a8429b97b4901910b8e955 100644
--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -73,4 +73,6 @@ REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp);
 REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp);
 #endif
 
+REGISTER_OPERATOR_X86(concat, ops::ConcatOp);
+
 #endif
diff --git a/src/operators/concat_op.h b/src/operators/concat_op.h
index a169c17dc468dd06ed344a0c7a6ef3cb2c977a27..eb257d47228ab854c00574a001f6454e239cfbbd 100644
--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -46,14 +46,4 @@ class ConcatOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(concat);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(concat);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(concat);
-#endif
-
 #endif
diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp
index c4601995219b32db75f22c7c2ed959e18af85f36..15702cb8ff02370546251b40c4ced9ba25b6c8f3 100644
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -62,4 +62,6 @@ REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
 REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
 #endif
 
+REGISTER_OPERATOR_X86(conv2d, ops::ConvOp);
+
 #endif
diff --git a/src/operators/conv_op.h b/src/operators/conv_op.h
index 267abfeb614dc8e19a2cf0cf43e7c5f232a62072..23c022e584f9be6cb0b4c2c416ca96e61b3c131f 100644
--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -46,14 +46,4 @@ class ConvOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(conv2d);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(conv2d);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(conv2d);
-#endif
-
 #endif
diff --git a/src/operators/conv_transpose_op.cpp b/src/operators/conv_transpose_op.cpp
index 34de4cbb10d3689f0be95f1277cfdd76b4c2c141..870b82f75a04f8d65b1b238fa1b985b133e20099 100644
--- a/src/operators/conv_transpose_op.cpp
+++ b/src/operators/conv_transpose_op.cpp
@@ -29,4 +29,6 @@ REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
 #ifdef PADDLE_MOBILE_FPGA
 #endif
 
+REGISTER_OPERATOR_X86(conv2d_transpose, ops::ConvOpTranspose);
+
 #endif
diff --git a/src/operators/conv_transpose_op.h b/src/operators/conv_transpose_op.h
index c9b5e86bef0674b176ba901212a9add2ee2def83..e28cee2d74d6ef4b98ea49ee49c2257b6491e832 100644
--- a/src/operators/conv_transpose_op.h
+++ b/src/operators/conv_transpose_op.h
@@ -88,14 +88,4 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(conv2d_transpose);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(conv2d_transpose);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(conv2d_transpose);
-#endif
-
 #endif
diff --git a/src/operators/crf_op.cpp b/src/operators/crf_op.cpp
index 61f9a54352e236a7fcb7b2765ab11055fbec95ab..3411811f3a80cb014431979f6104879db1389a89 100644
--- a/src/operators/crf_op.cpp
+++ b/src/operators/crf_op.cpp
@@ -52,5 +52,6 @@ REGISTER_OPERATOR_CPU(crf_decoding, ops::CrfOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
+REGISTER_OPERATOR_X86(crf_decoding, ops::CrfOp);
 
 #endif
diff --git a/src/operators/crf_op.h b/src/operators/crf_op.h
index 9c966c9077273282bbcb4f25674e8df401956967..9b7487ee958467dac451c3bcb743e6122842c7f1 100644
--- a/src/operators/crf_op.h
+++ b/src/operators/crf_op.h
@@ -47,12 +47,4 @@ class CrfOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(crf_decoding);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/depthwise_conv_op.cpp b/src/operators/depthwise_conv_op.cpp
index 8d6b6a143c37537be6de1e60cc095f1052136e26..0fc8f29b81f8fcdcc683fe780efc0fdea10df418 100644
--- a/src/operators/depthwise_conv_op.cpp
+++ b/src/operators/depthwise_conv_op.cpp
@@ -56,9 +56,7 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(depthwise_conv2d, ops::DepthwiseConvOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(depthwise_conv2d, ops::DepthwiseConvOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/depthwise_conv_op.h b/src/operators/depthwise_conv_op.h
index 40e87a9b1bf9d2b5102a56ff59821b9d122563c5..845c59a19e613bfcf299b445b778eff4d99c7295 100644
--- a/src/operators/depthwise_conv_op.h
+++ b/src/operators/depthwise_conv_op.h
@@ -48,12 +48,4 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(depthwise_conv2d);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/dequantize_op.cpp b/src/operators/dequantize_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6936660a393d6c17a90bd59a67c632d21eba9a8a
--- /dev/null
+++ b/src/operators/dequantize_op.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "operators/dequantize_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename DeviceType, typename T>
+void DequantizeOp<DeviceType, T>::InferShape() const {
+  const auto& input_dims = this->param_.input_->dims();
+  this->param_.out_->Resize(input_dims);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(dequantize, ops::DequantizeOp);
+#endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(dequantize, ops::DequantizeOp);
+#endif
+
diff --git a/src/operators/dequantize_op.h b/src/operators/dequantize_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..9f388d156140ed6b33e5ad1df8c92bfe69f99d45
--- /dev/null
+++ b/src/operators/dequantize_op.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "framework/operator.h"
+#include "operators/op_param.h"
+#include "operators/kernel/dequantize_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename DeviceType, typename T>
+class DequantizeOp : public framework::OperatorWithKernel<
+                        DeviceType,
+                        DequantizeParam<DeviceType>,
+                        DequantizeKernel<DeviceType, T>> {
+ public:
+  DequantizeOp(const std::string &type,
+             const VariableNameMap &inputs,
+             const VariableNameMap &outputs,
+             const framework::AttributeMap &attrs,
+             std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType,
+                                      DequantizeParam<DeviceType>,
+                                      DequantizeKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  // inference output shape
+  void InferShape() const override;
+};
+
+}  // namespace paddle_mobile
+}  // namespace operators
diff --git a/src/operators/dropout_op.cpp b/src/operators/dropout_op.cpp
index a913ff017bfe776a2c2dfea5696e4c0f23683c46..f1cf92b4cc2315ca232d218f9f63667fc705938f 100644
--- a/src/operators/dropout_op.cpp
+++ b/src/operators/dropout_op.cpp
@@ -30,7 +30,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(dropout, ops::DropoutOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(dropout, ops::DropoutOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(dropout, ops::DropoutOp);
diff --git a/src/operators/dropout_op.h b/src/operators/dropout_op.h
index 7523fd5fa8f21dfce20bce963be4b3bc323948e9..65f3587c2336b3e581a30328c41ad397b2848b34 100644
--- a/src/operators/dropout_op.h
+++ b/src/operators/dropout_op.h
@@ -50,13 +50,4 @@ class DropoutOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(dropout);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(dropout);
-#endif
-
 #endif
diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp
index 49885f783417d61c6348fc4563e7306036994f17..0835f3f74928e44e5233889a87a8059564d490be 100644
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -35,7 +35,8 @@ REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(elementwise_add, ops::ElementwiseAddOp);
 #endif
 
 #endif
diff --git a/src/operators/elementwise_add_op.h b/src/operators/elementwise_add_op.h
index 14bcd5264d136007e2eb2ffe917697570b32e40b..a1360eba5480a46395cedb445a4df4e4ca0ab279 100644
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -48,13 +48,4 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(elementwise_add);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(elementwise_add);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/feed_op.cpp b/src/operators/feed_op.cpp
index 77acb5db31e66d78bccd8dbef51832bda1a1bb60..373239203620ef51858b51e9a93a79fbbb957886 100644
--- a/src/operators/feed_op.cpp
+++ b/src/operators/feed_op.cpp
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "feed_op.h"
-namespace paddle_mobile {
-namespace operators {}
-}  // namespace paddle_mobile
+
 
 namespace ops = paddle_mobile::operators;
+
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(feed, ops::FeedOp);
 #endif
@@ -27,3 +26,6 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
 #endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(feed, ops::FeedOp);
+#endif
diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h
index cccd4f52ebdc368e4f68eaf9dc3f25ee3693fdd2..918c0d96ccafa67806fb3f2edc68aeee1e258ca3 100644
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -20,7 +20,7 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
-using std::string;
+
 template <typename DeviceType, typename T>
 class FeedOp : public framework::OperatorBase<DeviceType> {
  public:
@@ -35,17 +35,13 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
     auto out_dims = param_.Out()->dims();
     out_dims[0] = param_.BatchSize();
     param_.Out()->Resize(out_dims);
-
-    //  note : mobile infershape iscalled when executer is created.  so  do not
-    //  pass lod here .
-    // it is empty
   }
 
 #ifdef PADDLE_MOBILE_FPGA
 
   void Init() {
     Tensor *output = param_.Out();
-    fpga::format_fp16_ofm(output);
+    fpga::format_ofm(output);
   }
 
   void RunImpl() const {
@@ -53,18 +49,15 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
     auto input_ptr = input->data<float>();
     fpga::format_image(input);
     Tensor *output = param_.Out();
-    auto output_ptr = output->data<float>();
+    auto output_ptr = output->mutable_data<half>();
 
-    fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
-
-    args.input_data_type = fpga::DATA_TYPE_FP32;
-    args.output_data_type = fpga::DATA_TYPE_FP16;
-    args.input_layout_type = fpga::LAYOUT_CHW;
-    args.output_layout_type = fpga::LAYOUT_HWC;
+    fpga::BypassArgs args;
+    args.convert_type = fpga::DATA_FP32_TO_FP16;
+    args.layout_type = fpga::LAYOUT_NO_CONVERT;
     args.image.address = (void *)input_ptr;
-    args.image.channels = (uint32_t)input->dims()[1];
-    args.image.height = (uint32_t)input->dims()[2];
-    args.image.width = (uint32_t)input->dims()[3];
+    args.image.channels = input->dims()[1];
+    args.image.height = input->dims()[2];
+    args.image.width = input->dims()[3];
     args.image.pad_height = 0;
     args.image.pad_width = 0;
     args.output.address = output_ptr;
@@ -87,12 +80,3 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(feed);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(feed);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(feed);
-#endif
diff --git a/src/operators/fetch_op.cpp b/src/operators/fetch_op.cpp
index 30cddceaa45da91be5ea91d70f78503c404552c3..0a4872089414f68f6a801536053744c1becf9eb8 100644
--- a/src/operators/fetch_op.cpp
+++ b/src/operators/fetch_op.cpp
@@ -27,3 +27,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
 #endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fetch, ops::FetchOp);
+#endif
diff --git a/src/operators/fetch_op.h b/src/operators/fetch_op.h
index 1efe0832b1fc4b2ce240ed838e2f4554c29dccd9..edabd39260781bf0eb5db8882cfddf07aa23093c 100644
--- a/src/operators/fetch_op.h
+++ b/src/operators/fetch_op.h
@@ -47,12 +47,3 @@ class FetchOp : public framework::OperatorBase<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fetch);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(fetch);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fetch);
-#endif
diff --git a/src/operators/flatten_op.cpp b/src/operators/flatten_op.cpp
index 0282414ca6ed0be743849e9d295a354144fccdb9..7f941509e24fdf60545914f33235047c601848e0 100644
--- a/src/operators/flatten_op.cpp
+++ b/src/operators/flatten_op.cpp
@@ -53,7 +53,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(flatten, ops::FlattenOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
diff --git a/src/operators/flatten_op.h b/src/operators/flatten_op.h
index 4c1f6ff8a0f2b3212750f3be4d1a6aa2bad790ee..e935ae308cf5c28b9c435086b2b5e4d4407c319a 100644
--- a/src/operators/flatten_op.h
+++ b/src/operators/flatten_op.h
@@ -63,12 +63,4 @@ class FlattenOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(flatten);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_add_add_prelu.cpp b/src/operators/fusion_conv_add_add_prelu_op.cpp
similarity index 85%
rename from src/operators/fusion_conv_add_add_prelu.cpp
rename to src/operators/fusion_conv_add_add_prelu_op.cpp
index 5104c989415eee46e66bdbf419fc6ecf7a2baa34..23049265e85add40ed850affe46a492f6b3044e2 100644
--- a/src/operators/fusion_conv_add_add_prelu.cpp
+++ b/src/operators/fusion_conv_add_add_prelu_op.cpp
@@ -44,17 +44,20 @@ void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
+    new FusionConvAddAddPReluOpMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_MALI_GPU)
+#elif defined(PADDLE_MOBILE_FPGA)
 REGISTER_OPERATOR_FPGA(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
+#else
+REGISTER_OPERATOR_X86(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
 #endif
 
-#endif
+#endif  // FUSION_CONVADDADDPRELU_OP
diff --git a/src/operators/fusion_conv_add_add_prelu_op.h b/src/operators/fusion_conv_add_add_prelu_op.h
index d91b4d28d728efb4ecf817294f37e67ac19cfe72..7893ff95a671447adbeebeeaf4096235e7a37964 100644
--- a/src/operators/fusion_conv_add_add_prelu_op.h
+++ b/src/operators/fusion_conv_add_add_prelu_op.h
@@ -76,37 +76,7 @@ class FusionConvAddAddPReluOp
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef CONV_ADD_ADD_PRELU_REGISTER
-#define CONV_ADD_ADD_PRELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
-    new FusionConvAddAddPReluOpMatcher());
-#endif
-
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef CONV_ADD_ADD_PRELU_REGISTER
-#define CONV_ADD_ADD_PRELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
-    new FusionConvAddAddPReluOpMatcher());
-#endif
-
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_add_add_prelu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_add_add_prelu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_add_bn_op.cpp b/src/operators/fusion_conv_add_bn_op.cpp
index 5b61bf5d390cc2904a3f40f5400a5a3eec9a2dd5..99f942b42d34031796da3cc8cc8e6a08c8cc0208 100644
--- a/src/operators/fusion_conv_add_bn_op.cpp
+++ b/src/operators/fusion_conv_add_bn_op.cpp
@@ -45,6 +45,9 @@ void FusionConvAddBNOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
+    new FusionConvAddBNMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -52,7 +55,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_add_bn, ops::FusionConvAddBNOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_add_bn, ops::FusionConvAddBNOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_add_bn, ops::FusionConvAddBNOp);
diff --git a/src/operators/fusion_conv_add_bn_op.h b/src/operators/fusion_conv_add_bn_op.h
index ec10787697deb006fe03a35192efb0d80bd00a3c..c4260aef42f9d74cc1f7069c3ae26ccf58f75280 100644
--- a/src/operators/fusion_conv_add_bn_op.h
+++ b/src/operators/fusion_conv_add_bn_op.h
@@ -70,46 +70,7 @@ class FusionConvAddBNOp : public framework::OperatorWithKernel<
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef FUSION_CONV_ADD_BN_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
-    new FusionConvAddBNMatcher());
-#define FUSION_CONV_ADD_BN_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#ifndef FUSION_CONV_ADD_BN_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
-    new FusionConvAddBNMatcher());
-#define FUSION_CONV_ADD_BN_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef FUSION_CONV_ADD_BN_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
-    new FusionConvAddBNMatcher());
-#define FUSION_CONV_ADD_BN_REGISTER
-#endif
-
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_add_bn);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_add_bn);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_add_bn_relu_op.cpp b/src/operators/fusion_conv_add_bn_relu_op.cpp
index 793634eec392fabe6c7399127ec9cb3e187697bc..c4cd211d1cd7acfa4bfa6b9806fae6304d08769e 100644
--- a/src/operators/fusion_conv_add_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_add_bn_relu_op.cpp
@@ -45,6 +45,9 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
+    new FusionConvAddBNReluMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -52,7 +55,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
diff --git a/src/operators/fusion_conv_add_bn_relu_op.h b/src/operators/fusion_conv_add_bn_relu_op.h
index 4dee4416622e8dee8ca495026843c7506d084617..07bb0146b3f481e09d0a944c4791237e7eea08e4 100644
--- a/src/operators/fusion_conv_add_bn_relu_op.h
+++ b/src/operators/fusion_conv_add_bn_relu_op.h
@@ -75,46 +75,7 @@ class FusionConvAddBNReluOp
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
-    new FusionConvAddBNReluMatcher());
-#define FUSION_CONV_ADD_BN_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
-    new FusionConvAddBNReluMatcher());
-#define FUSION_CONV_ADD_BN_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
-    new FusionConvAddBNReluMatcher());
-#define FUSION_CONV_ADD_BN_RELU_REGISTER
-#endif
-
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_add_bn_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_add_bn_relu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_add.cpp b/src/operators/fusion_conv_add_op.cpp
similarity index 90%
rename from src/operators/fusion_conv_add.cpp
rename to src/operators/fusion_conv_add_op.cpp
index cdd6a6db2bb11ebf8dce2aca85630aa8805adf3e..8cb9cdf22c1b94f1b9c6992ecf2f4b7b3c42105b 100644
--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add_op.cpp
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #ifdef FUSION_CONVADD_OP
 
-#include "operators/fusion_conv_add.h"
+#include "operators/fusion_conv_add_op.h"
 #include "operators/math/conv_func.h"
 
 namespace paddle_mobile {
@@ -45,6 +45,9 @@ void FusionConvAddOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar convadd_registrar(
+    new FusionConvAddMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -55,7 +58,8 @@ REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_add, ops::FusionConvAddOp);
 #endif
 
 #endif
diff --git a/src/operators/fusion_conv_add.h b/src/operators/fusion_conv_add_op.h
similarity index 81%
rename from src/operators/fusion_conv_add.h
rename to src/operators/fusion_conv_add_op.h
index ba1ca997662ce67fdcd8f39d2a12e2f535c5b1a7..365e3afa97c2c2fd82c629302f8a5fddf8abb406 100644
--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add_op.h
@@ -65,40 +65,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef CONV_ADD_REGISTER
-static framework::FusionOpRegistrar convadd_registrar(
-    new FusionConvAddMatcher());
-#define CONV_ADD_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#ifndef CONV_ADD_REGISTER
-static framework::FusionOpRegistrar convadd_registrar(
-    new FusionConvAddMatcher());
-#define CONV_ADD_REGISTER
-
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_add);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(fusion_conv_add);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_add_prelu_op.cpp b/src/operators/fusion_conv_add_prelu_op.cpp
index 0cd30ae6888cd2372b0968717de14f9ca3c72e18..7d17292c2e8c047aa913c89eea11611d28fe1084 100644
--- a/src/operators/fusion_conv_add_prelu_op.cpp
+++ b/src/operators/fusion_conv_add_prelu_op.cpp
@@ -44,6 +44,9 @@ void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
+    new FusionConvAddPReluOpMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -51,7 +54,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
diff --git a/src/operators/fusion_conv_add_prelu_op.h b/src/operators/fusion_conv_add_prelu_op.h
index 4c968be68230fe6252e72655f47b2a347f720526..0b0763e781daf3d882d0463205b07fdef53b90f5 100644
--- a/src/operators/fusion_conv_add_prelu_op.h
+++ b/src/operators/fusion_conv_add_prelu_op.h
@@ -71,37 +71,7 @@ class FusionConvAddPReluOp
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef CONV_ADD_PRELU_REGISTER
-#define CONV_ADD_PRELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
-    new FusionConvAddPReluOpMatcher());
-#endif
-
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef CONV_ADD_PRELU_REGISTER
-#define CONV_ADD_PRELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
-    new FusionConvAddPReluOpMatcher());
-#endif
-
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_add_prelu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_add_prelu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_add_relu_op.cpp b/src/operators/fusion_conv_add_relu_op.cpp
index 99b770a6c5e3bc89024e467631e129b914f0bcec..7cee23c6b77a45dd1ae4bdfaa6e2e57ccdf10d89 100644
--- a/src/operators/fusion_conv_add_relu_op.cpp
+++ b/src/operators/fusion_conv_add_relu_op.cpp
@@ -44,6 +44,9 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
+    new FusionConvAddReluOpMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -51,7 +54,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_add_relu, ops::FusionConvAddReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp);
diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h
index 926f309403d37fa8ec1f15f7cb955c1c13842405..1335ce7b6ca5151e3d396856055f38825710f4b1 100644
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -65,37 +65,7 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef CONV_ADD_RELU_REGISTER
-#define CONV_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
-    new FusionConvAddReluOpMatcher());
-#endif
-
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef CONV_ADD_RELU_REGISTER
-#define CONV_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
-    new FusionConvAddReluOpMatcher());
-#endif
-
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_add_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_add_relu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_bn_add_relu_op.cpp b/src/operators/fusion_conv_bn_add_relu_op.cpp
index 9823a3111e54f5aec90d5518073ca52255706c1a..693eac81fb3617cddbdab9574135a13b30aa0a32 100644
--- a/src/operators/fusion_conv_bn_add_relu_op.cpp
+++ b/src/operators/fusion_conv_bn_add_relu_op.cpp
@@ -45,6 +45,9 @@ void FusionConvBNAddReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_bn_add_relu_registrar(
+    new FusionConvBNAddReluMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -52,7 +55,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_bn_add_relu, ops::FusionConvBNAddReluOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_bn_add_relu, ops::FusionConvBNAddReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_bn_add_relu, ops::FusionConvBNAddReluOp);
diff --git a/src/operators/fusion_conv_bn_add_relu_op.h b/src/operators/fusion_conv_bn_add_relu_op.h
index 62f3ccf37dfbff9720f39fb96b099f6d7eb5ddcc..b2f911363acc4f9d5b3c4407317107efadf3996d 100644
--- a/src/operators/fusion_conv_bn_add_relu_op.h
+++ b/src/operators/fusion_conv_bn_add_relu_op.h
@@ -80,46 +80,7 @@ class FusionConvBNAddReluOp
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef FUSION_CONV_BN_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_add_relu_registrar(
-    new FusionConvBNAddReluMatcher());
-#define FUSION_CONV_BN_ADD_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#ifndef FUSION_CONV_BN_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_add_relu_registrar(
-    new FusionConvBNAddReluMatcher());
-#define FUSION_CONV_BN_ADD_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef FUSION_CONV_BN_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_add_relu_registrar(
-    new FusionConvBNAddReluMatcher());
-#define FUSION_CONV_BN_ADD_RELU_REGISTER
-#endif
-
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_bn_add_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_bn_add_relu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_bn_op.cpp b/src/operators/fusion_conv_bn_op.cpp
index 470678bfe57a41e66d6f11f3bfd469d97369d939..7e736092721cc08252c78d3848fe9962d8933a24 100644
--- a/src/operators/fusion_conv_bn_op.cpp
+++ b/src/operators/fusion_conv_bn_op.cpp
@@ -44,6 +44,9 @@ void FusionConvBNOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -51,7 +54,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_bn, ops::FusionConvBNOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_bn, ops::FusionConvBNOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_bn, ops::FusionConvBNOp);
diff --git a/src/operators/fusion_conv_bn_op.h b/src/operators/fusion_conv_bn_op.h
index f43e62c9fa5c4b40c07fcb9cbdab4d06ab2c482f..f393928665301da0dd0076b33e81ca79791794f7 100644
--- a/src/operators/fusion_conv_bn_op.h
+++ b/src/operators/fusion_conv_bn_op.h
@@ -67,39 +67,7 @@ class FusionConvBNOp : public framework::OperatorWithKernel<
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef FUSION_CONV_BN_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_registrar(
-    new FusionConvBNMatcher());
-#define FUSION_CONV_BN_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef FUSION_CONV_BN_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_registrar(
-    new FusionConvBNMatcher());
-#define FUSION_CONV_BN_REGISTER
-#endif
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_bn);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_bn);
-#endif
-
 #endif
diff --git a/src/operators/fusion_conv_bn_relu_op.cpp b/src/operators/fusion_conv_bn_relu_op.cpp
index bfc9b99ea796bfdcc1a4ae1a23b2e39e8a513393..c5c403c1942e03dd9ef1cb04477a671374577859 100644
--- a/src/operators/fusion_conv_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_bn_relu_op.cpp
@@ -45,6 +45,9 @@ void FusionConvBNReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
+    new FusionConvBNReluMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -52,7 +55,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
diff --git a/src/operators/fusion_conv_bn_relu_op.h b/src/operators/fusion_conv_bn_relu_op.h
index 2b5ff4ea9d3e77ad9449b3968667ecc4558c2147..a6bbe72500ccfe2b43e21496c5abc18b9a562d47 100644
--- a/src/operators/fusion_conv_bn_relu_op.h
+++ b/src/operators/fusion_conv_bn_relu_op.h
@@ -72,39 +72,7 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel<
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef FUSION_CONV_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
-    new FusionConvBNReluMatcher());
-#define FUSION_CONV_BN_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-
-#ifndef FUSION_CONV_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
-    new FusionConvBNReluMatcher());
-#define FUSION_CONV_BN_RELU_REGISTER
-#endif
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_conv_bn_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_conv_bn_relu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_dwconv_bn_relu_op.cpp b/src/operators/fusion_dwconv_bn_relu_op.cpp
index e55295830e19b5b39a5ae2501e30170ffb1a7854..4a53e183df19313569dcdfdaea9b9650a58b9633 100644
--- a/src/operators/fusion_dwconv_bn_relu_op.cpp
+++ b/src/operators/fusion_dwconv_bn_relu_op.cpp
@@ -45,6 +45,9 @@ void FusionDWConvBNReluOp<Dtype, T>::InferShape() const {
   this->param_.Output()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fusion_dwconv_bn_relu_registrar(
+    new FusionDWConvBNReluMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -52,7 +55,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(fusion_dwconv_bn_relu, ops::FusionDWConvBNReluOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_dwconv_bn_relu, ops::FusionDWConvBNReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
diff --git a/src/operators/fusion_dwconv_bn_relu_op.h b/src/operators/fusion_dwconv_bn_relu_op.h
index dd1f85688f576106a46cd3070ab2034ec8f55881..44a1f845bc9b2dc0251fb729de9f9c00071fd492 100644
--- a/src/operators/fusion_dwconv_bn_relu_op.h
+++ b/src/operators/fusion_dwconv_bn_relu_op.h
@@ -73,38 +73,7 @@ class FusionDWConvBNReluOp
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-
-#ifndef FUSION_DWCONV_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_dwconv_bn_relu_registrar(
-    new FusionDWConvBNReluMatcher());
-#define FUSION_DWCONV_BN_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-
-#ifndef FUSION_DWCONV_BN_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_dwconv_bn_relu_registrar(
-    new FusionDWConvBNReluMatcher());
-#define FUSION_DWCONV_BN_RELU_REGISTER
-#endif
-
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_dwconv_bn_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/fusion_elementwise_add_relu_op.cpp b/src/operators/fusion_elementwise_add_relu_op.cpp
index fa2739ab4283c1fbb35e541ed2d40ea7a1904580..82c2957ec8f2bfc84a45b2ceec85823ecaab5254 100644
--- a/src/operators/fusion_elementwise_add_relu_op.cpp
+++ b/src/operators/fusion_elementwise_add_relu_op.cpp
@@ -25,6 +25,9 @@ void FusionElementwiseAddReluOp<Dtype, T>::InferShape() const {
   this->param_.Out()->Resize(x_dim);
 }
 
+static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
+    new FusioneElementwiseAddReluMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
diff --git a/src/operators/fusion_elementwise_add_relu_op.h b/src/operators/fusion_elementwise_add_relu_op.h
index 2a92f1e2471cb9e14d84ef03e4bfb872fc738d68..6434e726ccd8df8cf97736bfa65904674c73ad03 100644
--- a/src/operators/fusion_elementwise_add_relu_op.h
+++ b/src/operators/fusion_elementwise_add_relu_op.h
@@ -61,39 +61,7 @@ class FusionElementwiseAddReluOp
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
-    new FusioneElementwiseAddReluMatcher());
-#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
-#endif
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
-    new FusioneElementwiseAddReluMatcher());
-#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
-#endif
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
-static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
-    new FusioneElementwiseAddReluMatcher());
-#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
-#endif
-#endif
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_elementwise_add_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_elementwise_add_relu);
-#endif
-
 #endif
diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp
index 9fa80fbf12d0fe300921418705b6900108c68faf..7ec8150b600e0ee21a7c40fcf266a1a8c79db164 100644
--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -15,9 +15,12 @@ limitations under the License. */
 #ifdef FUSION_FC_OP
 
 #include "operators/fusion_fc_op.h"
+
 namespace paddle_mobile {
 namespace operators {
 
+static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+
 template <typename Dtype, typename T>
 void FusionFcOp<Dtype, T>::InferShape() const {
   auto x_dims = this->param_.InputX()->dims();
@@ -54,14 +57,15 @@ void FusionFcOp<Dtype, T>::InferShape() const {
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#elif defined(PADDLE_MOBILE_MALI_GPU)
 REGISTER_OPERATOR_MALI_GPU(fusion_fc, ops::FusionFcOp);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_FPGA)
 REGISTER_OPERATOR_FPGA(fusion_fc, ops::FusionFcOp);
+#else
+REGISTER_OPERATOR_X86(fusion_fc, ops::FusionFcOp);
 #endif
 
-#endif
+#endif  // FUSION_FC_OP
diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h
index 076a95d745e8d44a417dd95fb75844a67b11e653..722c5225bc035df2761154a08a521a09b34a1e82 100644
--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -25,8 +25,7 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
-using std::string;
-using std::vector;
+
 class FusionFcMatcher : public framework::FusionOpMatcher {
  public:
   FusionFcMatcher() {
@@ -49,7 +48,7 @@ class FusionFcOp : public framework::OperatorWithKernel<
                        DeviceType, FusionFcParam<DeviceType>,
                        operators::FusionFcKernel<DeviceType, T>> {
  public:
-  FusionFcOp(const string &type, const VariableNameMap &inputs,
+  FusionFcOp(const std::string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
              const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
@@ -60,42 +59,11 @@ class FusionFcOp : public framework::OperatorWithKernel<
   using framework::OperatorWithKernel<
       DeviceType, FusionFcParam<DeviceType>,
       operators::FusionFcKernel<DeviceType, T>>::OperatorWithKernel;
-  void InferShape() const override;
 
- protected:
+  void InferShape() const override;
 };
 
-#ifdef PADDLE_MOBILE_CPU
-#ifndef FUSION_FC_REGISTER
-static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
-#define FUSION_FC_REGISTER
-#endif
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-#ifndef FUSION_FC_REGISTER
-static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
-#define FUSION_FC_REGISTER
-#endif
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-#ifndef FUSION_FC_REGISTER
-static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
-#define FUSION_FC_REGISTER
-#endif
-#endif
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_fc);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(fusion_fc);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_fc);
-#endif
-
-#endif
+#endif  // FUSION_FC_OP
diff --git a/src/operators/fusion_fc_relu_op.cpp b/src/operators/fusion_fc_relu_op.cpp
index 97568323a3c204da06546ffc6b4d9a2483e95848..520372c6fb8e2c621aa6857ccaed2a9094f00dca 100644
--- a/src/operators/fusion_fc_relu_op.cpp
+++ b/src/operators/fusion_fc_relu_op.cpp
@@ -50,6 +50,9 @@ void FusionFcReluOp<Dtype, T>::InferShape() const {
   this->param_.Out()->Resize(ddim);
 }
 
+static framework::FusionOpRegistrar fc_relu_registrar(
+    new FusionFcReluMatcher());
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
@@ -63,5 +66,8 @@ REGISTER_OPERATOR_MALI_GPU(fusion_fc_relu, ops::FusionFcReluOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_fc_relu, ops::FusionFcReluOp);
 #endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(fusion_fc_relu, ops::FusionFcReluOp);
+#endif
 
 #endif
diff --git a/src/operators/fusion_fc_relu_op.h b/src/operators/fusion_fc_relu_op.h
index fa7d4045fc10d6e240d93e129aa736be793f7bbf..5cd884f04e819ac881c3b2a4ad666591ea610117 100644
--- a/src/operators/fusion_fc_relu_op.h
+++ b/src/operators/fusion_fc_relu_op.h
@@ -64,39 +64,7 @@ class FusionFcReluOp : public framework::OperatorWithKernel<
  protected:
 };
 
-#ifdef PADDLE_MOBILE_CPU
-#ifndef FUSION_FC_RELU_REGISTER
-static framework::FusionOpRegistrar fc_relu_registrar(
-    new FusionFcReluMatcher());
-#define FUSION_FC_RELU_REGISTER
-#endif
-#endif
-
-#ifdef PADDLE_MOBILE_MALI_GPU
-#ifndef FUSION_FC_RELU_REGISTER
-static framework::FusionOpRegistrar fc_relu_registrar(
-    new FusionFcReluMatcher());
-#define FUSION_FC_RELU_REGISTER
-#endif
-#endif
-
-#ifdef PADDLE_MOBILE_FPGA
-#ifndef FUSION_FC_RELU_REGISTER
-static framework::FusionOpRegistrar fc_relu_registrar(
-    new FusionFcReluMatcher());
-#define FUSION_FC_RELU_REGISTER
-#endif
-#endif
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(fusion_fc_relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(fusion_fc_relu);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(fusion_fc_relu);
-#endif
 #endif  // FUSION_FC_RELU_OP
diff --git a/src/operators/gru_op.cpp b/src/operators/gru_op.cpp
index c141cbc06531fabcf5e29546e832480cff850b8c..ac64b5f541c436a160fd1f6713931237e7c0239b 100644
--- a/src/operators/gru_op.cpp
+++ b/src/operators/gru_op.cpp
@@ -64,7 +64,8 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(gru, ops::GruOp);
 #endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(gru, ops::GruOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
diff --git a/src/operators/gru_op.h b/src/operators/gru_op.h
index d348b6c52431f93673f1b772f8c8a9462878cfd5..a45d3efe5b4c59f8582c534f85de7cc1ac82df85 100644
--- a/src/operators/gru_op.h
+++ b/src/operators/gru_op.h
@@ -47,12 +47,4 @@ class GruOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(gru);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/im2sequence_op.h b/src/operators/im2sequence_op.h
index edb87d0012e5514cb5541f94a965965f3dc02825..50d5664c1a3ce999a0c163225d20126961804a22 100644
--- a/src/operators/im2sequence_op.h
+++ b/src/operators/im2sequence_op.h
@@ -50,12 +50,4 @@ class Im2SequenceOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(im2sequence);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/kernel/arm/bilinear_interp_kernel.cpp b/src/operators/kernel/arm/bilinear_interp_kernel.cpp
index ea5ff627d7ea2e0fa5434f9f7fc9f5ec44ce60a7..4888f7a37a47fe80ffcbaee7e3f80b1d5c1f20f4 100644
--- a/src/operators/kernel/arm/bilinear_interp_kernel.cpp
+++ b/src/operators/kernel/arm/bilinear_interp_kernel.cpp
@@ -11,7 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
 
 #ifdef BILINEAR_INTERP_OP
 
diff --git a/test/operators/test_gru_op.cpp b/src/operators/kernel/arm/dequantize_kernel.cpp
similarity index 58%
rename from test/operators/test_gru_op.cpp
rename to src/operators/kernel/arm/dequantize_kernel.cpp
index 52ab8b54d709391ea263b74a395a635ce50a18af..d773e24fc634bf8a6f5b016ae1d7d69747688ebe 100644
--- a/test/operators/test_gru_op.cpp
+++ b/src/operators/kernel/arm/dequantize_kernel.cpp
@@ -12,18 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../test_include.h"
-#include "operators/gru_op.h"
+#include "operators/kernel/dequantize_kernel.h"
 
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_nlp);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
+namespace paddle_mobile {
+namespace operators {
 
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::GruOp<paddle_mobile::CPU, float>>
-      executor(program, "gru");
+template<>
+bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) {
+  return true;
+}
 
-  return 0;
+template<>
+void DequantizeKernel<CPU, float>::Compute(
+    const DequantizeParam<CPU> &param) const {
+  // TODO
 }
+
+}  // namespace paddle_mobile
+}  // namespace operators
diff --git a/src/operators/kernel/arm/flatten_kernel.cpp b/src/operators/kernel/arm/flatten_kernel.cpp
index 6866b740aa945852050e7fca4991489f48435150..ef4fe913c4800526f46daa75760afe82fdbee591 100644
--- a/src/operators/kernel/arm/flatten_kernel.cpp
+++ b/src/operators/kernel/arm/flatten_kernel.cpp
@@ -11,7 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
 
 #ifdef FLATTEN_OP
 
diff --git a/src/operators/kernel/arm/quantize_kernel.cpp b/src/operators/kernel/arm/quantize_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..919c6128de7fc9366213dd10fb4107056e9dc0c2
--- /dev/null
+++ b/src/operators/kernel/arm/quantize_kernel.cpp
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "operators/kernel/quantize_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template<>
+bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
+  return true;
+}
+
+template<>
+void QuantizeKernel<CPU, float>::Compute(
+    const QuantizeParam<CPU> &param) const {
+  // TODO
+}
+
+}  // namespace paddle_mobile
+}  // namespace operators
diff --git a/src/operators/kernel/arm/shape_kernel.cpp b/src/operators/kernel/arm/shape_kernel.cpp
index 69fd4021fe3110a7cea02a67443939048c1dddab..1687cfb4cdaf12eb2be9d465a83b82034b59f7cc 100644
--- a/src/operators/kernel/arm/shape_kernel.cpp
+++ b/src/operators/kernel/arm/shape_kernel.cpp
@@ -11,7 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
 
 #ifdef SHAPE_OP
 
diff --git a/src/operators/kernel/arm/split_kernel.cpp b/src/operators/kernel/arm/split_kernel.cpp
index 292b5bda99a524615df4a8552e5617fd4470d8a0..d2ca34f764adc50154fb58e3a6248f9311bbface 100644
--- a/src/operators/kernel/arm/split_kernel.cpp
+++ b/src/operators/kernel/arm/split_kernel.cpp
@@ -11,7 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
 
 #ifdef SPLIT_OP
 
diff --git a/src/operators/kernel/central-arm-func/conv_add_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_arm_func.h
index d71bc235977236fbd0dd332df556ea4bd41eacf4..643ee84529e01aebc33a144b4c7a8181ff39a1c9 100644
--- a/src/operators/kernel/central-arm-func/conv_add_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_arm_func.h
@@ -129,13 +129,10 @@ void ConvAddCompute(const FusionConvAddParam<CPU> &param) {
     //        param.Paddings(),
     //                               param.Filter(), param.Bias(),
     //                               param.Output(), false);
-    if (param.Paddings()[0] == 0) {
-      math::DepthwiseConv3x3s2p0(param.Input(), param.Filter(), param.Output(),
+
+    math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(), param.Output(),
                                  *param.Bias(), true);
-    } else {
-      math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(),
-                                   param.Output(), *param.Bias(), true);
-    }
+
   } else {
     ConvAddBasic(param);
   }
diff --git a/src/operators/kernel/central-arm-func/flatten_arm_func.h b/src/operators/kernel/central-arm-func/flatten_arm_func.h
index 396658013310a84c763f90f7cec515fba4fd7e4e..8c803a20df10431dc54c00fc31fc17fcc8659d63 100644
--- a/src/operators/kernel/central-arm-func/flatten_arm_func.h
+++ b/src/operators/kernel/central-arm-func/flatten_arm_func.h
@@ -13,11 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #ifdef FLATTEN_OP
-
-#ifndef RESHAPE_OP
-#define RESHAPE_OP
-#endif
-
 #pragma once
 
 #include <operators/kernel/reshape_kernel.h>
diff --git a/src/operators/kernel/central-arm-func/prior_box_arm_func.h b/src/operators/kernel/central-arm-func/prior_box_arm_func.h
index e783c52f8184d6e09b04cd5c8210f5b89276541e..7129996319aac7c71836d8706eb5c02300e576e6 100644
--- a/src/operators/kernel/central-arm-func/prior_box_arm_func.h
+++ b/src/operators/kernel/central-arm-func/prior_box_arm_func.h
@@ -16,7 +16,6 @@ limitations under the License. */
 #pragma once
 
 #include <algorithm>
-#include <cmath>
 #include <vector>
 
 namespace paddle_mobile {
@@ -90,8 +89,26 @@ void PriorBoxCompute(const PriorBoxParam<CPU> &param) {
       int idx = 0;
       for (size_t s = 0; s < min_sizes.size(); ++s) {
         auto min_size = min_sizes[s];
-        if (param.MinMaxAspectRatiosOrder()) {
-          box_width = box_height = min_size / 2.;
+        // priors with different aspect ratios
+        for (float ar : aspect_ratios) {
+          box_width = min_size * sqrt(ar) / 2.;
+          box_height = min_size / sqrt(ar) / 2.;
+          /// box_width/2 , / img_width 为了得到feature map 相对于
+          /// 原图的归一化位置的比例。
+          output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 0] =
+              (center_x - box_width) / img_width;
+          output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 1] =
+              (center_y - box_height) / img_height;
+          output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 2] =
+              (center_x + box_width) / img_width;
+          output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 3] =
+              (center_y + box_height) / img_height;
+          idx++;
+        }
+        if (!max_sizes.empty()) {
+          auto max_size = max_sizes[s];
+          // square prior with size sqrt(minSize * maxSize)
+          box_width = box_height = sqrt(min_size * max_size) / 2.;
           output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 0] =
               (center_x - box_width) / img_width;
           output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 1] =
@@ -101,73 +118,6 @@ void PriorBoxCompute(const PriorBoxParam<CPU> &param) {
           output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 + 3] =
               (center_y + box_height) / img_height;
           idx++;
-
-          if (max_sizes.size() > 0) {
-            auto max_size = max_sizes[s];
-            // square prior with size sqrt(minSize * maxSize)
-            box_width = box_height = sqrt(min_size * max_size) / 2.;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 0] = (center_x - box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 1] = (center_y - box_height) / img_height;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 2] = (center_x + box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 3] = (center_y + box_height) / img_height;
-            idx++;
-          }
-
-          // priors with different aspect ratios
-          for (float ar : aspect_ratios) {
-            if (fabs(ar - 1.) < 1e-6) {
-              continue;
-            }
-            box_width = min_size * sqrt(ar) / 2.;
-            box_height = min_size / sqrt(ar) / 2.;
-            /// box_width/2 , / img_width 为了得到feature map 相对于
-            /// 原图的归一化位置的比例。
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 0] = (center_x - box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 1] = (center_y - box_height) / img_height;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 2] = (center_x + box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 3] = (center_y + box_height) / img_height;
-            idx++;
-          }
-
-        } else {
-          // priors with different aspect ratios
-          for (float ar : aspect_ratios) {
-            box_width = min_size * sqrt(ar) / 2.;
-            box_height = min_size / sqrt(ar) / 2.;
-            /// box_width/2 , / img_width 为了得到feature map 相对于
-            /// 原图的归一化位置的比例。
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 0] = (center_x - box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 1] = (center_y - box_height) / img_height;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 2] = (center_x + box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 3] = (center_y + box_height) / img_height;
-            idx++;
-          }
-          if (!max_sizes.empty()) {
-            auto max_size = max_sizes[s];
-            // square prior with size sqrt(minSize * maxSize)
-            box_width = box_height = sqrt(min_size * max_size) / 2.;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 0] = (center_x - box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 1] = (center_y - box_height) / img_height;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 2] = (center_x + box_width) / img_width;
-            output_boxes_dataptr[h * stride0 + w * stride1 + idx * stride2 +
-                                 3] = (center_y + box_height) / img_height;
-            idx++;
-          }
         }
       }
     }
diff --git a/src/operators/kernel/central-arm-func/sigmoid_arm_func.h b/src/operators/kernel/central-arm-func/sigmoid_arm_func.h
index 1076fa49d555d14da76ff08a67c0943fb9ab115a..c782171e59ca7077ebb5622ad550dd0906d9f441 100644
--- a/src/operators/kernel/central-arm-func/sigmoid_arm_func.h
+++ b/src/operators/kernel/central-arm-func/sigmoid_arm_func.h
@@ -24,7 +24,9 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
+
 using framework::DDim;
+
 void sigmoid(const Tensor *X, Tensor *Y) {
 #ifdef __ARM_NEON
   const float *input = X->data<float>();
diff --git a/src/operators/kernel/dequantize_kernel.h b/src/operators/kernel/dequantize_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..fb575fc812cef7ce2cda333630ee985914d4cb94
--- /dev/null
+++ b/src/operators/kernel/dequantize_kernel.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "framework/operator.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template<typename DeviceType, typename T>
+class DequantizeKernel : public
+    framework::OpKernelBase<DeviceType, DequantizeParam<DeviceType>> {
+ public:
+  void Compute(const DequantizeParam<DeviceType> &param) const;
+  bool Init(DequantizeParam<DeviceType> *param);
+};
+
+}  // namespace operators
+}  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp
index 86da2833ed6e1443707054896127e87c0ca297b9..9de1511746f70c225e2d978a43b43cb34ad9143f 100644
--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -43,7 +43,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
   fpga::format_concat_output(out, (int)height, (int)width, (int)image_num,
                              channel_num);
 
-  fpga::ConcatArgs concatArgs = {0};
+  fpga::ConcatArgs concatArgs;
   concatArgs.image_num = (uint32_t)image_num;
   concatArgs.images_in = images_in;
   concatArgs.scales_in = scales_in;
diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
index 671df76967b4537d111695cdbe091b9c7de2c5a2..84b9d6b0ddd9a1577ee37d095cabed2a8a2fe5a2 100644
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -23,7 +23,7 @@ template <>
 bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
   bool relu_enabled = false;
   auto input = const_cast<Tensor *>(param->Input());
-
+  auto input_ptr = input->data<float>();
   auto bias = param->Bias();
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<Tensor *>(param->Filter());
@@ -62,11 +62,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
   fpga::format_filter(filter, max_value, param->Groups());
 
   int element_num_per_div =
-      fpga::get_filter_num_per_div(filter, param->Groups());
+      fpga::get_element_num_per_div(filter, param->Groups());
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
-  fpga::format_fp16_ofm(out);
+  fpga::format_ofm(out);
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
                       param->Groups(), param->Strides()[0], param->Strides()[1],
                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
@@ -80,6 +80,7 @@ void ConvAddBNKernel<FPGA, float>::Compute(
     const FusionConvAddBNParam<FPGA> &param) const {
   fpga::ComputeFpgaConv(param.FpgaArgs());
 }
+template class ConvAddBNKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
index d435692db6b40568afc599733c2adb6b05b00ffa..e38ae9240534b17e97d7ee1c68bffb25a8aedf71 100644
--- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
@@ -24,6 +24,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
     FusionConvAddBNReluParam<FPGA> *param) {
   bool relu_enabled = true;
   auto input = const_cast<Tensor *>(param->Input());
+  auto input_ptr = input->data<float>();
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<Tensor *>(param->Filter());
@@ -57,14 +58,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
 
   float max_value = fpga::filter_find_max(filter);
   fpga::format_filter(filter, max_value, param->Groups());
+  auto filter_ptr = filter->data<float>();
 
   int element_num_per_div =
-      fpga::get_filter_num_per_div(filter, param->Groups());
+      fpga::get_element_num_per_div(filter, param->Groups());
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
 
-  fpga::format_fp16_ofm(out);
+  fpga::format_ofm(out);
+  auto out_ptr = out->mutable_data<float>();
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
                       param->Groups(), param->Strides()[0], param->Strides()[1],
                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
@@ -77,6 +80,7 @@ void ConvAddBNReluKernel<FPGA, float>::Compute(
     const FusionConvAddBNReluParam<FPGA> &param) const {
   fpga::ComputeFpgaConv(param.FpgaArgs());
 }
+template class ConvAddBNReluKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
index 32d90b36e4c14a60219a3779da03100651aa2f13..31f28df5103942750758040ab983e2c0298a8cfd 100644
--- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
@@ -23,6 +23,7 @@ template <>
 bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
   bool relu_enabled = true;
   auto input = const_cast<Tensor *>(param->Input());
+  auto input_ptr = input->data<float>();
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<Tensor *>(param->Filter());
@@ -39,14 +40,16 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
 
   float max_value = fpga::filter_find_max(filter);
   fpga::format_filter(filter, max_value, param->Groups());
+  auto filter_ptr = filter->data<float>();
 
   int element_num_per_div =
-      fpga::get_filter_num_per_div(filter, param->Groups());
+      fpga::get_element_num_per_div(filter, param->Groups());
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
 
-  fpga::format_fp16_ofm(out);
+  fpga::format_ofm(out);
+  auto out_ptr = out->mutable_data<float>();
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
                       param->Groups(), param->Strides()[0], param->Strides()[1],
                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
@@ -59,6 +62,7 @@ void ConvAddReluKernel<FPGA, float>::Compute(
     const FusionConvAddReluParam<FPGA> &param) const {
   fpga::ComputeFpgaConv(param.FpgaArgs());
 }
+template class ConvAddReluKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp
index 4263c9c40491366813d3c9a5bf7dbc8ae976d39e..8818e98c376ab4e33d399bdf429e5b01928672e2 100644
--- a/src/operators/kernel/fpga/conv_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVBN_OP
 
 #include "operators/kernel/conv_bn_kernel.h"
+#include "fpga/api.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -23,6 +24,7 @@ template <>
 bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
   bool relu_enabled = false;
   auto input = const_cast<Tensor *>(param->Input());
+  auto input_ptr = input->data<float>();
   auto filter = const_cast<Tensor *>(param->Filter());
   auto out = param->Output();
   auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -32,8 +34,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
   const float epsilon = param->Epsilon();
   PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
                         "Output channel should be equal to bias number");
+
   const int channel = out->dims()[1];
-  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  auto bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
   auto new_scale = new Tensor();
   auto new_bias = new Tensor();
   auto new_scale_ptr = new_scale->mutable_data<float>({channel});
@@ -51,14 +55,16 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
 
   float max_value = fpga::filter_find_max(filter);
   fpga::format_filter(filter, max_value, param->Groups());
+  auto filter_ptr = filter->data<float>();
 
   int element_num_per_div =
-      fpga::get_filter_num_per_div(filter, param->Groups());
+      fpga::get_element_num_per_div(filter, param->Groups());
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
 
-  fpga::format_fp16_ofm(out);
+  fpga::format_ofm(out);
+  auto out_ptr = out->mutable_data<float>();
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
                       param->Groups(), param->Strides()[0], param->Strides()[1],
                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
@@ -71,6 +77,7 @@ void ConvBNKernel<FPGA, float>::Compute(
     const FusionConvBNParam<FPGA> &param) const {
   fpga::ComputeFpgaConv(param.FpgaArgs());
 }
+template class ConvBNKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
index 3d6e0faa5fe3d4ef3514bbe1679298b11d96727c..8fe4425a23de2b4b16b241bf65d893d10132cc2e 100644
--- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
@@ -23,6 +23,7 @@ template <>
 bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
   bool relu_enabled = true;
   auto input = const_cast<Tensor *>(param->Input());
+  auto input_ptr = input->data<float>();
   auto filter = const_cast<Tensor *>(param->Filter());
   auto out = param->Output();
   auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -51,14 +52,29 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
 
   float max_value = fpga::filter_find_max(filter);
   fpga::format_filter(filter, max_value, param->Groups());
+  auto filter_ptr = filter->data<float>();
 
   int element_num_per_div =
-      fpga::get_filter_num_per_div(filter, param->Groups());
+      fpga::get_element_num_per_div(filter, param->Groups());
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
 
-  fpga::format_fp16_ofm(out);
+  fpga::format_ofm(out);
+  auto out_ptr = out->mutable_data<float>();
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  fpga::WrapperConvArgs convArgs;
+  convArgs.group_num = (uint32_t)param->Groups();
+  convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
+  convArgs.filter_num = (uint32_t)filter->dims()[0];
+  convArgs.output.address = out_ptr;
+  convArgs.output.scale_address = out->scale;
+  convArgs.conv_args = (fpga::ConvArgs *)fpga::fpga_malloc(
+      convArgs.split_num * sizeof(fpga::ConvArgs));
+  param->SetFpgaArgs(convArgs);
+
+  int element_num = fpga::get_aligned_filter_element_num(
+      filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
+
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
                       param->Groups(), param->Strides()[0], param->Strides()[1],
                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
@@ -71,6 +87,7 @@ void ConvBNReluKernel<FPGA, float>::Compute(
     const FusionConvBNReluParam<FPGA> &param) const {
   fpga::ComputeFpgaConv(param.FpgaArgs());
 }
+template class ConvBNReluKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/dropout_kernel.cpp b/src/operators/kernel/fpga/dropout_kernel.cpp
index b0981c4254060996a16f4ae5beabb7c22edd6d34..3a4dd216d481322a9228cfd247bf6f0d0098177e 100644
--- a/src/operators/kernel/fpga/dropout_kernel.cpp
+++ b/src/operators/kernel/fpga/dropout_kernel.cpp
@@ -27,7 +27,13 @@ bool DropoutKernel<FPGA, float>::Init(DropoutParam<FPGA> *param) {
 
 template <>
 void DropoutKernel<FPGA, float>::Compute(
-    const DropoutParam<FPGA> &param) const {}
+    const DropoutParam<FPGA> &param) const {
+  // auto *input_x = param.InputX();
+  // auto *out = param.Out();
+  // auto input_x_ptr = input_x->data<float>();
+  // auto out_ptr = out->mutable_data<float>();
+  // out_ptr = const_cast<float *>(input_x_ptr);
+}
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
index f0d8533641941fe43a6d06b49266ac06646a7b4d..9840f495e89a3e63990bf5f10c65cf4afe8d0854 100644
--- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
@@ -27,10 +27,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
   auto *out = param->Out();
   auto input_x_ptr = input_x->data<float>();
   auto input_y_ptr = input_y->data<float>();
-  fpga::format_fp16_ofm(out);
+  fpga::format_ofm(out);
   auto out_ptr = out->mutable_data<float>();
 
-  fpga::EWAddArgs ewaddArgs = {0};
+  fpga::EWAddArgs ewaddArgs;
   ewaddArgs.relu_enabled = relu_enabled;
   ewaddArgs.const0 = 1;
   ewaddArgs.const1 = 1;
diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp
index 38b39f982ce41c7d5a88b82f21e446b05c859a2c..48d7425fcb7a3c630165fe4a7d26875a4f4a0a9d 100644
--- a/src/operators/kernel/fpga/fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp
@@ -21,6 +21,7 @@ template <>
 bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
   bool relu_enabled = true;
   auto input_x = const_cast<LoDTensor *>(param->InputX());
+  auto input_x_ptr = input_x->data<float>();
   auto filter = const_cast<Tensor *>(param->InputY());
   auto input_z = param->InputZ();
   auto input_z_ptr = input_z->data<float>();
@@ -46,12 +47,14 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
   filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
   float max_value = fpga::filter_find_max(filter);
   fpga::format_filter(filter, max_value, 1);
+  auto filter_ptr = filter->data<float>();
 
-  int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
+  int element_num_per_div = fpga::get_element_num_per_div(filter, 1);
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
-  fpga::format_fp16_ofm(out);
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  auto out_ptr = out->mutable_data<float>();
+
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
                       0, bs_ptr);
   param->SetFpgaArgs(conv_arg);
diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
index 6dee8ea6a7e1b26bec4ffd3ed324db4a4ac3be2d..ccc6009700c98f1f94835a7e21a83de1faade1f0 100644
--- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
@@ -22,6 +22,7 @@ template <>
 bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   bool relu_enabled = false;
   auto input_x = const_cast<LoDTensor *>(param->InputX());
+  auto input_x_ptr = input_x->data<float>();
   auto filter = const_cast<Tensor *>(param->InputY());
   const Tensor *input_z = param->InputZ();
   auto input_z_ptr = input_z->data<float>();
@@ -47,12 +48,14 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
   float max_value = fpga::filter_find_max(filter);
   fpga::format_filter(filter, max_value, 1);
+  auto filter_ptr = filter->data<float>();
 
-  int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
+  int element_num_per_div = fpga::get_element_num_per_div(filter, 1);
   fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
-  fpga::format_fp16_ofm(out);
 
-  fpga::WrapperConvArgs conv_arg = {0};
+  auto out_ptr = out->mutable_data<float>();
+
+  fpga::WrapperConvArgs conv_arg;
   fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
                       0, bs_ptr);
   param->SetFpgaArgs(conv_arg);
diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp
index 4dad2f789baeb6e381c66ed861b8a8360fa2996e..d3df951dbc340814d766f76e8720c3aaef2f3539 100644
--- a/src/operators/kernel/fpga/pool_kernel.cpp
+++ b/src/operators/kernel/fpga/pool_kernel.cpp
@@ -24,13 +24,13 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
   auto *input = const_cast<Tensor *>(param->Input());
   auto input_ptr = input->data<float>();
   Tensor *output = param->Output();
-  fpga::format_fp16_ofm(output);
+  fpga::format_ofm(output);
   auto output_ptr = output->mutable_data<float>();
   vector<int> ksize = param->Ksize();
   vector<int> strides = param->Strides();
   vector<int> paddings = param->Paddings();
 
-  fpga::PoolingArgs poolArgs = {0};
+  fpga::PoolingArgs poolArgs;
   poolArgs.image.address = input_ptr;
   poolArgs.image.channels = (uint32_t)input->dims()[1];
   poolArgs.image.height = (uint32_t)input->dims()[2];
@@ -39,7 +39,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
   poolArgs.image.pad_width = (uint32_t)paddings[1];
   poolArgs.image.scale_address = input->scale;
   poolArgs.output.address = output_ptr;
-  poolArgs.output.scale_address = output->scale;
+  poolArgs.output.scale_address = input->scale;
   poolArgs.kernel.height = (uint32_t)ksize[0];
   poolArgs.kernel.width = (uint32_t)ksize[1];
   poolArgs.kernel.stride_h = (uint32_t)strides[0];
@@ -50,7 +50,9 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
 
 template <>
 void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) const {
+#ifdef PADDLE_MOBILE_FPGA
   fpga::ComputeFpgaPool(param.FpgaArgs());
+#endif
 }
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp
index 7cfd0c7d76c1a8e73955dbec1971d86ceebde259..20c86a5c73bc9c35b8f8fd430013bb97d269fb4a 100644
--- a/src/operators/kernel/fpga/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/softmax_kernel.cpp
@@ -24,43 +24,31 @@ namespace operators {
 
 template <>
 bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
-  auto input = const_cast<Tensor *>(param->InputX());
-  auto input_ptr = input->data<float>();
-  auto float_input = new Tensor;
-  float_input->mutable_data<float>(input->dims());
-  fpga::format_fp32_ofm(float_input);
-
-  fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
-  args.input_layout_type = fpga::LAYOUT_HWC;
-  args.output_layout_type = fpga::LAYOUT_CHW;
-  args.input_data_type = fpga::DATA_TYPE_FP16;
-  args.output_data_type = fpga::DATA_TYPE_FP32;
-  args.image.address = input_ptr;
-  args.image.height = 1;
-  args.image.width = 1;
-  args.image.channels = (uint32_t)input->dims()[1];
-  args.output.address = float_input->mutable_data<float>();
+  const Tensor *input = param->InputX();
 
-  param->SetFloatInput(float_input);
+  auto input_ptr = input->data<float>();
+  auto output = param->Out();
+  auto output_ptr = output->mutable_data<float>();
+  fpga::BypassArgs args;
+  args.convert_type = fpga::DATA_FP16_TO_FP32;
+  args.layout_type = fpga::LAYOUT_NO_CONVERT;
+  args.image.address = (void *)(input_ptr);
+  args.image.height = (uint32_t)input->dims()[0];
+  args.image.width = (uint32_t)input->dims()[1];
+  args.image.channels = 1;
+  args.output.address = output_ptr;
   param->SetFpgaArgs(args);
+
   return true;
 }
 
 template <>
 void SoftmaxKernel<FPGA, float>::Compute(
     const SoftmaxParam<FPGA> &param) const {
-  Tensor *in_x = param.FloatInput();
-  Tensor *out = param.Out();
-
-  fpga::PerformBypass(param.FpgaArgs());
-  fpga::fpga_invalidate(
-      (void *)in_x->data<float>(),
-      (size_t)fpga::get_align_image_cw((int)in_x->dims()[1]) * sizeof(float));
-
-  math::SoftmaxFuntor<CPU, float>()(in_x, out);
-  fpga::fpga_flush(out->data<float>(), out->memory_size());
+  // SoftmaxCompute<float>(param);
 }
 
+template class SoftmaxKernel<FPGA, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
 
diff --git a/src/operators/kernel/fusion_fc_kernel.h b/src/operators/kernel/fusion_fc_kernel.h
index 39cfd898a203e742168a775ec892e562bd19f5db..06d3981bd23708aee982e38d82ba592d69733a89 100644
--- a/src/operators/kernel/fusion_fc_kernel.h
+++ b/src/operators/kernel/fusion_fc_kernel.h
@@ -30,6 +30,7 @@ class FusionFcKernel
   void Compute(const FusionFcParam<DeviceType>& param) const;
   bool Init(FusionFcParam<DeviceType>* param);
 };
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
diff --git a/src/operators/kernel/quantize_kernel.h b/src/operators/kernel/quantize_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..ee0a9e64896ec7f74fa413332a2045ad30ccd338
--- /dev/null
+++ b/src/operators/kernel/quantize_kernel.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "framework/operator.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template<typename DeviceType, typename T>
+class QuantizeKernel : public
+    framework::OpKernelBase<DeviceType, QuantizeParam<DeviceType>> {
+ public:
+  void Compute(const QuantizeParam<DeviceType> &param) const;
+  bool Init(QuantizeParam<DeviceType> *param);
+};
+
+}  // namespace operators
+}  // namespace paddle_mobile
diff --git a/src/operators/kernel/resize_kernel.h b/src/operators/kernel/resize_kernel.h
index 4c06429858b9575ffc061c000e4a9343fa7eee26..7102d2f4bc9bc64d53fa40697cf2b7a68d8be566 100644
--- a/src/operators/kernel/resize_kernel.h
+++ b/src/operators/kernel/resize_kernel.h
@@ -23,6 +23,7 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
+
 template <typename DeviceType>
 inline framework::DDim CalOutputShape(const ResizeParam<DeviceType> &param) {
   const auto *input_x = param.InputX();
diff --git a/src/operators/kernel/sigmoid_kernel.h b/src/operators/kernel/sigmoid_kernel.h
index e8cfe6cad9ce2f25b9f38e1784ded9ea0741ff9a..e68f215b00aa2f9faba850853efe4896752a8f7b 100644
--- a/src/operators/kernel/sigmoid_kernel.h
+++ b/src/operators/kernel/sigmoid_kernel.h
@@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#ifdef SIGMOID_OP
-
 #pragma once
 
+#ifdef SIGMOID_OP
+
 #include "framework/operator.h"
 #include "operators/op_param.h"
+
 namespace paddle_mobile {
 namespace operators {
+
 using framework::OpKernelBase;
-void sigmoid(const Tensor* X, Tensor* Y);
+
 template <typename DeviceType, typename T>
 class SigmoidKernel
     : public OpKernelBase<DeviceType, SigmoidParam<DeviceType>> {
@@ -29,6 +31,7 @@ class SigmoidKernel
   void Compute(const SigmoidParam<DeviceType>& param) const override;
   bool Init(SigmoidParam<DeviceType>* param);
 };
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
diff --git a/src/operators/kernel/x86/batchnorm_kernel.cpp b/src/operators/kernel/x86/batchnorm_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f896e51cf846104b60b3b919671265cf3741ce6e
--- /dev/null
+++ b/src/operators/kernel/x86/batchnorm_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef BATCHNORM_OP
+
+#include "operators/kernel/batchnorm_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool BatchNormKernel<X86, float>::Init(BatchNormParam<X86> *param) {
+  return true;
+}
+
+template <>
+void BatchNormKernel<X86, float>::Compute(
+    const BatchNormParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/bilinear_interp_kernel.cpp b/src/operators/kernel/x86/bilinear_interp_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..63dd5781aca4659755bd7844977c2af714526178
--- /dev/null
+++ b/src/operators/kernel/x86/bilinear_interp_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef BILINEAR_INTERP_OP
+
+#include "operators/kernel/bilinear_interp_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool BilinearInterpKernel<X86, float>::Init(BilinearInterpParam<X86> *param) {
+  return true;
+}
+
+template <>
+void BilinearInterpKernel<X86, float>::Compute(
+    const BilinearInterpParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/box_coder_kernel.cpp b/src/operators/kernel/x86/box_coder_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a63a23646d39949c4248714cc6d4f4954ca82fc2
--- /dev/null
+++ b/src/operators/kernel/x86/box_coder_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef BOXCODER_OP
+
+#include "operators/kernel/box_coder_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool BoxCoderKernel<X86, float>::Init(BoxCoderParam<X86> *param) {
+  return true;
+}
+
+template <>
+void BoxCoderKernel<X86, float>::Compute(
+    const BoxCoderParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/concat_kernel.cpp b/src/operators/kernel/x86/concat_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..88e0cc99f46cccb578582f686bd5f854241ac73f
--- /dev/null
+++ b/src/operators/kernel/x86/concat_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONCAT_OP
+
+#include "operators/kernel/concat_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConcatKernel<X86, float>::Init(ConcatParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConcatKernel<X86, float>::Compute(const ConcatParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_add_add_prelu_kernel.cpp b/src/operators/kernel/x86/conv_add_add_prelu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ad327b09016d2e494b847f4efe849f13d1bffc86
--- /dev/null
+++ b/src/operators/kernel/x86/conv_add_add_prelu_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDADDPRELU_OP
+
+#include "operators/kernel/conv_add_add_prelu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddAddPReluKernel<X86, float>::Init(
+    FusionConvAddAddPReluParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConvAddAddPReluKernel<X86, float>::Compute(
+    const FusionConvAddAddPReluParam<X86> &param) const {
+  // TODO
+}
+template class ConvAddAddPReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/x86/conv_add_bn_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3139b8a30fb9fc55eb1ca2ff43ce35c49e8e258d
--- /dev/null
+++ b/src/operators/kernel/x86/conv_add_bn_relu_kernel.cpp
@@ -0,0 +1,65 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDBNRELU_OP
+
+#include "operators/kernel/conv_add_bn_relu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddBNReluKernel<X86, float>::Init(
+    FusionConvAddBNReluParam<X86> *param) {
+  const Tensor *mean = param->InputMean();
+  const Tensor *variance = param->InputVariance();
+  const Tensor *scale = param->InputScale();
+  const Tensor *bias = param->InputBias();
+  const float epsilon = param->Epsilon();
+
+  auto mean_ptr = mean->data<float>();
+  auto variance_ptr = variance->data<float>();
+  auto scale_ptr = scale->data<float>();
+  auto bias_ptr = bias->data<float>();
+
+  const int C = mean->numel();
+  float inv_std_ptr[C];
+  for (int i = 0; i < C; i++) {
+    inv_std_ptr[i] =
+        1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
+  }
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({C});
+  auto new_bias_ptr = new_bias->mutable_data<float>({C});
+  for (int i = 0; i < C; i++) {
+    new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
+    new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  return true;
+}
+
+template <>
+void ConvAddBNReluKernel<X86, float>::Compute(
+    const FusionConvAddBNReluParam<X86> &param) const {
+  // TODO
+}
+template class ConvAddBNReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_add_kernel.cpp b/src/operators/kernel/x86/conv_add_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f051daf7961767c32f601b89d532c77ba61ca9bb
--- /dev/null
+++ b/src/operators/kernel/x86/conv_add_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADD_OP
+
+#include "operators/kernel/conv_add_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddKernel<X86, float>::Init(FusionConvAddParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConvAddKernel<X86, float>::Compute(
+    const FusionConvAddParam<X86> &param) const {
+  // TODO
+}
+
+template class ConvAddKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_add_prelu_kernel.cpp b/src/operators/kernel/x86/conv_add_prelu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..126b5e2079ea1f376a786c1b409afc0d43765f15
--- /dev/null
+++ b/src/operators/kernel/x86/conv_add_prelu_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDPRELU_OP
+
+#include "operators/kernel/conv_add_prelu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddPReluKernel<X86, float>::Init(FusionConvAddPReluParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConvAddPReluKernel<X86, float>::Compute(
+    const FusionConvAddPReluParam<X86> &param) const {
+  // TODO
+}
+
+template class ConvAddPReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_add_relu_kernel.cpp b/src/operators/kernel/x86/conv_add_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f01f3a0feff2f2eff141363d3901f5e5749fda1b
--- /dev/null
+++ b/src/operators/kernel/x86/conv_add_relu_kernel.cpp
@@ -0,0 +1,39 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDRELU_OP
+
+#include "operators/kernel/conv_add_relu_kernel.h"
+
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddReluKernel<X86, float>::Init(FusionConvAddReluParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConvAddReluKernel<X86, float>::Compute(
+    const FusionConvAddReluParam<X86> &param) const {
+  // TODO
+}
+
+template class ConvAddReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_bn_add_relu_kernel.cpp b/src/operators/kernel/x86/conv_bn_add_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1fed6538b4ce511997b6e37a780ba3d32f6b818b
--- /dev/null
+++ b/src/operators/kernel/x86/conv_bn_add_relu_kernel.cpp
@@ -0,0 +1,65 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBNADDRELU_OP
+
+#include "operators/kernel/conv_bn_add_relu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNAddReluKernel<X86, float>::Init(
+    FusionConvBNAddReluParam<X86> *param) {
+  const Tensor *mean = param->InputMean();
+  const Tensor *variance = param->InputVariance();
+  const Tensor *scale = param->InputScale();
+  const Tensor *bias = param->InputBias();
+  const float epsilon = param->Epsilon();
+
+  auto mean_ptr = mean->data<float>();
+  auto variance_ptr = variance->data<float>();
+  auto scale_ptr = scale->data<float>();
+  auto bias_ptr = bias->data<float>();
+
+  const int C = mean->numel();
+  float inv_std_ptr[C];
+  for (int i = 0; i < C; i++) {
+    inv_std_ptr[i] =
+        1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
+  }
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({C});
+  auto new_bias_ptr = new_bias->mutable_data<float>({C});
+  for (int i = 0; i < C; i++) {
+    new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
+    new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  return true;
+}
+
+template <>
+void ConvBNAddReluKernel<X86, float>::Compute(
+    const FusionConvBNAddReluParam<X86> &param) const {
+  // TODO
+}
+template class ConvBNAddReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_bn_relu_kernel.cpp b/src/operators/kernel/x86/conv_bn_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8eeb2109c58b96e078e483947ff15a6dcfc61298
--- /dev/null
+++ b/src/operators/kernel/x86/conv_bn_relu_kernel.cpp
@@ -0,0 +1,68 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBNRELU_OP
+
+#include "operators/kernel/conv_bn_relu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNReluKernel<X86, float>::Init(FusionConvBNReluParam<X86> *param) {
+  const Tensor *mean = param->InputMean();
+  const Tensor *variance = param->InputVariance();
+  const Tensor *scale = param->InputScale();
+  const Tensor *bias = param->InputBias();
+  const float epsilon = param->Epsilon();
+
+  //   DLOG << "variance: " << *variance;
+
+  auto mean_ptr = mean->data<float>();
+  auto variance_ptr = variance->data<float>();
+  auto scale_ptr = scale->data<float>();
+  auto bias_ptr = bias->data<float>();
+
+  const int C = mean->numel();
+  float inv_std_ptr[C];
+  for (int i = 0; i < C; i++) {
+    inv_std_ptr[i] =
+        1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
+  }
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({C});
+  auto new_bias_ptr = new_bias->mutable_data<float>({C});
+  for (int i = 0; i < C; i++) {
+    new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
+    new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
+  }
+
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  return true;
+}
+
+template <>
+void ConvBNReluKernel<X86, float>::Compute(
+    const FusionConvBNReluParam<X86> &param) const {
+  // TODO
+}
+
+template class ConvBNReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_kernel.cpp b/src/operators/kernel/x86/conv_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..75674ef979398124c3572f9e51ef9fb269c3d74b
--- /dev/null
+++ b/src/operators/kernel/x86/conv_kernel.cpp
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONV_OP
+
+#include "operators/kernel/conv_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvKernel<X86, float>::Init(ConvParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConvKernel<X86, float>::Compute(const ConvParam<X86> &param) const {
+  // TODO
+}
+
+template class ConvKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/conv_transpose_kernel.cpp b/src/operators/kernel/x86/conv_transpose_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce0f84736c377f459e9898692b41d47e808cb5d1
--- /dev/null
+++ b/src/operators/kernel/x86/conv_transpose_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONV_TRANSPOSE
+
+#include "operators/kernel/conv_transpose_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvTransposeKernel<X86, float>::Init(ConvTransposeParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ConvTransposeKernel<X86, float>::Compute(
+    const ConvTransposeParam<X86> &param) const {
+  // TODO
+}
+
+template class ConvTransposeKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/crf_kernel.cpp b/src/operators/kernel/x86/crf_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ba0f13cf964e6b6023b261d89519ccd0f662612
--- /dev/null
+++ b/src/operators/kernel/x86/crf_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CRF_OP
+
+#include "operators/kernel/crf_kernel.h"
+#include "common/types.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool CrfKernel<X86, float>::Init(CrfParam<X86> *param) {
+  return true;
+}
+
+template <>
+void CrfKernel<X86, float>::Compute(const CrfParam<X86> &param) const {
+  // TODO
+}
+
+template class CrfKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/depthwise_conv_kernel.cpp b/src/operators/kernel/x86/depthwise_conv_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..42ccceee8ec4d7ad2ec9e464894c7ef33852d660
--- /dev/null
+++ b/src/operators/kernel/x86/depthwise_conv_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef DEPTHWISECONV_OP
+
+#include "operators/kernel/depthwise_conv_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool DepthwiseConvKernel<X86, float>::Init(ConvParam<X86> *param) {
+  return true;
+}
+
+template <>
+void DepthwiseConvKernel<X86, float>::Compute(
+    const ConvParam<X86> &param) const {
+  // TODO
+}
+
+template class DepthwiseConvKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/dequantize_kernel.cpp b/src/operators/kernel/x86/dequantize_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f0cf543830cc79292ed3a62126e86dcdf65e0461
--- /dev/null
+++ b/src/operators/kernel/x86/dequantize_kernel.cpp
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "operators/kernel/dequantize_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template<>
+bool DequantizeKernel<X86, float>::Init(DequantizeParam<X86> *param) {
+  return true;
+}
+
+template<>
+void DequantizeKernel<X86, float>::Compute(
+    const DequantizeParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace paddle_mobile
+}  // namespace operators
diff --git a/src/operators/kernel/x86/dropout_kernel.cpp b/src/operators/kernel/x86/dropout_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..487ecdc3c8f66633045abb6e1ca7fbfb71a9f13d
--- /dev/null
+++ b/src/operators/kernel/x86/dropout_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef DROPOUT_OP
+
+#include "operators/kernel/dropout_kernel.h"
+#include <operators/math/transform.h>
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool DropoutKernel<X86, float>::Init(DropoutParam<X86> *para) {
+  return true;
+}
+
+template <>
+void DropoutKernel<X86, float>::Compute(const DropoutParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/dwconv_bn_relu_kernel.cpp b/src/operators/kernel/x86/dwconv_bn_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..87839eca76429aa943e03d760fc831ec043163cf
--- /dev/null
+++ b/src/operators/kernel/x86/dwconv_bn_relu_kernel.cpp
@@ -0,0 +1,65 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_DWCONVBNRELU_OP
+
+#include "operators/kernel/dwconv_bn_relu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool DWConvBNReluKernel<X86, float>::Init(FusionDWConvBNReluParam<X86> *param) {
+  const Tensor *mean = param->InputMean();
+  const Tensor *variance = param->InputVariance();
+  const Tensor *scale = param->InputScale();
+  const Tensor *bias = param->InputBias();
+  const float epsilon = param->Epsilon();
+
+  auto mean_ptr = mean->data<float>();
+  auto variance_ptr = variance->data<float>();
+  auto scale_ptr = scale->data<float>();
+  auto bias_ptr = bias->data<float>();
+
+  const int C = mean->numel();
+  float inv_std_ptr[C];
+  for (int i = 0; i < C; i++) {
+    inv_std_ptr[i] =
+        1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
+  }
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({C});
+  auto new_bias_ptr = new_bias->mutable_data<float>({C});
+  for (int i = 0; i < C; i++) {
+    new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
+    new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  return true;
+}
+
+template <>
+void DWConvBNReluKernel<X86, float>::Compute(
+    const FusionDWConvBNReluParam<X86> &param) const {
+  // TODO
+}
+
+template class DWConvBNReluKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/elementwise_add_kernel.cpp b/src/operators/kernel/x86/elementwise_add_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..142fddbb46b43ead9559c789cc63b30206b9582f
--- /dev/null
+++ b/src/operators/kernel/x86/elementwise_add_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ELEMENTWISEADD_OP
+
+#include "operators/kernel/elementwise_add_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ElementwiseAddKernel<X86, float>::Init(ElementwiseAddParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ElementwiseAddKernel<X86, float>::Compute(
+    const ElementwiseAddParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/flatten_kernel.cpp b/src/operators/kernel/x86/flatten_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3d488e580951f3f74235e2903afd4b833d97d70a
--- /dev/null
+++ b/src/operators/kernel/x86/flatten_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FLATTEN_OP
+
+#include "operators/kernel/flatten_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool FlattenKernel<X86, float>::Init(FlattenParam<X86> *param) {
+  return true;
+}
+
+template <>
+void FlattenKernel<X86, float>::Compute(const FlattenParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/fusion_fc_kernel.cpp b/src/operators/kernel/x86/fusion_fc_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..29f6e3896f355632567799b27f772564bb2c6bad
--- /dev/null
+++ b/src/operators/kernel/x86/fusion_fc_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_FC_OP
+
+#include "operators/kernel/fusion_fc_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template<>
+bool FusionFcKernel<X86, float>::Init(FusionFcParam<X86> *param) {
+  return true;
+}
+
+template<>
+void FusionFcKernel<X86, float>::Compute(
+    const FusionFcParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/gru_kernel.cpp b/src/operators/kernel/x86/gru_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9f7e1d0aecbae1846cc7196a780219d54fe9a05d
--- /dev/null
+++ b/src/operators/kernel/x86/gru_kernel.cpp
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef GRU_OP
+
+#include "operators/kernel/gru_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool GruKernel<X86, float>::Init(GruParam<X86> *param) {
+  return true;
+}
+
+template <>
+void GruKernel<X86, float>::Compute(const GruParam<X86> &param) const {
+  // TODO
+}
+
+template class GruKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/lrn_kernel.cpp b/src/operators/kernel/x86/lrn_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e48ea4644210abdd27fbf473c217d2d6b8a44ec0
--- /dev/null
+++ b/src/operators/kernel/x86/lrn_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef LRN_OP
+
+#include "operators/kernel/lrn_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool LrnKernel<X86, float>::Init(LrnParam<X86> *param) {
+  return true;
+}
+
+template <>
+void LrnKernel<X86, float>::Compute(const LrnParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/mul_kernel.cpp b/src/operators/kernel/x86/mul_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6abaddb419e0fc36dcf953338d43ad3d6649f069
--- /dev/null
+++ b/src/operators/kernel/x86/mul_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef MUL_OP
+
+#include "operators/kernel/mul_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool MulKernel<X86, float>::Init(MulParam<X86> *param) {
+  return true;
+}
+
+template <>
+void MulKernel<X86, float>::Compute(const MulParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/multiclass_nms_kernel.cpp b/src/operators/kernel/x86/multiclass_nms_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f3d5139a7c62a2e0cbff0cf7b275bb2c770ee38
--- /dev/null
+++ b/src/operators/kernel/x86/multiclass_nms_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef MULTICLASSNMS_OP
+
+#include "operators/kernel/multiclass_nms_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool MultiClassNMSKernel<X86, float>::Init(MultiClassNMSParam<X86> *param) {
+  return true;
+}
+
+template <>
+void MultiClassNMSKernel<X86, float>::Compute(
+    const MultiClassNMSParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/pool_kernel.cpp b/src/operators/kernel/x86/pool_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..07c444d1596b887daec6436774f05f4cfaaff70e
--- /dev/null
+++ b/src/operators/kernel/x86/pool_kernel.cpp
@@ -0,0 +1,34 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef POOL_OP
+
+#include "operators/kernel/pool_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool PoolKernel<X86, float>::Init(PoolParam<X86> *param) {
+  return true;
+}
+
+template <>
+void PoolKernel<X86, float>::Compute(const PoolParam<X86> &param) const {
+  // TODO
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/test/operators/test_scale_op.cpp b/src/operators/kernel/x86/prelu_kernel.cpp
similarity index 68%
rename from test/operators/test_scale_op.cpp
rename to src/operators/kernel/x86/prelu_kernel.cpp
index 574779d71e5ebc5f06fe5cd8fb33422726f39464..d885a35335e9678fc5dbae50d2fb4f52056e7a37 100644
--- a/test/operators/test_scale_op.cpp
+++ b/src/operators/kernel/x86/prelu_kernel.cpp
@@ -12,7 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../test_include.h"
-#include "operators/scale_op.h"
+#ifdef PRELU_OP
 
-int main() {}
+#include "operators/kernel/prelu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+void PReluKernel<X86, float>::Compute(const PReluParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/prior_box_kernel.cpp b/src/operators/kernel/x86/prior_box_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f4ca30821af3d44a61b323fd92de2825f9b3644a
--- /dev/null
+++ b/src/operators/kernel/x86/prior_box_kernel.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef PRIORBOX_OP
+
+#include "operators/kernel/prior_box_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool PriorBoxKernel<X86, float>::Init(PriorBoxParam<X86> *param) {
+  return true;
+}
+
+template <>
+void PriorBoxKernel<X86, float>::Compute(
+    const PriorBoxParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/quantize_kernel.cpp b/src/operators/kernel/x86/quantize_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..15ebdf8ae40bef08bcfe0537792b2e88f380ab6b
--- /dev/null
+++ b/src/operators/kernel/x86/quantize_kernel.cpp
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "operators/kernel/quantize_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template<>
+bool QuantizeKernel<X86, float>::Init(QuantizeParam<X86> *param) {
+  return true;
+}
+
+template<>
+void QuantizeKernel<X86, float>::Compute(
+    const QuantizeParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace paddle_mobile
+}  // namespace operators
diff --git a/src/operators/kernel/x86/relu_kernel.cpp b/src/operators/kernel/x86/relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae353aa18678414bc7a2865491b46bb86b03aa23
--- /dev/null
+++ b/src/operators/kernel/x86/relu_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef RELU_OP
+
+#include "operators/kernel/relu_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ReluKernel<X86, float>::Init(ReluParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ReluKernel<X86, float>::Compute(const ReluParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/reshape_kernel.cpp b/src/operators/kernel/x86/reshape_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d938a9dd23f04e42f99960d882d7445925a9a83d
--- /dev/null
+++ b/src/operators/kernel/x86/reshape_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef RESHAPE_OP
+
+#include "operators/kernel/reshape_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ReshapeKernel<X86, float>::Init(ReshapeParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ReshapeKernel<X86, float>::Compute(const ReshapeParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/test/operators/test_slice_op.cpp b/src/operators/kernel/x86/resize_kernel.cpp
similarity index 66%
rename from test/operators/test_slice_op.cpp
rename to src/operators/kernel/x86/resize_kernel.cpp
index 9306bc53c6ae23b10c27a71071c11c9ddf1c0d25..553d28f233bec2873583edb7eaccdd61b683d954 100644
--- a/test/operators/test_slice_op.cpp
+++ b/src/operators/kernel/x86/resize_kernel.cpp
@@ -12,7 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../test_include.h"
-#include "operators/slice_op.h"
+#ifdef RESIZE_OP
 
-int main() {}
+#include "operators/kernel/resize_kernel.h"
+#include <cmath>
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+void ResizeKernel<X86, float>::Compute(const ResizeParam<X86>& param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/scale_kernel.cpp b/src/operators/kernel/x86/scale_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c1ab15dafa42fc850f5613469309222a06cd27dc
--- /dev/null
+++ b/src/operators/kernel/x86/scale_kernel.cpp
@@ -0,0 +1,143 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SCALE_OP
+
+#include "operators/kernel/scale_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+void ScaleKernel<X86, float>::Compute(const ScaleParam<X86> &param) const {
+  const auto *input_x = param.InputX();
+  auto *input_x_ptr = input_x->data<float>();
+  auto *out = param.Out();
+  auto *out_ptr = out->mutable_data<float>();
+
+  const vector<float> scales = param.Scales();
+  bool has_bias = param.HasBias();
+
+  const int dim_size = input_x->dims().size();
+  switch (dim_size) {
+    case 1: {
+      const int input_width = input_x->dims()[0];
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+        #pragma omp parallel for
+        for (int w = 0; w < input_width; w++) {
+          out_ptr[w] = input_x_ptr[w] * scales[w] + biases[w];
+        }
+      } else {
+        #pragma omp parallel for
+        for (int w = 0; w < input_width; w++) {
+          out_ptr[w] = input_x_ptr[w] * scales[w];
+        }
+      }
+    } break;
+    case 2: {
+      const int input_height = input_x->dims()[0];
+      const int input_width = input_x->dims()[1];
+
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+        #pragma omp parallel for
+        for (int h = 0; h < input_height; ++h) {
+          const float *iptr = input_x_ptr + h * input_width;
+          float *optr = out_ptr + h * input_width;
+          for (int w = 0; w < input_width; ++w) {
+            optr[w] = iptr[w] * scales[w] + biases[w];
+          }
+        }
+      } else {
+        #pragma omp parallel for
+        for (int h = 0; h < input_height; ++h) {
+          const float *iptr = input_x_ptr + h * input_width;
+          float *optr = out_ptr + h * input_width;
+          for (int w = 0; w < input_width; ++w) {
+            optr[w] = iptr[w] * scales[w];
+          }
+        }
+      }
+    } break;
+    case 3: {
+      const int chan_size = input_x->dims()[0];
+      const int input_height = input_x->dims()[1];
+      const int input_width = input_x->dims()[2];
+      int size = input_width * input_height;
+
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+
+        #pragma omp parallel for
+        for (int c = 0; c < chan_size; ++c) {
+          const float *iptr = input_x_ptr + c * size;
+          float *optr = out_ptr + c * size;
+          for (int i = 0; i < size; ++i) {
+            optr[i] = iptr[i] * scales[c] + biases[c];
+          }
+        }
+      } else {
+        #pragma omp parallel for
+        for (int c = 0; c < chan_size; ++c) {
+          const float *iptr = input_x_ptr + c * size;
+          float *optr = out_ptr + c * size;
+          for (int i = 0; i < size; ++i) {
+            optr[i] = iptr[i] * scales[c];
+          }
+        }
+      }
+    } break;
+
+    case 4: {
+      const int batch_size = input_x->dims()[0];
+      const int chan_size = input_x->dims()[0];
+      const int input_height = input_x->dims()[1];
+      const int input_width = input_x->dims()[2];
+      int size = input_width * input_height;
+
+      if (has_bias) {
+        const vector<float> biases = param.Biases();
+
+        #pragma omp parallel for
+        for (int b = 0; b < batch_size; ++b) {
+          for (int c = 0; c < chan_size; ++c) {
+            const float *iptr = input_x_ptr + b * c * size;
+            float *optr = out_ptr + b * c * size;
+            for (int i = 0; i < size; ++i) {
+              optr[i] = iptr[i] * scales[c] + biases[c];
+            }
+          }
+        }
+      } else {
+        #pragma omp parallel for
+        for (int b = 0; b < batch_size; ++b) {
+          for (int c = 0; c < chan_size; ++c) {
+            const float *iptr = input_x_ptr + b * c * size;
+            float *optr = out_ptr + b * c * size;
+            for (int i = 0; i < size; ++i) {
+              optr[i] = iptr[i] * scales[c];
+            }
+          }
+        }
+      }
+    } break;
+    default:
+      break;
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/shape_kernel.cpp b/src/operators/kernel/x86/shape_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ede2c0d9afaaf3de37e7520fd6a9a37ac876d27
--- /dev/null
+++ b/src/operators/kernel/x86/shape_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SHAPE_OP
+
+#include "operators/kernel/shape_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ShapeKernel<X86, float>::Init(ShapeParam<X86> *param) {
+  return true;
+}
+
+template <>
+void ShapeKernel<X86, float>::Compute(const ShapeParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/sigmoid_kernel.cpp b/src/operators/kernel/x86/sigmoid_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c97fd94100b6e73f0045bec9504e1985f8d79507
--- /dev/null
+++ b/src/operators/kernel/x86/sigmoid_kernel.cpp
@@ -0,0 +1,40 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SIGMOID_OP
+
+#include "operators/kernel/sigmoid_kernel.h"
+#include <cmath>
+
+namespace paddle_mobile {
+namespace operators {
+
+using framework::DDim;
+using framework::Tensor;
+
+template <>
+bool SigmoidKernel<X86, float>::Init(SigmoidParam<X86> *param) {
+  return true;
+}
+
+template <>
+void SigmoidKernel<X86, float>::Compute(const SigmoidParam<X86> &param) const {
+  // TODO
+}
+
+template class SigmoidKernel<X86, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/softmax_kernel.cpp b/src/operators/kernel/x86/softmax_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7bef5c1605a56a50a6bede52ff32931e331eb9dd
--- /dev/null
+++ b/src/operators/kernel/x86/softmax_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SOFTMAX_OP
+
+#include "../softmax_kernel.h"
+#include "operators/math/softmax.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool SoftmaxKernel<X86, float>::Init(SoftmaxParam<X86> *param) {
+  return true;
+}
+
+template <>
+void SoftmaxKernel<X86, float>::Compute(const SoftmaxParam<X86> &param) const {
+  // TODO
+}
+
+template class SoftmaxKernel<X86, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/split_kernel.cpp b/src/operators/kernel/x86/split_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..63fb597aeb0d626c9630c1a90d58fff3d4516cb7
--- /dev/null
+++ b/src/operators/kernel/x86/split_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SPLIT_OP
+
+#include "operators/kernel/split_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool SplitKernel<X86, float>::Init(SplitParam<X86> *param) {
+  return true;
+}
+
+template <>
+void SplitKernel<X86, float>::Compute(const SplitParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/x86/transpose_kernel.cpp b/src/operators/kernel/x86/transpose_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..11dd599c56d9fe22f2f9e31a96b61b5200abc18c
--- /dev/null
+++ b/src/operators/kernel/x86/transpose_kernel.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef TRANSPOSE_OP
+
+#include "operators/kernel/transpose_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool TransposeKernel<X86, float>::Init(TransposeParam<X86> *param) {
+  return true;
+}
+
+template <>
+void TransposeKernel<X86, float>::Compute(
+    const TransposeParam<X86> &param) const {
+  // TODO
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/lookup_op.h b/src/operators/lookup_op.h
index 9c9d03c8d10e9b01ad958c12d31a49908075eb27..073e884e9157644670259b5acdb47443d2333e03 100644
--- a/src/operators/lookup_op.h
+++ b/src/operators/lookup_op.h
@@ -47,12 +47,4 @@ class LookupOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(lookup_table);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/lrn_op.cpp b/src/operators/lrn_op.cpp
index dde9123edf3568020f933bb7375be99e40f2367b..e19813b842651664bd3a06da5afbbe1bca9c1813 100644
--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
@@ -35,7 +35,8 @@ REGISTER_OPERATOR_CPU(lrn, ops::LrnOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 REGISTER_OPERATOR_MALI_GPU(lrn, ops::LrnOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(lrn, ops::LrnOp);
 #endif
 
 #endif
diff --git a/src/operators/lrn_op.h b/src/operators/lrn_op.h
index 6c609c7654cca022f473dba0aad1f4214a4e43e3..26415a84aa96abdab91da7508080ce6a095aca62 100644
--- a/src/operators/lrn_op.h
+++ b/src/operators/lrn_op.h
@@ -47,13 +47,4 @@ class LrnOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(lrn);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(lrn);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp
index 716256a376a50f2ec1c4c62fa25703cabf3a0c66..402b187f8f5e9d2fbb70fa6bcfb72c88aa53e3d3 100644
--- a/src/operators/math/depthwise_conv_3x3.cpp
+++ b/src/operators/math/depthwise_conv_3x3.cpp
@@ -1881,103 +1881,6 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
 #endif
 }
 
-void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
-                          Tensor *output, Tensor bias, bool if_bias) {
-#if __ARM_NEON
-
-  const int batch_size = static_cast<int>(input->dims()[0]);
-  const int input_channel = static_cast<int>(input->dims()[1]);
-
-  const int input_height = static_cast<int>(input->dims()[2]);
-  const int input_width = static_cast<int>(input->dims()[3]);
-  const int output_height = static_cast<int>(output->dims()[2]);
-  const int output_width = static_cast<int>(output->dims()[3]);
-  const int inhxw = input_height * input_width;
-  const int outhxw = output_height * output_width;
-
-  float32x4_t zero = vdupq_n_f32(0.0);
-  for (int b = 0; b < batch_size; b++) {
-#pragma omp parallel for
-    for (int c = 0; c < input_channel; c++) {
-      const float *filter_data = filter->data<float>() + c * 9;
-      const float *input_data = input->data<float>() + c * inhxw;
-      const float *bias_data = bias.data<float>() + c;
-      float *output_data = output->data<float>() + c * outhxw;
-      float w00 = filter_data[0];
-      float w01 = filter_data[1];
-      float w02 = filter_data[2];
-      float w10 = filter_data[3];
-      float w11 = filter_data[4];
-      float w12 = filter_data[5];
-      float w20 = filter_data[6];
-      float w21 = filter_data[7];
-      float w22 = filter_data[8];
-
-      float32x4_t biasv = vld1q_dup_f32(bias_data);
-
-      for (int i = 0; i < output_height; i += 1) {
-        for (int m = 0; m < output_width - 2; m += 3) {
-          float *output_ptr = output_data + i * output_width + m;
-          float32x4x2_t input_buff_top{}, input_buff_mid{}, input_buff_bottom{};
-          float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3,
-              tmp4, tmp5, out0;
-          input_buff_top =
-              vld2q_f32(input_data + (2 * i) * input_width + (2 * m));
-          input_buff_mid =
-              vld2q_f32(input_data + (2 * i + 1) * input_width + (2 * m));
-          input_buff_bottom =
-              vld2q_f32(input_data + (2 * i + 2) * input_width + (2 * m));
-
-          in0 = input_buff_top.val[0];
-          tmp0 = input_buff_top.val[1];
-          tmp1 = vextq_f32(in0, zero, 1);
-
-          in2 = input_buff_mid.val[0];
-          tmp2 = input_buff_mid.val[1];
-          tmp3 = vextq_f32(in2, zero, 1);
-
-          in4 = input_buff_bottom.val[0];
-          tmp4 = input_buff_bottom.val[1];
-          tmp5 = vextq_f32(in4, zero, 1);
-
-          out0 = vmulq_n_f32(in0, w00);
-          out0 = vmlaq_n_f32(out0, tmp0, w01);
-          out0 = vmlaq_n_f32(out0, tmp1, w02);
-          out0 = vmlaq_n_f32(out0, in2, w10);
-          out0 = vmlaq_n_f32(out0, tmp2, w11);
-          out0 = vmlaq_n_f32(out0, tmp3, w12);
-          out0 = vmlaq_n_f32(out0, in4, w20);
-          out0 = vmlaq_n_f32(out0, tmp4, w21);
-          out0 = vmlaq_n_f32(out0, tmp5, w22);
-          out0 = vaddq_f32(out0, biasv);
-
-          vst1q_lane_f32(output_ptr, out0, 0);
-          vst1q_lane_f32(output_ptr + 1, out0, 1);
-          vst1q_lane_f32(output_ptr + 2, out0, 2);
-        }
-        int m;
-        for (m = 0; m < output_width - 2; m += 3) {
-        }
-        for (int j = m; j < output_width; j++) {
-          output_data[i * output_width + j] =
-              input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 +
-              input_data[(2 * i - 1) * input_width + 2 * j] * w01 +
-              input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 +
-              input_data[(2 * i) * input_width + 2 * j - 1] * w10 +
-              input_data[(2 * i) * input_width + 2 * j] * w11 +
-              input_data[(2 * i) * input_width + 2 * j + 1] * w12 +
-              input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 +
-              input_data[(2 * i + 1) * input_width + 2 * j] * w21 +
-              input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22;
-          output_data[i * output_width + j] += *bias_data;
-        }
-      }
-    }
-  }
-
-#endif
-}
-
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/math/depthwise_conv_3x3.h b/src/operators/math/depthwise_conv_3x3.h
index b146b88e737a07ea08250315fc94653f63d2ad05..60e979648f871e640924a3373c625c311c3dd067 100644
--- a/src/operators/math/depthwise_conv_3x3.h
+++ b/src/operators/math/depthwise_conv_3x3.h
@@ -43,9 +43,6 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
 void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
                                      Tensor *output, const Tensor *new_scale,
                                      const Tensor *new_bias, bool if_relu);
-
-void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
-                          Tensor *output, Tensor bias, bool if_bias);
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/math/im2col.cpp b/src/operators/math/im2col.cpp
index 090ccdf24e214fc86b8a4032df228d50caa65ef9..4065f7d9c4934bce8285ea99fe4f14c4e2cc990c 100644
--- a/src/operators/math/im2col.cpp
+++ b/src/operators/math/im2col.cpp
@@ -74,7 +74,7 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
     const int isize = im_height;
     bool pad1 = padding[0] > 0;
     bool pad2 =
-        (pad1 && padding[1] &&
+        (pad1 &&
          (((isize - 2 * padding[0] + filter_height) % stride[0] == 0) ? 1 : 0));
     int fill = isize % 2;
     if (stride[0] == 1 && filter_height == 3 && pad1 && pad2 &&
diff --git a/src/operators/math/math_function.cpp b/src/operators/math/math_function.cpp
index 14269817ededd097c4c9ade20be5ee773c02d692..6ef9fb2a8252e82014ebebc22f82066eeb324c0d 100644
--- a/src/operators/math/math_function.cpp
+++ b/src/operators/math/math_function.cpp
@@ -36,35 +36,13 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
   int N = dim_out[1];
   int K = (!trans_a) ? dim_a[1] : dim_a[0];
 
-  if (trans_a) {
-    int numel = matrix_a.numel();
-    int m = matrix_a.dims()[0];
-    int n = matrix_a.dims()[1];
-    float *tmp = (float *)(matrix_a.data<float>());
-    float *a = static_cast<float *>(
-        paddle_mobile::memory::Alloc(sizeof(float) * numel));
-    int index = 0;
-    for (int j = 0; j < n; j++) {
-      for (int i = 0; i < m; i++) {
-        a[index++] = tmp[i * n + j];
-      }
-    }
-#ifdef _OPENMP
-    Sgemm_omp(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
-              matrix_out->data<float>(), N, relu, bias);
-#else
-    Sgemm(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
-          matrix_out->data<float>(), N, relu, bias);
-#endif
-  } else {
 #ifdef _OPENMP
-    Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
-              N, beta, matrix_out->data<float>(), N, relu, bias);
+  Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
+            N, beta, matrix_out->data<float>(), N, relu, bias);
 #else
-    Sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
-          beta, matrix_out->data<float>(), N, relu, bias);
+  Sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
+        beta, matrix_out->data<float>(), N, relu, bias);
 #endif
-  }
 }
 
 template <>
diff --git a/src/operators/math/pool_3x3.cpp b/src/operators/math/pool_3x3.cpp
index f8b52c59f5689461ef9b4171b9e33c0d49529eed..05d3017f635a040a52d2cc377c8f384dbbd8086c 100644
--- a/src/operators/math/pool_3x3.cpp
+++ b/src/operators/math/pool_3x3.cpp
@@ -31,428 +31,251 @@ using std::min;
 using std::vector;
 void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) {
 #if __ARM_NEON
-  const int batch_size = static_cast<int>(input->dims()[0]);
-  const int input_channel = static_cast<int>(input->dims()[1]);
-
-  const int input_height = static_cast<int>(input->dims()[2]);
-  const int input_width = static_cast<int>(input->dims()[3]);
-  const int output_height = static_cast<int>(output->dims()[2]);
-  const int output_width = static_cast<int>(output->dims()[3]);
+  const int batch_size = input->dims()[0];
 
-  const int hxw = input_height * input_width;
+  const int h_in = input->dims()[2];
 
-  const int l = input_height;
+  const int w_in = input->dims()[3];
 
-  const float coef = 1.0 / 9.0;
-  const float coef1 = 1.0 / 6.0;
-  const float coef2 = 1.0 / 4.0;
+  const int output_channels = output->dims()[1];
 
-  float32x4_t v_coef = vdupq_n_f32(coef);
-  float32x4_t v_coef1 = vdupq_n_f32(coef1);
+  const int h_out = output->dims()[2];
+  const int w_out = output->dims()[3];
+  const int outputdata_channel_stride = h_out * w_out;
+  const int inputdata_channel_stride = h_in * w_in;
+  const int input_batch_stride = output_channels * inputdata_channel_stride;
+  const int output_batch_stride = output_channels * outputdata_channel_stride;
+  float *out_data = output->data<float>();
+  const float *input_data = input->data<float>();
 
-  for (int b = 0; b < batch_size; b++) {
+  const float coef = 1.0 / 9.0;
+  for (int k = 0; k < batch_size; ++k) {
 #pragma omp parallel for
-    for (int c = 0; c < input_channel; c++) {
-      const float *input_data = input->data<float>() + c * hxw;
-      float *output_data = output->data<float>() + c * hxw;
-
-      for (int i = 1; i < output_height - 1; i++) {
-        float *output_ptr;
-        float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3, tmp4,
-            tmp5, out0;
-        for (int m = 1; m < output_width - 4; m += 4) {
-          output_ptr = output_data + i * output_width + m;
-          in0 = vld1q_f32(input_data + (i - 1) * input_width + m - 1);
-          in1 = vld1q_f32(input_data + (i - 1) * input_width + m + 3);
-          in2 = vld1q_f32(input_data + i * input_width + m - 1);
-          in3 = vld1q_f32(input_data + i * input_width + m + 3);
-          in4 = vld1q_f32(input_data + (i + 1) * input_width + m - 1);
-          in5 = vld1q_f32(input_data + (i + 1) * input_width + m + 3);
-
-          tmp0 = vextq_f32(in0, in1, 1);
-          tmp1 = vextq_f32(in0, in1, 2);
-          tmp2 = vextq_f32(in2, in3, 1);
-          tmp3 = vextq_f32(in2, in3, 2);
-          tmp4 = vextq_f32(in4, in5, 1);
-          tmp5 = vextq_f32(in4, in5, 2);
-
-          out0 = in0;
-          out0 = vaddq_f32(out0, tmp0);
-          out0 = vaddq_f32(out0, tmp1);
-          out0 = vaddq_f32(out0, in2);
-          out0 = vaddq_f32(out0, tmp2);
-          out0 = vaddq_f32(out0, tmp3);
-          out0 = vaddq_f32(out0, in4);
-          out0 = vaddq_f32(out0, tmp4);
-          out0 = vaddq_f32(out0, tmp5);
-
-          vst1q_f32(output_ptr, vmulq_f32(out0, v_coef));
-        }
-        int m;
-        for (m = 1; (m + 3) < output_width - 1; m = m + 4) {
-        }
-
-        for (int j = m; j < output_width - 1; j++) {
-          output_data[i * output_width + j] =
-              input_data[(i - 1) * input_width + j - 1] +
-              input_data[(i - 1) * input_width + j] +
-              input_data[(i - 1) * input_width + j + 1] +
-              input_data[(i)*input_width + j - 1] +
-              input_data[(i)*input_width + j] +
-              input_data[(i)*input_width + j + 1] +
-              input_data[(i + 1) * input_width + j - 1] +
-              input_data[(i + 1) * input_width + j] +
-              input_data[(i + 1) * input_width + j + 1];
-          output_data[i * output_width + j] =
-              output_data[i * output_width + j] * coef;
-        }
+    for (int c = 0; c < output_channels; ++c) {
+      const float *input_seg = input_data + c * inputdata_channel_stride;
+      float *output_seg = out_data + c * outputdata_channel_stride;
+      // four corner point
+      output_seg[0] = (input_seg[0] + input_seg[1] + input_seg[w_in] +
+                       input_seg[w_in + 1]) *
+                      coef;
+      output_seg[w_out - 1] =
+          (input_seg[w_in - 2] + input_seg[w_in - 1] + input_seg[w_in * 2 - 2] +
+           input_seg[2 * w_in - 1]) *
+          coef;
+      output_seg[(h_out - 1) * w_out] =
+          (input_seg[(h_in - 2) * w_in] + input_seg[(h_in - 2) * w_in + 1] +
+           input_seg[(h_in - 1) * w_in] + input_seg[(h_in - 1) * w_in + 1]) *
+          coef;
+      output_seg[h_out * w_out - 1] =
+          (input_seg[h_in * w_in - 1] + input_seg[h_in * w_in - 2] +
+           input_seg[(h_in - 1) * w_in - 1] +
+           input_seg[(h_in - 1) * w_in - 2]) *
+          coef;
+      // left side & right side
+      for (int i = 1; i < h_in - 1; ++i) {
+        output_seg[i * w_out] =
+            (input_seg[i * w_in - w_in] + input_seg[i * w_in - w_in + 1] +
+             input_seg[i * w_in] + input_seg[i * w_in + 1] +
+             input_seg[i * w_in + w_in] + input_seg[i * w_in + w_in + 1]) *
+            coef;
+        output_seg[i * w_out + w_out - 1] =
+            (input_seg[i * w_in - w_in + w_in - 2] +
+             input_seg[i * w_in - w_in + 1 + w_in - 2] +
+             input_seg[i * w_in + w_in - 2] +
+             input_seg[i * w_in + 1 + w_in - 2] +
+             input_seg[i * w_in + w_in + w_in - 2] +
+             input_seg[i * w_in + w_in + 1 + w_in - 2]) *
+            coef;
       }
+      // top 1 row & bottom 1 row
+      const float *input_tmp = input_seg;
 
-      output_data[0] =
-          input_data[0] + input_data[1] + input_data[l] + input_data[l + 1];
-      output_data[l - 1] = input_data[l - 2] + input_data[l - 1] +
-                           input_data[2 * l - 2] + input_data[2 * l - 1];
-      output_data[(l - 1) * l] =
-          input_data[(l - 2) * l] + input_data[(l - 2) * l + 1] +
-          input_data[(l - 1) * l] + input_data[(l - 1) * l + 1];
-      output_data[l * l - 1] = input_data[(l - 2) * (l + 1)] +
-                               input_data[(l - 2) * (l + 1) + 1] +
-                               input_data[l * l - 2] + input_data[l * l - 1];
-      output_data[0] = output_data[0] * coef2;
-      output_data[l - 1] = output_data[l - 1] * coef2;
-      output_data[(l - 1) * l] = output_data[(l - 1) * l] * coef2;
-      output_data[l * l - 1] = output_data[l * l - 1] * coef2;
-
-      for (int i = 1; i < l - 1; ++i) {
-        output_data[i * l] = input_data[i * l - l] + input_data[i * l - l + 1] +
-                             input_data[i * l] + input_data[i * l + 1] +
-                             input_data[i * l + l] + input_data[i * l + l + 1];
-
-        output_data[i * l + l - 1] =
-            input_data[i * l + l - 1 - l - 1] + input_data[i * l + l - 1 - l] +
-            input_data[i * l + l - 1 - 1] + input_data[i * l + l - 1] +
-            input_data[i * l + l - 1 + l - 1] + input_data[i * l + l - 1 + l];
-        output_data[i * l] = output_data[i * l] * coef1;
-        output_data[i * l + l - 1] = output_data[i * l + l - 1] * coef1;
-      }
+      float32x4_t in0, in1, in2, in3, in4, in5, in6, in7, tmp0, tmp1, tmp2,
+          tmp3, tmp4, tmp5, sum, out0;
+      float32x4_t v_coef = vdupq_n_f32(coef);
+      in0 = vld1q_f32(input_tmp);
+      in2 = vld1q_f32(input_tmp + w_in);
+      const float *input_tmp_end = input_tmp + (h_in - 2) * w_in;
+      in4 = vld1q_f32(input_tmp_end);
+      in6 = vld1q_f32(input_tmp_end + w_in);
+      int c_mid = w_out - 2;
+      auto output_ptr = output_seg + 1;
+      for (; c_mid > 3; c_mid -= 4) {
+        in1 = vld1q_f32(input_tmp + 4);
+        in3 = vld1q_f32(input_tmp + w_in + 4);
 
-      int m;
-      for (m = 1; m < output_width - 4; m += 4) {
-        float *output_ptr = output_data + m;
-        float32x4_t in0, in1, in2, in3, tmp0, tmp1, tmp2, tmp3, out0;
-        in0 = vld1q_f32(input_data + m - 1);
-        in1 = vld1q_f32(input_data + m + 3);
-        in2 = vld1q_f32(input_data + input_width + m - 1);
-        in3 = vld1q_f32(input_data + input_width + m + 3);
         tmp0 = vextq_f32(in0, in1, 1);
         tmp1 = vextq_f32(in0, in1, 2);
+
         tmp2 = vextq_f32(in2, in3, 1);
         tmp3 = vextq_f32(in2, in3, 2);
-        out0 = in0;
-        out0 = vaddq_f32(out0, tmp0);
-        out0 = vaddq_f32(out0, tmp1);
-        out0 = vaddq_f32(out0, in2);
-        out0 = vaddq_f32(out0, tmp2);
-        out0 = vaddq_f32(out0, tmp3);
-
-        vst1q_f32(output_ptr, vmulq_f32(out0, v_coef1));
-      }
 
-      for (m = 1; (m + 3) < output_width - 1; m += 4) {
-      }
-      for (int j = m; j < output_width - 1; j++) {
-        output_data[j] = input_data[j - 1] + input_data[j] + input_data[j + 1] +
-                         input_data[input_width + j - 1] +
-                         input_data[input_width + j] +
-                         input_data[input_width + j + 1];
-        output_data[j] = output_data[j] * coef1;
+        sum = vaddq_f32(in0, tmp0);
+        sum = vaddq_f32(sum, tmp1);
+        sum = vaddq_f32(sum, in2);
+        sum = vaddq_f32(sum, tmp2);
+        sum = vaddq_f32(sum, tmp3);
+
+        vst1q_f32(output_ptr, vmulq_f32(sum, v_coef));
+
+        in5 = vld1q_f32(input_tmp_end + 4);
+        in7 = vld1q_f32(input_tmp_end + w_in + 4);
+
+        tmp0 = vextq_f32(in4, in5, 1);
+        tmp1 = vextq_f32(in4, in5, 2);
+        tmp2 = vextq_f32(in6, in7, 1);
+        tmp3 = vextq_f32(in6, in7, 2);
+
+        sum = vaddq_f32(in0, tmp0);
+        sum = vaddq_f32(sum, tmp1);
+        sum = vaddq_f32(sum, in2);
+        sum = vaddq_f32(sum, tmp2);
+        sum = vaddq_f32(sum, tmp3);
+
+        vst1q_f32(output_ptr + (h_out - 1) * w_out, vmulq_f32(sum, v_coef));
+
+        // can optimize to each 8 stride.
+        input_tmp += 4;
+        input_tmp_end += 4;
+        output_ptr += 4;
+        in0 = in1;
+        in2 = in3;
+        in4 = in5;
+        in6 = in7;
       }
+      // top right remain
+      float32x4_t pad0 = vdupq_n_f32(input_seg[w_in - 1]);
+      float32x4_t pad1 = vdupq_n_f32(input_seg[2 * w_in - 1]);
 
-      for (m = 1; m < output_width - 4; m += 4) {
-        float *output_ptr =
-            output_data + (output_height - 1) * output_width + m;
+      tmp0 = vextq_f32(in0, pad0, 1);
+      tmp1 = vextq_f32(in0, pad0, 2);
+      tmp2 = vextq_f32(in2, pad1, 2);
+      tmp3 = vextq_f32(in2, pad1, 2);
 
-        float32x4_t in0, in1, in2, in3, tmp0, tmp1, tmp2, tmp3, out0;
-        in0 = vld1q_f32(input_data + (output_height - 2) * input_width + m - 1);
-        in1 = vld1q_f32(input_data + (output_height - 2) * input_width + m + 3);
-        in2 = vld1q_f32(input_data + (output_height - 1) * input_width + m - 1);
-        in3 = vld1q_f32(input_data + (output_height - 1) * input_width + m + 3);
-        tmp0 = vextq_f32(in0, in1, 1);
-        tmp1 = vextq_f32(in0, in1, 2);
-        tmp2 = vextq_f32(in2, in3, 1);
-        tmp3 = vextq_f32(in2, in3, 2);
-        out0 = in0;
-        out0 = vaddq_f32(out0, tmp0);
-        out0 = vaddq_f32(out0, tmp1);
-        out0 = vaddq_f32(out0, in2);
-        out0 = vaddq_f32(out0, tmp2);
-        out0 = vaddq_f32(out0, tmp3);
-
-        vst1q_f32(output_ptr, vmulq_f32(out0, v_coef1));
+      sum = vaddq_f32(in0, tmp0);
+      sum = vaddq_f32(sum, tmp1);
+      sum = vaddq_f32(sum, in2);
+      sum = vaddq_f32(sum, tmp2);
+      sum = vaddq_f32(sum, tmp3);
+      out0 = vmulq_f32(sum, v_coef);
+
+      for (int i = 0; i < c_mid; ++i) {
+        if (i == 0) {
+          vst1q_lane_f32(output_ptr + i, out0, 0);
+        }
+        if (i == 1) {
+          vst1q_lane_f32(output_ptr + i, out0, 1);
+        }
+        if (i == 2) {
+          vst1q_lane_f32(output_ptr + i, out0, 2);
+        }
       }
-      for (m = 1; (m + 3) < output_width - 1; m = m + 4) {
+
+      // bottom_right remain
+      float32x4_t pad2 = vdupq_n_f32(input_seg[(h_in - 1) * w_in - 1]);
+      float32x4_t pad3 = vdupq_n_f32(input_seg[h_in * w_in - 1]);
+
+      tmp0 = vextq_f32(in4, pad2, 1);
+      tmp1 = vextq_f32(in4, pad2, 2);
+      tmp2 = vextq_f32(in6, pad3, 2);
+      tmp3 = vextq_f32(in6, pad3, 2);
+
+      sum = vaddq_f32(in4, tmp0);
+      sum = vaddq_f32(sum, tmp1);
+      sum = vaddq_f32(sum, in6);
+      sum = vaddq_f32(sum, tmp2);
+      sum = vaddq_f32(sum, tmp3);
+      out0 = vmulq_f32(sum, v_coef);
+
+      for (int i = 0; i < c_mid; ++i) {
+        if (i == 0) {
+          vst1q_lane_f32(output_ptr + (h_out - 1) * w_out + i, out0, 0);
+        }
+        if (i == 1) {
+          vst1q_lane_f32(output_ptr + (h_out - 1) * w_out + i, out0, 1);
+        }
+        if (i == 2) {
+          vst1q_lane_f32(output_ptr + (h_out - 1) * w_out + i, out0, 2);
+        }
       }
-      for (int j = m; j < output_width - 1; j++) {
-        output_data[(output_height - 1) * input_width + j] =
-            input_data[(output_height - 2) * input_width + j - 1] +
-            input_data[(output_height - 2) * input_width + j] +
-            input_data[(output_height - 2) * input_width + j + 1] +
-            input_data[(output_height - 1) * input_width + j - 1] +
-            input_data[(output_height - 1) * input_width + j] +
-            input_data[(output_height - 1) * input_width + j + 1];
-        output_data[(output_height - 1) * output_width + j] =
-            output_data[(output_height - 1) * output_width + j] * coef1;
+      // mid
+      for (int j = 0; j < h_out - 2; ++j) {
+        output_ptr = output_seg + w_out * (j + 1) + 1;
+        input_tmp = input_seg + j * w_in;
+
+        in0 = vld1q_f32(input_tmp);
+        in2 = vld1q_f32(input_tmp + w_in);
+        in4 = vld1q_f32(input_tmp + 2 * w_in);
+        c_mid = w_out - 2;
+        for (; c_mid > 3; c_mid -= 4) {
+          in1 = vld1q_f32(input_tmp + 4);
+          in3 = vld1q_f32(input_tmp + w_in + 4);
+          in5 = vld1q_f32(input_tmp + 2 * w_in + 4);
+
+          tmp0 = vextq_f32(in0, in1, 1);
+          tmp1 = vextq_f32(in0, in1, 2);
+          tmp2 = vextq_f32(in2, in3, 1);
+          tmp3 = vextq_f32(in2, in3, 2);
+          tmp4 = vextq_f32(in4, in5, 1);
+          tmp5 = vextq_f32(in4, in5, 2);
+
+          sum = vaddq_f32(in0, tmp0);
+          sum = vaddq_f32(sum, tmp1);
+          sum = vaddq_f32(sum, in2);
+          sum = vaddq_f32(sum, tmp2);
+          sum = vaddq_f32(sum, tmp3);
+          sum = vaddq_f32(sum, in4);
+          sum = vaddq_f32(sum, tmp4);
+          sum = vaddq_f32(sum, tmp5);
+
+          out0 = vmulq_f32(sum, v_coef);
+          vst1q_f32(output_ptr, out0);
+          output_ptr += 4;
+          input_tmp += 4;
+          in0 = in1;
+          in2 = in3;
+          in4 = in5;
+        }
+        // mid remain
+        float32x4_t pad0 = vdupq_n_f32(input_seg[(j + 1) * w_in - 1]);
+        float32x4_t pad1 = vdupq_n_f32(input_seg[(j + 2) * w_in - 1]);
+        float32x4_t pad2 = vdupq_n_f32(input_seg[(j + 2) * w_in - 1]);
+
+        tmp0 = vextq_f32(in0, pad0, 1);
+        tmp1 = vextq_f32(in0, pad0, 2);
+        tmp2 = vextq_f32(in2, pad1, 1);
+        tmp3 = vextq_f32(in2, pad1, 2);
+        tmp4 = vextq_f32(in4, pad2, 1);
+        tmp5 = vextq_f32(in4, pad2, 2);
+
+        sum = vaddq_f32(in0, tmp0);
+        sum = vaddq_f32(sum, tmp1);
+        sum = vaddq_f32(sum, in2);
+        sum = vaddq_f32(sum, tmp2);
+        sum = vaddq_f32(sum, tmp3);
+        sum = vaddq_f32(sum, in4);
+        sum = vaddq_f32(sum, tmp4);
+        sum = vaddq_f32(sum, tmp5);
+        out0 = vmulq_f32(sum, v_coef);
+
+        for (int i = 0; i < c_mid; ++i) {
+          if (i == 0) {
+            vst1q_lane_f32(output_ptr + i, out0, 0);
+          }
+          if (i == 1) {
+            vst1q_lane_f32(output_ptr + i, out0, 1);
+          }
+          if (i == 2) {
+            vst1q_lane_f32(output_ptr + i, out0, 2);
+          }
+        }
       }
+      //      input_data += inputdata_channel_stride;
+      //      out_data += outputdata_channel_stride;
     }
+    input_data += input_batch_stride;
+    out_data += output_batch_stride;
   }
-
-//  const int batch_size = input->dims()[0];
-//
-//  const int h_in = input->dims()[2];
-//
-//  const int w_in = input->dims()[3];
-//
-//  const int output_channels = output->dims()[1];
-//
-//  const int h_out = output->dims()[2];
-//  const int w_out = output->dims()[3];
-//  const int outputdata_channel_stride = h_out * w_out;
-//  const int inputdata_channel_stride = h_in * w_in;
-//  const int input_batch_stride = output_channels * inputdata_channel_stride;
-//  const int output_batch_stride = output_channels *
-//  outputdata_channel_stride; float *out_data = output->data<float>(); const
-//  float *input_data = input->data<float>();
-//
-//  const float coef = 1.0 / 9.0;
-//  for (int k = 0; k < batch_size; ++k) {
-//#pragma omp parallel for
-//    for (int c = 0; c < output_channels; ++c) {
-//      const float *input_seg = input_data + c * inputdata_channel_stride;
-//      float *output_seg = out_data + c * outputdata_channel_stride;
-//      // four corner point
-//      output_seg[0] = (input_seg[0] + input_seg[1] + input_seg[w_in] +
-//                       input_seg[w_in + 1]) *
-//                      coef;
-//      output_seg[w_out - 1] =
-//          (input_seg[w_in - 2] + input_seg[w_in - 1] + input_seg[w_in * 2 -
-//          2] +
-//           input_seg[2 * w_in - 1]) *
-//          coef;
-//      output_seg[(h_out - 1) * w_out] =
-//          (input_seg[(h_in - 2) * w_in] + input_seg[(h_in - 2) * w_in + 1] +
-//           input_seg[(h_in - 1) * w_in] + input_seg[(h_in - 1) * w_in + 1])
-//           *
-//          coef;
-//      output_seg[h_out * w_out - 1] =
-//          (input_seg[h_in * w_in - 1] + input_seg[h_in * w_in - 2] +
-//           input_seg[(h_in - 1) * w_in - 1] +
-//           input_seg[(h_in - 1) * w_in - 2]) *
-//          coef;
-//      // left side & right side
-//      for (int i = 1; i < h_in - 1; ++i) {
-//        output_seg[i * w_out] =
-//            (input_seg[i * w_in - w_in] + input_seg[i * w_in - w_in + 1] +
-//             input_seg[i * w_in] + input_seg[i * w_in + 1] +
-//             input_seg[i * w_in + w_in] + input_seg[i * w_in + w_in + 1]) *
-//            coef;
-//        output_seg[i * w_out + w_out - 1] =
-//            (input_seg[i * w_in - w_in + w_in - 2] +
-//             input_seg[i * w_in - w_in + 1 + w_in - 2] +
-//             input_seg[i * w_in + w_in - 2] +
-//             input_seg[i * w_in + 1 + w_in - 2] +
-//             input_seg[i * w_in + w_in + w_in - 2] +
-//             input_seg[i * w_in + w_in + 1 + w_in - 2]) *
-//            coef;
-//      }
-//      // top 1 row & bottom 1 row
-//      const float *input_tmp = input_seg;
-//
-//      float32x4_t in0, in1, in2, in3, in4, in5, in6, in7, tmp0, tmp1, tmp2,
-//          tmp3, tmp4, tmp5, sum, out0;
-//      float32x4_t v_coef = vdupq_n_f32(coef);
-//      in0 = vld1q_f32(input_tmp);
-//      in2 = vld1q_f32(input_tmp + w_in);
-//      const float *input_tmp_end = input_tmp + (h_in - 2) * w_in;
-//      in4 = vld1q_f32(input_tmp_end);
-//      in6 = vld1q_f32(input_tmp_end + w_in);
-//      int c_mid = w_out - 2;
-//      auto output_ptr = output_seg + 1;
-//      for (; c_mid > 3; c_mid -= 4) {
-//        in1 = vld1q_f32(input_tmp + 4);
-//        in3 = vld1q_f32(input_tmp + w_in + 4);
-//
-//        tmp0 = vextq_f32(in0, in1, 1);
-//        tmp1 = vextq_f32(in0, in1, 2);
-//
-//        tmp2 = vextq_f32(in2, in3, 1);
-//        tmp3 = vextq_f32(in2, in3, 2);
-//
-//        sum = vaddq_f32(in0, tmp0);
-//        sum = vaddq_f32(sum, tmp1);
-//        sum = vaddq_f32(sum, in2);
-//        sum = vaddq_f32(sum, tmp2);
-//        sum = vaddq_f32(sum, tmp3);
-//
-//        vst1q_f32(output_ptr, vmulq_f32(sum, v_coef));
-//
-//        in5 = vld1q_f32(input_tmp_end + 4);
-//        in7 = vld1q_f32(input_tmp_end + w_in + 4);
-//
-//        tmp0 = vextq_f32(in4, in5, 1);
-//        tmp1 = vextq_f32(in4, in5, 2);
-//        tmp2 = vextq_f32(in6, in7, 1);
-//        tmp3 = vextq_f32(in6, in7, 2);
-//
-//        sum = vaddq_f32(in0, tmp0);
-//        sum = vaddq_f32(sum, tmp1);
-//        sum = vaddq_f32(sum, in2);
-//        sum = vaddq_f32(sum, tmp2);
-//        sum = vaddq_f32(sum, tmp3);
-//
-//        vst1q_f32(output_ptr + (h_out - 1) * w_out, vmulq_f32(sum, v_coef));
-//
-//        // can optimize to each 8 stride.
-//        input_tmp += 4;
-//        input_tmp_end += 4;
-//        output_ptr += 4;
-//        in0 = in1;
-//        in2 = in3;
-//        in4 = in5;
-//        in6 = in7;
-//      }
-//      // top right remain
-//      float32x4_t pad0 = vdupq_n_f32(input_seg[w_in - 1]);
-//      float32x4_t pad1 = vdupq_n_f32(input_seg[2 * w_in - 1]);
-//
-//      tmp0 = vextq_f32(in0, pad0, 1);
-//      tmp1 = vextq_f32(in0, pad0, 2);
-//      tmp2 = vextq_f32(in2, pad1, 2);
-//      tmp3 = vextq_f32(in2, pad1, 2);
-//
-//      sum = vaddq_f32(in0, tmp0);
-//      sum = vaddq_f32(sum, tmp1);
-//      sum = vaddq_f32(sum, in2);
-//      sum = vaddq_f32(sum, tmp2);
-//      sum = vaddq_f32(sum, tmp3);
-//      out0 = vmulq_f32(sum, v_coef);
-//
-//      for (int i = 0; i < c_mid; ++i) {
-//        if (i == 0) {
-//          vst1q_lane_f32(output_ptr + i, out0, 0);
-//        }
-//        if (i == 1) {
-//          vst1q_lane_f32(output_ptr + i, out0, 1);
-//        }
-//        if (i == 2) {
-//          vst1q_lane_f32(output_ptr + i, out0, 2);
-//        }
-//      }
-//
-//      // bottom_right remain
-//      float32x4_t pad2 = vdupq_n_f32(input_seg[(h_in - 1) * w_in - 1]);
-//      float32x4_t pad3 = vdupq_n_f32(input_seg[h_in * w_in - 1]);
-//
-//      tmp0 = vextq_f32(in4, pad2, 1);
-//      tmp1 = vextq_f32(in4, pad2, 2);
-//      tmp2 = vextq_f32(in6, pad3, 2);
-//      tmp3 = vextq_f32(in6, pad3, 2);
-//
-//      sum = vaddq_f32(in4, tmp0);
-//      sum = vaddq_f32(sum, tmp1);
-//      sum = vaddq_f32(sum, in6);
-//      sum = vaddq_f32(sum, tmp2);
-//      sum = vaddq_f32(sum, tmp3);
-//      out0 = vmulq_f32(sum, v_coef);
-//
-//      for (int i = 0; i < c_mid; ++i) {
-//        if (i == 0) {
-//          vst1q_lane_f32(output_ptr + (h_out - 1) * w_out + i, out0, 0);
-//        }
-//        if (i == 1) {
-//          vst1q_lane_f32(output_ptr + (h_out - 1) * w_out + i, out0, 1);
-//        }
-//        if (i == 2) {
-//          vst1q_lane_f32(output_ptr + (h_out - 1) * w_out + i, out0, 2);
-//        }
-//      }
-//      // mid
-//      for (int j = 0; j < h_out - 2; ++j) {
-//        output_ptr = output_seg + w_out * (j + 1) + 1;
-//        input_tmp = input_seg + j * w_in;
-//
-//        in0 = vld1q_f32(input_tmp);
-//        in2 = vld1q_f32(input_tmp + w_in);
-//        in4 = vld1q_f32(input_tmp + 2 * w_in);
-//        c_mid = w_out - 2;
-//        for (; c_mid > 3; c_mid -= 4) {
-//          in1 = vld1q_f32(input_tmp + 4);
-//          in3 = vld1q_f32(input_tmp + w_in + 4);
-//          in5 = vld1q_f32(input_tmp + 2 * w_in + 4);
-//
-//          tmp0 = vextq_f32(in0, in1, 1);
-//          tmp1 = vextq_f32(in0, in1, 2);
-//          tmp2 = vextq_f32(in2, in3, 1);
-//          tmp3 = vextq_f32(in2, in3, 2);
-//          tmp4 = vextq_f32(in4, in5, 1);
-//          tmp5 = vextq_f32(in4, in5, 2);
-//
-//          sum = vaddq_f32(in0, tmp0);
-//          sum = vaddq_f32(sum, tmp1);
-//          sum = vaddq_f32(sum, in2);
-//          sum = vaddq_f32(sum, tmp2);
-//          sum = vaddq_f32(sum, tmp3);
-//          sum = vaddq_f32(sum, in4);
-//          sum = vaddq_f32(sum, tmp4);
-//          sum = vaddq_f32(sum, tmp5);
-//
-//          out0 = vmulq_f32(sum, v_coef);
-//          vst1q_f32(output_ptr, out0);
-//          output_ptr += 4;
-//          input_tmp += 4;
-//          in0 = in1;
-//          in2 = in3;
-//          in4 = in5;
-//        }
-//        // mid remain
-//        float32x4_t pad0 = vdupq_n_f32(input_seg[(j + 1) * w_in - 1]);
-//        float32x4_t pad1 = vdupq_n_f32(input_seg[(j + 2) * w_in - 1]);
-//        float32x4_t pad2 = vdupq_n_f32(input_seg[(j + 2) * w_in - 1]);
-//
-//        tmp0 = vextq_f32(in0, pad0, 1);
-//        tmp1 = vextq_f32(in0, pad0, 2);
-//        tmp2 = vextq_f32(in2, pad1, 1);
-//        tmp3 = vextq_f32(in2, pad1, 2);
-//        tmp4 = vextq_f32(in4, pad2, 1);
-//        tmp5 = vextq_f32(in4, pad2, 2);
-//
-//        sum = vaddq_f32(in0, tmp0);
-//        sum = vaddq_f32(sum, tmp1);
-//        sum = vaddq_f32(sum, in2);
-//        sum = vaddq_f32(sum, tmp2);
-//        sum = vaddq_f32(sum, tmp3);
-//        sum = vaddq_f32(sum, in4);
-//        sum = vaddq_f32(sum, tmp4);
-//        sum = vaddq_f32(sum, tmp5);
-//        out0 = vmulq_f32(sum, v_coef);
-//
-//        for (int i = 0; i < c_mid; ++i) {
-//          if (i == 0) {
-//            vst1q_lane_f32(output_ptr + i, out0, 0);
-//          }
-//          if (i == 1) {
-//            vst1q_lane_f32(output_ptr + i, out0, 1);
-//          }
-//          if (i == 2) {
-//            vst1q_lane_f32(output_ptr + i, out0, 2);
-//          }
-//        }
-//      }
-//      //      input_data += inputdata_channel_stride;
-//      //      out_data += outputdata_channel_stride;
-//    }
-//    input_data += input_batch_stride;
-//    out_data += output_batch_stride;
-//  }
 #endif
 }
 
@@ -839,7 +662,6 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
           wstart = max(wstart, 0);
           hend = min(hend, input_height);
           wend = min(wend, input_width);
-
           const float *pos1 = input_seg + hstart * input_width + wstart;
           const float *pos2 = input_seg + (hstart + 1) * input_width + wstart;
           const float *pos3 = input_seg + (hstart + 2) * input_width + wstart;
@@ -852,8 +674,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
                 sum += input_seg[h * input_width + w];
               }
             }
-            output_seg[ph * output_width + pw] =
-                sum / ((hend - hstart) * (wend - wstart) * 1.0);
+            output_seg[ph * output_width + pw] = sum / 9.0;
           } else {
 #if __aarch64__
 #else
diff --git a/src/operators/mul_op.cpp b/src/operators/mul_op.cpp
index 044da7012eccde57a87d417f4f3c00b82e01da42..e386a803f03670b2df0d1b8527f8e3b70425da2a 100644
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -55,13 +55,13 @@ void MulOp<Dtype, T>::InferShape() const {
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(mul, ops::MulOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
+#elif defined(PADDLE_MOBILE_MALI_GPU)
 REGISTER_OPERATOR_MALI_GPU(mul, ops::MulOp);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_FPGA)
+#else
+REGISTER_OPERATOR_X86(mul, ops::MulOp);
 #endif
 
 #endif
diff --git a/src/operators/mul_op.h b/src/operators/mul_op.h
index 127048efbacf2da87de9371cd8e54875f8554d61..5cd174db07973461fe699242a2013d9c4ea78732 100644
--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -47,13 +47,4 @@ class MulOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(mul);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(mul);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/multiclass_nms_op.cpp b/src/operators/multiclass_nms_op.cpp
index 4324cab35298a45ece7e375299909994648a27a4..f97170e27cbbcb62e734b580ab9ae39128665cba 100644
--- a/src/operators/multiclass_nms_op.cpp
+++ b/src/operators/multiclass_nms_op.cpp
@@ -39,12 +39,12 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const {
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(multiclass_nms, ops::MultiClassNMSOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_MALI_GPU)
+#elif defined(PADDLE_MOBILE_FPGA)
+#else
+REGISTER_OPERATOR_X86(multiclass_nms, ops::MultiClassNMSOp);
 #endif
 
 #endif
diff --git a/src/operators/multiclass_nms_op.h b/src/operators/multiclass_nms_op.h
index b40ef5ee009f6c16c685479ffcf58186958bb4cc..4919ec69b6b5b1a702760f46ddbfc77b16c7875e 100644
--- a/src/operators/multiclass_nms_op.h
+++ b/src/operators/multiclass_nms_op.h
@@ -52,12 +52,4 @@ class MultiClassNMSOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(multiclass_nms);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/op_param.cpp b/src/operators/op_param.cpp
index 4d1689911686198612eb4df4dfe8f99450ba503d..54d76a3654403cf473a5db15f5cb38adb17495a0 100644
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -42,28 +42,33 @@ Print &operator<<(Print &printer, const ConvParam<CPU> &conv_param) {
 template class ConvParam<CPU>;
 template class ConvParam<FPGA>;
 template class ConvParam<GPU_MALI>;
+template class ConvParam<X86>;
 #endif
 
 template class ElementwiseAddParam<CPU>;
 template class ElementwiseAddParam<FPGA>;
 template class ElementwiseAddParam<GPU_MALI>;
+template class ElementwiseAddParam<X86>;
 
 #ifdef MUL_OP
 template class MulParam<CPU>;
 template class MulParam<FPGA>;
 template class MulParam<GPU_MALI>;
+template class MulParam<X86>;
 #endif
 
 #ifdef CONCAT_OP
 template class ConcatParam<CPU>;
 template class ConcatParam<FPGA>;
 template class ConcatParam<GPU_MALI>;
+template class ConcatParam<X86>;
 #endif
 
 #ifdef LRN_OP
 template class LrnParam<CPU>;
 template class LrnParam<FPGA>;
 template class LrnParam<GPU_MALI>;
+template class LrnParam<X86>;
 #endif
 
 #ifdef FUSION_CONVADD_OP
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 5b53743b75bfe65a9e029e44114b339603388c08..371c4982f8c87013552b7b1d163a64212867b6ae 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -40,30 +40,6 @@ using std::vector;
 
 template <typename Dtype>
 struct DtypeTensorTrait {
-  typedef void ptype;
-  typedef void rtype;
-};
-
-template <>
-struct DtypeTensorTrait<CPU> {
-  // This is the type we obtained in variable.
-  typedef framework::LoDTensor gtype;
-  // This type will be the parent class type
-  // or the same type.
-  typedef framework::Tensor rtype;
-};
-
-template <>
-struct DtypeTensorTrait<FPGA> {
-  // This is the type we obtained in variable.
-  typedef framework::LoDTensor gtype;
-  // This type will be the parent class type
-  // or the same type.
-  typedef framework::Tensor rtype;
-};
-
-template <>
-struct DtypeTensorTrait<GPU_MALI> {
   // This is the type we obtained in variable.
   typedef framework::LoDTensor gtype;
   // This type will be the parent class type
@@ -341,23 +317,22 @@ class OpParam {
   }
 };
 
+#ifdef CONV_OP
 template <typename Dtype>
-class ConvParam : public OpParam {
+class ConvParam : OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
             const AttributeMap &attrs, const Scope &scope) {
-    filter_ = OpParam::FilterFrom<GType>(inputs, scope);
-    input_ = OpParam::InputFrom<GType>(inputs, scope);
-    if (outputs.count("Output")) {
-      output_ = OpParam::OutputFrom<GType>(outputs, scope);
-    }
-    strides_ = OpParam::GetAttr<vector<int>>("strides", attrs);
-    paddings_ = OpParam::GetAttr<vector<int>>("paddings", attrs);
-    dilations_ = OpParam::GetAttr<vector<int>>("dilations", attrs);
-    groups = OpParam::GetAttr<int>("groups", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutputFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
   }
 
   const RType *Input() const { return input_; }
@@ -385,6 +360,7 @@ class ConvParam : public OpParam {
 };
 template <typename Dtype>
 Print &operator<<(Print &printer, const ConvParam<Dtype> &conv_param);
+#endif
 
 template <typename Dtype>
 class ElementwiseAddParam : OpParam {
@@ -676,11 +652,6 @@ class PriorBoxParam : public OpParam {
     max_sizes_ = GetAttr<vector<float>>("max_sizes", attrs);
     aspect_ratios_ = GetAttr<vector<float>>("aspect_ratios", attrs);
     variances_ = GetAttr<vector<float>>("variances", attrs);
-
-    if (HasAttr("min_max_aspect_ratios_order", attrs)) {
-      min_max_aspect_ratios_order_ =
-          GetAttr<bool>("min_max_aspect_ratios_order", attrs);
-    }
     flip_ = GetAttr<bool>("flip", attrs);
     clip_ = GetAttr<bool>("clip", attrs);
     step_w_ = GetAttr<float>("step_w", attrs);
@@ -713,10 +684,6 @@ class PriorBoxParam : public OpParam {
 
   const float &Offset() const { return offset_; }
 
-  const bool &MinMaxAspectRatiosOrder() const {
-    return min_max_aspect_ratios_order_;
-  }
-
  private:
   RType *input_;
   RType *input_image_;
@@ -731,7 +698,6 @@ class PriorBoxParam : public OpParam {
   float step_w_;
   float step_h_;
   float offset_;
-  bool min_max_aspect_ratios_order_;
 };
 #endif
 
@@ -795,7 +761,7 @@ class SoftmaxParam : public OpParam {
   fpga::BypassArgs fpga_bypass_args;
 
  public:
-  RType *FloatInput() const {
+  RType *FloatInput() {
     return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
   }
   void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
@@ -1294,29 +1260,52 @@ using FusionFcReluParam = FusionFcParam<DeviceType>;
 #endif
 
 template <typename Dtype>
-class FusionConvAddParam : public ConvParam<Dtype> {
+class FusionConvAddParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionConvAddParam(const VariableNameMap &inputs,
                      const VariableNameMap &outputs, const AttributeMap &attrs,
-                     const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    bias_ = OpParam::InputYFrom<GType>(inputs, scope);
-    axis_ = OpParam::GetAttr<int>("axis", attrs);
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
+                     const Scope &scope) {
+    bias_ = InputYFrom<GType>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
   }
   RType *Bias() const { return bias_; }
 
   const int &Axis() const { return axis_; }
 
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
  protected:
   RType *bias_;
   int axis_;
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
 #ifdef PADDLE_MOBILE_FPGA
 
  private:
@@ -1343,33 +1332,58 @@ class FusionConvAddReluParam : public FusionConvAddParam<DeviceType> {
 #endif
 
 #ifdef FUSION_CONVADDPRELU_OP
-template <typename Dtype>
-class FusionConvAddPReluParam : public ConvParam<Dtype> {
-  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
-  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+template <typename DeviceType>
+class FusionConvAddPReluParam : public OpParam {
+  typedef typename DtypeTensorTrait<DeviceType>::gtype GType;
+  typedef typename DtypeTensorTrait<DeviceType>::rtype RType;
 
  public:
   FusionConvAddPReluParam(const VariableNameMap &inputs,
                           const VariableNameMap &outputs,
-                          const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    alpha_ = OpParam::InputAlphaFrom<GType>(inputs, scope);
-    mode_ = OpParam::GetAttr<std::string>("mode", attrs);
+                          const AttributeMap &attrs, const Scope &scope) {
+    alpha_ = InputAlphaFrom<GType>(inputs, scope);
+    mode_ = GetAttr<std::string>("mode", attrs);
     framework::DDim dims = alpha_->dims();
-    bias_ = OpParam::InputYFrom<GType>(inputs, scope);
-    axis_ = OpParam::GetAttr<int>("axis", attrs);
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
+    bias_ = InputYFrom<GType>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
   }
   const RType *InputAlpha() const { return alpha_; }
   const std::string &Mode() const { return mode_; }
   RType *Bias() const { return bias_; }
+
   const int &Axis() const { return axis_; }
+
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
  protected:
   RType *bias_;
   int axis_;
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *alpha_;
   std::string mode_;
 #ifdef PADDLE_MOBILE_FPGA
@@ -1385,30 +1399,35 @@ class FusionConvAddPReluParam : public ConvParam<Dtype> {
 #endif
 
 #ifdef FUSION_CONVADDADDPRELU_OP
-template <typename Dtype>
-class FusionConvAddAddPReluParam : public ConvParam<Dtype> {
-  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
-  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+template <typename DeviceType>
+class FusionConvAddAddPReluParam : public OpParam {
+  typedef typename DtypeTensorTrait<DeviceType>::gtype GType;
+  typedef typename DtypeTensorTrait<DeviceType>::rtype RType;
 
  public:
   FusionConvAddAddPReluParam(const VariableNameMap &inputs,
                              const VariableNameMap &outputs,
-                             const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    bias1_ = OpParam::InputYFrom1<GType>(inputs, scope);
-    alpha_ = OpParam::InputAlphaFrom<GType>(inputs, scope);
-    mode_ = OpParam::GetAttr<std::string>("mode", attrs);
+                             const AttributeMap &attrs, const Scope &scope) {
+    bias1_ = InputYFrom1<GType>(inputs, scope);
+    alpha_ = InputAlphaFrom<GType>(inputs, scope);
+    mode_ = GetAttr<std::string>("mode", attrs);
     framework::DDim dims = alpha_->dims();
-    bias_ = OpParam::InputYFrom<GType>(inputs, scope);
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
-    axis_ = OpParam::GetAttr<int>("axis", attrs);
-    keyOutput_ = OpParam::getkey("addOut", inputs, 0);
-    keyX1_ = OpParam::getkey("addX", inputs, 1);
-    keyY1_ = OpParam::getkey("Y", inputs, 1);
+    bias_ = InputYFrom<GType>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    keyOutput_ = getkey("addOut", inputs, 0);
+    keyX1_ = getkey("addX", inputs, 1);
+    keyY1_ = getkey("Y", inputs, 1);
     if (keyX1_ == keyOutput_) {
-      bias1_ = OpParam::InputYFrom1<GType>(inputs, scope);
+      bias1_ = InputYFrom1<GType>(inputs, scope);
     } else if (keyY1_ == keyOutput_) {
-      bias1_ = OpParam::InputXFrom1<GType>(inputs, scope);
+      bias1_ = InputXFrom1<GType>(inputs, scope);
     }
   }
   const RType *InputAlpha() const { return alpha_; }
@@ -1418,12 +1437,31 @@ class FusionConvAddAddPReluParam : public ConvParam<Dtype> {
   RType *Bias() const { return bias_; }
 
   const int &Axis() const { return axis_; }
+
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
  protected:
   RType *bias_;
   int axis_;
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *alpha_;
   std::string mode_;
   RType *bias1_;
@@ -1444,32 +1482,49 @@ class FusionConvAddAddPReluParam : public ConvParam<Dtype> {
 
 #ifdef FUSION_CONVADDBNRELU_OP
 template <typename Dtype>
-class FusionConvAddBNReluParam : public ConvParam<Dtype> {
+class FusionConvAddBNReluParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionConvAddBNReluParam(const VariableNameMap &inputs,
                            const VariableNameMap &outputs,
-                           const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    bias_ = OpParam::InputYFrom<GType>(inputs, scope);
-    axis_ = OpParam::GetAttr<int>("axis", attrs);
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
-    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
-    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
-    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
-    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
-    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
-    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+                           const AttributeMap &attrs, const Scope &scope) {
+    bias_ = InputYFrom<GType>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
   }
   RType *Bias() const { return bias_; }
 
   const int &Axis() const { return axis_; }
 
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
   const RType *InputBias() const { return input_bias_; }
 
   const RType *InputMean() const { return input_mean_; }
@@ -1495,7 +1550,13 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
  protected:
   RType *bias_;
   int axis_;
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *input_bias_;
   RType *input_mean_;
   RType *input_scale_;
@@ -1519,40 +1580,57 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
 
 #ifdef FUSION_CONVBNADDRELU_OP
 template <typename Dtype>
-class FusionConvBNAddReluParam : public ConvParam<Dtype> {
+class FusionConvBNAddReluParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionConvBNAddReluParam(const VariableNameMap &inputs,
                            const VariableNameMap &outputs,
-                           const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    bias_ = OpParam::InputYFrom<GType>(inputs, scope);
-    axis_ = OpParam::GetAttr<int>("axis", attrs);
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
-    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
-    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
-    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
-    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
-    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
-    keyBNY_ = OpParam::getkey("BNY", inputs, 0);
-    keyX_ = OpParam::getkey("X", inputs, 0);
-    keyY_ = OpParam::getkey("Y", inputs, 0);
+                           const AttributeMap &attrs, const Scope &scope) {
+    bias_ = InputYFrom<GType>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    keyBNY_ = getkey("BNY", inputs, 0);
+    keyX_ = getkey("X", inputs, 0);
+    keyY_ = getkey("Y", inputs, 0);
     if (keyX_ == keyBNY_) {
-      bias_ = OpParam::InputYFrom<GType>(inputs, scope);
+      bias_ = InputYFrom<GType>(inputs, scope);
     } else if (keyY_ == keyBNY_) {
-      bias_ = OpParam::InputXFrom<GType>(inputs, scope);
+      bias_ = InputXFrom<GType>(inputs, scope);
     }
-    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
   }
   RType *Bias() const { return bias_; }
 
   const int &Axis() const { return axis_; }
 
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
   const RType *InputBias() const { return input_bias_; }
 
   const RType *InputMean() const { return input_mean_; }
@@ -1578,7 +1656,13 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> {
  protected:
   RType *bias_;
   int axis_;
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *input_bias_;
   RType *input_mean_;
   RType *input_scale_;
@@ -1605,26 +1689,44 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> {
 
 #ifdef FUSION_CONVBN_OP
 template <typename Dtype>
-class FusionConvBNParam : public ConvParam<Dtype> {
+class FusionConvBNParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionConvBNParam(const VariableNameMap &inputs,
                     const VariableNameMap &outputs, const AttributeMap &attrs,
-                    const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    output_y_ = OpParam::OutputYFrom<GType>(outputs, scope);
-    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
-    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
-    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
-    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
-    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
-    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+                    const Scope &scope) {
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_y_ = OutputYFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
   }
+
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_y_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
   const RType *InputBias() const { return input_bias_; }
 
   const RType *InputMean() const { return input_mean_; }
@@ -1648,7 +1750,13 @@ class FusionConvBNParam : public ConvParam<Dtype> {
   const RType *NewBias() const { return new_bias_; }
 
  protected:
+  RType *input_;
   RType *output_y_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *input_bias_;
   RType *input_mean_;
   RType *input_scale_;
@@ -1672,32 +1780,49 @@ class FusionConvBNParam : public ConvParam<Dtype> {
 
 #ifdef FUSION_CONVADDBN_OP
 template <typename Dtype>
-class FusionConvAddBNParam : public ConvParam<Dtype> {
+class FusionConvAddBNParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionConvAddBNParam(const VariableNameMap &inputs,
                        const VariableNameMap &outputs,
-                       const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    bias_ = OpParam::InputYFrom<GType>(inputs, scope);
-    axis_ = OpParam::GetAttr<int>("axis", attrs);
-    output_y_ = OpParam::OutputYFrom<GType>(outputs, scope);
-    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
-    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
-    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
-    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
-    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
-    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+                       const AttributeMap &attrs, const Scope &scope) {
+    bias_ = InputYFrom<GType>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_y_ = OutputYFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
   }
   RType *Bias() const { return bias_; }
 
   const int &Axis() const { return axis_; }
 
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_y_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
   const RType *InputBias() const { return input_bias_; }
 
   const RType *InputMean() const { return input_mean_; }
@@ -1723,7 +1848,13 @@ class FusionConvAddBNParam : public ConvParam<Dtype> {
  protected:
   RType *bias_;
   int axis_;
+  RType *input_;
   RType *output_y_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *input_bias_;
   RType *input_mean_;
   RType *input_scale_;
@@ -1747,26 +1878,44 @@ class FusionConvAddBNParam : public ConvParam<Dtype> {
 
 #ifdef FUSION_DWCONVBNRELU_OP
 template <typename Dtype>
-class FusionDWConvBNReluParam : public ConvParam<Dtype> {
+class FusionDWConvBNReluParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionDWConvBNReluParam(const VariableNameMap &inputs,
                           const VariableNameMap &outputs,
-                          const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
-    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
-    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
-    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
-    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
-    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
-    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+                          const AttributeMap &attrs, const Scope &scope) {
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
   }
+
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
   const RType *InputBias() const { return input_bias_; }
 
   const RType *InputMean() const { return input_mean_; }
@@ -1790,7 +1939,13 @@ class FusionDWConvBNReluParam : public ConvParam<Dtype> {
   const RType *NewBias() const { return new_bias_; }
 
  protected:
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *input_bias_;
   RType *input_mean_;
   RType *input_scale_;
@@ -1806,26 +1961,45 @@ class FusionDWConvBNReluParam : public ConvParam<Dtype> {
 
 #ifdef FUSION_CONVBNRELU_OP
 template <typename Dtype>
-class FusionConvBNReluParam : public ConvParam<Dtype> {
+class FusionConvBNReluParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
   typedef typename DtypeTensorTrait<Dtype>::rtype RType;
 
  public:
   FusionConvBNReluParam(const VariableNameMap &inputs,
                         const VariableNameMap &outputs,
-                        const AttributeMap &attrs, const Scope &scope)
-      : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
-    output_ = OpParam::OutFrom<GType>(outputs, scope);
-    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
-    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
-    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
-    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
-    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
-    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
-    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+                        const AttributeMap &attrs, const Scope &scope) {
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_ = OutFrom<GType>(outputs, scope);
+
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
   }
+
+  const RType *Input() const { return input_; }
+
+  const RType *Filter() const { return filter_; }
+
   RType *Output() const { return output_; }
 
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
   const RType *InputBias() const { return input_bias_; }
 
   const RType *InputMean() const { return input_mean_; }
@@ -1849,7 +2023,13 @@ class FusionConvBNReluParam : public ConvParam<Dtype> {
   const RType *NewBias() const { return new_bias_; }
 
  protected:
+  RType *input_;
   RType *output_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
   RType *input_bias_;
   RType *input_mean_;
   RType *input_scale_;
@@ -2151,5 +2331,90 @@ class ShapeParam : public OpParam {
 };
 #endif
 
+template<typename Dtype>
+class QuantizeParam : public OpParam {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+
+ public:
+  QuantizeParam(const VariableNameMap &inputs,
+                const VariableNameMap &outputs,
+                const AttributeMap &attrs,
+                const Scope &scope) {
+    input_ = InputXFrom<GType>(inputs, scope);
+    out_ = OutFrom<GType>(outputs, scope);
+    if (HasAttr("is_static", attrs)) {
+      is_static_ = GetAttr<bool>("is_static", attrs);
+    }
+    // online
+    // scale = max(abs(x))
+    online_scale_ = GetVarValue<GType>("OutScale", outputs, scope);
+    if (HasAttr("is_signed", attrs)) {
+      is_signed_ = GetAttr<bool>("signed", attrs);
+    }
+    if (HasAttr("mantissa", attrs)) {
+      mantissa_bits_ = GetAttr<bool>("mantissa", attrs);
+    }
+    // offline
+    if (HasAttr("static_scale", attrs)) {
+      static_scale_ = GetAttr<float>("static_scale", attrs);
+    }
+    // x = round(scale * x)
+    if (HasAttr("round_type", attrs)) {
+      round_type_ = GetAttr<RoundType>("round_type", attrs);
+    }
+  }
+
+ public:
+  // op input
+  RType *input_;
+  // op output
+  RType *out_;
+  //
+  RType *online_scale_;
+  // signed quantize or unsigned quantize
+  bool is_signed_ = true;
+  // mantissa bit width
+  // for int8, mantissa bits is 7
+  int mantissa_bits_ = 7;
+  // if static scale or not
+  bool is_static_ = false;
+  // quantize scale
+  float static_scale_ = 1.0f;
+  // round method type
+  // nearest_zero and nearest_even is valid currently
+  RoundType round_type_ = ROUND_NEAREST_TO_EVEN;
+};
+
+template<typename Dtype>
+class DequantizeParam : public OpParam {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+
+ public:
+  DequantizeParam(const VariableNameMap &inputs,
+                const VariableNameMap &outputs,
+                const AttributeMap &attrs,
+                const Scope &scope) {
+    input_ = InputXFrom<GType>(inputs, scope);
+    out_ = OutFrom<GType>(outputs, scope);
+    activation_scale_ = GetVarValue<GType>("Scale", inputs, scope);
+    // dequantization is performed as x = x / static_scale / online_scale
+    if (HasAttr("weight_scale", attrs)) {
+      weight_scale_ = GetAttr<float>("weight_scale", attrs);
+    } else {
+      weight_scale_ = GetAttr<float>("max_range", attrs);
+    }
+  }
+
+ public:
+  // op input
+  RType *input_;
+  // op output
+  RType *out_;
+  RType *activation_scale_;
+  float weight_scale_;
+};
+
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp
index dd23059ea01a332aff45137b7f7ed4c9f6c2e1bb..e439cfb97b8d7d5b4d3876a29ccd951f9a6c12f1 100644
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -68,5 +68,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp);
 #endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(pool2d, ops::PoolOp);
+#endif
 
 #endif
diff --git a/src/operators/pool_op.h b/src/operators/pool_op.h
index 4f76fb8f800dea43432b48562cca563505a1af76..9880599ce5fc71048d6a555b3fa4848c5d7a8220 100644
--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
@@ -48,14 +48,4 @@ class PoolOp : public OperatorWithKernel<DeviceType, PoolParam<DeviceType>,
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(pool2d);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(pool2d);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(pool2d);
-#endif
-
 #endif
diff --git a/src/operators/prelu_op.cpp b/src/operators/prelu_op.cpp
index 332b5cc9bbbabf9498858b96e0028a9e3992f3ea..d0bc85b709620fa542f526b18bf3c1c05324e7ce 100644
--- a/src/operators/prelu_op.cpp
+++ b/src/operators/prelu_op.cpp
@@ -39,7 +39,8 @@ REGISTER_OPERATOR_CPU(prelu, ops::PReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(prelu, ops::PReluOp);
 #endif
 
 #endif
diff --git a/src/operators/prelu_op.h b/src/operators/prelu_op.h
index 7b6b778fa6e8f0951faffda6803b25b6b23ea17c..af33476b7298a5728a6ef944506d55f422a2fa8c 100644
--- a/src/operators/prelu_op.h
+++ b/src/operators/prelu_op.h
@@ -50,14 +50,4 @@ class PReluOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(prelu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(prelu);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(prelu);
-#endif
-
 #endif
diff --git a/src/operators/prior_box_op.cpp b/src/operators/prior_box_op.cpp
index a05a0ddcec5ba9d442b58846468a121e9b655a6a..59da95ef8bf0428c2d872c89be1b78d5f7bf60c4 100644
--- a/src/operators/prior_box_op.cpp
+++ b/src/operators/prior_box_op.cpp
@@ -54,7 +54,8 @@ REGISTER_OPERATOR_CPU(prior_box, ops::PriorBoxOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(prior_box, ops::PriorBoxOp);
 #endif
 
 #endif
diff --git a/src/operators/prior_box_op.h b/src/operators/prior_box_op.h
index 00fc8e039c9958e4b43653d6360c0f54c78648a1..f7e02802ae82368319d5e9095c73afcac295b4fc 100644
--- a/src/operators/prior_box_op.h
+++ b/src/operators/prior_box_op.h
@@ -51,12 +51,4 @@ class PriorBoxOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(prior_box);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/quantize_op.cpp b/src/operators/quantize_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f264c211ec4d00a40da2496caa5c616f559a2b6a
--- /dev/null
+++ b/src/operators/quantize_op.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "operators/quantize_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename DeviceType, typename T>
+void QuantizeOp<DeviceType, T>::InferShape() const {
+  const auto& input_dims = this->param_.input_->dims();
+  this->param_.out_->Resize(input_dims);
+  auto scale_dims = framework::make_ddim(std::vector<int>{1});
+  this->param_.online_scale_->Resize(scale_dims);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(quantize, ops::QuantizeOp);
+#endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(quantize, ops::QuantizeOp);
+#endif
+
diff --git a/src/operators/quantize_op.h b/src/operators/quantize_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..4943c6f4668f3ca1c4ee5c6569b0b91df880424f
--- /dev/null
+++ b/src/operators/quantize_op.h
@@ -0,0 +1,43 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "framework/operator.h"
+#include "operators/op_param.h"
+#include "operators/kernel/quantize_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename DeviceType, typename T>
+class QuantizeOp : public framework::OperatorWithKernel<
+                        DeviceType, QuantizeParam<DeviceType>,
+                        operators::QuantizeKernel<DeviceType, T>> {
+ public:
+  QuantizeOp(const std::string &type,
+             const VariableNameMap &inputs,
+             const VariableNameMap &outputs,
+             const framework::AttributeMap &attrs,
+             std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType,
+                                      QuantizeParam<DeviceType>,
+                                      operators::QuantizeKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  // inference output shape
+  void InferShape() const override;
+};
+
+}  // namespace paddle_mobile
+}  // namespace operators
diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp
index 2a771e81e7a5a0e869984990b52b98d15036543a..3275fa499ddc13a31ffb7cfac6121e72c7fc9f6c 100644
--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
@@ -39,7 +39,8 @@ REGISTER_OPERATOR_CPU(relu, ops::ReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(relu, ops::ReluOp);
 #endif
 
 #endif
diff --git a/src/operators/relu_op.h b/src/operators/relu_op.h
index 0364dd7f8ec4b3861200380597e18ede0819e8b6..584c9da3c80c4e3e9e69fdb70a602cdd486e26b8 100644
--- a/src/operators/relu_op.h
+++ b/src/operators/relu_op.h
@@ -53,13 +53,4 @@ class ReluOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(relu);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(relu);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/reshape_op.cpp b/src/operators/reshape_op.cpp
index dcc15009af2b23129552d58b3fa22c3c67684dce..4426149be11ead0da63d85351cc143c89b11cbc9 100644
--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
@@ -38,7 +38,8 @@ REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(reshape, ops::ReshapeOp);
 #endif
 
 #endif
diff --git a/src/operators/reshape_op.h b/src/operators/reshape_op.h
index 9284e94f346ed0f225d6dabe16077b1fb2034c64..a7347ddd8c6511224d4422f66eac71e61bf48549 100644
--- a/src/operators/reshape_op.h
+++ b/src/operators/reshape_op.h
@@ -51,14 +51,4 @@ class ReshapeOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(reshape);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(reshape);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/resize_op.cpp b/src/operators/resize_op.cpp
index 02c50b662665fc9bd2f662922cb88dbce9fc5d53..1dc52cb7b1399fd247e1651271f79d330c8d9542 100644
--- a/src/operators/resize_op.cpp
+++ b/src/operators/resize_op.cpp
@@ -30,14 +30,13 @@ void ResizeOp<Dtype, T>::InferShape() const {
 
 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(resize);
 REGISTER_OPERATOR_CPU(resize, ops::ResizeOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(resize);
 REGISTER_OPERATOR_MALI_GPU(resize, ops::ResizeOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(resize, ops::ResizeOp);
 #endif
 
 #endif
diff --git a/src/operators/scale_op.cpp b/src/operators/scale_op.cpp
index 968fcd4098e92a47899c9a733c0261d91c314c29..e55f696f730dd4e9f38a530ef857cf93ce1af436 100644
--- a/src/operators/scale_op.cpp
+++ b/src/operators/scale_op.cpp
@@ -30,14 +30,13 @@ void ScaleOp<Dtype, T>::InferShape() const {
 
 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(scale);
 REGISTER_OPERATOR_CPU(scale, ops::ScaleOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(scale);
 REGISTER_OPERATOR_MALI_GPU(scale, ops::ScaleOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(scale, ops::ScaleOp);
 #endif
 
 #endif
diff --git a/src/operators/shape_op.cpp b/src/operators/shape_op.cpp
index b50a9c4507bff31ee753980c93917b93a4e1f42f..ac654356f1e2fbc3d7d450e46df588055e26c514 100644
--- a/src/operators/shape_op.cpp
+++ b/src/operators/shape_op.cpp
@@ -36,7 +36,8 @@ REGISTER_OPERATOR_CPU(shape, ops::ShapeOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(shape, ops::ShapeOp);
 #endif
 
 #endif
diff --git a/src/operators/shape_op.h b/src/operators/shape_op.h
index 2f88c807d3c331f83cf87e6c77a65fa5d90a9f4e..37b4fef1f4667051e51adbd96d6ada36bf36b647 100644
--- a/src/operators/shape_op.h
+++ b/src/operators/shape_op.h
@@ -48,12 +48,4 @@ class ShapeOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(shape);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/sigmoid_op.cpp b/src/operators/sigmoid_op.cpp
index 8ea4c98942e0630f5b69133991583ee1192c8153..2219f302d36d5c723034daaaca7084858ab6c87a 100644
--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
@@ -27,12 +27,12 @@ void SigmoidOp<DeviceType, T>::InferShape() const {
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(sigmoid, ops::SigmoidOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_MALI_GPU)
+#elif defined(PADDLE_MOBILE_FPGA)
+#else
+REGISTER_OPERATOR_X86(sigmoid, ops::SigmoidOp);
 #endif
 
 #endif
diff --git a/src/operators/sigmoid_op.h b/src/operators/sigmoid_op.h
index 533ea587958e8766b1469c73b909cfa2fcb60696..406db6db114775460b4af616b372cfb7285d7ac1 100644
--- a/src/operators/sigmoid_op.h
+++ b/src/operators/sigmoid_op.h
@@ -46,12 +46,4 @@ class SigmoidOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(sigmoid);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/slice_op.cpp b/src/operators/slice_op.cpp
index b77a675e10ed030443e1d4074239a715ddedf772..975dd4dbd6a6d6ea05b32e370858beabc3142670 100644
--- a/src/operators/slice_op.cpp
+++ b/src/operators/slice_op.cpp
@@ -29,14 +29,13 @@ void SliceOp<Dtype, T>::InferShape() const {
 
 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(slice);
 REGISTER_OPERATOR_CPU(slice, ops::SliceOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(slice);
 REGISTER_OPERATOR_MALI_GPU(slice, ops::SliceOp);
 #endif
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(slice, ops::SliceOp);
 #endif
 
 #endif
diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp
index e85edc69c3291c794f2eeb8119b91b2926c4d870..14e0ffeec0c7a5c9c134b753db74107358fe062a 100644
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
 #endif
+#ifdef PADDLE_MOBILE_X86
+REGISTER_OPERATOR_X86(softmax, ops::SoftmaxOp);
+#endif
 
 #endif
diff --git a/src/operators/softmax_op.h b/src/operators/softmax_op.h
index 579a2ed605cb3f3c8c4a3d0c2f1ccc7bd9595fc2..cee5993174a02f610c1de0ad47ca6b73477fd946 100644
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -48,14 +48,4 @@ class SoftmaxOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(softmax);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(softmax);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-USE_OP_FPGA(softmax);
-#endif
-
 #endif
diff --git a/src/operators/split_op.cpp b/src/operators/split_op.cpp
index 8b7fadc1a64d1a6f7549e5875b543c871b385e6d..c807df05d1f70be0250ae2dd80f853d339031d39 100644
--- a/src/operators/split_op.cpp
+++ b/src/operators/split_op.cpp
@@ -80,12 +80,12 @@ void SplitOp<DeviceType, T>::InferShape() const {
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(split, ops::SplitOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_MALI_GPU)
+#elif defined(PADDLE_MOBILE_FPGA)
+#else
+REGISTER_OPERATOR_X86(split, ops::SplitOp);
 #endif
 
-#endif
+#endif  // SPLIT_OP
diff --git a/src/operators/split_op.h b/src/operators/split_op.h
index f7d60b37441e77c5d47ac6040404535a841bcf8e..d37bf7a0f93005a4c95e7e82c7c90313fda409cb 100644
--- a/src/operators/split_op.h
+++ b/src/operators/split_op.h
@@ -47,12 +47,4 @@ class SplitOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(split);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/operators/transpose_op.cpp b/src/operators/transpose_op.cpp
index 5f193f96396c8d4d7cb58143573015384e7a7c28..1a2fdc802c90a873c3927f9e8b82926a6afa0705 100644
--- a/src/operators/transpose_op.cpp
+++ b/src/operators/transpose_op.cpp
@@ -52,12 +52,12 @@ void TransposeOp<Dtype, T>::InferShape() const {
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
-#ifdef PADDLE_MOBILE_CPU
+#if defined(PADDLE_MOBILE_CPU)
 REGISTER_OPERATOR_CPU(transpose, ops::TransposeOp);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+#elif defined(PADDLE_MOBILE_MALI_GPU)
+#elif defined(PADDLE_MOBILE_FPGA)
+#else
+REGISTER_OPERATOR_X86(transpose, ops::TransposeOp);
 #endif
 
-#endif
+#endif  // TRANSPOSE_OP
diff --git a/src/operators/transpose_op.h b/src/operators/transpose_op.h
index b96ce4e17ca4b0d0e321cefb3175b973cd7df307..7e5f72058d4e06f5b5b1fef81ade0350ea78f21c 100644
--- a/src/operators/transpose_op.h
+++ b/src/operators/transpose_op.h
@@ -50,12 +50,4 @@ class TransposeOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
-#ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(transpose);
-#endif
-#ifdef PADDLE_MOBILE_MALI_GPU
-#endif
-#ifdef PADDLE_MOBILE_FPGA
-#endif
-
 #endif
diff --git a/src/protobuf-c/protobuf-c.c b/src/protobuf-c/protobuf-c.c
new file mode 100644
index 0000000000000000000000000000000000000000..5d857e1eeb264d32089ef1f8924bb5376064888b
--- /dev/null
+++ b/src/protobuf-c/protobuf-c.c
@@ -0,0 +1,3646 @@
+/*
+ * Copyright (c) 2008-2015, Dave Benson and the protobuf-c authors.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*! \file
+ * Support library for `protoc-c` generated code.
+ *
+ * This file implements the public API used by the code generated
+ * by `protoc-c`.
+ *
+ * \authors Dave Benson and the protobuf-c authors
+ *
+ * \copyright 2008-2014. Licensed under the terms of the [BSD-2-Clause] license.
+ */
+
+/**
+ * \todo 64-BIT OPTIMIZATION: certain implementations use 32-bit math
+ * even on 64-bit platforms (uint64_size, uint64_pack, parse_uint64).
+ *
+ * \todo Use size_t consistently.
+ */
+
+#include <stdlib.h>	/* for malloc, free */
+#include <string.h>	/* for strcmp, strlen, memcpy, memmove, memset */
+
+#include "protobuf-c.h"
+
+#define TRUE				1
+#define FALSE				0
+
+#define PROTOBUF_C__ASSERT_NOT_REACHED() assert(0)
+
+/* Workaround for Microsoft compilers. */
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+/**
+ * \defgroup internal Internal functions and macros
+ *
+ * These are not exported by the library but are useful to developers working
+ * on `libprotobuf-c` itself.
+ */
+
+/**
+ * \defgroup macros Utility macros for manipulating structures
+ *
+ * Macros and constants used to manipulate the base "classes" generated by
+ * `protobuf-c`. They also define limits and check correctness.
+ *
+ * \ingroup internal
+ * @{
+ */
+
+/** The maximum length of a 64-bit integer in varint encoding. */
+#define MAX_UINT64_ENCODED_SIZE		10
+
+#ifndef PROTOBUF_C_UNPACK_ERROR
+# define PROTOBUF_C_UNPACK_ERROR(...)
+#endif
+
+const char protobuf_c_empty_string[] = "";
+
+/**
+ * Internal `ProtobufCMessage` manipulation macro.
+ *
+ * Base macro for manipulating a `ProtobufCMessage`. Used by STRUCT_MEMBER() and
+ * STRUCT_MEMBER_PTR().
+ */
+#define STRUCT_MEMBER_P(struct_p, struct_offset) \
+    ((void *) ((uint8_t *) (struct_p) + (struct_offset)))
+
+/**
+ * Return field in a `ProtobufCMessage` based on offset.
+ *
+ * Take a pointer to a `ProtobufCMessage` and find the field at the offset.
+ * Cast it to the passed type.
+ */
+#define STRUCT_MEMBER(member_type, struct_p, struct_offset) \
+    (*(member_type *) STRUCT_MEMBER_P((struct_p), (struct_offset)))
+
+/**
+ * Return field in a `ProtobufCMessage` based on offset.
+ *
+ * Take a pointer to a `ProtobufCMessage` and find the field at the offset. Cast
+ * it to a pointer to the passed type.
+ */
+#define STRUCT_MEMBER_PTR(member_type, struct_p, struct_offset) \
+    ((member_type *) STRUCT_MEMBER_P((struct_p), (struct_offset)))
+
+/* Assertions for magic numbers. */
+
+#define ASSERT_IS_ENUM_DESCRIPTOR(desc) \
+	assert((desc)->magic == PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC)
+
+#define ASSERT_IS_MESSAGE_DESCRIPTOR(desc) \
+	assert((desc)->magic == PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC)
+
+#define ASSERT_IS_MESSAGE(message) \
+	ASSERT_IS_MESSAGE_DESCRIPTOR((message)->descriptor)
+
+#define ASSERT_IS_SERVICE_DESCRIPTOR(desc) \
+	assert((desc)->magic == PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC)
+
+/**@}*/
+
+/* --- version --- */
+
+const char *
+protobuf_c_version(void)
+{
+	return PROTOBUF_C_VERSION;
+}
+
+uint32_t
+protobuf_c_version_number(void)
+{
+	return PROTOBUF_C_VERSION_NUMBER;
+}
+
+/* --- allocator --- */
+
+static void *
+system_alloc(void *allocator_data, size_t size)
+{
+	return malloc(size);
+}
+
+static void
+system_free(void *allocator_data, void *data)
+{
+	free(data);
+}
+
+static inline void *
+do_alloc(ProtobufCAllocator *allocator, size_t size)
+{
+	return allocator->alloc(allocator->allocator_data, size);
+}
+
+static inline void
+do_free(ProtobufCAllocator *allocator, void *data)
+{
+	if (data != NULL)
+		allocator->free(allocator->allocator_data, data);
+}
+
+/*
+ * This allocator uses the system's malloc() and free(). It is the default
+ * allocator used if NULL is passed as the ProtobufCAllocator to an exported
+ * function.
+ */
+static ProtobufCAllocator protobuf_c__allocator = {
+	.alloc = &system_alloc,
+	.free = &system_free,
+	.allocator_data = NULL,
+};
+
+/* === buffer-simple === */
+
+void
+protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer,
+				size_t len, const uint8_t *data)
+{
+	ProtobufCBufferSimple *simp = (ProtobufCBufferSimple *) buffer;
+	size_t new_len = simp->len + len;
+
+	if (new_len > simp->alloced) {
+		ProtobufCAllocator *allocator = simp->allocator;
+		size_t new_alloced = simp->alloced * 2;
+		uint8_t *new_data;
+
+		if (allocator == NULL)
+			allocator = &protobuf_c__allocator;
+		while (new_alloced < new_len)
+			new_alloced += new_alloced;
+		new_data = do_alloc(allocator, new_alloced);
+		if (!new_data)
+			return;
+		memcpy(new_data, simp->data, simp->len);
+		if (simp->must_free_data)
+			do_free(allocator, simp->data);
+		else
+			simp->must_free_data = TRUE;
+		simp->data = new_data;
+		simp->alloced = new_alloced;
+	}
+	memcpy(simp->data + simp->len, data, len);
+	simp->len = new_len;
+}
+
+/**
+ * \defgroup packedsz protobuf_c_message_get_packed_size() implementation
+ *
+ * Routines mainly used by protobuf_c_message_get_packed_size().
+ *
+ * \ingroup internal
+ * @{
+ */
+
+/**
+ * Return the number of bytes required to store the tag for the field. Includes
+ * 3 bits for the wire-type, and a single bit that denotes the end-of-tag.
+ *
+ * \param number
+ *      Field tag to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+get_tag_size(uint32_t number)
+{
+	if (number < (1UL << 4)) {
+		return 1;
+	} else if (number < (1UL << 11)) {
+		return 2;
+	} else if (number < (1UL << 18)) {
+		return 3;
+	} else if (number < (1UL << 25)) {
+		return 4;
+	} else {
+		return 5;
+	}
+}
+
+/**
+ * Return the number of bytes required to store a variable-length unsigned
+ * 32-bit integer in base-128 varint encoding.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+uint32_size(uint32_t v)
+{
+	if (v < (1UL << 7)) {
+		return 1;
+	} else if (v < (1UL << 14)) {
+		return 2;
+	} else if (v < (1UL << 21)) {
+		return 3;
+	} else if (v < (1UL << 28)) {
+		return 4;
+	} else {
+		return 5;
+	}
+}
+
+/**
+ * Return the number of bytes required to store a variable-length signed 32-bit
+ * integer in base-128 varint encoding.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+int32_size(int32_t v)
+{
+	if (v < 0) {
+		return 10;
+	} else if (v < (1L << 7)) {
+		return 1;
+	} else if (v < (1L << 14)) {
+		return 2;
+	} else if (v < (1L << 21)) {
+		return 3;
+	} else if (v < (1L << 28)) {
+		return 4;
+	} else {
+		return 5;
+	}
+}
+
+/**
+ * Return the ZigZag-encoded 32-bit unsigned integer form of a 32-bit signed
+ * integer.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      ZigZag encoded integer.
+ */
+static inline uint32_t
+zigzag32(int32_t v)
+{
+	if (v < 0)
+		return (-(uint32_t)v) * 2 - 1;
+	else
+		return (uint32_t)(v) * 2;
+}
+
+/**
+ * Return the number of bytes required to store a signed 32-bit integer,
+ * converted to an unsigned 32-bit integer with ZigZag encoding, using base-128
+ * varint encoding.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+sint32_size(int32_t v)
+{
+	return uint32_size(zigzag32(v));
+}
+
+/**
+ * Return the number of bytes required to store a 64-bit unsigned integer in
+ * base-128 varint encoding.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+uint64_size(uint64_t v)
+{
+	uint32_t upper_v = (uint32_t) (v >> 32);
+
+	if (upper_v == 0) {
+		return uint32_size((uint32_t) v);
+	} else if (upper_v < (1UL << 3)) {
+		return 5;
+	} else if (upper_v < (1UL << 10)) {
+		return 6;
+	} else if (upper_v < (1UL << 17)) {
+		return 7;
+	} else if (upper_v < (1UL << 24)) {
+		return 8;
+	} else if (upper_v < (1UL << 31)) {
+		return 9;
+	} else {
+		return 10;
+	}
+}
+
+/**
+ * Return the ZigZag-encoded 64-bit unsigned integer form of a 64-bit signed
+ * integer.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      ZigZag encoded integer.
+ */
+static inline uint64_t
+zigzag64(int64_t v)
+{
+	if (v < 0)
+		return (-(uint64_t)v) * 2 - 1;
+	else
+		return (uint64_t)(v) * 2;
+}
+
+/**
+ * Return the number of bytes required to store a signed 64-bit integer,
+ * converted to an unsigned 64-bit integer with ZigZag encoding, using base-128
+ * varint encoding.
+ *
+ * \param v
+ *      Value to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+sint64_size(int64_t v)
+{
+	return uint64_size(zigzag64(v));
+}
+
+/**
+ * Calculate the serialized size of a single required message field, including
+ * the space needed by the preceding tag.
+ *
+ * \param field
+ *      Field descriptor for member.
+ * \param member
+ *      Field to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static size_t
+required_field_get_packed_size(const ProtobufCFieldDescriptor *field,
+			       const void *member)
+{
+	size_t rv = get_tag_size(field->id);
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SINT32:
+		return rv + sint32_size(*(const int32_t *) member);
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		return rv + int32_size(*(const int32_t *) member);
+	case PROTOBUF_C_TYPE_UINT32:
+		return rv + uint32_size(*(const uint32_t *) member);
+	case PROTOBUF_C_TYPE_SINT64:
+		return rv + sint64_size(*(const int64_t *) member);
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		return rv + uint64_size(*(const uint64_t *) member);
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+		return rv + 4;
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+		return rv + 8;
+	case PROTOBUF_C_TYPE_BOOL:
+		return rv + 1;
+	case PROTOBUF_C_TYPE_FLOAT:
+		return rv + 4;
+	case PROTOBUF_C_TYPE_DOUBLE:
+		return rv + 8;
+	case PROTOBUF_C_TYPE_STRING: {
+		const char *str = *(char * const *) member;
+		size_t len = str ? strlen(str) : 0;
+		return rv + uint32_size(len) + len;
+	}
+	case PROTOBUF_C_TYPE_BYTES: {
+		size_t len = ((const ProtobufCBinaryData *) member)->len;
+		return rv + uint32_size(len) + len;
+	}
+	case PROTOBUF_C_TYPE_MESSAGE: {
+		const ProtobufCMessage *msg = *(ProtobufCMessage * const *) member;
+		size_t subrv = msg ? protobuf_c_message_get_packed_size(msg) : 0;
+		return rv + uint32_size(subrv) + subrv;
+	}
+	}
+	PROTOBUF_C__ASSERT_NOT_REACHED();
+	return 0;
+}
+
+/**
+ * Calculate the serialized size of a single oneof message field, including
+ * the space needed by the preceding tag. Returns 0 if the oneof field isn't
+ * selected or is not set.
+ *
+ * \param field
+ *      Field descriptor for member.
+ * \param oneof_case
+ *      Enum value that selects the field in the oneof.
+ * \param member
+ *      Field to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static size_t
+oneof_field_get_packed_size(const ProtobufCFieldDescriptor *field,
+			    uint32_t oneof_case,
+			    const void *member)
+{
+	if (oneof_case != field->id) {
+		return 0;
+	}
+	if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
+	    field->type == PROTOBUF_C_TYPE_STRING)
+	{
+		const void *ptr = *(const void * const *) member;
+		if (ptr == NULL || ptr == field->default_value)
+			return 0;
+	}
+	return required_field_get_packed_size(field, member);
+}
+
+/**
+ * Calculate the serialized size of a single optional message field, including
+ * the space needed by the preceding tag. Returns 0 if the optional field isn't
+ * set.
+ *
+ * \param field
+ *      Field descriptor for member.
+ * \param has
+ *      True if the field exists, false if not.
+ * \param member
+ *      Field to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static size_t
+optional_field_get_packed_size(const ProtobufCFieldDescriptor *field,
+			       const protobuf_c_boolean has,
+			       const void *member)
+{
+	if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
+	    field->type == PROTOBUF_C_TYPE_STRING)
+	{
+		const void *ptr = *(const void * const *) member;
+		if (ptr == NULL || ptr == field->default_value)
+			return 0;
+	} else {
+		if (!has)
+			return 0;
+	}
+	return required_field_get_packed_size(field, member);
+}
+
+static protobuf_c_boolean
+field_is_zeroish(const ProtobufCFieldDescriptor *field,
+		 const void *member)
+{
+	protobuf_c_boolean ret = FALSE;
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_BOOL:
+		ret = (0 == *(const protobuf_c_boolean *) member);
+		break;
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_SINT32:
+	case PROTOBUF_C_TYPE_INT32:
+	case PROTOBUF_C_TYPE_UINT32:
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+		ret = (0 == *(const uint32_t *) member);
+		break;
+	case PROTOBUF_C_TYPE_SINT64:
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+		ret = (0 == *(const uint64_t *) member);
+		break;
+	case PROTOBUF_C_TYPE_FLOAT:
+		ret = (0 == *(const float *) member);
+		break;
+	case PROTOBUF_C_TYPE_DOUBLE:
+		ret = (0 == *(const double *) member);
+		break;
+	case PROTOBUF_C_TYPE_STRING:
+		ret = (NULL == *(const char * const *) member) ||
+		      ('\0' == **(const char * const *) member);
+		break;
+	case PROTOBUF_C_TYPE_BYTES:
+	case PROTOBUF_C_TYPE_MESSAGE:
+		ret = (NULL == *(const void * const *) member);
+		break;
+	default:
+		ret = TRUE;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * Calculate the serialized size of a single unlabeled message field, including
+ * the space needed by the preceding tag. Returns 0 if the field isn't set or
+ * if it is set to a "zeroish" value (null pointer or 0 for numerical values).
+ * Unlabeled fields are supported only in proto3.
+ *
+ * \param field
+ *      Field descriptor for member.
+ * \param member
+ *      Field to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static size_t
+unlabeled_field_get_packed_size(const ProtobufCFieldDescriptor *field,
+				const void *member)
+{
+	if (field_is_zeroish(field, member))
+		return 0;
+	return required_field_get_packed_size(field, member);
+}
+
+/**
+ * Calculate the serialized size of repeated message fields, which may consist
+ * of any number of values (including 0). Includes the space needed by the
+ * preceding tags (as needed).
+ *
+ * \param field
+ *      Field descriptor for member.
+ * \param count
+ *      Number of repeated field members.
+ * \param member
+ *      Field to encode.
+ * \return
+ *      Number of bytes required.
+ */
+static size_t
+repeated_field_get_packed_size(const ProtobufCFieldDescriptor *field,
+			       size_t count, const void *member)
+{
+	size_t header_size;
+	size_t rv = 0;
+	unsigned i;
+	void *array = *(void * const *) member;
+
+	if (count == 0)
+		return 0;
+	header_size = get_tag_size(field->id);
+	if (0 == (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED))
+		header_size *= count;
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SINT32:
+		for (i = 0; i < count; i++)
+			rv += sint32_size(((int32_t *) array)[i]);
+		break;
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		for (i = 0; i < count; i++)
+			rv += int32_size(((int32_t *) array)[i]);
+		break;
+	case PROTOBUF_C_TYPE_UINT32:
+		for (i = 0; i < count; i++)
+			rv += uint32_size(((uint32_t *) array)[i]);
+		break;
+	case PROTOBUF_C_TYPE_SINT64:
+		for (i = 0; i < count; i++)
+			rv += sint64_size(((int64_t *) array)[i]);
+		break;
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		for (i = 0; i < count; i++)
+			rv += uint64_size(((uint64_t *) array)[i]);
+		break;
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		rv += 4 * count;
+		break;
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		rv += 8 * count;
+		break;
+	case PROTOBUF_C_TYPE_BOOL:
+		rv += count;
+		break;
+	case PROTOBUF_C_TYPE_STRING:
+		for (i = 0; i < count; i++) {
+			size_t len = strlen(((char **) array)[i]);
+			rv += uint32_size(len) + len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_BYTES:
+		for (i = 0; i < count; i++) {
+			size_t len = ((ProtobufCBinaryData *) array)[i].len;
+			rv += uint32_size(len) + len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_MESSAGE:
+		for (i = 0; i < count; i++) {
+			size_t len = protobuf_c_message_get_packed_size(
+				((ProtobufCMessage **) array)[i]);
+			rv += uint32_size(len) + len;
+		}
+		break;
+	}
+
+	if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED))
+		header_size += uint32_size(rv);
+	return header_size + rv;
+}
+
+/**
+ * Calculate the serialized size of an unknown field, i.e. one that is passed
+ * through mostly uninterpreted. This is required for forward compatibility if
+ * new fields are added to the message descriptor.
+ *
+ * \param field
+ *      Unknown field type.
+ * \return
+ *      Number of bytes required.
+ */
+static inline size_t
+unknown_field_get_packed_size(const ProtobufCMessageUnknownField *field)
+{
+	return get_tag_size(field->tag) + field->len;
+}
+
+/**@}*/
+
+/*
+ * Calculate the serialized size of the message.
+ */
+size_t protobuf_c_message_get_packed_size(const ProtobufCMessage *message)
+{
+	unsigned i;
+	size_t rv = 0;
+
+	ASSERT_IS_MESSAGE(message);
+	for (i = 0; i < message->descriptor->n_fields; i++) {
+		const ProtobufCFieldDescriptor *field =
+			message->descriptor->fields + i;
+		const void *member =
+			((const char *) message) + field->offset;
+		const void *qmember =
+			((const char *) message) + field->quantifier_offset;
+
+		if (field->label == PROTOBUF_C_LABEL_REQUIRED) {
+			rv += required_field_get_packed_size(field, member);
+		} else if ((field->label == PROTOBUF_C_LABEL_OPTIONAL ||
+			    field->label == PROTOBUF_C_LABEL_NONE) &&
+			   (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF))) {
+			rv += oneof_field_get_packed_size(
+				field,
+				*(const uint32_t *) qmember,
+				member
+			);
+		} else if (field->label == PROTOBUF_C_LABEL_OPTIONAL) {
+			rv += optional_field_get_packed_size(
+				field,
+				*(protobuf_c_boolean *) qmember,
+				member
+			);
+		} else if (field->label == PROTOBUF_C_LABEL_NONE) {
+			rv += unlabeled_field_get_packed_size(
+				field,
+				member
+			);
+		} else {
+			rv += repeated_field_get_packed_size(
+				field,
+				*(const size_t *) qmember,
+				member
+			);
+		}
+	}
+	for (i = 0; i < message->n_unknown_fields; i++)
+		rv += unknown_field_get_packed_size(&message->unknown_fields[i]);
+	return rv;
+}
+
+/**
+ * \defgroup pack protobuf_c_message_pack() implementation
+ *
+ * Routines mainly used by protobuf_c_message_pack().
+ *
+ * \ingroup internal
+ * @{
+ */
+
+/**
+ * Pack an unsigned 32-bit integer in base-128 varint encoding and return the
+ * number of bytes written, which must be 5 or less.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+uint32_pack(uint32_t value, uint8_t *out)
+{
+	unsigned rv = 0;
+
+	if (value >= 0x80) {
+		out[rv++] = value | 0x80;
+		value >>= 7;
+		if (value >= 0x80) {
+			out[rv++] = value | 0x80;
+			value >>= 7;
+			if (value >= 0x80) {
+				out[rv++] = value | 0x80;
+				value >>= 7;
+				if (value >= 0x80) {
+					out[rv++] = value | 0x80;
+					value >>= 7;
+				}
+			}
+		}
+	}
+	/* assert: value<128 */
+	out[rv++] = value;
+	return rv;
+}
+
+/**
+ * Pack a signed 32-bit integer and return the number of bytes written.
+ * Negative numbers are encoded as two's complement 64-bit integers.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+int32_pack(int32_t value, uint8_t *out)
+{
+	if (value < 0) {
+		out[0] = value | 0x80;
+		out[1] = (value >> 7) | 0x80;
+		out[2] = (value >> 14) | 0x80;
+		out[3] = (value >> 21) | 0x80;
+		out[4] = (value >> 28) | 0x80;
+		out[5] = out[6] = out[7] = out[8] = 0xff;
+		out[9] = 0x01;
+		return 10;
+	} else {
+		return uint32_pack(value, out);
+	}
+}
+
+/**
+ * Pack a signed 32-bit integer using ZigZag encoding and return the number of
+ * bytes written.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+sint32_pack(int32_t value, uint8_t *out)
+{
+	return uint32_pack(zigzag32(value), out);
+}
+
+/**
+ * Pack a 64-bit unsigned integer using base-128 varint encoding and return the
+ * number of bytes written.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static size_t
+uint64_pack(uint64_t value, uint8_t *out)
+{
+	uint32_t hi = (uint32_t) (value >> 32);
+	uint32_t lo = (uint32_t) value;
+	unsigned rv;
+
+	if (hi == 0)
+		return uint32_pack((uint32_t) lo, out);
+	out[0] = (lo) | 0x80;
+	out[1] = (lo >> 7) | 0x80;
+	out[2] = (lo >> 14) | 0x80;
+	out[3] = (lo >> 21) | 0x80;
+	if (hi < 8) {
+		out[4] = (hi << 4) | (lo >> 28);
+		return 5;
+	} else {
+		out[4] = ((hi & 7) << 4) | (lo >> 28) | 0x80;
+		hi >>= 3;
+	}
+	rv = 5;
+	while (hi >= 128) {
+		out[rv++] = hi | 0x80;
+		hi >>= 7;
+	}
+	out[rv++] = hi;
+	return rv;
+}
+
+/**
+ * Pack a 64-bit signed integer in ZigZag encoding and return the number of
+ * bytes written.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+sint64_pack(int64_t value, uint8_t *out)
+{
+	return uint64_pack(zigzag64(value), out);
+}
+
+/**
+ * Pack a 32-bit quantity in little-endian byte order. Used for protobuf wire
+ * types fixed32, sfixed32, float. Similar to "htole32".
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+fixed32_pack(uint32_t value, void *out)
+{
+#if !defined(WORDS_BIGENDIAN)
+	memcpy(out, &value, 4);
+#else
+	uint8_t *buf = out;
+
+	buf[0] = value;
+	buf[1] = value >> 8;
+	buf[2] = value >> 16;
+	buf[3] = value >> 24;
+#endif
+	return 4;
+}
+
+/**
+ * Pack a 64-bit quantity in little-endian byte order. Used for protobuf wire
+ * types fixed64, sfixed64, double. Similar to "htole64".
+ *
+ * \todo The big-endian impl is really only good for 32-bit machines, a 64-bit
+ * version would be appreciated, plus a way to decide to use 64-bit math where
+ * convenient.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+fixed64_pack(uint64_t value, void *out)
+{
+#if !defined(WORDS_BIGENDIAN)
+	memcpy(out, &value, 8);
+#else
+	fixed32_pack(value, out);
+	fixed32_pack(value >> 32, ((char *) out) + 4);
+#endif
+	return 8;
+}
+
+/**
+ * Pack a boolean value as an integer and return the number of bytes written.
+ *
+ * \todo Perhaps on some platforms *out = !!value would be a better impl, b/c
+ * that is idiomatic C++ in some STL implementations.
+ *
+ * \param value
+ *      Value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+boolean_pack(protobuf_c_boolean value, uint8_t *out)
+{
+	*out = value ? TRUE : FALSE;
+	return 1;
+}
+
+/**
+ * Pack a NUL-terminated C string and return the number of bytes written. The
+ * output includes a length delimiter.
+ *
+ * The NULL pointer is treated as an empty string. This isn't really necessary,
+ * but it allows people to leave required strings blank. (See Issue #13 in the
+ * bug tracker for a little more explanation).
+ *
+ * \param str
+ *      String to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+string_pack(const char *str, uint8_t *out)
+{
+	if (str == NULL) {
+		out[0] = 0;
+		return 1;
+	} else {
+		size_t len = strlen(str);
+		size_t rv = uint32_pack(len, out);
+		memcpy(out + rv, str, len);
+		return rv + len;
+	}
+}
+
+/**
+ * Pack a ProtobufCBinaryData and return the number of bytes written. The output
+ * includes a length delimiter.
+ *
+ * \param bd
+ *      ProtobufCBinaryData to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+binary_data_pack(const ProtobufCBinaryData *bd, uint8_t *out)
+{
+	size_t len = bd->len;
+	size_t rv = uint32_pack(len, out);
+	memcpy(out + rv, bd->data, len);
+	return rv + len;
+}
+
+/**
+ * Pack a ProtobufCMessage and return the number of bytes written. The output
+ * includes a length delimiter.
+ *
+ * \param message
+ *      ProtobufCMessage object to pack.
+ * \param[out] out
+ *      Packed message.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static inline size_t
+prefixed_message_pack(const ProtobufCMessage *message, uint8_t *out)
+{
+	if (message == NULL) {
+		out[0] = 0;
+		return 1;
+	} else {
+		size_t rv = protobuf_c_message_pack(message, out + 1);
+		uint32_t rv_packed_size = uint32_size(rv);
+		if (rv_packed_size != 1)
+			memmove(out + rv_packed_size, out + 1, rv);
+		return uint32_pack(rv, out) + rv;
+	}
+}
+
+/**
+ * Pack a field tag.
+ *
+ * Wire-type will be added in required_field_pack().
+ *
+ * \todo Just call uint64_pack on 64-bit platforms.
+ *
+ * \param id
+ *      Tag value to encode.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static size_t
+tag_pack(uint32_t id, uint8_t *out)
+{
+	if (id < (1UL << (32 - 3)))
+		return uint32_pack(id << 3, out);
+	else
+		return uint64_pack(((uint64_t) id) << 3, out);
+}
+
+/**
+ * Pack a required field and return the number of bytes written.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param member
+ *      The field member.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static size_t
+required_field_pack(const ProtobufCFieldDescriptor *field,
+		    const void *member, uint8_t *out)
+{
+	size_t rv = tag_pack(field->id, out);
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SINT32:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		return rv + sint32_pack(*(const int32_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		return rv + int32_pack(*(const int32_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_UINT32:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		return rv + uint32_pack(*(const uint32_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_SINT64:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		return rv + sint64_pack(*(const int64_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		return rv + uint64_pack(*(const uint64_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_32BIT;
+		return rv + fixed32_pack(*(const uint32_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_64BIT;
+		return rv + fixed64_pack(*(const uint64_t *) member, out + rv);
+	case PROTOBUF_C_TYPE_BOOL:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		return rv + boolean_pack(*(const protobuf_c_boolean *) member, out + rv);
+	case PROTOBUF_C_TYPE_STRING:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		return rv + string_pack(*(char *const *) member, out + rv);
+	case PROTOBUF_C_TYPE_BYTES:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		return rv + binary_data_pack((const ProtobufCBinaryData *) member, out + rv);
+	case PROTOBUF_C_TYPE_MESSAGE:
+		out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		return rv + prefixed_message_pack(*(ProtobufCMessage * const *) member, out + rv);
+	}
+	PROTOBUF_C__ASSERT_NOT_REACHED();
+	return 0;
+}
+
+/**
+ * Pack a oneof field and return the number of bytes written. Only packs the
+ * field that is selected by the case enum.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param oneof_case
+ *      Enum value that selects the field in the oneof.
+ * \param member
+ *      The field member.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static size_t
+oneof_field_pack(const ProtobufCFieldDescriptor *field,
+		 uint32_t oneof_case,
+		 const void *member, uint8_t *out)
+{
+	if (oneof_case != field->id) {
+		return 0;
+	}
+	if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
+	    field->type == PROTOBUF_C_TYPE_STRING)
+	{
+		const void *ptr = *(const void * const *) member;
+		if (ptr == NULL || ptr == field->default_value)
+			return 0;
+	}
+	return required_field_pack(field, member, out);
+}
+
+/**
+ * Pack an optional field and return the number of bytes written.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param has
+ *      Whether the field is set.
+ * \param member
+ *      The field member.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static size_t
+optional_field_pack(const ProtobufCFieldDescriptor *field,
+		    const protobuf_c_boolean has,
+		    const void *member, uint8_t *out)
+{
+	if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
+	    field->type == PROTOBUF_C_TYPE_STRING)
+	{
+		const void *ptr = *(const void * const *) member;
+		if (ptr == NULL || ptr == field->default_value)
+			return 0;
+	} else {
+		if (!has)
+			return 0;
+	}
+	return required_field_pack(field, member, out);
+}
+
+/**
+ * Pack an unlabeled field and return the number of bytes written.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param member
+ *      The field member.
+ * \param[out] out
+ *      Packed value.
+ * \return
+ *      Number of bytes written to `out`.
+ */
+static size_t
+unlabeled_field_pack(const ProtobufCFieldDescriptor *field,
+		     const void *member, uint8_t *out)
+{
+	if (field_is_zeroish(field, member))
+		return 0;
+	return required_field_pack(field, member, out);
+}
+
+/**
+ * Given a field type, return the in-memory size.
+ *
+ * \todo Implement as a table lookup.
+ *
+ * \param type
+ *      Field type.
+ * \return
+ *      Size of the field.
+ */
+static inline size_t
+sizeof_elt_in_repeated_array(ProtobufCType type)
+{
+	switch (type) {
+	case PROTOBUF_C_TYPE_SINT32:
+	case PROTOBUF_C_TYPE_INT32:
+	case PROTOBUF_C_TYPE_UINT32:
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+	case PROTOBUF_C_TYPE_ENUM:
+		return 4;
+	case PROTOBUF_C_TYPE_SINT64:
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		return 8;
+	case PROTOBUF_C_TYPE_BOOL:
+		return sizeof(protobuf_c_boolean);
+	case PROTOBUF_C_TYPE_STRING:
+	case PROTOBUF_C_TYPE_MESSAGE:
+		return sizeof(void *);
+	case PROTOBUF_C_TYPE_BYTES:
+		return sizeof(ProtobufCBinaryData);
+	}
+	PROTOBUF_C__ASSERT_NOT_REACHED();
+	return 0;
+}
+
+/**
+ * Pack an array of 32-bit quantities.
+ *
+ * \param[out] out
+ *      Destination.
+ * \param[in] in
+ *      Source.
+ * \param[in] n
+ *      Number of elements in the source array.
+ */
+static void
+copy_to_little_endian_32(void *out, const void *in, const unsigned n)
+{
+#if !defined(WORDS_BIGENDIAN)
+	memcpy(out, in, n * 4);
+#else
+	unsigned i;
+	const uint32_t *ini = in;
+	for (i = 0; i < n; i++)
+		fixed32_pack(ini[i], (uint32_t *) out + i);
+#endif
+}
+
+/**
+ * Pack an array of 64-bit quantities.
+ *
+ * \param[out] out
+ *      Destination.
+ * \param[in] in
+ *      Source.
+ * \param[in] n
+ *      Number of elements in the source array.
+ */
+static void
+copy_to_little_endian_64(void *out, const void *in, const unsigned n)
+{
+#if !defined(WORDS_BIGENDIAN)
+	memcpy(out, in, n * 8);
+#else
+	unsigned i;
+	const uint64_t *ini = in;
+	for (i = 0; i < n; i++)
+		fixed64_pack(ini[i], (uint64_t *) out + i);
+#endif
+}
+
+/**
+ * Get the minimum number of bytes required to pack a field value of a
+ * particular type.
+ *
+ * \param type
+ *      Field type.
+ * \return
+ *      Number of bytes.
+ */
+static unsigned
+get_type_min_size(ProtobufCType type)
+{
+	if (type == PROTOBUF_C_TYPE_SFIXED32 ||
+	    type == PROTOBUF_C_TYPE_FIXED32 ||
+	    type == PROTOBUF_C_TYPE_FLOAT)
+	{
+		return 4;
+	}
+	if (type == PROTOBUF_C_TYPE_SFIXED64 ||
+	    type == PROTOBUF_C_TYPE_FIXED64 ||
+	    type == PROTOBUF_C_TYPE_DOUBLE)
+	{
+		return 8;
+	}
+	return 1;
+}
+
+/**
+ * Packs the elements of a repeated field and returns the serialised field and
+ * its length.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param count
+ *      Number of elements in the repeated field array.
+ * \param member
+ *      Pointer to the elements for this repeated field.
+ * \param[out] out
+ *      Serialised representation of the repeated field.
+ * \return
+ *      Number of bytes serialised to `out`.
+ */
+static size_t
+repeated_field_pack(const ProtobufCFieldDescriptor *field,
+		    size_t count, const void *member, uint8_t *out)
+{
+	void *array = *(void * const *) member;
+	unsigned i;
+
+	if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED)) {
+		unsigned header_len;
+		unsigned len_start;
+		unsigned min_length;
+		unsigned payload_len;
+		unsigned length_size_min;
+		unsigned actual_length_size;
+		uint8_t *payload_at;
+
+		if (count == 0)
+			return 0;
+		header_len = tag_pack(field->id, out);
+		out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		len_start = header_len;
+		min_length = get_type_min_size(field->type) * count;
+		length_size_min = uint32_size(min_length);
+		header_len += length_size_min;
+		payload_at = out + header_len;
+
+		switch (field->type) {
+		case PROTOBUF_C_TYPE_SFIXED32:
+		case PROTOBUF_C_TYPE_FIXED32:
+		case PROTOBUF_C_TYPE_FLOAT:
+			copy_to_little_endian_32(payload_at, array, count);
+			payload_at += count * 4;
+			break;
+		case PROTOBUF_C_TYPE_SFIXED64:
+		case PROTOBUF_C_TYPE_FIXED64:
+		case PROTOBUF_C_TYPE_DOUBLE:
+			copy_to_little_endian_64(payload_at, array, count);
+			payload_at += count * 8;
+			break;
+		case PROTOBUF_C_TYPE_ENUM:
+		case PROTOBUF_C_TYPE_INT32: {
+			const int32_t *arr = (const int32_t *) array;
+			for (i = 0; i < count; i++)
+				payload_at += int32_pack(arr[i], payload_at);
+			break;
+		}
+		case PROTOBUF_C_TYPE_SINT32: {
+			const int32_t *arr = (const int32_t *) array;
+			for (i = 0; i < count; i++)
+				payload_at += sint32_pack(arr[i], payload_at);
+			break;
+		}
+		case PROTOBUF_C_TYPE_SINT64: {
+			const int64_t *arr = (const int64_t *) array;
+			for (i = 0; i < count; i++)
+				payload_at += sint64_pack(arr[i], payload_at);
+			break;
+		}
+		case PROTOBUF_C_TYPE_UINT32: {
+			const uint32_t *arr = (const uint32_t *) array;
+			for (i = 0; i < count; i++)
+				payload_at += uint32_pack(arr[i], payload_at);
+			break;
+		}
+		case PROTOBUF_C_TYPE_INT64:
+		case PROTOBUF_C_TYPE_UINT64: {
+			const uint64_t *arr = (const uint64_t *) array;
+			for (i = 0; i < count; i++)
+				payload_at += uint64_pack(arr[i], payload_at);
+			break;
+		}
+		case PROTOBUF_C_TYPE_BOOL: {
+			const protobuf_c_boolean *arr = (const protobuf_c_boolean *) array;
+			for (i = 0; i < count; i++)
+				payload_at += boolean_pack(arr[i], payload_at);
+			break;
+		}
+		default:
+			PROTOBUF_C__ASSERT_NOT_REACHED();
+		}
+
+		payload_len = payload_at - (out + header_len);
+		actual_length_size = uint32_size(payload_len);
+		if (length_size_min != actual_length_size) {
+			assert(actual_length_size == length_size_min + 1);
+			memmove(out + header_len + 1, out + header_len,
+				payload_len);
+			header_len++;
+		}
+		uint32_pack(payload_len, out + len_start);
+		return header_len + payload_len;
+	} else {
+		/* not "packed" cased */
+		/* CONSIDER: optimize this case a bit (by putting the loop inside the switch) */
+		size_t rv = 0;
+		unsigned siz = sizeof_elt_in_repeated_array(field->type);
+
+		for (i = 0; i < count; i++) {
+			rv += required_field_pack(field, array, out + rv);
+			array = (char *)array + siz;
+		}
+		return rv;
+	}
+}
+
+static size_t
+unknown_field_pack(const ProtobufCMessageUnknownField *field, uint8_t *out)
+{
+	size_t rv = tag_pack(field->tag, out);
+	out[0] |= field->wire_type;
+	memcpy(out + rv, field->data, field->len);
+	return rv + field->len;
+}
+
+/**@}*/
+
+size_t
+protobuf_c_message_pack(const ProtobufCMessage *message, uint8_t *out)
+{
+	unsigned i;
+	size_t rv = 0;
+
+	ASSERT_IS_MESSAGE(message);
+	for (i = 0; i < message->descriptor->n_fields; i++) {
+		const ProtobufCFieldDescriptor *field =
+			message->descriptor->fields + i;
+		const void *member = ((const char *) message) + field->offset;
+
+		/*
+		 * It doesn't hurt to compute qmember (a pointer to the
+		 * quantifier field of the structure), but the pointer is only
+		 * valid if the field is:
+		 *  - a repeated field, or
+		 *  - a field that is part of a oneof
+		 *  - an optional field that isn't a pointer type
+		 * (Meaning: not a message or a string).
+		 */
+		const void *qmember =
+			((const char *) message) + field->quantifier_offset;
+
+		if (field->label == PROTOBUF_C_LABEL_REQUIRED) {
+			rv += required_field_pack(field, member, out + rv);
+		} else if ((field->label == PROTOBUF_C_LABEL_OPTIONAL ||
+			    field->label == PROTOBUF_C_LABEL_NONE) &&
+			   (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF))) {
+			rv += oneof_field_pack(
+				field,
+				*(const uint32_t *) qmember,
+				member,
+				out + rv
+			);
+		} else if (field->label == PROTOBUF_C_LABEL_OPTIONAL) {
+			rv += optional_field_pack(
+				field,
+				*(const protobuf_c_boolean *) qmember,
+				member,
+				out + rv
+			);
+		} else if (field->label == PROTOBUF_C_LABEL_NONE) {
+			rv += unlabeled_field_pack(field, member, out + rv);
+		} else {
+			rv += repeated_field_pack(field, *(const size_t *) qmember,
+				member, out + rv);
+		}
+	}
+	for (i = 0; i < message->n_unknown_fields; i++)
+		rv += unknown_field_pack(&message->unknown_fields[i], out + rv);
+	return rv;
+}
+
+/**
+ * \defgroup packbuf protobuf_c_message_pack_to_buffer() implementation
+ *
+ * Routines mainly used by protobuf_c_message_pack_to_buffer().
+ *
+ * \ingroup internal
+ * @{
+ */
+
+/**
+ * Pack a required field to a virtual buffer.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param member
+ *      The element to be packed.
+ * \param[out] buffer
+ *      Virtual buffer to append data to.
+ * \return
+ *      Number of bytes packed.
+ */
+static size_t
+required_field_pack_to_buffer(const ProtobufCFieldDescriptor *field,
+			      const void *member, ProtobufCBuffer *buffer)
+{
+	size_t rv;
+	uint8_t scratch[MAX_UINT64_ENCODED_SIZE * 2];
+
+	rv = tag_pack(field->id, scratch);
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SINT32:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		rv += sint32_pack(*(const int32_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		rv += int32_pack(*(const int32_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_UINT32:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		rv += uint32_pack(*(const uint32_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_SINT64:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		rv += sint64_pack(*(const int64_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		rv += uint64_pack(*(const uint64_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_32BIT;
+		rv += fixed32_pack(*(const uint32_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_64BIT;
+		rv += fixed64_pack(*(const uint64_t *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_BOOL:
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
+		rv += boolean_pack(*(const protobuf_c_boolean *) member, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		break;
+	case PROTOBUF_C_TYPE_STRING: {
+		const char *str = *(char *const *) member;
+		size_t sublen = str ? strlen(str) : 0;
+
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		rv += uint32_pack(sublen, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		buffer->append(buffer, sublen, (const uint8_t *) str);
+		rv += sublen;
+		break;
+	}
+	case PROTOBUF_C_TYPE_BYTES: {
+		const ProtobufCBinaryData *bd = ((const ProtobufCBinaryData *) member);
+		size_t sublen = bd->len;
+
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		rv += uint32_pack(sublen, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		buffer->append(buffer, sublen, bd->data);
+		rv += sublen;
+		break;
+	}
+	case PROTOBUF_C_TYPE_MESSAGE: {
+		uint8_t simple_buffer_scratch[256];
+		size_t sublen;
+		const ProtobufCMessage *msg = *(ProtobufCMessage * const *) member;
+		ProtobufCBufferSimple simple_buffer =
+			PROTOBUF_C_BUFFER_SIMPLE_INIT(simple_buffer_scratch);
+
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		if (msg == NULL)
+			sublen = 0;
+		else
+			sublen = protobuf_c_message_pack_to_buffer(msg, &simple_buffer.base);
+		rv += uint32_pack(sublen, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		buffer->append(buffer, sublen, simple_buffer.data);
+		rv += sublen;
+		PROTOBUF_C_BUFFER_SIMPLE_CLEAR(&simple_buffer);
+		break;
+	}
+	default:
+		PROTOBUF_C__ASSERT_NOT_REACHED();
+	}
+	return rv;
+}
+
+/**
+ * Pack a oneof field to a buffer. Only packs the field that is selected by the case enum.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param oneof_case
+ *      Enum value that selects the field in the oneof.
+ * \param member
+ *      The element to be packed.
+ * \param[out] buffer
+ *      Virtual buffer to append data to.
+ * \return
+ *      Number of bytes serialised to `buffer`.
+ */
+static size_t
+oneof_field_pack_to_buffer(const ProtobufCFieldDescriptor *field,
+			   uint32_t oneof_case,
+			   const void *member, ProtobufCBuffer *buffer)
+{
+	if (oneof_case != field->id) {
+		return 0;
+	}
+	if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
+	    field->type == PROTOBUF_C_TYPE_STRING)
+	{
+		const void *ptr = *(const void *const *) member;
+		if (ptr == NULL || ptr == field->default_value)
+			return 0;
+	}
+	return required_field_pack_to_buffer(field, member, buffer);
+}
+
+/**
+ * Pack an optional field to a buffer.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param has
+ *      Whether the field is set.
+ * \param member
+ *      The element to be packed.
+ * \param[out] buffer
+ *      Virtual buffer to append data to.
+ * \return
+ *      Number of bytes serialised to `buffer`.
+ */
+static size_t
+optional_field_pack_to_buffer(const ProtobufCFieldDescriptor *field,
+			      const protobuf_c_boolean has,
+			      const void *member, ProtobufCBuffer *buffer)
+{
+	if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
+	    field->type == PROTOBUF_C_TYPE_STRING)
+	{
+		const void *ptr = *(const void *const *) member;
+		if (ptr == NULL || ptr == field->default_value)
+			return 0;
+	} else {
+		if (!has)
+			return 0;
+	}
+	return required_field_pack_to_buffer(field, member, buffer);
+}
+
+/**
+ * Pack an unlabeled field to a buffer.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param member
+ *      The element to be packed.
+ * \param[out] buffer
+ *      Virtual buffer to append data to.
+ * \return
+ *      Number of bytes serialised to `buffer`.
+ */
+static size_t
+unlabeled_field_pack_to_buffer(const ProtobufCFieldDescriptor *field,
+			       const void *member, ProtobufCBuffer *buffer)
+{
+	if (field_is_zeroish(field, member))
+		return 0;
+	return required_field_pack_to_buffer(field, member, buffer);
+}
+
+/**
+ * Get the packed size of an array of same field type.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param count
+ *      Number of elements of this type.
+ * \param array
+ *      The elements to get the size of.
+ * \return
+ *      Number of bytes required.
+ */
+static size_t
+get_packed_payload_length(const ProtobufCFieldDescriptor *field,
+			  unsigned count, const void *array)
+{
+	unsigned rv = 0;
+	unsigned i;
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		return count * 4;
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		return count * 8;
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32: {
+		const int32_t *arr = (const int32_t *) array;
+		for (i = 0; i < count; i++)
+			rv += int32_size(arr[i]);
+		break;
+	}
+	case PROTOBUF_C_TYPE_SINT32: {
+		const int32_t *arr = (const int32_t *) array;
+		for (i = 0; i < count; i++)
+			rv += sint32_size(arr[i]);
+		break;
+	}
+	case PROTOBUF_C_TYPE_UINT32: {
+		const uint32_t *arr = (const uint32_t *) array;
+		for (i = 0; i < count; i++)
+			rv += uint32_size(arr[i]);
+		break;
+	}
+	case PROTOBUF_C_TYPE_SINT64: {
+		const int64_t *arr = (const int64_t *) array;
+		for (i = 0; i < count; i++)
+			rv += sint64_size(arr[i]);
+		break;
+	}
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64: {
+		const uint64_t *arr = (const uint64_t *) array;
+		for (i = 0; i < count; i++)
+			rv += uint64_size(arr[i]);
+		break;
+	}
+	case PROTOBUF_C_TYPE_BOOL:
+		return count;
+	default:
+		PROTOBUF_C__ASSERT_NOT_REACHED();
+	}
+	return rv;
+}
+
+/**
+ * Pack an array of same field type to a virtual buffer.
+ *
+ * \param field
+ *      Field descriptor.
+ * \param count
+ *      Number of elements of this type.
+ * \param array
+ *      The elements to get the size of.
+ * \param[out] buffer
+ *      Virtual buffer to append data to.
+ * \return
+ *      Number of bytes packed.
+ */
+static size_t
+pack_buffer_packed_payload(const ProtobufCFieldDescriptor *field,
+			   unsigned count, const void *array,
+			   ProtobufCBuffer *buffer)
+{
+	uint8_t scratch[16];
+	size_t rv = 0;
+	unsigned i;
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+#if !defined(WORDS_BIGENDIAN)
+		rv = count * 4;
+		goto no_packing_needed;
+#else
+		for (i = 0; i < count; i++) {
+			unsigned len = fixed32_pack(((uint32_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+#endif
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+#if !defined(WORDS_BIGENDIAN)
+		rv = count * 8;
+		goto no_packing_needed;
+#else
+		for (i = 0; i < count; i++) {
+			unsigned len = fixed64_pack(((uint64_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+#endif
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		for (i = 0; i < count; i++) {
+			unsigned len = int32_pack(((int32_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_SINT32:
+		for (i = 0; i < count; i++) {
+			unsigned len = sint32_pack(((int32_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_UINT32:
+		for (i = 0; i < count; i++) {
+			unsigned len = uint32_pack(((uint32_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_SINT64:
+		for (i = 0; i < count; i++) {
+			unsigned len = sint64_pack(((int64_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		for (i = 0; i < count; i++) {
+			unsigned len = uint64_pack(((uint64_t *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		break;
+	case PROTOBUF_C_TYPE_BOOL:
+		for (i = 0; i < count; i++) {
+			unsigned len = boolean_pack(((protobuf_c_boolean *) array)[i], scratch);
+			buffer->append(buffer, len, scratch);
+			rv += len;
+		}
+		return count;
+	default:
+		PROTOBUF_C__ASSERT_NOT_REACHED();
+	}
+	return rv;
+
+#if !defined(WORDS_BIGENDIAN)
+no_packing_needed:
+	buffer->append(buffer, rv, array);
+	return rv;
+#endif
+}
+
+static size_t
+repeated_field_pack_to_buffer(const ProtobufCFieldDescriptor *field,
+			      unsigned count, const void *member,
+			      ProtobufCBuffer *buffer)
+{
+	char *array = *(char * const *) member;
+
+	if (count == 0)
+		return 0;
+	if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED)) {
+		uint8_t scratch[MAX_UINT64_ENCODED_SIZE * 2];
+		size_t rv = tag_pack(field->id, scratch);
+		size_t payload_len = get_packed_payload_length(field, count, array);
+		size_t tmp;
+
+		scratch[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
+		rv += uint32_pack(payload_len, scratch + rv);
+		buffer->append(buffer, rv, scratch);
+		tmp = pack_buffer_packed_payload(field, count, array, buffer);
+		assert(tmp == payload_len);
+		return rv + payload_len;
+	} else {
+		size_t siz;
+		unsigned i;
+		/* CONSIDER: optimize this case a bit (by putting the loop inside the switch) */
+		unsigned rv = 0;
+
+		siz = sizeof_elt_in_repeated_array(field->type);
+		for (i = 0; i < count; i++) {
+			rv += required_field_pack_to_buffer(field, array, buffer);
+			array += siz;
+		}
+		return rv;
+	}
+}
+
+static size_t
+unknown_field_pack_to_buffer(const ProtobufCMessageUnknownField *field,
+			     ProtobufCBuffer *buffer)
+{
+	uint8_t header[MAX_UINT64_ENCODED_SIZE];
+	size_t rv = tag_pack(field->tag, header);
+
+	header[0] |= field->wire_type;
+	buffer->append(buffer, rv, header);
+	buffer->append(buffer, field->len, field->data);
+	return rv + field->len;
+}
+
+/**@}*/
+
+size_t
+protobuf_c_message_pack_to_buffer(const ProtobufCMessage *message,
+				  ProtobufCBuffer *buffer)
+{
+	unsigned i;
+	size_t rv = 0;
+
+	ASSERT_IS_MESSAGE(message);
+	for (i = 0; i < message->descriptor->n_fields; i++) {
+		const ProtobufCFieldDescriptor *field =
+			message->descriptor->fields + i;
+		const void *member =
+			((const char *) message) + field->offset;
+		const void *qmember =
+			((const char *) message) + field->quantifier_offset;
+
+		if (field->label == PROTOBUF_C_LABEL_REQUIRED) {
+			rv += required_field_pack_to_buffer(field, member, buffer);
+		} else if ((field->label == PROTOBUF_C_LABEL_OPTIONAL ||
+			    field->label == PROTOBUF_C_LABEL_NONE) &&
+			   (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF))) {
+			rv += oneof_field_pack_to_buffer(
+				field,
+				*(const uint32_t *) qmember,
+				member,
+				buffer
+			);
+		} else if (field->label == PROTOBUF_C_LABEL_OPTIONAL) {
+			rv += optional_field_pack_to_buffer(
+				field,
+				*(const protobuf_c_boolean *) qmember,
+				member,
+				buffer
+			);
+		} else if (field->label == PROTOBUF_C_LABEL_NONE) {
+			rv += unlabeled_field_pack_to_buffer(
+				field,
+				member,
+				buffer
+			);
+		} else {
+			rv += repeated_field_pack_to_buffer(
+				field,
+				*(const size_t *) qmember,
+				member,
+				buffer
+			);
+		}
+	}
+	for (i = 0; i < message->n_unknown_fields; i++)
+		rv += unknown_field_pack_to_buffer(&message->unknown_fields[i], buffer);
+
+	return rv;
+}
+
+/**
+ * \defgroup unpack unpacking implementation
+ *
+ * Routines mainly used by the unpacking functions.
+ *
+ * \ingroup internal
+ * @{
+ */
+
+static inline int
+int_range_lookup(unsigned n_ranges, const ProtobufCIntRange *ranges, int value)
+{
+	unsigned n;
+	unsigned start;
+
+	if (n_ranges == 0)
+		return -1;
+	start = 0;
+	n = n_ranges;
+	while (n > 1) {
+		unsigned mid = start + n / 2;
+
+		if (value < ranges[mid].start_value) {
+			n = mid - start;
+		} else if (value >= ranges[mid].start_value +
+			   (int) (ranges[mid + 1].orig_index -
+				  ranges[mid].orig_index))
+		{
+			unsigned new_start = mid + 1;
+			n = start + n - new_start;
+			start = new_start;
+		} else
+			return (value - ranges[mid].start_value) +
+			    ranges[mid].orig_index;
+	}
+	if (n > 0) {
+		unsigned start_orig_index = ranges[start].orig_index;
+		unsigned range_size =
+			ranges[start + 1].orig_index - start_orig_index;
+
+		if (ranges[start].start_value <= value &&
+		    value < (int) (ranges[start].start_value + range_size))
+		{
+			return (value - ranges[start].start_value) +
+			    start_orig_index;
+		}
+	}
+	return -1;
+}
+
+static size_t
+parse_tag_and_wiretype(size_t len,
+		       const uint8_t *data,
+		       uint32_t *tag_out,
+		       ProtobufCWireType *wiretype_out)
+{
+	unsigned max_rv = len > 5 ? 5 : len;
+	uint32_t tag = (data[0] & 0x7f) >> 3;
+	unsigned shift = 4;
+	unsigned rv;
+
+	*wiretype_out = data[0] & 7;
+	if ((data[0] & 0x80) == 0) {
+		*tag_out = tag;
+		return 1;
+	}
+	for (rv = 1; rv < max_rv; rv++) {
+		if (data[rv] & 0x80) {
+			tag |= (data[rv] & 0x7f) << shift;
+			shift += 7;
+		} else {
+			tag |= data[rv] << shift;
+			*tag_out = tag;
+			return rv + 1;
+		}
+	}
+	return 0; /* error: bad header */
+}
+
+/* sizeof(ScannedMember) must be <= (1UL<<BOUND_SIZEOF_SCANNED_MEMBER_LOG2) */
+#define BOUND_SIZEOF_SCANNED_MEMBER_LOG2 5
+typedef struct _ScannedMember ScannedMember;
+/** Field as it's being read. */
+struct _ScannedMember {
+	uint32_t tag;              /**< Field tag. */
+	uint8_t wire_type;         /**< Field type. */
+	uint8_t length_prefix_len; /**< Prefix length. */
+	const ProtobufCFieldDescriptor *field; /**< Field descriptor. */
+	size_t len;                /**< Field length. */
+	const uint8_t *data;       /**< Pointer to field data. */
+};
+
+static inline uint32_t
+scan_length_prefixed_data(size_t len, const uint8_t *data,
+			  size_t *prefix_len_out)
+{
+	unsigned hdr_max = len < 5 ? len : 5;
+	unsigned hdr_len;
+	uint32_t val = 0;
+	unsigned i;
+	unsigned shift = 0;
+
+	for (i = 0; i < hdr_max; i++) {
+		val |= (data[i] & 0x7f) << shift;
+		shift += 7;
+		if ((data[i] & 0x80) == 0)
+			break;
+	}
+	if (i == hdr_max) {
+		PROTOBUF_C_UNPACK_ERROR("error parsing length for length-prefixed data");
+		return 0;
+	}
+	hdr_len = i + 1;
+	*prefix_len_out = hdr_len;
+	if (hdr_len + val > len) {
+		PROTOBUF_C_UNPACK_ERROR("data too short after length-prefix of %u", val);
+		return 0;
+	}
+	return hdr_len + val;
+}
+
+static size_t
+max_b128_numbers(size_t len, const uint8_t *data)
+{
+	size_t rv = 0;
+	while (len--)
+		if ((*data++ & 0x80) == 0)
+			++rv;
+	return rv;
+}
+
+/**@}*/
+
+/**
+ * Merge earlier message into a latter message.
+ *
+ * For numeric types and strings, if the same value appears multiple
+ * times, the parser accepts the last value it sees. For embedded
+ * message fields, the parser merges multiple instances of the same
+ * field. That is, all singular scalar fields in the latter instance
+ * replace those in the former, singular embedded messages are merged,
+ * and repeated fields are concatenated.
+ *
+ * The earlier message should be freed after calling this function, as
+ * some of its fields may have been reused and changed to their default
+ * values during the merge.
+ */
+static protobuf_c_boolean
+merge_messages(ProtobufCMessage *earlier_msg,
+	       ProtobufCMessage *latter_msg,
+	       ProtobufCAllocator *allocator)
+{
+	unsigned i;
+	const ProtobufCFieldDescriptor *fields =
+		latter_msg->descriptor->fields;
+	for (i = 0; i < latter_msg->descriptor->n_fields; i++) {
+		if (fields[i].label == PROTOBUF_C_LABEL_REPEATED) {
+			size_t *n_earlier =
+				STRUCT_MEMBER_PTR(size_t, earlier_msg,
+						  fields[i].quantifier_offset);
+			uint8_t **p_earlier =
+				STRUCT_MEMBER_PTR(uint8_t *, earlier_msg,
+						  fields[i].offset);
+			size_t *n_latter =
+				STRUCT_MEMBER_PTR(size_t, latter_msg,
+						  fields[i].quantifier_offset);
+			uint8_t **p_latter =
+				STRUCT_MEMBER_PTR(uint8_t *, latter_msg,
+						  fields[i].offset);
+
+			if (*n_earlier > 0) {
+				if (*n_latter > 0) {
+					/* Concatenate the repeated field */
+					size_t el_size =
+						sizeof_elt_in_repeated_array(fields[i].type);
+					uint8_t *new_field;
+
+					new_field = do_alloc(allocator,
+						(*n_earlier + *n_latter) * el_size);
+					if (!new_field)
+						return FALSE;
+
+					memcpy(new_field, *p_earlier,
+					       *n_earlier * el_size);
+					memcpy(new_field +
+					       *n_earlier * el_size,
+					       *p_latter,
+					       *n_latter * el_size);
+
+					do_free(allocator, *p_latter);
+					do_free(allocator, *p_earlier);
+					*p_latter = new_field;
+					*n_latter = *n_earlier + *n_latter;
+				} else {
+					/* Zero copy the repeated field from the earlier message */
+					*n_latter = *n_earlier;
+					*p_latter = *p_earlier;
+				}
+				/* Make sure the field does not get double freed */
+				*n_earlier = 0;
+				*p_earlier = 0;
+			}
+		} else if (fields[i].label == PROTOBUF_C_LABEL_OPTIONAL ||
+			   fields[i].label == PROTOBUF_C_LABEL_NONE) {
+			const ProtobufCFieldDescriptor *field;
+			uint32_t *earlier_case_p = STRUCT_MEMBER_PTR(uint32_t,
+								     earlier_msg,
+								     fields[i].
+								     quantifier_offset);
+			uint32_t *latter_case_p = STRUCT_MEMBER_PTR(uint32_t,
+								    latter_msg,
+								    fields[i].
+								    quantifier_offset);
+			protobuf_c_boolean need_to_merge = FALSE;
+			void *earlier_elem;
+			void *latter_elem;
+			const void *def_val;
+
+			if (fields[i].flags & PROTOBUF_C_FIELD_FLAG_ONEOF) {
+				if (*latter_case_p == 0) {
+					/* lookup correct oneof field */
+					int field_index =
+						int_range_lookup(
+							latter_msg->descriptor
+							->n_field_ranges,
+							latter_msg->descriptor
+							->field_ranges,
+							*earlier_case_p);
+					if (field_index < 0)
+						return FALSE;
+					field = latter_msg->descriptor->fields +
+						field_index;
+				} else {
+					/* Oneof is present in the latter message, move on */
+					continue;
+				}
+			} else {
+				field = &fields[i];
+			}
+
+			earlier_elem = STRUCT_MEMBER_P(earlier_msg, field->offset);
+			latter_elem = STRUCT_MEMBER_P(latter_msg, field->offset);
+			def_val = field->default_value;
+
+			switch (field->type) {
+			case PROTOBUF_C_TYPE_MESSAGE: {
+				ProtobufCMessage *em = *(ProtobufCMessage **) earlier_elem;
+				ProtobufCMessage *lm = *(ProtobufCMessage **) latter_elem;
+				if (em != NULL) {
+					if (lm != NULL) {
+						if (!merge_messages(em, lm, allocator))
+							return FALSE;
+						/* Already merged */
+						need_to_merge = FALSE;
+					} else {
+						/* Zero copy the message */
+						need_to_merge = TRUE;
+					}
+				}
+				break;
+			}
+			case PROTOBUF_C_TYPE_BYTES: {
+				uint8_t *e_data =
+					((ProtobufCBinaryData *) earlier_elem)->data;
+				uint8_t *l_data =
+					((ProtobufCBinaryData *) latter_elem)->data;
+				const ProtobufCBinaryData *d_bd =
+					(ProtobufCBinaryData *) def_val;
+
+				need_to_merge =
+					(e_data != NULL &&
+					 (d_bd == NULL ||
+					  e_data != d_bd->data)) &&
+					(l_data == NULL ||
+					 (d_bd != NULL &&
+					  l_data == d_bd->data));
+				break;
+			}
+			case PROTOBUF_C_TYPE_STRING: {
+				char *e_str = *(char **) earlier_elem;
+				char *l_str = *(char **) latter_elem;
+				const char *d_str = def_val;
+
+				need_to_merge = e_str != d_str && l_str == d_str;
+				break;
+			}
+			default: {
+				/* Could be has field or case enum, the logic is
+				 * equivalent, since 0 (FALSE) means not set for
+				 * oneof */
+				need_to_merge = (*earlier_case_p != 0) &&
+						(*latter_case_p == 0);
+				break;
+			}
+			}
+
+			if (need_to_merge) {
+				size_t el_size =
+					sizeof_elt_in_repeated_array(field->type);
+				memcpy(latter_elem, earlier_elem, el_size);
+				/*
+				 * Reset the element from the old message to 0
+				 * to make sure earlier message deallocation
+				 * doesn't corrupt zero-copied data in the new
+				 * message, earlier message will be freed after
+				 * this function is called anyway
+				 */
+				memset(earlier_elem, 0, el_size);
+
+				if (field->quantifier_offset != 0) {
+					/* Set the has field or the case enum,
+					 * if applicable */
+					*latter_case_p = *earlier_case_p;
+					*earlier_case_p = 0;
+				}
+			}
+		}
+	}
+	return TRUE;
+}
+
+/**
+ * Count packed elements.
+ *
+ * Given a raw slab of packed-repeated values, determine the number of
+ * elements. This function detects certain kinds of errors but not
+ * others; the remaining error checking is done by
+ * parse_packed_repeated_member().
+ */
+static protobuf_c_boolean
+count_packed_elements(ProtobufCType type,
+		      size_t len, const uint8_t *data, size_t *count_out)
+{
+	switch (type) {
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		if (len % 4 != 0) {
+			PROTOBUF_C_UNPACK_ERROR("length must be a multiple of 4 for fixed-length 32-bit types");
+			return FALSE;
+		}
+		*count_out = len / 4;
+		return TRUE;
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		if (len % 8 != 0) {
+			PROTOBUF_C_UNPACK_ERROR("length must be a multiple of 8 for fixed-length 64-bit types");
+			return FALSE;
+		}
+		*count_out = len / 8;
+		return TRUE;
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+	case PROTOBUF_C_TYPE_SINT32:
+	case PROTOBUF_C_TYPE_UINT32:
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_SINT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		*count_out = max_b128_numbers(len, data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_BOOL:
+		*count_out = len;
+		return TRUE;
+	case PROTOBUF_C_TYPE_STRING:
+	case PROTOBUF_C_TYPE_BYTES:
+	case PROTOBUF_C_TYPE_MESSAGE:
+	default:
+		PROTOBUF_C_UNPACK_ERROR("bad protobuf-c type %u for packed-repeated", type);
+		return FALSE;
+	}
+}
+
+static inline uint32_t
+parse_uint32(unsigned len, const uint8_t *data)
+{
+	uint32_t rv = data[0] & 0x7f;
+	if (len > 1) {
+		rv |= ((uint32_t) (data[1] & 0x7f) << 7);
+		if (len > 2) {
+			rv |= ((uint32_t) (data[2] & 0x7f) << 14);
+			if (len > 3) {
+				rv |= ((uint32_t) (data[3] & 0x7f) << 21);
+				if (len > 4)
+					rv |= ((uint32_t) (data[4]) << 28);
+			}
+		}
+	}
+	return rv;
+}
+
+static inline uint32_t
+parse_int32(unsigned len, const uint8_t *data)
+{
+	return parse_uint32(len, data);
+}
+
+static inline int32_t
+unzigzag32(uint32_t v)
+{
+	if (v & 1)
+		return -(v >> 1) - 1;
+	else
+		return v >> 1;
+}
+
+static inline uint32_t
+parse_fixed_uint32(const uint8_t *data)
+{
+#if !defined(WORDS_BIGENDIAN)
+	uint32_t t;
+	memcpy(&t, data, 4);
+	return t;
+#else
+	return data[0] |
+		((uint32_t) (data[1]) << 8) |
+		((uint32_t) (data[2]) << 16) |
+		((uint32_t) (data[3]) << 24);
+#endif
+}
+
+static uint64_t
+parse_uint64(unsigned len, const uint8_t *data)
+{
+	unsigned shift, i;
+	uint64_t rv;
+
+	if (len < 5)
+		return parse_uint32(len, data);
+	rv = ((uint64_t) (data[0] & 0x7f)) |
+		((uint64_t) (data[1] & 0x7f) << 7) |
+		((uint64_t) (data[2] & 0x7f) << 14) |
+		((uint64_t) (data[3] & 0x7f) << 21);
+	shift = 28;
+	for (i = 4; i < len; i++) {
+		rv |= (((uint64_t) (data[i] & 0x7f)) << shift);
+		shift += 7;
+	}
+	return rv;
+}
+
+static inline int64_t
+unzigzag64(uint64_t v)
+{
+	if (v & 1)
+		return -(v >> 1) - 1;
+	else
+		return v >> 1;
+}
+
+static inline uint64_t
+parse_fixed_uint64(const uint8_t *data)
+{
+#if !defined(WORDS_BIGENDIAN)
+	uint64_t t;
+	memcpy(&t, data, 8);
+	return t;
+#else
+	return (uint64_t) parse_fixed_uint32(data) |
+		(((uint64_t) parse_fixed_uint32(data + 4)) << 32);
+#endif
+}
+
+static protobuf_c_boolean
+parse_boolean(unsigned len, const uint8_t *data)
+{
+	unsigned i;
+	for (i = 0; i < len; i++)
+		if (data[i] & 0x7f)
+			return TRUE;
+	return FALSE;
+}
+
+static protobuf_c_boolean
+parse_required_member(ScannedMember *scanned_member,
+		      void *member,
+		      ProtobufCAllocator *allocator,
+		      protobuf_c_boolean maybe_clear)
+{
+	unsigned len = scanned_member->len;
+	const uint8_t *data = scanned_member->data;
+	ProtobufCWireType wire_type = scanned_member->wire_type;
+
+	switch (scanned_member->field->type) {
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT)
+			return FALSE;
+		*(int32_t *) member = parse_int32(len, data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_UINT32:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT)
+			return FALSE;
+		*(uint32_t *) member = parse_uint32(len, data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_SINT32:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT)
+			return FALSE;
+		*(int32_t *) member = unzigzag32(parse_uint32(len, data));
+		return TRUE;
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_32BIT)
+			return FALSE;
+		*(uint32_t *) member = parse_fixed_uint32(data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT)
+			return FALSE;
+		*(uint64_t *) member = parse_uint64(len, data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_SINT64:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT)
+			return FALSE;
+		*(int64_t *) member = unzigzag64(parse_uint64(len, data));
+		return TRUE;
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_64BIT)
+			return FALSE;
+		*(uint64_t *) member = parse_fixed_uint64(data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_BOOL:
+		*(protobuf_c_boolean *) member = parse_boolean(len, data);
+		return TRUE;
+	case PROTOBUF_C_TYPE_STRING: {
+		char **pstr = member;
+		unsigned pref_len = scanned_member->length_prefix_len;
+
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED)
+			return FALSE;
+
+		if (maybe_clear && *pstr != NULL) {
+			const char *def = scanned_member->field->default_value;
+			if (*pstr != NULL && *pstr != def)
+				do_free(allocator, *pstr);
+		}
+		*pstr = do_alloc(allocator, len - pref_len + 1);
+		if (*pstr == NULL)
+			return FALSE;
+		memcpy(*pstr, data + pref_len, len - pref_len);
+		(*pstr)[len - pref_len] = 0;
+		return TRUE;
+	}
+	case PROTOBUF_C_TYPE_BYTES: {
+		ProtobufCBinaryData *bd = member;
+		const ProtobufCBinaryData *def_bd;
+		unsigned pref_len = scanned_member->length_prefix_len;
+
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED)
+			return FALSE;
+
+		def_bd = scanned_member->field->default_value;
+		if (maybe_clear &&
+		    bd->data != NULL &&
+		    (def_bd == NULL || bd->data != def_bd->data))
+		{
+			do_free(allocator, bd->data);
+		}
+		if (len - pref_len > 0) {
+			bd->data = do_alloc(allocator, len - pref_len);
+			if (bd->data == NULL)
+				return FALSE;
+			memcpy(bd->data, data + pref_len, len - pref_len);
+		} else {
+			bd->data = NULL;
+		}
+		bd->len = len - pref_len;
+		return TRUE;
+	}
+	case PROTOBUF_C_TYPE_MESSAGE: {
+		ProtobufCMessage **pmessage = member;
+		ProtobufCMessage *subm;
+		const ProtobufCMessage *def_mess;
+		protobuf_c_boolean merge_successful = TRUE;
+		unsigned pref_len = scanned_member->length_prefix_len;
+
+		if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED)
+			return FALSE;
+
+		def_mess = scanned_member->field->default_value;
+		subm = protobuf_c_message_unpack(scanned_member->field->descriptor,
+						 allocator,
+						 len - pref_len,
+						 data + pref_len);
+
+		if (maybe_clear &&
+		    *pmessage != NULL &&
+		    *pmessage != def_mess)
+		{
+			if (subm != NULL)
+				merge_successful = merge_messages(*pmessage, subm, allocator);
+			/* Delete the previous message */
+			protobuf_c_message_free_unpacked(*pmessage, allocator);
+		}
+		*pmessage = subm;
+		if (subm == NULL || !merge_successful)
+			return FALSE;
+		return TRUE;
+	}
+	}
+	return FALSE;
+}
+
+static protobuf_c_boolean
+parse_oneof_member (ScannedMember *scanned_member,
+		    void *member,
+		    ProtobufCMessage *message,
+		    ProtobufCAllocator *allocator)
+{
+	uint32_t *oneof_case = STRUCT_MEMBER_PTR(uint32_t, message,
+					       scanned_member->field->quantifier_offset);
+
+	/* If we have already parsed a member of this oneof, free it. */
+	if (*oneof_case != 0) {
+		/* lookup field */
+		int field_index =
+			int_range_lookup(message->descriptor->n_field_ranges,
+					 message->descriptor->field_ranges,
+					 *oneof_case);
+		if (field_index < 0)
+			return FALSE;
+		const ProtobufCFieldDescriptor *old_field =
+			message->descriptor->fields + field_index;
+		size_t el_size = sizeof_elt_in_repeated_array(old_field->type);
+
+		switch (old_field->type) {
+	        case PROTOBUF_C_TYPE_STRING: {
+			char **pstr = member;
+			const char *def = old_field->default_value;
+			if (*pstr != NULL && *pstr != def)
+				do_free(allocator, *pstr);
+			break;
+	        }
+		case PROTOBUF_C_TYPE_BYTES: {
+			ProtobufCBinaryData *bd = member;
+			const ProtobufCBinaryData *def_bd = old_field->default_value;
+			if (bd->data != NULL &&
+			   (def_bd == NULL || bd->data != def_bd->data))
+			{
+				do_free(allocator, bd->data);
+			}
+			break;
+	        }
+		case PROTOBUF_C_TYPE_MESSAGE: {
+			ProtobufCMessage **pmessage = member;
+			const ProtobufCMessage *def_mess = old_field->default_value;
+			if (*pmessage != NULL && *pmessage != def_mess)
+				protobuf_c_message_free_unpacked(*pmessage, allocator);
+			break;
+	        }
+		default:
+			break;
+		}
+
+		memset (member, 0, el_size);
+	}
+	if (!parse_required_member (scanned_member, member, allocator, TRUE))
+		return FALSE;
+
+	*oneof_case = scanned_member->tag;
+	return TRUE;
+}
+
+
+static protobuf_c_boolean
+parse_optional_member(ScannedMember *scanned_member,
+		      void *member,
+		      ProtobufCMessage *message,
+		      ProtobufCAllocator *allocator)
+{
+	if (!parse_required_member(scanned_member, member, allocator, TRUE))
+		return FALSE;
+	if (scanned_member->field->quantifier_offset != 0)
+		STRUCT_MEMBER(protobuf_c_boolean,
+			      message,
+			      scanned_member->field->quantifier_offset) = TRUE;
+	return TRUE;
+}
+
+static protobuf_c_boolean
+parse_repeated_member(ScannedMember *scanned_member,
+		      void *member,
+		      ProtobufCMessage *message,
+		      ProtobufCAllocator *allocator)
+{
+	const ProtobufCFieldDescriptor *field = scanned_member->field;
+	size_t *p_n = STRUCT_MEMBER_PTR(size_t, message, field->quantifier_offset);
+	size_t siz = sizeof_elt_in_repeated_array(field->type);
+	char *array = *(char **) member;
+
+	if (!parse_required_member(scanned_member, array + siz * (*p_n),
+				   allocator, FALSE))
+	{
+		return FALSE;
+	}
+	*p_n += 1;
+	return TRUE;
+}
+
+static unsigned
+scan_varint(unsigned len, const uint8_t *data)
+{
+	unsigned i;
+	if (len > 10)
+		len = 10;
+	for (i = 0; i < len; i++)
+		if ((data[i] & 0x80) == 0)
+			break;
+	if (i == len)
+		return 0;
+	return i + 1;
+}
+
+static protobuf_c_boolean
+parse_packed_repeated_member(ScannedMember *scanned_member,
+			     void *member,
+			     ProtobufCMessage *message)
+{
+	const ProtobufCFieldDescriptor *field = scanned_member->field;
+	size_t *p_n = STRUCT_MEMBER_PTR(size_t, message, field->quantifier_offset);
+	size_t siz = sizeof_elt_in_repeated_array(field->type);
+	void *array = *(char **) member + siz * (*p_n);
+	const uint8_t *at = scanned_member->data + scanned_member->length_prefix_len;
+	size_t rem = scanned_member->len - scanned_member->length_prefix_len;
+	size_t count = 0;
+	unsigned i;
+
+	switch (field->type) {
+	case PROTOBUF_C_TYPE_SFIXED32:
+	case PROTOBUF_C_TYPE_FIXED32:
+	case PROTOBUF_C_TYPE_FLOAT:
+		count = (scanned_member->len - scanned_member->length_prefix_len) / 4;
+#if !defined(WORDS_BIGENDIAN)
+		goto no_unpacking_needed;
+#else
+		for (i = 0; i < count; i++) {
+			((uint32_t *) array)[i] = parse_fixed_uint32(at);
+			at += 4;
+		}
+		break;
+#endif
+	case PROTOBUF_C_TYPE_SFIXED64:
+	case PROTOBUF_C_TYPE_FIXED64:
+	case PROTOBUF_C_TYPE_DOUBLE:
+		count = (scanned_member->len - scanned_member->length_prefix_len) / 8;
+#if !defined(WORDS_BIGENDIAN)
+		goto no_unpacking_needed;
+#else
+		for (i = 0; i < count; i++) {
+			((uint64_t *) array)[i] = parse_fixed_uint64(at);
+			at += 8;
+		}
+		break;
+#endif
+	case PROTOBUF_C_TYPE_ENUM:
+	case PROTOBUF_C_TYPE_INT32:
+		while (rem > 0) {
+			unsigned s = scan_varint(rem, at);
+			if (s == 0) {
+				PROTOBUF_C_UNPACK_ERROR("bad packed-repeated int32 value");
+				return FALSE;
+			}
+			((int32_t *) array)[count++] = parse_int32(s, at);
+			at += s;
+			rem -= s;
+		}
+		break;
+	case PROTOBUF_C_TYPE_SINT32:
+		while (rem > 0) {
+			unsigned s = scan_varint(rem, at);
+			if (s == 0) {
+				PROTOBUF_C_UNPACK_ERROR("bad packed-repeated sint32 value");
+				return FALSE;
+			}
+			((int32_t *) array)[count++] = unzigzag32(parse_uint32(s, at));
+			at += s;
+			rem -= s;
+		}
+		break;
+	case PROTOBUF_C_TYPE_UINT32:
+		while (rem > 0) {
+			unsigned s = scan_varint(rem, at);
+			if (s == 0) {
+				PROTOBUF_C_UNPACK_ERROR("bad packed-repeated enum or uint32 value");
+				return FALSE;
+			}
+			((uint32_t *) array)[count++] = parse_uint32(s, at);
+			at += s;
+			rem -= s;
+		}
+		break;
+
+	case PROTOBUF_C_TYPE_SINT64:
+		while (rem > 0) {
+			unsigned s = scan_varint(rem, at);
+			if (s == 0) {
+				PROTOBUF_C_UNPACK_ERROR("bad packed-repeated sint64 value");
+				return FALSE;
+			}
+			((int64_t *) array)[count++] = unzigzag64(parse_uint64(s, at));
+			at += s;
+			rem -= s;
+		}
+		break;
+	case PROTOBUF_C_TYPE_INT64:
+	case PROTOBUF_C_TYPE_UINT64:
+		while (rem > 0) {
+			unsigned s = scan_varint(rem, at);
+			if (s == 0) {
+				PROTOBUF_C_UNPACK_ERROR("bad packed-repeated int64/uint64 value");
+				return FALSE;
+			}
+			((int64_t *) array)[count++] = parse_uint64(s, at);
+			at += s;
+			rem -= s;
+		}
+		break;
+	case PROTOBUF_C_TYPE_BOOL:
+		count = rem;
+		for (i = 0; i < count; i++) {
+			if (at[i] > 1) {
+				PROTOBUF_C_UNPACK_ERROR("bad packed-repeated boolean value");
+				return FALSE;
+			}
+			((protobuf_c_boolean *) array)[i] = at[i];
+		}
+		break;
+	default:
+		PROTOBUF_C__ASSERT_NOT_REACHED();
+	}
+	*p_n += count;
+	return TRUE;
+
+#if !defined(WORDS_BIGENDIAN)
+no_unpacking_needed:
+	memcpy(array, at, count * siz);
+	*p_n += count;
+	return TRUE;
+#endif
+}
+
+static protobuf_c_boolean
+is_packable_type(ProtobufCType type)
+{
+	return
+		type != PROTOBUF_C_TYPE_STRING &&
+		type != PROTOBUF_C_TYPE_BYTES &&
+		type != PROTOBUF_C_TYPE_MESSAGE;
+}
+
+static protobuf_c_boolean
+parse_member(ScannedMember *scanned_member,
+	     ProtobufCMessage *message,
+	     ProtobufCAllocator *allocator)
+{
+	const ProtobufCFieldDescriptor *field = scanned_member->field;
+	void *member;
+
+	if (field == NULL) {
+		ProtobufCMessageUnknownField *ufield =
+			message->unknown_fields +
+			(message->n_unknown_fields++);
+		ufield->tag = scanned_member->tag;
+		ufield->wire_type = scanned_member->wire_type;
+		ufield->len = scanned_member->len;
+		ufield->data = do_alloc(allocator, scanned_member->len);
+		if (ufield->data == NULL)
+			return FALSE;
+		memcpy(ufield->data, scanned_member->data, ufield->len);
+		return TRUE;
+	}
+	member = (char *) message + field->offset;
+	switch (field->label) {
+	case PROTOBUF_C_LABEL_REQUIRED:
+		return parse_required_member(scanned_member, member,
+					     allocator, TRUE);
+	case PROTOBUF_C_LABEL_OPTIONAL:
+	case PROTOBUF_C_LABEL_NONE:
+		if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF)) {
+			return parse_oneof_member(scanned_member, member,
+						  message, allocator);
+		} else {
+			return parse_optional_member(scanned_member, member,
+						     message, allocator);
+		}
+	case PROTOBUF_C_LABEL_REPEATED:
+		if (scanned_member->wire_type ==
+		    PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED &&
+		    (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED) ||
+		     is_packable_type(field->type)))
+		{
+			return parse_packed_repeated_member(scanned_member,
+							    member, message);
+		} else {
+			return parse_repeated_member(scanned_member,
+						     member, message,
+						     allocator);
+		}
+	}
+	PROTOBUF_C__ASSERT_NOT_REACHED();
+	return 0;
+}
+
+/**
+ * Initialise messages generated by old code.
+ *
+ * This function is used if desc->message_init == NULL (which occurs
+ * for old code, and which would be useful to support allocating
+ * descriptors dynamically).
+ */
+static void
+message_init_generic(const ProtobufCMessageDescriptor *desc,
+		     ProtobufCMessage *message)
+{
+	unsigned i;
+
+	memset(message, 0, desc->sizeof_message);
+	message->descriptor = desc;
+	for (i = 0; i < desc->n_fields; i++) {
+		if (desc->fields[i].default_value != NULL &&
+		    desc->fields[i].label != PROTOBUF_C_LABEL_REPEATED)
+		{
+			void *field =
+				STRUCT_MEMBER_P(message, desc->fields[i].offset);
+			const void *dv = desc->fields[i].default_value;
+
+			switch (desc->fields[i].type) {
+			case PROTOBUF_C_TYPE_INT32:
+			case PROTOBUF_C_TYPE_SINT32:
+			case PROTOBUF_C_TYPE_SFIXED32:
+			case PROTOBUF_C_TYPE_UINT32:
+			case PROTOBUF_C_TYPE_FIXED32:
+			case PROTOBUF_C_TYPE_FLOAT:
+			case PROTOBUF_C_TYPE_ENUM:
+				memcpy(field, dv, 4);
+				break;
+			case PROTOBUF_C_TYPE_INT64:
+			case PROTOBUF_C_TYPE_SINT64:
+			case PROTOBUF_C_TYPE_SFIXED64:
+			case PROTOBUF_C_TYPE_UINT64:
+			case PROTOBUF_C_TYPE_FIXED64:
+			case PROTOBUF_C_TYPE_DOUBLE:
+				memcpy(field, dv, 8);
+				break;
+			case PROTOBUF_C_TYPE_BOOL:
+				memcpy(field, dv, sizeof(protobuf_c_boolean));
+				break;
+			case PROTOBUF_C_TYPE_BYTES:
+				memcpy(field, dv, sizeof(ProtobufCBinaryData));
+				break;
+
+			case PROTOBUF_C_TYPE_STRING:
+			case PROTOBUF_C_TYPE_MESSAGE:
+				/*
+				 * The next line essentially implements a cast
+				 * from const, which is totally unavoidable.
+				 */
+				*(const void **) field = dv;
+				break;
+			}
+		}
+	}
+}
+
+/**@}*/
+
+/*
+ * ScannedMember slabs (an unpacking implementation detail). Before doing real
+ * unpacking, we first scan through the elements to see how many there are (for
+ * repeated fields), and which field to use (for non-repeated fields given
+ * twice).
+ *
+ * In order to avoid allocations for small messages, we keep a stack-allocated
+ * slab of ScannedMembers of size FIRST_SCANNED_MEMBER_SLAB_SIZE (16). After we
+ * fill that up, we allocate each slab twice as large as the previous one.
+ */
+#define FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2 4
+
+/*
+ * The number of slabs, including the stack-allocated ones; choose the number so
+ * that we would overflow if we needed a slab larger than provided.
+ */
+#define MAX_SCANNED_MEMBER_SLAB			\
+  (sizeof(unsigned int)*8 - 1			\
+   - BOUND_SIZEOF_SCANNED_MEMBER_LOG2		\
+   - FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2)
+
+#define REQUIRED_FIELD_BITMAP_SET(index)	\
+	(required_fields_bitmap[(index)/8] |= (1UL<<((index)%8)))
+
+#define REQUIRED_FIELD_BITMAP_IS_SET(index)	\
+	(required_fields_bitmap[(index)/8] & (1UL<<((index)%8)))
+
+ProtobufCMessage *
+protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc,
+			  ProtobufCAllocator *allocator,
+			  size_t len, const uint8_t *data)
+{
+	ProtobufCMessage *rv;
+	size_t rem = len;
+	const uint8_t *at = data;
+	const ProtobufCFieldDescriptor *last_field = desc->fields + 0;
+	ScannedMember first_member_slab[1UL <<
+					FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2];
+
+	/*
+	 * scanned_member_slabs[i] is an array of arrays of ScannedMember.
+	 * The first slab (scanned_member_slabs[0] is just a pointer to
+	 * first_member_slab), above. All subsequent slabs will be allocated
+	 * using the allocator.
+	 */
+	ScannedMember *scanned_member_slabs[MAX_SCANNED_MEMBER_SLAB + 1];
+	unsigned which_slab = 0; /* the slab we are currently populating */
+	unsigned in_slab_index = 0; /* number of members in the slab */
+	size_t n_unknown = 0;
+	unsigned f;
+	unsigned j;
+	unsigned i_slab;
+	unsigned last_field_index = 0;
+	unsigned required_fields_bitmap_len;
+	unsigned char required_fields_bitmap_stack[16];
+	unsigned char *required_fields_bitmap = required_fields_bitmap_stack;
+	protobuf_c_boolean required_fields_bitmap_alloced = FALSE;
+
+	ASSERT_IS_MESSAGE_DESCRIPTOR(desc);
+
+	if (allocator == NULL)
+		allocator = &protobuf_c__allocator;
+
+	rv = do_alloc(allocator, desc->sizeof_message);
+	if (!rv)
+		return (NULL);
+	scanned_member_slabs[0] = first_member_slab;
+
+	required_fields_bitmap_len = (desc->n_fields + 7) / 8;
+	if (required_fields_bitmap_len > sizeof(required_fields_bitmap_stack)) {
+		required_fields_bitmap = do_alloc(allocator, required_fields_bitmap_len);
+		if (!required_fields_bitmap) {
+			do_free(allocator, rv);
+			return (NULL);
+		}
+		required_fields_bitmap_alloced = TRUE;
+	}
+	memset(required_fields_bitmap, 0, required_fields_bitmap_len);
+
+	/*
+	 * Generated code always defines "message_init". However, we provide a
+	 * fallback for (1) users of old protobuf-c generated-code that do not
+	 * provide the function, and (2) descriptors constructed from some other
+	 * source (most likely, direct construction from the .proto file).
+	 */
+	if (desc->message_init != NULL)
+		protobuf_c_message_init(desc, rv);
+	else
+		message_init_generic(desc, rv);
+
+	while (rem > 0) {
+		uint32_t tag;
+		ProtobufCWireType wire_type;
+		size_t used = parse_tag_and_wiretype(rem, at, &tag, &wire_type);
+		const ProtobufCFieldDescriptor *field;
+		ScannedMember tmp;
+
+		if (used == 0) {
+			PROTOBUF_C_UNPACK_ERROR("error parsing tag/wiretype at offset %u",
+						(unsigned) (at - data));
+			goto error_cleanup_during_scan;
+		}
+		/*
+		 * \todo Consider optimizing for field[1].id == tag, if field[1]
+		 * exists!
+		 */
+		if (last_field == NULL || last_field->id != tag) {
+			/* lookup field */
+			int field_index =
+			    int_range_lookup(desc->n_field_ranges,
+					     desc->field_ranges,
+					     tag);
+			if (field_index < 0) {
+				field = NULL;
+				n_unknown++;
+			} else {
+				field = desc->fields + field_index;
+				last_field = field;
+				last_field_index = field_index;
+			}
+		} else {
+			field = last_field;
+		}
+
+		if (field != NULL && field->label == PROTOBUF_C_LABEL_REQUIRED)
+			REQUIRED_FIELD_BITMAP_SET(last_field_index);
+
+		at += used;
+		rem -= used;
+		tmp.tag = tag;
+		tmp.wire_type = wire_type;
+		tmp.field = field;
+		tmp.data = at;
+		tmp.length_prefix_len = 0;
+
+		switch (wire_type) {
+		case PROTOBUF_C_WIRE_TYPE_VARINT: {
+			unsigned max_len = rem < 10 ? rem : 10;
+			unsigned i;
+
+			for (i = 0; i < max_len; i++)
+				if ((at[i] & 0x80) == 0)
+					break;
+			if (i == max_len) {
+				PROTOBUF_C_UNPACK_ERROR("unterminated varint at offset %u",
+							(unsigned) (at - data));
+				goto error_cleanup_during_scan;
+			}
+			tmp.len = i + 1;
+			break;
+		}
+		case PROTOBUF_C_WIRE_TYPE_64BIT:
+			if (rem < 8) {
+				PROTOBUF_C_UNPACK_ERROR("too short after 64bit wiretype at offset %u",
+							(unsigned) (at - data));
+				goto error_cleanup_during_scan;
+			}
+			tmp.len = 8;
+			break;
+		case PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED: {
+			size_t pref_len;
+
+			tmp.len = scan_length_prefixed_data(rem, at, &pref_len);
+			if (tmp.len == 0) {
+				/* NOTE: scan_length_prefixed_data calls UNPACK_ERROR */
+				goto error_cleanup_during_scan;
+			}
+			tmp.length_prefix_len = pref_len;
+			break;
+		}
+		case PROTOBUF_C_WIRE_TYPE_32BIT:
+			if (rem < 4) {
+				PROTOBUF_C_UNPACK_ERROR("too short after 32bit wiretype at offset %u",
+					      (unsigned) (at - data));
+				goto error_cleanup_during_scan;
+			}
+			tmp.len = 4;
+			break;
+		default:
+			PROTOBUF_C_UNPACK_ERROR("unsupported tag %u at offset %u",
+						wire_type, (unsigned) (at - data));
+			goto error_cleanup_during_scan;
+		}
+
+		if (in_slab_index == (1UL <<
+			(which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2)))
+		{
+			size_t size;
+
+			in_slab_index = 0;
+			if (which_slab == MAX_SCANNED_MEMBER_SLAB) {
+				PROTOBUF_C_UNPACK_ERROR("too many fields");
+				goto error_cleanup_during_scan;
+			}
+			which_slab++;
+			size = sizeof(ScannedMember)
+				<< (which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2);
+			scanned_member_slabs[which_slab] = do_alloc(allocator, size);
+			if (scanned_member_slabs[which_slab] == NULL)
+				goto error_cleanup_during_scan;
+		}
+		scanned_member_slabs[which_slab][in_slab_index++] = tmp;
+
+		if (field != NULL && field->label == PROTOBUF_C_LABEL_REPEATED) {
+			size_t *n = STRUCT_MEMBER_PTR(size_t, rv,
+						      field->quantifier_offset);
+			if (wire_type == PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED &&
+			    (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED) ||
+			     is_packable_type(field->type)))
+			{
+				size_t count;
+				if (!count_packed_elements(field->type,
+							   tmp.len -
+							   tmp.length_prefix_len,
+							   tmp.data +
+							   tmp.length_prefix_len,
+							   &count))
+				{
+					PROTOBUF_C_UNPACK_ERROR("counting packed elements");
+					goto error_cleanup_during_scan;
+				}
+				*n += count;
+			} else {
+				*n += 1;
+			}
+		}
+
+		at += tmp.len;
+		rem -= tmp.len;
+	}
+
+	/* allocate space for repeated fields, also check that all required fields have been set */
+	for (f = 0; f < desc->n_fields; f++) {
+		const ProtobufCFieldDescriptor *field = desc->fields + f;
+		if (field->label == PROTOBUF_C_LABEL_REPEATED) {
+			size_t siz =
+			    sizeof_elt_in_repeated_array(field->type);
+			size_t *n_ptr =
+			    STRUCT_MEMBER_PTR(size_t, rv,
+					      field->quantifier_offset);
+			if (*n_ptr != 0) {
+				unsigned n = *n_ptr;
+				void *a;
+				*n_ptr = 0;
+				assert(rv->descriptor != NULL);
+#define CLEAR_REMAINING_N_PTRS()                                              \
+              for(f++;f < desc->n_fields; f++)                                \
+                {                                                             \
+                  field = desc->fields + f;                                   \
+                  if (field->label == PROTOBUF_C_LABEL_REPEATED)              \
+                    STRUCT_MEMBER (size_t, rv, field->quantifier_offset) = 0; \
+                }
+				a = do_alloc(allocator, siz * n);
+				if (!a) {
+					CLEAR_REMAINING_N_PTRS();
+					goto error_cleanup;
+				}
+				STRUCT_MEMBER(void *, rv, field->offset) = a;
+			}
+		} else if (field->label == PROTOBUF_C_LABEL_REQUIRED) {
+			if (field->default_value == NULL &&
+			    !REQUIRED_FIELD_BITMAP_IS_SET(f))
+			{
+				CLEAR_REMAINING_N_PTRS();
+				PROTOBUF_C_UNPACK_ERROR("message '%s': missing required field '%s'",
+							desc->name, field->name);
+				goto error_cleanup;
+			}
+		}
+	}
+#undef CLEAR_REMAINING_N_PTRS
+
+	/* allocate space for unknown fields */
+	if (n_unknown) {
+		rv->unknown_fields = do_alloc(allocator,
+					      n_unknown * sizeof(ProtobufCMessageUnknownField));
+		if (rv->unknown_fields == NULL)
+			goto error_cleanup;
+	}
+
+	/* do real parsing */
+	for (i_slab = 0; i_slab <= which_slab; i_slab++) {
+		unsigned max = (i_slab == which_slab) ?
+			in_slab_index : (1UL << (i_slab + 4));
+		ScannedMember *slab = scanned_member_slabs[i_slab];
+
+		for (j = 0; j < max; j++) {
+			if (!parse_member(slab + j, rv, allocator)) {
+				PROTOBUF_C_UNPACK_ERROR("error parsing member %s of %s",
+							slab->field ? slab->field->name : "*unknown-field*",
+					desc->name);
+				goto error_cleanup;
+			}
+		}
+	}
+
+	/* cleanup */
+	for (j = 1; j <= which_slab; j++)
+		do_free(allocator, scanned_member_slabs[j]);
+	if (required_fields_bitmap_alloced)
+		do_free(allocator, required_fields_bitmap);
+	return rv;
+
+error_cleanup:
+	protobuf_c_message_free_unpacked(rv, allocator);
+	for (j = 1; j <= which_slab; j++)
+		do_free(allocator, scanned_member_slabs[j]);
+	if (required_fields_bitmap_alloced)
+		do_free(allocator, required_fields_bitmap);
+	return NULL;
+
+error_cleanup_during_scan:
+	do_free(allocator, rv);
+	for (j = 1; j <= which_slab; j++)
+		do_free(allocator, scanned_member_slabs[j]);
+	if (required_fields_bitmap_alloced)
+		do_free(allocator, required_fields_bitmap);
+	return NULL;
+}
+
+void
+protobuf_c_message_free_unpacked(ProtobufCMessage *message,
+				 ProtobufCAllocator *allocator)
+{
+	const ProtobufCMessageDescriptor *desc;
+	unsigned f;
+
+	if (message == NULL)
+		return;
+
+	desc = message->descriptor;
+
+	ASSERT_IS_MESSAGE(message);
+
+	if (allocator == NULL)
+		allocator = &protobuf_c__allocator;
+	message->descriptor = NULL;
+	for (f = 0; f < desc->n_fields; f++) {
+		if (0 != (desc->fields[f].flags & PROTOBUF_C_FIELD_FLAG_ONEOF) &&
+		    desc->fields[f].id !=
+		    STRUCT_MEMBER(uint32_t, message, desc->fields[f].quantifier_offset))
+		{
+			/* This is not the selected oneof, skip it */
+			continue;
+		}
+
+		if (desc->fields[f].label == PROTOBUF_C_LABEL_REPEATED) {
+			size_t n = STRUCT_MEMBER(size_t,
+						 message,
+						 desc->fields[f].quantifier_offset);
+			void *arr = STRUCT_MEMBER(void *,
+						  message,
+						  desc->fields[f].offset);
+
+			if (arr != NULL) {
+				if (desc->fields[f].type == PROTOBUF_C_TYPE_STRING) {
+					unsigned i;
+					for (i = 0; i < n; i++)
+						do_free(allocator, ((char **) arr)[i]);
+				} else if (desc->fields[f].type == PROTOBUF_C_TYPE_BYTES) {
+					unsigned i;
+					for (i = 0; i < n; i++)
+						do_free(allocator, ((ProtobufCBinaryData *) arr)[i].data);
+				} else if (desc->fields[f].type == PROTOBUF_C_TYPE_MESSAGE) {
+					unsigned i;
+					for (i = 0; i < n; i++)
+						protobuf_c_message_free_unpacked(
+							((ProtobufCMessage **) arr)[i],
+							allocator
+						);
+				}
+				do_free(allocator, arr);
+			}
+		} else if (desc->fields[f].type == PROTOBUF_C_TYPE_STRING) {
+			char *str = STRUCT_MEMBER(char *, message,
+						  desc->fields[f].offset);
+
+			if (str && str != desc->fields[f].default_value)
+				do_free(allocator, str);
+		} else if (desc->fields[f].type == PROTOBUF_C_TYPE_BYTES) {
+			void *data = STRUCT_MEMBER(ProtobufCBinaryData, message,
+						   desc->fields[f].offset).data;
+			const ProtobufCBinaryData *default_bd;
+
+			default_bd = desc->fields[f].default_value;
+			if (data != NULL &&
+			    (default_bd == NULL ||
+			     default_bd->data != data))
+			{
+				do_free(allocator, data);
+			}
+		} else if (desc->fields[f].type == PROTOBUF_C_TYPE_MESSAGE) {
+			ProtobufCMessage *sm;
+
+			sm = STRUCT_MEMBER(ProtobufCMessage *, message,
+					   desc->fields[f].offset);
+			if (sm && sm != desc->fields[f].default_value)
+				protobuf_c_message_free_unpacked(sm, allocator);
+		}
+	}
+
+	for (f = 0; f < message->n_unknown_fields; f++)
+		do_free(allocator, message->unknown_fields[f].data);
+	if (message->unknown_fields != NULL)
+		do_free(allocator, message->unknown_fields);
+
+	do_free(allocator, message);
+}
+
+void
+protobuf_c_message_init(const ProtobufCMessageDescriptor * descriptor,
+			void *message)
+{
+	descriptor->message_init((ProtobufCMessage *) (message));
+}
+
+protobuf_c_boolean
+protobuf_c_message_check(const ProtobufCMessage *message)
+{
+	unsigned i;
+
+	if (!message ||
+	    !message->descriptor ||
+	    message->descriptor->magic != PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC)
+	{
+		return FALSE;
+	}
+
+	for (i = 0; i < message->descriptor->n_fields; i++) {
+		const ProtobufCFieldDescriptor *f = message->descriptor->fields + i;
+		ProtobufCType type = f->type;
+		ProtobufCLabel label = f->label;
+		void *field = STRUCT_MEMBER_P (message, f->offset);
+
+		if (label == PROTOBUF_C_LABEL_REPEATED) {
+			size_t *quantity = STRUCT_MEMBER_P (message, f->quantifier_offset);
+
+			if (*quantity > 0 && *(void **) field == NULL) {
+				return FALSE;
+			}
+
+			if (type == PROTOBUF_C_TYPE_MESSAGE) {
+				ProtobufCMessage **submessage = *(ProtobufCMessage ***) field;
+				unsigned j;
+				for (j = 0; j < *quantity; j++) {
+					if (!protobuf_c_message_check(submessage[j]))
+						return FALSE;
+				}
+			} else if (type == PROTOBUF_C_TYPE_STRING) {
+				char **string = *(char ***) field;
+				unsigned j;
+				for (j = 0; j < *quantity; j++) {
+					if (!string[j])
+						return FALSE;
+				}
+			} else if (type == PROTOBUF_C_TYPE_BYTES) {
+				ProtobufCBinaryData *bd = *(ProtobufCBinaryData **) field;
+				unsigned j;
+				for (j = 0; j < *quantity; j++) {
+					if (bd[j].len > 0 && bd[j].data == NULL)
+						return FALSE;
+				}
+			}
+
+		} else { /* PROTOBUF_C_LABEL_REQUIRED or PROTOBUF_C_LABEL_OPTIONAL */
+
+			if (type == PROTOBUF_C_TYPE_MESSAGE) {
+				ProtobufCMessage *submessage = *(ProtobufCMessage **) field;
+				if (label == PROTOBUF_C_LABEL_REQUIRED || submessage != NULL) {
+					if (!protobuf_c_message_check(submessage))
+						return FALSE;
+				}
+			} else if (type == PROTOBUF_C_TYPE_STRING) {
+				char *string = *(char **) field;
+				if (label == PROTOBUF_C_LABEL_REQUIRED && string == NULL)
+					return FALSE;
+			} else if (type == PROTOBUF_C_TYPE_BYTES) {
+				protobuf_c_boolean *has = STRUCT_MEMBER_P (message, f->quantifier_offset);
+				ProtobufCBinaryData *bd = field;
+				if (label == PROTOBUF_C_LABEL_REQUIRED || *has == TRUE) {
+					if (bd->len > 0 && bd->data == NULL)
+						return FALSE;
+				}
+			}
+		}
+	}
+
+	return TRUE;
+}
+
+/* === services === */
+
+typedef void (*GenericHandler) (void *service,
+				const ProtobufCMessage *input,
+				ProtobufCClosure closure,
+				void *closure_data);
+void
+protobuf_c_service_invoke_internal(ProtobufCService *service,
+				   unsigned method_index,
+				   const ProtobufCMessage *input,
+				   ProtobufCClosure closure,
+				   void *closure_data)
+{
+	GenericHandler *handlers;
+	GenericHandler handler;
+
+	/*
+	 * Verify that method_index is within range. If this fails, you are
+	 * likely invoking a newly added method on an old service. (Although
+	 * other memory corruption bugs can cause this assertion too.)
+	 */
+	assert(method_index < service->descriptor->n_methods);
+
+	/*
+	 * Get the array of virtual methods (which are enumerated by the
+	 * generated code).
+	 */
+	handlers = (GenericHandler *) (service + 1);
+
+	/*
+	 * Get our method and invoke it.
+	 * \todo Seems like handler == NULL is a situation that needs handling.
+	 */
+	handler = handlers[method_index];
+	(*handler)(service, input, closure, closure_data);
+}
+
+void
+protobuf_c_service_generated_init(ProtobufCService *service,
+				  const ProtobufCServiceDescriptor *descriptor,
+				  ProtobufCServiceDestroy destroy)
+{
+	ASSERT_IS_SERVICE_DESCRIPTOR(descriptor);
+	service->descriptor = descriptor;
+	service->destroy = destroy;
+	service->invoke = protobuf_c_service_invoke_internal;
+	memset(service + 1, 0, descriptor->n_methods * sizeof(GenericHandler));
+}
+
+void protobuf_c_service_destroy(ProtobufCService *service)
+{
+	service->destroy(service);
+}
+
+/* --- querying the descriptors --- */
+
+const ProtobufCEnumValue *
+protobuf_c_enum_descriptor_get_value_by_name(const ProtobufCEnumDescriptor *desc,
+					     const char *name)
+{
+	unsigned start = 0;
+	unsigned count;
+
+	if (desc == NULL || desc->values_by_name == NULL)
+		return NULL;
+
+	count = desc->n_value_names;
+
+	while (count > 1) {
+		unsigned mid = start + count / 2;
+		int rv = strcmp(desc->values_by_name[mid].name, name);
+		if (rv == 0)
+			return desc->values + desc->values_by_name[mid].index;
+		else if (rv < 0) {
+			count = start + count - (mid + 1);
+			start = mid + 1;
+		} else
+			count = mid - start;
+	}
+	if (count == 0)
+		return NULL;
+	if (strcmp(desc->values_by_name[start].name, name) == 0)
+		return desc->values + desc->values_by_name[start].index;
+	return NULL;
+}
+
+const ProtobufCEnumValue *
+protobuf_c_enum_descriptor_get_value(const ProtobufCEnumDescriptor *desc,
+				     int value)
+{
+	int rv = int_range_lookup(desc->n_value_ranges, desc->value_ranges, value);
+	if (rv < 0)
+		return NULL;
+	return desc->values + rv;
+}
+
+const ProtobufCFieldDescriptor *
+protobuf_c_message_descriptor_get_field_by_name(const ProtobufCMessageDescriptor *desc,
+						const char *name)
+{
+	unsigned start = 0;
+	unsigned count;
+	const ProtobufCFieldDescriptor *field;
+
+	if (desc == NULL || desc->fields_sorted_by_name == NULL)
+		return NULL;
+
+	count = desc->n_fields;
+
+	while (count > 1) {
+		unsigned mid = start + count / 2;
+		int rv;
+		field = desc->fields + desc->fields_sorted_by_name[mid];
+		rv = strcmp(field->name, name);
+		if (rv == 0)
+			return field;
+		else if (rv < 0) {
+			count = start + count - (mid + 1);
+			start = mid + 1;
+		} else
+			count = mid - start;
+	}
+	if (count == 0)
+		return NULL;
+	field = desc->fields + desc->fields_sorted_by_name[start];
+	if (strcmp(field->name, name) == 0)
+		return field;
+	return NULL;
+}
+
+const ProtobufCFieldDescriptor *
+protobuf_c_message_descriptor_get_field(const ProtobufCMessageDescriptor *desc,
+					unsigned value)
+{
+	int rv = int_range_lookup(desc->n_field_ranges,desc->field_ranges, value);
+	if (rv < 0)
+		return NULL;
+	return desc->fields + rv;
+}
+
+const ProtobufCMethodDescriptor *
+protobuf_c_service_descriptor_get_method_by_name(const ProtobufCServiceDescriptor *desc,
+						 const char *name)
+{
+	unsigned start = 0;
+	unsigned count;
+
+	if (desc == NULL || desc->method_indices_by_name == NULL)
+		return NULL;
+
+	count = desc->n_methods;
+
+	while (count > 1) {
+		unsigned mid = start + count / 2;
+		unsigned mid_index = desc->method_indices_by_name[mid];
+		const char *mid_name = desc->methods[mid_index].name;
+		int rv = strcmp(mid_name, name);
+
+		if (rv == 0)
+			return desc->methods + desc->method_indices_by_name[mid];
+		if (rv < 0) {
+			count = start + count - (mid + 1);
+			start = mid + 1;
+		} else {
+			count = mid - start;
+		}
+	}
+	if (count == 0)
+		return NULL;
+	if (strcmp(desc->methods[desc->method_indices_by_name[start]].name, name) == 0)
+		return desc->methods + desc->method_indices_by_name[start];
+	return NULL;
+}
diff --git a/src/common/protobuf-c.h b/src/protobuf-c/protobuf-c.h
old mode 100644
new mode 100755
similarity index 53%
rename from src/common/protobuf-c.h
rename to src/protobuf-c/protobuf-c.h
index bd85695b868af6c7b91590196339bc4f7826a256..390bf4238eb7204432bc8598af1e7cd0ce8bcf81
--- a/src/common/protobuf-c.h
+++ b/src/protobuf-c/protobuf-c.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2017, Dave Benson and the protobuf-c authors.
+ * Copyright (c) 2008-2018, Dave Benson and the protobuf-c authors.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -202,40 +202,40 @@ size_t foo__bar__baz_bah__pack_to_buffer
 #include <stdint.h>
 
 #ifdef __cplusplus
-#define PROTOBUF_C__BEGIN_DECLS extern "C" {
-#define PROTOBUF_C__END_DECLS }
+# define PROTOBUF_C__BEGIN_DECLS	extern "C" {
+# define PROTOBUF_C__END_DECLS		}
 #else
-#define PROTOBUF_C__BEGIN_DECLS
-#define PROTOBUF_C__END_DECLS
+# define PROTOBUF_C__BEGIN_DECLS
+# define PROTOBUF_C__END_DECLS
 #endif
 
 PROTOBUF_C__BEGIN_DECLS
 
 #if defined(_WIN32) && defined(PROTOBUF_C_USE_SHARED_LIB)
-#ifdef PROTOBUF_C_EXPORT
-#define PROTOBUF_C__API __declspec(dllexport)
+# ifdef PROTOBUF_C_EXPORT
+#  define PROTOBUF_C__API __declspec(dllexport)
+# else
+#  define PROTOBUF_C__API __declspec(dllimport)
+# endif
 #else
-#define PROTOBUF_C__API __declspec(dllimport)
-#endif
-#else
-#define PROTOBUF_C__API
+# define PROTOBUF_C__API
 #endif
 
 #if !defined(PROTOBUF_C__NO_DEPRECATED) && \
-    ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
-#define PROTOBUF_C__DEPRECATED __attribute__((__deprecated__))
+	((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+# define PROTOBUF_C__DEPRECATED __attribute__((__deprecated__))
 #else
-#define PROTOBUF_C__DEPRECATED
+# define PROTOBUF_C__DEPRECATED
 #endif
 
 #ifndef PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE
-#define PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(enum_name) \
+ #define PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(enum_name) \
   , _##enum_name##_IS_INT_SIZE = INT_MAX
 #endif
 
-#define PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC 0x14159bc3
-#define PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC 0x28aaeef9
-#define PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC 0x114315af
+#define PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC    0x14159bc3
+#define PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC    0x28aaeef9
+#define PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC       0x114315af
 
 /* Empty string used for initializers */
 extern const char protobuf_c_empty_string[];
@@ -253,14 +253,14 @@ extern const char protobuf_c_empty_string[];
  * Values for the `flags` word in `ProtobufCFieldDescriptor`.
  */
 typedef enum {
-  /** Set if the field is repeated and marked with the `packed` option. */
-  PROTOBUF_C_FIELD_FLAG_PACKED = (1 << 0),
+	/** Set if the field is repeated and marked with the `packed` option. */
+	PROTOBUF_C_FIELD_FLAG_PACKED		= (1 << 0),
 
-  /** Set if the field is marked with the `deprecated` option. */
-  PROTOBUF_C_FIELD_FLAG_DEPRECATED = (1 << 1),
+	/** Set if the field is marked with the `deprecated` option. */
+	PROTOBUF_C_FIELD_FLAG_DEPRECATED	= (1 << 1),
 
-  /** Set if the field is a member of a oneof (union). */
-  PROTOBUF_C_FIELD_FLAG_ONEOF = (1 << 2),
+	/** Set if the field is a member of a oneof (union). */
+	PROTOBUF_C_FIELD_FLAG_ONEOF		= (1 << 2),
 } ProtobufCFieldFlag;
 
 /**
@@ -272,27 +272,27 @@ typedef enum {
  *      https://developers.google.com/protocol-buffers/docs/proto#simple
  */
 typedef enum {
-  /** A well-formed message must have exactly one of this field. */
-  PROTOBUF_C_LABEL_REQUIRED,
-
-  /**
-   * A well-formed message can have zero or one of this field (but not
-   * more than one).
-   */
-  PROTOBUF_C_LABEL_OPTIONAL,
-
-  /**
-   * This field can be repeated any number of times (including zero) in a
-   * well-formed message. The order of the repeated values will be
-   * preserved.
-   */
-  PROTOBUF_C_LABEL_REPEATED,
-
-  /**
-   * This field has no label. This is valid only in proto3 and is
-   * equivalent to OPTIONAL but no "has" quantifier will be consulted.
-   */
-  PROTOBUF_C_LABEL_NONE,
+	/** A well-formed message must have exactly one of this field. */
+	PROTOBUF_C_LABEL_REQUIRED,
+
+	/**
+	 * A well-formed message can have zero or one of this field (but not
+	 * more than one).
+	 */
+	PROTOBUF_C_LABEL_OPTIONAL,
+
+	/**
+	 * This field can be repeated any number of times (including zero) in a
+	 * well-formed message. The order of the repeated values will be
+	 * preserved.
+	 */
+	PROTOBUF_C_LABEL_REPEATED,
+
+	/**
+	 * This field has no label. This is valid only in proto3 and is
+	 * equivalent to OPTIONAL but no "has" quantifier will be consulted.
+	 */
+	PROTOBUF_C_LABEL_NONE,
 } ProtobufCLabel;
 
 /**
@@ -304,23 +304,23 @@ typedef enum {
  *      https://developers.google.com/protocol-buffers/docs/proto#scalar
  */
 typedef enum {
-  PROTOBUF_C_TYPE_INT32,    /**< int32 */
-  PROTOBUF_C_TYPE_SINT32,   /**< signed int32 */
-  PROTOBUF_C_TYPE_SFIXED32, /**< signed int32 (4 bytes) */
-  PROTOBUF_C_TYPE_INT64,    /**< int64 */
-  PROTOBUF_C_TYPE_SINT64,   /**< signed int64 */
-  PROTOBUF_C_TYPE_SFIXED64, /**< signed int64 (8 bytes) */
-  PROTOBUF_C_TYPE_UINT32,   /**< unsigned int32 */
-  PROTOBUF_C_TYPE_FIXED32,  /**< unsigned int32 (4 bytes) */
-  PROTOBUF_C_TYPE_UINT64,   /**< unsigned int64 */
-  PROTOBUF_C_TYPE_FIXED64,  /**< unsigned int64 (8 bytes) */
-  PROTOBUF_C_TYPE_FLOAT,    /**< float */
-  PROTOBUF_C_TYPE_DOUBLE,   /**< double */
-  PROTOBUF_C_TYPE_BOOL,     /**< boolean */
-  PROTOBUF_C_TYPE_ENUM,     /**< enumerated type */
-  PROTOBUF_C_TYPE_STRING,   /**< UTF-8 or ASCII string */
-  PROTOBUF_C_TYPE_BYTES,    /**< arbitrary byte sequence */
-  PROTOBUF_C_TYPE_MESSAGE,  /**< nested message */
+	PROTOBUF_C_TYPE_INT32,      /**< int32 */
+	PROTOBUF_C_TYPE_SINT32,     /**< signed int32 */
+	PROTOBUF_C_TYPE_SFIXED32,   /**< signed int32 (4 bytes) */
+	PROTOBUF_C_TYPE_INT64,      /**< int64 */
+	PROTOBUF_C_TYPE_SINT64,     /**< signed int64 */
+	PROTOBUF_C_TYPE_SFIXED64,   /**< signed int64 (8 bytes) */
+	PROTOBUF_C_TYPE_UINT32,     /**< unsigned int32 */
+	PROTOBUF_C_TYPE_FIXED32,    /**< unsigned int32 (4 bytes) */
+	PROTOBUF_C_TYPE_UINT64,     /**< unsigned int64 */
+	PROTOBUF_C_TYPE_FIXED64,    /**< unsigned int64 (8 bytes) */
+	PROTOBUF_C_TYPE_FLOAT,      /**< float */
+	PROTOBUF_C_TYPE_DOUBLE,     /**< double */
+	PROTOBUF_C_TYPE_BOOL,       /**< boolean */
+	PROTOBUF_C_TYPE_ENUM,       /**< enumerated type */
+	PROTOBUF_C_TYPE_STRING,     /**< UTF-8 or ASCII string */
+	PROTOBUF_C_TYPE_BYTES,      /**< arbitrary byte sequence */
+	PROTOBUF_C_TYPE_MESSAGE,    /**< nested message */
 } ProtobufCType;
 
 /**
@@ -332,11 +332,11 @@ typedef enum {
  *      https://developers.google.com/protocol-buffers/docs/encoding#structure
  */
 typedef enum {
-  PROTOBUF_C_WIRE_TYPE_VARINT = 0,
-  PROTOBUF_C_WIRE_TYPE_64BIT = 1,
-  PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED = 2,
-  /* "Start group" and "end group" wire types are unsupported. */
-  PROTOBUF_C_WIRE_TYPE_32BIT = 5,
+	PROTOBUF_C_WIRE_TYPE_VARINT = 0,
+	PROTOBUF_C_WIRE_TYPE_64BIT = 1,
+	PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED = 2,
+	/* "Start group" and "end group" wire types are unsupported. */
+	PROTOBUF_C_WIRE_TYPE_32BIT = 5,
 } ProtobufCWireType;
 
 struct ProtobufCAllocator;
@@ -382,14 +382,14 @@ typedef void (*ProtobufCServiceDestroy)(ProtobufCService *);
  * Structure for defining a custom memory allocator.
  */
 struct ProtobufCAllocator {
-  /** Function to allocate memory. */
-  void *(*alloc)(void *allocator_data, size_t size);
+	/** Function to allocate memory. */
+	void		*(*alloc)(void *allocator_data, size_t size);
 
-  /** Function to free memory. */
-  void (*free)(void *allocator_data, void *pointer);
+	/** Function to free memory. */
+	void		(*free)(void *allocator_data, void *pointer);
 
-  /** Opaque pointer passed to `alloc` and `free` functions. */
-  void *allocator_data;
+	/** Opaque pointer passed to `alloc` and `free` functions. */
+	void		*allocator_data;
 };
 
 /**
@@ -400,8 +400,8 @@ struct ProtobufCAllocator {
  * `NUL`-terminated.
  */
 struct ProtobufCBinaryData {
-  size_t len;    /**< Number of bytes in the `data` field. */
-  uint8_t *data; /**< Data bytes. */
+	size_t	len;        /**< Number of bytes in the `data` field. */
+	uint8_t	*data;      /**< Data bytes. */
 };
 
 /**
@@ -440,8 +440,10 @@ protobuf_c_message_pack_to_buffer(&message, &tmp);
 ~~~
  */
 struct ProtobufCBuffer {
-  /** Append function. Consumes the `len` bytes stored at `data`. */
-  void (*append)(ProtobufCBuffer *buffer, size_t len, const uint8_t *data);
+	/** Append function. Consumes the `len` bytes stored at `data`. */
+	void		(*append)(ProtobufCBuffer *buffer,
+				  size_t len,
+				  const uint8_t *data);
 };
 
 /**
@@ -473,142 +475,142 @@ PROTOBUF_C_BUFFER_SIMPLE_CLEAR(&simple);
  * \see PROTOBUF_C_BUFFER_SIMPLE_CLEAR
  */
 struct ProtobufCBufferSimple {
-  /** "Base class". */
-  ProtobufCBuffer base;
-  /** Number of bytes allocated in `data`. */
-  size_t alloced;
-  /** Number of bytes currently stored in `data`. */
-  size_t len;
-  /** Data bytes. */
-  uint8_t *data;
-  /** Whether `data` must be freed. */
-  protobuf_c_boolean must_free_data;
-  /** Allocator to use. May be NULL to indicate the system allocator. */
-  ProtobufCAllocator *allocator;
+	/** "Base class". */
+	ProtobufCBuffer		base;
+	/** Number of bytes allocated in `data`. */
+	size_t			alloced;
+	/** Number of bytes currently stored in `data`. */
+	size_t			len;
+	/** Data bytes. */
+	uint8_t			*data;
+	/** Whether `data` must be freed. */
+	protobuf_c_boolean	must_free_data;
+	/** Allocator to use. May be NULL to indicate the system allocator. */
+	ProtobufCAllocator	*allocator;
 };
 
 /**
  * Describes an enumeration as a whole, with all of its values.
  */
 struct ProtobufCEnumDescriptor {
-  /** Magic value checked to ensure that the API is used correctly. */
-  uint32_t magic;
-
-  /** The qualified name (e.g., "namespace.Type"). */
-  const char *name;
-  /** The unqualified name as given in the .proto file (e.g., "Type"). */
-  const char *short_name;
-  /** Identifier used in generated C code. */
-  const char *c_name;
-  /** The dot-separated namespace. */
-  const char *package_name;
-
-  /** Number elements in `values`. */
-  unsigned n_values;
-  /** Array of distinct values, sorted by numeric value. */
-  const ProtobufCEnumValue *values;
-
-  /** Number of elements in `values_by_name`. */
-  unsigned n_value_names;
-  /** Array of named values, including aliases, sorted by name. */
-  const ProtobufCEnumValueIndex *values_by_name;
-
-  /** Number of elements in `value_ranges`. */
-  unsigned n_value_ranges;
-  /** Value ranges, for faster lookups by numeric value. */
-  const ProtobufCIntRange *value_ranges;
-
-  /** Reserved for future use. */
-  void *reserved1;
-  /** Reserved for future use. */
-  void *reserved2;
-  /** Reserved for future use. */
-  void *reserved3;
-  /** Reserved for future use. */
-  void *reserved4;
+	/** Magic value checked to ensure that the API is used correctly. */
+	uint32_t			magic;
+
+	/** The qualified name (e.g., "namespace.Type"). */
+	const char			*name;
+	/** The unqualified name as given in the .proto file (e.g., "Type"). */
+	const char			*short_name;
+	/** Identifier used in generated C code. */
+	const char			*c_name;
+	/** The dot-separated namespace. */
+	const char			*package_name;
+
+	/** Number elements in `values`. */
+	unsigned			n_values;
+	/** Array of distinct values, sorted by numeric value. */
+	const ProtobufCEnumValue	*values;
+
+	/** Number of elements in `values_by_name`. */
+	unsigned			n_value_names;
+	/** Array of named values, including aliases, sorted by name. */
+	const ProtobufCEnumValueIndex	*values_by_name;
+
+	/** Number of elements in `value_ranges`. */
+	unsigned			n_value_ranges;
+	/** Value ranges, for faster lookups by numeric value. */
+	const ProtobufCIntRange		*value_ranges;
+
+	/** Reserved for future use. */
+	void				*reserved1;
+	/** Reserved for future use. */
+	void				*reserved2;
+	/** Reserved for future use. */
+	void				*reserved3;
+	/** Reserved for future use. */
+	void				*reserved4;
 };
 
 /**
  * Represents a single value of an enumeration.
  */
 struct ProtobufCEnumValue {
-  /** The string identifying this value in the .proto file. */
-  const char *name;
+	/** The string identifying this value in the .proto file. */
+	const char	*name;
 
-  /** The string identifying this value in generated C code. */
-  const char *c_name;
+	/** The string identifying this value in generated C code. */
+	const char	*c_name;
 
-  /** The numeric value assigned in the .proto file. */
-  int value;
+	/** The numeric value assigned in the .proto file. */
+	int		value;
 };
 
 /**
  * Used by `ProtobufCEnumDescriptor` to look up enum values.
  */
 struct ProtobufCEnumValueIndex {
-  /** Name of the enum value. */
-  const char *name;
-  /** Index into values[] array. */
-  unsigned index;
+	/** Name of the enum value. */
+	const char      *name;
+	/** Index into values[] array. */
+	unsigned        index;
 };
 
 /**
  * Describes a single field in a message.
  */
 struct ProtobufCFieldDescriptor {
-  /** Name of the field as given in the .proto file. */
-  const char *name;
-
-  /** Tag value of the field as given in the .proto file. */
-  uint32_t id;
-
-  /** Whether the field is `REQUIRED`, `OPTIONAL`, or `REPEATED`. */
-  ProtobufCLabel label;
-
-  /** The type of the field. */
-  ProtobufCType type;
-
-  /**
-   * The offset in bytes of the message's C structure's quantifier field
-   * (the `has_MEMBER` field for optional members or the `n_MEMBER` field
-   * for repeated members or the case enum for oneofs).
-   */
-  unsigned quantifier_offset;
-
-  /**
-   * The offset in bytes into the message's C structure for the member
-   * itself.
-   */
-  unsigned offset;
-
-  /**
-   * A type-specific descriptor.
-   *
-   * If `type` is `PROTOBUF_C_TYPE_ENUM`, then `descriptor` points to the
-   * corresponding `ProtobufCEnumDescriptor`.
-   *
-   * If `type` is `PROTOBUF_C_TYPE_MESSAGE`, then `descriptor` points to
-   * the corresponding `ProtobufCMessageDescriptor`.
-   *
-   * Otherwise this field is NULL.
-   */
-  const void *descriptor; /* for MESSAGE and ENUM types */
-
-  /** The default value for this field, if defined. May be NULL. */
-  const void *default_value;
-
-  /**
-   * A flag word. Zero or more of the bits defined in the
-   * `ProtobufCFieldFlag` enum may be set.
-   */
-  uint32_t flags;
-
-  /** Reserved for future use. */
-  unsigned reserved_flags;
-  /** Reserved for future use. */
-  void *reserved2;
-  /** Reserved for future use. */
-  void *reserved3;
+	/** Name of the field as given in the .proto file. */
+	const char		*name;
+
+	/** Tag value of the field as given in the .proto file. */
+	uint32_t		id;
+
+	/** Whether the field is `REQUIRED`, `OPTIONAL`, or `REPEATED`. */
+	ProtobufCLabel		label;
+
+	/** The type of the field. */
+	ProtobufCType		type;
+
+	/**
+	 * The offset in bytes of the message's C structure's quantifier field
+	 * (the `has_MEMBER` field for optional members or the `n_MEMBER` field
+	 * for repeated members or the case enum for oneofs).
+	 */
+	unsigned		quantifier_offset;
+
+	/**
+	 * The offset in bytes into the message's C structure for the member
+	 * itself.
+	 */
+	unsigned		offset;
+
+	/**
+	 * A type-specific descriptor.
+	 *
+	 * If `type` is `PROTOBUF_C_TYPE_ENUM`, then `descriptor` points to the
+	 * corresponding `ProtobufCEnumDescriptor`.
+	 *
+	 * If `type` is `PROTOBUF_C_TYPE_MESSAGE`, then `descriptor` points to
+	 * the corresponding `ProtobufCMessageDescriptor`.
+	 *
+	 * Otherwise this field is NULL.
+	 */
+	const void		*descriptor; /* for MESSAGE and ENUM types */
+
+	/** The default value for this field, if defined. May be NULL. */
+	const void		*default_value;
+
+	/**
+	 * A flag word. Zero or more of the bits defined in the
+	 * `ProtobufCFieldFlag` enum may be set.
+	 */
+	uint32_t		flags;
+
+	/** Reserved for future use. */
+	unsigned		reserved_flags;
+	/** Reserved for future use. */
+	void			*reserved2;
+	/** Reserved for future use. */
+	void			*reserved3;
 };
 
 /**
@@ -620,13 +622,13 @@ struct ProtobufCFieldDescriptor {
  * sorted.
  */
 struct ProtobufCIntRange {
-  int start_value;
-  unsigned orig_index;
-  /*
-   * NOTE: the number of values in the range can be inferred by looking
-   * at the next element's orig_index. A dummy element is added to make
-   * this simple.
-   */
+	int             start_value;
+	unsigned        orig_index;
+	/*
+	 * NOTE: the number of values in the range can be inferred by looking
+	 * at the next element's orig_index. A dummy element is added to make
+	 * this simple.
+	 */
 };
 
 /**
@@ -645,120 +647,122 @@ struct ProtobufCIntRange {
  * like protobuf_c_message_free_unpacked().
  */
 struct ProtobufCMessage {
-  /** The descriptor for this message type. */
-  const ProtobufCMessageDescriptor *descriptor;
-  /** The number of elements in `unknown_fields`. */
-  unsigned n_unknown_fields;
-  /** The fields that weren't recognized by the parser. */
-  ProtobufCMessageUnknownField *unknown_fields;
+	/** The descriptor for this message type. */
+	const ProtobufCMessageDescriptor	*descriptor;
+	/** The number of elements in `unknown_fields`. */
+	unsigned				n_unknown_fields;
+	/** The fields that weren't recognized by the parser. */
+	ProtobufCMessageUnknownField		*unknown_fields;
 };
 
 /**
  * Describes a message.
  */
 struct ProtobufCMessageDescriptor {
-  /** Magic value checked to ensure that the API is used correctly. */
-  uint32_t magic;
-
-  /** The qualified name (e.g., "namespace.Type"). */
-  const char *name;
-  /** The unqualified name as given in the .proto file (e.g., "Type"). */
-  const char *short_name;
-  /** Identifier used in generated C code. */
-  const char *c_name;
-  /** The dot-separated namespace. */
-  const char *package_name;
-
-  /**
-   * Size in bytes of the C structure representing an instance of this
-   * type of message.
-   */
-  size_t sizeof_message;
-
-  /** Number of elements in `fields`. */
-  unsigned n_fields;
-  /** Field descriptors, sorted by tag number. */
-  const ProtobufCFieldDescriptor *fields;
-  /** Used for looking up fields by name. */
-  const unsigned *fields_sorted_by_name;
-
-  /** Number of elements in `field_ranges`. */
-  unsigned n_field_ranges;
-  /** Used for looking up fields by id. */
-  const ProtobufCIntRange *field_ranges;
-
-  /** Message initialisation function. */
-  ProtobufCMessageInit message_init;
-
-  /** Reserved for future use. */
-  void *reserved1;
-  /** Reserved for future use. */
-  void *reserved2;
-  /** Reserved for future use. */
-  void *reserved3;
+	/** Magic value checked to ensure that the API is used correctly. */
+	uint32_t			magic;
+
+	/** The qualified name (e.g., "namespace.Type"). */
+	const char			*name;
+	/** The unqualified name as given in the .proto file (e.g., "Type"). */
+	const char			*short_name;
+	/** Identifier used in generated C code. */
+	const char			*c_name;
+	/** The dot-separated namespace. */
+	const char			*package_name;
+
+	/**
+	 * Size in bytes of the C structure representing an instance of this
+	 * type of message.
+	 */
+	size_t				sizeof_message;
+
+	/** Number of elements in `fields`. */
+	unsigned			n_fields;
+	/** Field descriptors, sorted by tag number. */
+	const ProtobufCFieldDescriptor	*fields;
+	/** Used for looking up fields by name. */
+	const unsigned			*fields_sorted_by_name;
+
+	/** Number of elements in `field_ranges`. */
+	unsigned			n_field_ranges;
+	/** Used for looking up fields by id. */
+	const ProtobufCIntRange		*field_ranges;
+
+	/** Message initialisation function. */
+	ProtobufCMessageInit		message_init;
+
+	/** Reserved for future use. */
+	void				*reserved1;
+	/** Reserved for future use. */
+	void				*reserved2;
+	/** Reserved for future use. */
+	void				*reserved3;
 };
 
 /**
  * An unknown message field.
  */
 struct ProtobufCMessageUnknownField {
-  /** The tag number. */
-  uint32_t tag;
-  /** The wire type of the field. */
-  ProtobufCWireType wire_type;
-  /** Number of bytes in `data`. */
-  size_t len;
-  /** Field data. */
-  uint8_t *data;
+	/** The tag number. */
+	uint32_t		tag;
+	/** The wire type of the field. */
+	ProtobufCWireType	wire_type;
+	/** Number of bytes in `data`. */
+	size_t			len;
+	/** Field data. */
+	uint8_t			*data;
 };
 
 /**
  * Method descriptor.
  */
 struct ProtobufCMethodDescriptor {
-  /** Method name. */
-  const char *name;
-  /** Input message descriptor. */
-  const ProtobufCMessageDescriptor *input;
-  /** Output message descriptor. */
-  const ProtobufCMessageDescriptor *output;
+	/** Method name. */
+	const char				*name;
+	/** Input message descriptor. */
+	const ProtobufCMessageDescriptor	*input;
+	/** Output message descriptor. */
+	const ProtobufCMessageDescriptor	*output;
 };
 
 /**
  * Service.
  */
 struct ProtobufCService {
-  /** Service descriptor. */
-  const ProtobufCServiceDescriptor *descriptor;
-  /** Function to invoke the service. */
-  void (*invoke)(ProtobufCService *service, unsigned method_index,
-                 const ProtobufCMessage *input, ProtobufCClosure closure,
-                 void *closure_data);
-  /** Function to destroy the service. */
-  void (*destroy)(ProtobufCService *service);
+	/** Service descriptor. */
+	const ProtobufCServiceDescriptor *descriptor;
+	/** Function to invoke the service. */
+	void (*invoke)(ProtobufCService *service,
+		       unsigned method_index,
+		       const ProtobufCMessage *input,
+		       ProtobufCClosure closure,
+		       void *closure_data);
+	/** Function to destroy the service. */
+	void (*destroy)(ProtobufCService *service);
 };
 
 /**
  * Service descriptor.
  */
 struct ProtobufCServiceDescriptor {
-  /** Magic value checked to ensure that the API is used correctly. */
-  uint32_t magic;
-
-  /** Service name. */
-  const char *name;
-  /** Short version of service name. */
-  const char *short_name;
-  /** C identifier for the service name. */
-  const char *c_name;
-  /** Package name. */
-  const char *package;
-  /** Number of elements in `methods`. */
-  unsigned n_methods;
-  /** Method descriptors, in the order defined in the .proto file. */
-  const ProtobufCMethodDescriptor *methods;
-  /** Sort index of methods. */
-  const unsigned *method_indices_by_name;
+	/** Magic value checked to ensure that the API is used correctly. */
+	uint32_t			magic;
+
+	/** Service name. */
+	const char			*name;
+	/** Short version of service name. */
+	const char			*short_name;
+	/** C identifier for the service name. */
+	const char			*c_name;
+	/** Package name. */
+	const char			*package;
+	/** Number of elements in `methods`. */
+	unsigned			n_methods;
+	/** Method descriptors, in the order defined in the .proto file. */
+	const ProtobufCMethodDescriptor	*methods;
+	/** Sort index of methods. */
+	const unsigned			*method_indices_by_name;
 };
 
 /**
@@ -768,7 +772,8 @@ struct ProtobufCServiceDescriptor {
  * \return A string containing the version number of protobuf-c.
  */
 PROTOBUF_C__API
-const char *protobuf_c_version(void);
+const char *
+protobuf_c_version(void);
 
 /**
  * Get the version of the protobuf-c library. Note that this is the version of
@@ -778,25 +783,104 @@ const char *protobuf_c_version(void);
  *      protobuf-c, represented in base-10 as (MAJOR*1E6) + (MINOR*1E3) + PATCH.
  */
 PROTOBUF_C__API
-uint32_t protobuf_c_version_number(void);
+uint32_t
+protobuf_c_version_number(void);
 
 /**
  * The version of the protobuf-c headers, represented as a string using the same
  * format as protobuf_c_version().
  */
-#define PROTOBUF_C_VERSION "1.3.0"
+#define PROTOBUF_C_VERSION		"1.3.1"
 
 /**
  * The version of the protobuf-c headers, represented as an integer using the
  * same format as protobuf_c_version_number().
  */
-#define PROTOBUF_C_VERSION_NUMBER 1003000
+#define PROTOBUF_C_VERSION_NUMBER	1003001
 
 /**
  * The minimum protoc-c version which works with the current version of the
  * protobuf-c headers.
  */
-#define PROTOBUF_C_MIN_COMPILER_VERSION 1000000
+#define PROTOBUF_C_MIN_COMPILER_VERSION	1000000
+
+/**
+ * Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by name.
+ *
+ * \param desc
+ *      The `ProtobufCEnumDescriptor` object.
+ * \param name
+ *      The `name` field from the corresponding `ProtobufCEnumValue` object to
+ *      match.
+ * \return
+ *      A `ProtobufCEnumValue` object.
+ * \retval NULL
+ *      If not found or if the optimize_for = CODE_SIZE option was set.
+ */
+PROTOBUF_C__API
+const ProtobufCEnumValue *
+protobuf_c_enum_descriptor_get_value_by_name(
+	const ProtobufCEnumDescriptor *desc,
+	const char *name);
+
+/**
+ * Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by numeric
+ * value.
+ *
+ * \param desc
+ *      The `ProtobufCEnumDescriptor` object.
+ * \param value
+ *      The `value` field from the corresponding `ProtobufCEnumValue` object to
+ *      match.
+ *
+ * \return
+ *      A `ProtobufCEnumValue` object.
+ * \retval NULL
+ *      If not found.
+ */
+PROTOBUF_C__API
+const ProtobufCEnumValue *
+protobuf_c_enum_descriptor_get_value(
+	const ProtobufCEnumDescriptor *desc,
+	int value);
+
+/**
+ * Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
+ * the name of the field.
+ *
+ * \param desc
+ *      The `ProtobufCMessageDescriptor` object.
+ * \param name
+ *      The name of the field.
+ * \return
+ *      A `ProtobufCFieldDescriptor` object.
+ * \retval NULL
+ *      If not found or if the optimize_for = CODE_SIZE option was set.
+ */
+PROTOBUF_C__API
+const ProtobufCFieldDescriptor *
+protobuf_c_message_descriptor_get_field_by_name(
+	const ProtobufCMessageDescriptor *desc,
+	const char *name);
+
+/**
+ * Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
+ * the tag value of the field.
+ *
+ * \param desc
+ *      The `ProtobufCMessageDescriptor` object.
+ * \param value
+ *      The tag value of the field.
+ * \return
+ *      A `ProtobufCFieldDescriptor` object.
+ * \retval NULL
+ *      If not found.
+ */
+PROTOBUF_C__API
+const ProtobufCFieldDescriptor *
+protobuf_c_message_descriptor_get_field(
+	const ProtobufCMessageDescriptor *desc,
+	unsigned value);
 
 /**
  * Determine the number of bytes required to store the serialised message.
@@ -807,7 +891,47 @@ uint32_t protobuf_c_version_number(void);
  *      Number of bytes.
  */
 PROTOBUF_C__API
-size_t protobuf_c_message_get_packed_size(const ProtobufCMessage *message);
+size_t
+protobuf_c_message_get_packed_size(const ProtobufCMessage *message);
+
+/**
+ * Serialise a message from its in-memory representation.
+ *
+ * This function stores the serialised bytes of the message in a pre-allocated
+ * buffer.
+ *
+ * \param message
+ *      The message object to serialise.
+ * \param[out] out
+ *      Buffer to store the bytes of the serialised message. This buffer must
+ *      have enough space to store the packed message. Use
+ *      protobuf_c_message_get_packed_size() to determine the number of bytes
+ *      required.
+ * \return
+ *      Number of bytes stored in `out`.
+ */
+PROTOBUF_C__API
+size_t
+protobuf_c_message_pack(const ProtobufCMessage *message, uint8_t *out);
+
+/**
+ * Serialise a message from its in-memory representation to a virtual buffer.
+ *
+ * This function calls the `append` method of a `ProtobufCBuffer` object to
+ * consume the bytes generated by the serialiser.
+ *
+ * \param message
+ *      The message object to serialise.
+ * \param buffer
+ *      The virtual buffer object.
+ * \return
+ *      Number of bytes passed to the virtual buffer.
+ */
+PROTOBUF_C__API
+size_t
+protobuf_c_message_pack_to_buffer(
+	const ProtobufCMessage *message,
+	ProtobufCBuffer *buffer);
 
 /**
  * Unpack a serialised message into an in-memory representation.
@@ -827,9 +951,12 @@ size_t protobuf_c_message_get_packed_size(const ProtobufCMessage *message);
  *      If an error occurred during unpacking.
  */
 PROTOBUF_C__API
-ProtobufCMessage *protobuf_c_message_unpack(
-    const ProtobufCMessageDescriptor *descriptor, ProtobufCAllocator *allocator,
-    size_t len, const uint8_t *data);
+ProtobufCMessage *
+protobuf_c_message_unpack(
+	const ProtobufCMessageDescriptor *descriptor,
+	ProtobufCAllocator *allocator,
+	size_t len,
+	const uint8_t *data);
 
 /**
  * Free an unpacked message object.
@@ -844,8 +971,10 @@ ProtobufCMessage *protobuf_c_message_unpack(
  *      specify the default allocator.
  */
 PROTOBUF_C__API
-void protobuf_c_message_free_unpacked(ProtobufCMessage *message,
-                                      ProtobufCAllocator *allocator);
+void
+protobuf_c_message_free_unpacked(
+	ProtobufCMessage *message,
+	ProtobufCAllocator *allocator);
 
 /**
  * Check the validity of a message object.
@@ -859,11 +988,11 @@ void protobuf_c_message_free_unpacked(ProtobufCMessage *message,
  *      Message is invalid.
  */
 PROTOBUF_C__API
-protobuf_c_boolean protobuf_c_message_check(const ProtobufCMessage *);
+protobuf_c_boolean
+protobuf_c_message_check(const ProtobufCMessage *);
 
 /** Message initialiser. */
-#define PROTOBUF_C_MESSAGE_INIT(descriptor) \
-  { descriptor, 0, NULL }
+#define PROTOBUF_C_MESSAGE_INIT(descriptor) { descriptor, 0, NULL }
 
 /**
  * Initialise a message object from a message descriptor.
@@ -874,30 +1003,67 @@ protobuf_c_boolean protobuf_c_message_check(const ProtobufCMessage *);
  *      Allocated block of memory of size `descriptor->sizeof_message`.
  */
 PROTOBUF_C__API
-void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor,
-                             void *message);
+void
+protobuf_c_message_init(
+	const ProtobufCMessageDescriptor *descriptor,
+	void *message);
+
+/**
+ * Free a service.
+ *
+ * \param service
+ *      The service object to free.
+ */
+PROTOBUF_C__API
+void
+protobuf_c_service_destroy(ProtobufCService *service);
+
+/**
+ * Look up a `ProtobufCMethodDescriptor` by name.
+ *
+ * \param desc
+ *      Service descriptor.
+ * \param name
+ *      Name of the method.
+ *
+ * \return
+ *      A `ProtobufCMethodDescriptor` object.
+ * \retval NULL
+ *      If not found or if the optimize_for = CODE_SIZE option was set.
+ */
+PROTOBUF_C__API
+const ProtobufCMethodDescriptor *
+protobuf_c_service_descriptor_get_method_by_name(
+	const ProtobufCServiceDescriptor *desc,
+	const char *name);
 
 /**
  * Initialise a `ProtobufCBufferSimple` object.
  */
-#define PROTOBUF_C_BUFFER_SIMPLE_INIT(array_of_bytes)             \
-  {                                                               \
-    {protobuf_c_buffer_simple_append}, sizeof(array_of_bytes), 0, \
-        (array_of_bytes), 0, NULL                                 \
-  }
+#define PROTOBUF_C_BUFFER_SIMPLE_INIT(array_of_bytes)                   \
+{                                                                       \
+	{ protobuf_c_buffer_simple_append },                            \
+	sizeof(array_of_bytes),                                         \
+	0,                                                              \
+	(array_of_bytes),                                               \
+	0,                                                              \
+	NULL                                                            \
+}
 
 /**
  * Clear a `ProtobufCBufferSimple` object, freeing any allocated memory.
  */
-#define PROTOBUF_C_BUFFER_SIMPLE_CLEAR(simp_buf)                              \
-  do {                                                                        \
-    if ((simp_buf)->must_free_data) {                                         \
-      if ((simp_buf)->allocator != NULL)                                      \
-        (simp_buf)->allocator->free((simp_buf)->allocator, (simp_buf)->data); \
-      else                                                                    \
-        free((simp_buf)->data);                                               \
-    }                                                                         \
-  } while (0)
+#define PROTOBUF_C_BUFFER_SIMPLE_CLEAR(simp_buf)                        \
+do {                                                                    \
+	if ((simp_buf)->must_free_data) {                               \
+		if ((simp_buf)->allocator != NULL)                      \
+			(simp_buf)->allocator->free(                    \
+				(simp_buf)->allocator,                  \
+				(simp_buf)->data);			\
+		else                                                    \
+			free((simp_buf)->data);                         \
+	}                                                               \
+} while (0)
 
 /**
  * The `append` method for `ProtobufCBufferSimple`.
@@ -911,8 +1077,27 @@ void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor,
  *      Data to append.
  */
 PROTOBUF_C__API
-void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len,
-                                     const unsigned char *data);
+void
+protobuf_c_buffer_simple_append(
+	ProtobufCBuffer *buffer,
+	size_t len,
+	const unsigned char *data);
+
+PROTOBUF_C__API
+void
+protobuf_c_service_generated_init(
+	ProtobufCService *service,
+	const ProtobufCServiceDescriptor *descriptor,
+	ProtobufCServiceDestroy destroy);
+
+PROTOBUF_C__API
+void
+protobuf_c_service_invoke_internal(
+	ProtobufCService *service,
+	unsigned method_index,
+	const ProtobufCMessage *input,
+	ProtobufCClosure closure,
+	void *closure_data);
 
 /**@}*/
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f4dc1421a4cd0f1062e8ad1240caa237e58c9371..03bf4c50bb3726b109bf38bfa34e1c7a000a23f8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -18,9 +18,6 @@ elseif ("yolo" IN_LIST NET)
     # gen test
     ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-yolo paddle-mobile)
-    # gen test
-    ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test_yolo_combined paddle-mobile)
 elseif ("squeezenet" IN_LIST NET)
     # gen test
     ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
@@ -33,27 +30,6 @@ elseif("FPGAnets" IN_LIST NET)
     ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-resnet paddle-mobile)
 
-    ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-resnet50 paddle-mobile)
-
-    ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-fpga-EW paddle-mobile)
-
-    ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-fpga-conv paddle-mobile)
-
-    ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-fpga-pooling paddle-mobile)
-
-    ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-fpga-bypass paddle-mobile)
-
-    ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-fpga-softmax paddle-mobile)
-
-    ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-fpga-concat paddle-mobile)
-
     ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-tensor-quant paddle-mobile)
 
@@ -75,192 +51,8 @@ elseif("nlp" IN_LIST NET)
     # gen test
     ADD_EXECUTABLE(test-gru-op  operators/test_gru_op.cpp test_helper.h  test_include.h)
     target_link_libraries(test-gru-op paddle-mobile)
-elseif("mobilenetfssd" IN_LIST NET)
-    # gen test
-    ADD_EXECUTABLE(test-fssd  net/test_mobilenet_025_fssd.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-fssd paddle-mobile)
-
-elseif("genet" IN_LIST NET)
-    # gen test
-    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-genet paddle-mobile)
 else ()
-
-    # gen test
-    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-resnet paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-squeezenet paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-yolo paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test_yolo_combined paddle-mobile)
-
     # gen test
     ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-googlenet paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-conv-op  operators/test_cov_op.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-conv-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-mul-op  operators/test_mul_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-mul-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-elementwiseadd-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-concat-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-lrn-op  operators/test_lrn_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-lrn-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-batchnorm-op  operators/test_batchnorm_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-batchnorm-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-priorbox-op  operators/test_prior_box_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-priorbox-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-boxcoder-op  operators/test_box_coder_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-boxcoder-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-transpose-op  operators/test_transpose_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-transpose-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-multiclassnms-op  operators/test_multiclass_nms_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-multiclassnms-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-reshape-op  operators/test_reshape_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-reshape-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-relu-op  operators/test_relu_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-relu-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-fc-op  operators/test_fusion_fc_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-fc-op paddle-mobile)
-
-    # gen test log
-    ADD_EXECUTABLE(test-log common/test_log.cpp)
-    target_link_libraries(test-log paddle-mobile)
-
-    # gen test log
-    ADD_EXECUTABLE(test-load framework/test_load.cpp)
-    target_link_libraries(test-load paddle-mobile)
-
-    # gen test log
-    ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp)
-    target_link_libraries(test-loadmemory paddle-mobile)
-
-    ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp)
-    target_link_libraries(test-inference-api paddle-mobile)
-
-
-    # gen test log
-    # gen test
-    ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
-    target_link_libraries(test-optimize paddle-mobile)
-
-
-    #gen test
-    ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-pool paddle-mobile)
-
-    #gen test
-    ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-softmax paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp)
-    target_link_libraries(test-gemm-accuracy paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp)
-    target_link_libraries(test-gemm-perf paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
-    target_link_libraries(test-enforce paddle-mobile)
-
-    # gen test - test if openmp works
-    ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-openmp paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-mobilenetssd paddle-mobile)
-
-     # gen test
-    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-mobilenet-combine paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-genet paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
-    target_link_libraries(test-sigmoid paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
-    target_link_libraries(test-depthwise-conv-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-mobilenet paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-conv-add-relu-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-nlp paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-gru-op  operators/test_gru_op.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-gru-op paddle-mobile)
-
-    # gen test
-
-    ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-inceptionv4 paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-alexnet paddle-mobile)
-
-    ADD_EXECUTABLE(test-googlenetv1  net/test_googlenetv1_combine.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-googlenetv1 paddle-mobile)
-
-    # gen test
-    ADD_EXECUTABLE(test-fssd  net/test_mobilenet_025_fssd.cpp test_helper.h  test_include.h)
-    target_link_libraries(test-fssd paddle-mobile)
-
-
-    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
-
 endif()
diff --git a/test/fpga/test_format_data.cpp b/test/fpga/test_format_data.cpp
index 1d67c3110ff86dc6fba2d49412edb70ab1c9c16d..0fa3c23d2af6220959d434a6805adc9a7ae984a5 100644
--- a/test/fpga/test_format_data.cpp
+++ b/test/fpga/test_format_data.cpp
@@ -22,7 +22,7 @@ namespace fpga = paddle_mobile::fpga;
 using std::cout;
 using std::endl;
 
-void test_format_image() {
+int main() {
   std::vector<int> dims{1, 1, 3, 3};
   std::vector<float> elements{1, 2, 3, 4, 5, 6, 7, 8, 9};
   frame::DDim ddim = frame::make_ddim(dims);
@@ -44,50 +44,6 @@ void test_format_image() {
   cout << endl;
   auto dd = image.dims();
   cout << dims[0] << dims[1] << dims[2] << dims[3] << endl;
-}
-
-void test_fill_conv_arg() {
-  Tensor input, out, filter;
-  DLOG << "Setup input";
-  SetupTensor<int16_t>(&input, {1, 250, 32, 30}, static_cast<int16_t>(0),
-                       static_cast<int16_t>(1));
-
-  DLOG << "Setup filter";
-  SetupTensor<float>(&filter, {1001, 250, 3, 3}, static_cast<float>(0),
-                     static_cast<float>(1));
-
-  DLOG << "Setup output";
-  SetupTensor<int16_t>(&out, {1, 1001, 32, 30}, static_cast<int16_t>(0),
-                       static_cast<int16_t>(1));
-  auto bs_ptr = (float *)fpga::fpga_malloc(2 * 1001 * sizeof(float));
-
-  DLOG << "find max";
-  float max_value = fpga::filter_find_max(&filter);
-  DLOG << "format filter";
-  fpga::format_filter(&filter, max_value, 1);
-
-  DLOG << "format bs_ptr";
-  int element_num_per_div = fpga::get_filter_num_per_div(&filter, 1);
-  fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, 1001);
 
-  DLOG << "format ofm";
-  fpga::format_fp16_ofm(&out);
-  DLOG << "Build arg";
-
-  fpga::WrapperConvArgs arg;
-  fpga::fill_conv_arg(&arg, &input, &out, &filter, true, 1, 1, 1, 1, 1, bs_ptr);
-  DLOG << "splitNum: " << arg.split_num << "  group_num:" << arg.group_num
-       << "  filter_num:" << arg.filter_num;
-
-  for (int i = 0; i < arg.split_num; i++) {
-    DLOG << arg.conv_args[i].filter_num << "   " << arg.conv_args[i].sb_address
-         << "   " << arg.conv_args[i].filter_address << "   "
-         << arg.conv_args[i].filter_scale_address;
-  }
-}
-
-int main() {
-  test_format_image();
-  test_fill_conv_arg();
   return 0;
 }
diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp
deleted file mode 100644
index cca6793f10da5a0784cf8a3ba2d0104f3508028d..0000000000000000000000000000000000000000
--- a/test/fpga/test_resnet50.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-static const char *g_resnet_combine = "../models/resnet50";
-
-int main() {
-  DLOG << paddle_mobile::fpga::open_device();
-  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
-  if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model",
-                         std::string(g_resnet_combine) + "/params", true)) {
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    Tensor input_tensor;
-    SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
-                       static_cast<float>(1));
-
-    std::vector<float> input(input_tensor.data<float>(),
-                             input_tensor.data<float>() + input_tensor.numel());
-
-    paddle_mobile.FeedData(input_tensor);
-    paddle_mobile.Predict_To(-1);
-    //    paddle_mobile.Predict_From(73);
-    //    paddle_mobile.Predict_From_To(72, 73);
-
-    DLOG << "Computation done";
-    return 0;
-  }
-}
diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp
index d3d5705e63e3dffad0d4bad5422c27d57dddc350..25cad4feaa706899122902dee2a8f0c915e78975 100644
--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
@@ -21,15 +21,8 @@ int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
-<<<<<<< HEAD
-  auto program = loader.Load(g_mobilenet_ssd, false, false);
-  //  auto program = loader.Load(g_googlenet_combine + "/model",
-  //  g_googlenet_combine +
-  //    "/params", true);
-=======
   //  auto program = loader.Load(g_googlenet, true);
   //  auto program = loader.Load(g_mobilenet_ssd, true);
->>>>>>> e60ab7ae5a43b9cc788813877fbfffc67c87b5f3
 
   auto program = loader.Load(std::string(g_ocr) + "/model",
                              std::string(g_ocr) + "/params", false);
diff --git a/test/net/test_alexnet.cpp b/test/net/test_alexnet.cpp
deleted file mode 100644
index 50053fe82f95177fd786c1c8f8f5c9b7a521b888..0000000000000000000000000000000000000000
--- a/test/net/test_alexnet.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
-  //                     std::string(g_mobilenet_detect) + "/params", true);
-
-  auto isok = paddle_mobile.Load(g_alexnet, true);
-  if (isok) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
-
-    auto vec_result = paddle_mobile.Predict(input, dims);
-    std::vector<float>::iterator biggest =
-        std::max_element(std::begin(vec_result), std::end(vec_result));
-    std::cout << " Max element is " << *biggest << " at position "
-              << std::distance(std::begin(vec_result), biggest) << std::endl;
-
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    DLOG << vec_result;
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-
-  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
-               "是否存在?"
-            << std::endl;
-  return 0;
-}
diff --git a/test/net/test_genet_combine.cpp b/test/net/test_genet_combine.cpp
deleted file mode 100644
index e6b0505a670f1a58ed7d09cc4854ef52b05b0649..0000000000000000000000000000000000000000
--- a/test/net/test_genet_combine.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
-                         std::string(g_genet_combine) + "/params", true)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 128, 128};
-    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
-
-    // 预热一次
-    auto vec_result = paddle_mobile.Predict(input, dims);
-    std::vector<float>::iterator biggest =
-        std::max_element(std::begin(vec_result), std::end(vec_result));
-    std::cout << " Max element is " << *biggest << " at position "
-              << std::distance(std::begin(vec_result), biggest) << std::endl;
-
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-  std::cout
-      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
-      << std::endl;
-  return 0;
-}
diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp
index a2f030eeac5c2584b33fad2b082b9d5513707260..2439de96252c219cf5726975755a9f6730c3b8e0 100644
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,13 +17,7 @@ limitations under the License. */
 #include "../test_include.h"
 
 int main() {
-#ifdef PADDLE_MOBILE_FPGA
-  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
-#endif
-
-#ifdef PADDLE_MOBILE_CPU
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-#endif
+  paddle_mobile::PaddleMobile<paddle_mobile::X86> paddle_mobile;
 
   paddle_mobile.SetThreadNum(4);
   bool optimize = true;
diff --git a/test/net/test_googlenetv1_combine.cpp b/test/net/test_googlenetv1_combine.cpp
deleted file mode 100644
index 9aab25afd2aa6ece4e6b99bbd368b8a5be2e3106..0000000000000000000000000000000000000000
--- a/test/net/test_googlenetv1_combine.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  if (paddle_mobile.Load(std::string(g_googlenetv1_combined) + "/model",
-                         std::string(g_googlenetv1_combined) + "/params",
-                         false)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 160, 160};
-    GetInput<float>(g_img, &input, dims);
-
-    for (int i = 0; i < input.size(); i += 1000) {
-      std::cout << input[i] << std::endl;
-    }
-    //    auto vec_result = paddle_mobile.Predict(input, dims);
-    //    std::vector<float>::iterator biggest =
-    //        std::max_element(std::begin(vec_result), std::end(vec_result));
-    //    std::cout << " Max element is " << *biggest << " at position "
-    //              << std::distance(std::begin(vec_result), biggest) <<
-    //              std::endl;
-
-    //    // 预热十次
-    //    for (int i = 0; i < 1; ++i) {
-    //      auto vec_result = paddle_mobile.Predict(input, dims);
-    //    }
-    auto time3 = time();
-
-    auto vec_result = paddle_mobile.Predict(input, dims);
-
-    for (int j = 0; j < vec_result.size(); ++j) {
-      std::cout << j << " : " << vec_result[j] << std::endl;
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
-              << std::endl;
-  }
-
-  return 0;
-}
diff --git a/test/net/test_inceptionv4.cpp b/test/net/test_inceptionv4.cpp
deleted file mode 100644
index fbbc9dd39e64f7a8ea745cf7489e46f00ffe1413..0000000000000000000000000000000000000000
--- a/test/net/test_inceptionv4.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
-  //                     std::string(g_mobilenet_detect) + "/params", true);
-
-  auto isok = paddle_mobile.Load(g_inceptionv4, true);
-  if (isok) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
-
-    auto vec_result = paddle_mobile.Predict(input, dims);
-    std::vector<float>::iterator biggest =
-        std::max_element(std::begin(vec_result), std::end(vec_result));
-    std::cout << " Max element is " << *biggest << " at position "
-              << std::distance(std::begin(vec_result), biggest) << std::endl;
-
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    //        DLOG << vec_result;
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-
-  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
-               "是否存在?"
-            << std::endl;
-  return 0;
-}
diff --git a/test/net/test_mobilenet+ssd.cpp b/test/net/test_mobilenet+ssd.cpp
deleted file mode 100644
index 85083ca441ad242ffb5b63dd612a0e35e3589f99..0000000000000000000000000000000000000000
--- a/test/net/test_mobilenet+ssd.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  auto isok = paddle_mobile.Load(
-      std::string(g_mobilenet_ssd_gesture) + "/model",
-      std::string(g_mobilenet_ssd_gesture) + "/params", true);
-  //  auto isok = paddle_mobile.Load(g_mobilenet_ssd, false);
-  if (isok) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 300, 300};
-    GetInput<float>(g_hand, &input, dims);
-
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      auto output = paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      auto output = paddle_mobile.Predict(input, dims);
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-  return 0;
-}
diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp
deleted file mode 100644
index 4ed7d3b756cfef9554028e1d33f4dd86bf58e4b8..0000000000000000000000000000000000000000
--- a/test/net/test_mobilenet.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
-  //                     std::string(g_mobilenet_detect) + "/params", true);
-
-  auto isok = paddle_mobile.Load(g_mobilenet, true);
-  if (isok) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
-
-    auto vec_result = paddle_mobile.Predict(input, dims);
-    std::vector<float>::iterator biggest =
-        std::max_element(std::begin(vec_result), std::end(vec_result));
-    std::cout << " Max element is " << *biggest << " at position "
-              << std::distance(std::begin(vec_result), biggest) << std::endl;
-
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    DLOG << vec_result;
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-
-  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
-               "是否存在?"
-            << std::endl;
-  return 0;
-}
diff --git a/test/net/test_mobilenet_025_fssd.cpp b/test/net/test_mobilenet_025_fssd.cpp
deleted file mode 100644
index c0d037ceb05f57361f1385cb9959beed66186e4f..0000000000000000000000000000000000000000
--- a/test/net/test_mobilenet_025_fssd.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main(int argc, char **argv) {
-  int times = 10;
-  if (argc <= 1) {
-    times = 10;
-    std::cout << "没有输入 , 使用默认10次 " << times << std::endl;
-  } else {
-    std::string arstr = argv[1];
-    times = std::stoi(arstr);
-    std::cout << "input times: " << times << std::endl;
-  }
-
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(1);
-  auto isok =
-      paddle_mobile.Load(std::string(g_fluid_fssd_new) + "/model",
-                         std::string(g_fluid_fssd_new) + "/params", true);
-  if (isok) {
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 160, 160};
-    GetInput<float>(g_imgfssd_ar1, &input, dims);
-    std::cout << "预热10次....." << std::endl;
-
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      auto output = paddle_mobile.Predict(input, dims);
-    }
-    std::cout << "开始....." << std::endl;
-
-    double time_sum = 0;
-
-    for (int i = 0; i < times; ++i) {
-      auto time3 = time();
-      auto output = paddle_mobile.Predict(input, dims);
-      auto time4 = time();
-      double timeDiff = time_diff(time3, time4);
-      time_sum += timeDiff;
-      std::cout << "第" << i << "次"
-                << "predict cost :" << timeDiff << "ms" << std::endl;
-    }
-    std::cout << "平均时间:" << time_sum / times << "ms" << std::endl;
-  }
-  return 0;
-}
diff --git a/test/net/test_mobilenet_combine.cpp b/test/net/test_mobilenet_combine.cpp
deleted file mode 100644
index 073607795967af09c81bc0a0c492d065bce7ed72..0000000000000000000000000000000000000000
--- a/test/net/test_mobilenet_combine.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
-                         std::string(g_mobilenet_combined) + "/params", true)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
-
-    auto vec_result = paddle_mobile.Predict(input, dims);
-    std::vector<float>::iterator biggest =
-        std::max_element(std::begin(vec_result), std::end(vec_result));
-    std::cout << " Max element is " << *biggest << " at position "
-              << std::distance(std::begin(vec_result), biggest) << std::endl;
-
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      auto vec_result = paddle_mobile.Predict(input, dims);
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-  std::cout
-      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
-      << std::endl;
-  return 0;
-}
diff --git a/test/net/test_nlp.cpp b/test/net/test_nlp.cpp
deleted file mode 100644
index ca5f6571c8786a23017bd846890d6f78345121c3..0000000000000000000000000000000000000000
--- a/test/net/test_nlp.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
-  //                     std::string(g_mobilenet_detect) + "/params", true);
-
-  auto isok = paddle_mobile.Load(g_nlp, true, false, 1, true);
-
-  //  auto isok = paddle_mobile.Load(std::string(g_nlp) + "/model",
-  //                                 std::string(g_nlp) + "/params", false);
-  if (isok) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-    //    1064 1603 644 699 2878 1219 867 1352 8 1 13 312 479
-
-    std::vector<int64_t> ids{1064, 1603, 644, 699, 2878, 1219, 867,
-                             1352, 8,    1,   13,  312,  479};
-
-    paddle_mobile::framework::LoDTensor words;
-    auto size = static_cast<int>(ids.size());
-    paddle_mobile::framework::LoD lod{{0, ids.size()}};
-    DDim dims{size, 1};
-    words.Resize(dims);
-    words.set_lod(lod);
-    DLOG << "words lod : " << words.lod();
-    auto *pdata = words.mutable_data<int64_t>();
-    size_t n = words.numel() * sizeof(int64_t);
-    DLOG << "n :" << n;
-    memcpy(pdata, ids.data(), n);
-    DLOG << "words lod 22: " << words.lod();
-    auto time3 = time();
-    for (int i = 0; i < 1; ++i) {
-      auto vec_result = paddle_mobile.PredictLod(words);
-      DLOG << *vec_result;
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
-              << std::endl;
-  }
-  return 0;
-}
diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp
deleted file mode 100644
index d2a4abbbfd2c023f1e8220e74f815eda44acb6db..0000000000000000000000000000000000000000
--- a/test/net/test_resnet.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-#ifdef PADDLE_MOBILE_FPGA
-  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
-#endif
-
-#ifdef PADDLE_MOBILE_CPU
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-#endif
-  paddle_mobile.SetThreadNum(4);
-  auto time1 = time();
-  if (paddle_mobile.Load(g_resnet, true)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-    std::vector<int64_t> dims{1, 3, 32, 32};
-    Tensor input_tensor;
-    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
-                       static_cast<float>(1));
-
-    std::vector<float> input(input_tensor.data<float>(),
-                             input_tensor.data<float>() + input_tensor.numel());
-#ifndef PADDLE_MOBILE_FPGA
-    //   预热十次
-    for (int i = 0; i < 10; ++i) {
-      paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      paddle_mobile.Predict(input, dims);
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
-              << std::endl;
-
-#else
-    auto time3 = time();
-    paddle_mobile.FeedData(input_tensor);
-    paddle_mobile.Predict_To(10);
-    paddle_mobile.Predict_From(10);
-    auto tensor_ptr = paddle_mobile.FetchResult(9);
-    std::cout << "Tensor element number for op[9]: " << tensor_ptr->numel()
-              << std::endl;
-    auto result_ptr = paddle_mobile.FetchResult();
-    std::cout << "Result tensor element number: " << result_ptr->numel()
-              << std::endl;
-
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
-              << std::endl;
-#endif
-  }
-  return 0;
-}
diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp
deleted file mode 100644
index 02ec8691febbad5ec0e811f7d7bebde1bef54a79..0000000000000000000000000000000000000000
--- a/test/net/test_squeezenet.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  //  ../../../test/models/googlenet
-  //  ../../../test/models/mobilenet
-  auto time1 = time();
-  if (paddle_mobile.Load(g_squeezenet, true)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-    std::vector<int64_t> dims{1, 3, 227, 227};
-    Tensor input_tensor;
-    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
-                       static_cast<float>(1));
-
-    std::vector<float> input(input_tensor.data<float>(),
-                             input_tensor.data<float>() + input_tensor.numel());
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      paddle_mobile.Predict(input, dims);
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-
-  return 0;
-}
diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp
deleted file mode 100644
index 700eb10cac6f0b80595d8c53866c7f675d2b56fb..0000000000000000000000000000000000000000
--- a/test/net/test_yolo.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  //  ../../../test/models/googlenet
-  //  ../../../test/models/mobilenet
-  auto time1 = time();
-  if (paddle_mobile.Load(g_yolo, true)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<int64_t> dims{1, 3, 227, 227};
-    Tensor input_tensor;
-    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
-                       static_cast<float>(1));
-
-    std::vector<float> input(input_tensor.data<float>(),
-                             input_tensor.data<float>() + input_tensor.numel());
-    // 预热十次
-    for (int i = 0; i < 10; ++i) {
-      paddle_mobile.Predict(input, dims);
-    }
-    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
-      paddle_mobile.Predict(input, dims);
-    }
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-  return 0;
-}
diff --git a/test/net/test_yolo_combined.cpp b/test/net/test_yolo_combined.cpp
deleted file mode 100644
index 88b889daa946cfaef1d86ff36f416b4643532c89..0000000000000000000000000000000000000000
--- a/test/net/test_yolo_combined.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_helper.h"
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  paddle_mobile.SetThreadNum(4);
-  //  ../../../test/models/googlenet
-  //  ../../../test/models/mobilenet
-  auto time1 = time();
-
-  if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model",
-                         std::string(g_yolo_combined) + "/params", true)) {
-    auto time2 = time();
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    std::vector<int64_t> dims{1, 3, 416, 416};
-    std::vector<float> input;
-
-    GetInput<float>(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims);
-    std::cout << "input.size():  " << input.size() << std::endl;
-    for (int j = 0; j < 100; ++j) {
-      std::cout << j << " :  " << input[j] << std::endl;
-    }
-    //        // 预热十次
-    //        for (int i = 0; i < 10; ++i) {
-    //            paddle_mobile.Predict(input, dims);
-    //        }
-    auto time3 = time();
-    const vector<float> vector_out = paddle_mobile.Predict(input, dims);
-    std::cout << "--------------------------------------------" << std::endl;
-
-    for (float i : vector_out) {
-      std::cout << i << std::endl;
-    }
-
-    std::cout << "--------------------------------------------" << std::endl;
-
-    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-
-    auto time4 = time();
-    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
-              << std::endl;
-  }
-  return 0;
-}
diff --git a/test/operators/test_batchnorm_op.cpp b/test/operators/test_batchnorm_op.cpp
deleted file mode 100644
index 4ccad8c1512036c2400a09575b3775e75b26acce..0000000000000000000000000000000000000000
--- a/test/operators/test_batchnorm_op.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "../test_helper.h"
-#include "../test_include.h"
-#include "operators/batchnorm_op.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-template <typename Dtype>
-class TestBatchNormOp {
- public:
-  explicit TestBatchNormOp(const Program<Dtype> p) : program_(p) {
-    if (use_optimize_) {
-      to_predict_program_ = program_.optimizeProgram;
-    } else {
-      to_predict_program_ = program_.originProgram;
-    }
-
-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-        to_predict_program_->Blocks();
-    //  DLOG << " **block size " << blocks.size();
-    for (int i = 0; i < blocks.size(); ++i) {
-      std::shared_ptr<BlockDesc> block_desc = blocks[i];
-      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-      //    DLOG << " ops " << ops.size();
-      for (int j = 0; j < ops.size(); ++j) {
-        std::shared_ptr<OpDesc> op = ops[j];
-        if (op->Type() == "batch_norm" &&
-            op->Input("X")[0] == "conv2d_5.tmp_0") {
-          DLOG << " mul attr size: " << op->GetAttrMap().size();
-          DLOG << " inputs size: " << op->GetInputs().size();
-          DLOG << " outputs size: " << op->GetOutputs().size();
-          DLOG << " Input X is : " << op->Input("X")[0];
-          DLOG << " Input Mean is : " << op->Input("Mean")[0];
-          DLOG << " Input Variance is : " << op->Input("Variance")[0];
-          DLOG << " Input Scale is : " << op->Input("Scale")[0];
-          DLOG << " Input Bias is : " << op->Input("Bias")[0];
-          DLOG << " Output Y is : " << op->Output("Y")[0];
-          DLOG << " epsilon : " << op->GetAttrMap().at("epsilon").Get<float>();
-          std::shared_ptr<operators::BatchNormOp<Dtype, float>> lrn =
-              std::make_shared<operators::BatchNormOp<Dtype, float>>(
-                  op->Type(), op->GetInputs(), op->GetOutputs(),
-                  op->GetAttrMap(), program_.scope);
-          ops_of_block_[*block_desc.get()].push_back(lrn);
-        }
-      }
-    }
-  }
-
-  std::shared_ptr<Tensor> predict_bn(const Tensor &t1, const Tensor &t2,
-                                     const Tensor &t3, const Tensor &t4,
-                                     const Tensor &t5) {
-    // feed
-    auto scope = program_.scope;
-    Variable *x1_feed_value = scope->Var("conv2d_5.tmp_0");
-    auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
-    tensor_x1->ShareDataWith(t1);
-
-    Variable *mean_feed_value = scope->Var("batch_norm_10.w_1");
-    auto tensor_mean = mean_feed_value->GetMutable<LoDTensor>();
-    tensor_mean->ShareDataWith(t2);
-
-    Variable *scale_feed_value = scope->Var("batch_norm_10.w_0");
-    auto tensor_scale = scale_feed_value->GetMutable<LoDTensor>();
-    tensor_scale->ShareDataWith(t3);
-
-    Variable *variance_feed_value = scope->Var("batch_norm_10.w_2");
-    auto tensor_variance = variance_feed_value->GetMutable<LoDTensor>();
-    tensor_variance->ShareDataWith(t4);
-
-    Variable *bias_feed_value = scope->Var("batch_norm_10.b_0");
-    auto tensor_bias = bias_feed_value->GetMutable<LoDTensor>();
-    tensor_bias->ShareDataWith(t5);
-
-    Variable *output = scope->Var("batch_norm_10.tmp_2");
-    auto *output_tensor = output->GetMutable<LoDTensor>();
-    output_tensor->mutable_data<float>({1, 256, 38, 38});
-    //  DLOG << typeid(output_tensor).name();
-    //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-    out_tensor.reset(output_tensor);
-
-    predict_bn(t1, t2, t3, t4, t5, 0);
-    return out_tensor;
-  }
-
- private:
-  const framework::Program<Dtype> program_;
-  std::shared_ptr<ProgramDesc> to_predict_program_;
-  std::map<framework::BlockDesc,
-           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-      ops_of_block_;
-  bool use_optimize_ = false;
-
-  void predict_bn(const Tensor &t1, const Tensor &t2, const Tensor &t3,
-                  const Tensor &t4, const Tensor &t5, int block_id) {
-    std::shared_ptr<BlockDesc> to_predict_block =
-        to_predict_program_->Block(block_id);
-    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-      auto op = ops_of_block_[*to_predict_block.get()][j];
-      DLOG << "op -> run()";
-      op->Run();
-    }
-  }
-};
-
-template class TestBatchNormOp<CPU>;
-}  // namespace framework
-}  // namespace paddle_mobile
-
-int main() {
-  DLOG << "----------**********----------";
-  DLOG << "begin to run BatchNormOp Test";
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet_ssd));
-
-  /// input x (4,10,2,2)
-  paddle_mobile::framework::Tensor inputx1;
-  SetupTensor<float>(&inputx1, {1, 256, 38, 38}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *inputx1_ptr = inputx1.data<float>();
-
-  paddle_mobile::framework::Tensor mean;
-  SetupTensor<float>(&mean, {256}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *mean_ptr = mean.data<float>();
-
-  paddle_mobile::framework::Tensor scale;
-  SetupTensor<float>(&scale, {256}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *scale_ptr = scale.data<float>();
-
-  paddle_mobile::framework::Tensor variance;
-  SetupTensor<float>(&variance, {256}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *variance_ptr = variance.data<float>();
-
-  paddle_mobile::framework::Tensor bias;
-  SetupTensor<float>(&bias, {256}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *bias_ptr = bias.data<float>();
-
-  paddle_mobile::framework::TestBatchNormOp<paddle_mobile::CPU> testBatchNormOp(
-      program);
-
-  auto output_bn =
-      testBatchNormOp.predict_bn(inputx1, mean, scale, variance, bias);
-  auto *output_bn_ptr = output_bn->data<float>();
-
-  DLOG << " (" << inputx1_ptr[0] << " - " << mean_ptr[0] << ")/(("
-       << variance_ptr[0] << " + 0.00001"
-       << ")^0.5)* " << scale_ptr[0] << " + " << bias_ptr[0] << " = ";
-  DLOG << output_bn_ptr[0];
-
-  DLOG << "input_ptr 0 : " << inputx1_ptr[0];
-  DLOG << "output_ptr 0 : " << output_bn_ptr[0];
-
-  return 0;
-}
diff --git a/test/operators/test_box_coder_op.cpp b/test/operators/test_box_coder_op.cpp
deleted file mode 100644
index dac0d0b8051ec1790d6982a13ea31ef3f4a64242..0000000000000000000000000000000000000000
--- a/test/operators/test_box_coder_op.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include "../test_include.h"
-#include "operators/box_coder_op.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-template <typename Dtype>
-class TestBoxCoderOp {
- public:
-  explicit TestBoxCoderOp(const Program<Dtype> p) : program_(p) {
-    if (use_optimize_) {
-      to_predict_program_ = program_.optimizeProgram;
-    } else {
-      to_predict_program_ = program_.originProgram;
-    }
-
-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-        to_predict_program_->Blocks();
-    //  DLOG << " **block size " << blocks.size();
-    for (auto block_desc : blocks) {
-      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-      //    DLOG << " ops " << ops.size();
-      for (auto op : ops) {
-        if (op->Type() == "box_coder" &&
-            op->Input("PriorBox")[0] == "concat_0.tmp_0") {
-          DLOG << " mul attr size: " << op->GetAttrMap().size();
-          DLOG << " inputs size: " << op->GetInputs().size();
-          DLOG << " outputs size: " << op->GetOutputs().size();
-          DLOG << " Input PriorBox is : " << op->Input("PriorBox")[0];
-          DLOG << " Input PriorBoxVar is : " << op->Input("PriorBoxVar")[0];
-          DLOG << " Input TargetBox is : " << op->Input("TargetBox")[0];
-          DLOG << " OutputBox is : " << op->Output("OutputBox")[0];
-          DLOG << " code_type : "
-               << op->GetAttrMap().at("code_type").Get<std::string>();
-          std::shared_ptr<operators::BoxCoderOp<Dtype, float>> boxcoder =
-              std::make_shared<operators::BoxCoderOp<Dtype, float>>(
-                  op->Type(), op->GetInputs(), op->GetOutputs(),
-                  op->GetAttrMap(), program_.scope);
-          ops_of_block_[*block_desc.get()].push_back(boxcoder);
-        }
-      }
-    }
-  }
-
-  std::shared_ptr<Tensor> predict_boxcoder(const Tensor &t1, const Tensor &t2,
-                                           const Tensor &t3) {
-    // feed
-    auto scope = program_.scope;
-    Variable *prior_box = scope->Var("concat_0.tmp_0");
-    auto tensor_x1 = prior_box->GetMutable<LoDTensor>();
-    tensor_x1->ShareDataWith(t1);
-
-    Variable *prior_box_var = scope->Var("concat_1.tmp_0");
-    auto tensor_x2 = prior_box_var->GetMutable<LoDTensor>();
-    tensor_x2->ShareDataWith(t2);
-
-    Variable *target_box = scope->Var("concat_2.tmp_0");
-    auto tensor_x3 = target_box->GetMutable<LoDTensor>();
-    tensor_x3->ShareDataWith(t3);
-
-    Variable *boxes_output = scope->Var("box_coder_0.tmp_0");
-    auto *boxes_output_tensor = boxes_output->GetMutable<LoDTensor>();
-    boxes_output_tensor->mutable_data<float>({1, 1917, 4});
-
-    //  DLOG << typeid(output_tensor).name();
-    //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-    std::shared_ptr<Tensor> outbox_tensor = std::make_shared<LoDTensor>();
-    outbox_tensor.reset(boxes_output_tensor);
-
-    predict_boxcoder(t1, t2, t3, 0);
-
-    return outbox_tensor;
-  }
-
- private:
-  const framework::Program<Dtype> program_;
-  std::shared_ptr<ProgramDesc> to_predict_program_;
-  std::map<framework::BlockDesc,
-           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-      ops_of_block_;
-  bool use_optimize_ = false;
-
-  void predict_boxcoder(const Tensor &t1, const Tensor &t2, const Tensor &t3,
-                        int block_id) {
-    std::shared_ptr<BlockDesc> to_predict_block =
-        to_predict_program_->Block(block_id);
-    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-      auto op = ops_of_block_[*to_predict_block.get()][j];
-      DLOG << "op -> run()";
-      op->Run();
-    }
-  }
-};
-
-template class TestBoxCoderOp<CPU>;
-}  // namespace framework
-}  // namespace paddle_mobile
-
-int main() {
-  DLOG << "----------**********----------";
-  DLOG << "begin to run BoxCoderOp Test";
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet_ssd));
-
-  paddle_mobile::framework::Tensor priorbox;
-  SetupTensor<float>(&priorbox, {1917, 4}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *priorbox_ptr = priorbox.data<float>();
-
-  paddle_mobile::framework::Tensor priorboxvar;
-  SetupTensor<float>(&priorboxvar, {1917, 4}, static_cast<float>(0.1),
-                     static_cast<float>(0.2));
-  auto *priorboxvar_ptr = priorboxvar.data<float>();
-
-  paddle_mobile::framework::Tensor targetbox;
-  SetupTensor<float>(&targetbox, {1, 1917, 4}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *targetbox_ptr = targetbox.data<float>();
-
-  paddle_mobile::framework::TestBoxCoderOp<paddle_mobile::CPU> testBoxCoderOp(
-      program);
-
-  auto output_boxcoder =
-      testBoxCoderOp.predict_boxcoder(priorbox, priorboxvar, targetbox);
-  auto output_boxcoder_ptr = output_boxcoder->data<float>();
-
-  for (int i = 0; i < output_boxcoder->numel(); i++) {
-    DLOG << output_boxcoder_ptr[i];
-  }
-  DLOGF("\n");
-  /// testing 25th bbox.
-  DLOG << "PriorBox**************";
-  DLOG << priorbox_ptr[100];
-  DLOG << priorbox_ptr[101];
-  DLOG << priorbox_ptr[102];
-  DLOG << priorbox_ptr[103];
-  DLOG << "PriorBoxVar**************";
-  DLOG << priorboxvar_ptr[100];
-  DLOG << priorboxvar_ptr[101];
-  DLOG << priorboxvar_ptr[102];
-  DLOG << priorboxvar_ptr[103];
-  DLOG << "TargetBox***************";
-  DLOG << targetbox_ptr[100];
-  DLOG << targetbox_ptr[101];
-  DLOG << targetbox_ptr[102];
-  DLOG << targetbox_ptr[103];
-  DLOG << "OutputBox**************";
-  DLOG << output_boxcoder_ptr[100];
-  DLOG << output_boxcoder_ptr[101];
-  DLOG << output_boxcoder_ptr[102];
-  DLOG << output_boxcoder_ptr[103];
-
-  DLOG << "***********----------------------**************";
-  auto priorbox_w = priorbox_ptr[102] - priorbox_ptr[100];
-  auto priorbox_h = priorbox_ptr[103] - priorbox_ptr[101];
-  auto priorbox_center_x = (priorbox_ptr[100] + priorbox_ptr[102]) / 2;
-  auto priorbox_center_y = (priorbox_ptr[101] + priorbox_ptr[103]) / 2;
-  DLOG << "prior box width : " << priorbox_w;
-  DLOG << "prior box height : " << priorbox_h;
-  DLOG << "prior box center x : " << priorbox_center_x;
-  DLOG << "prior box center y : " << priorbox_center_y;
-  auto target_box_center_x =
-      priorboxvar_ptr[100] * targetbox_ptr[100] * priorbox_w +
-      priorbox_center_x;
-  DLOG << "target_box_center_x : " << target_box_center_x;
-  auto target_box_center_y =
-      priorboxvar_ptr[101] * targetbox_ptr[101] * priorbox_h +
-      priorbox_center_y;
-  DLOG << "target_box_center_y : " << target_box_center_y;
-  auto target_box_width =
-      std::exp(priorboxvar_ptr[102] * targetbox_ptr[102]) * priorbox_w;
-  DLOG << "target_box_width : " << target_box_width;
-  auto target_box_height =
-      std::exp(priorboxvar_ptr[103] * targetbox_ptr[103]) * priorbox_h;
-  DLOG << "target_box_height : " << target_box_height;
-  DLOG << "pre x min : " << target_box_center_x - target_box_width / 2;
-  DLOG << "pre y min : " << target_box_center_y - target_box_height / 2;
-  DLOG << "pre x max : " << target_box_center_x + target_box_width / 2;
-  DLOG << "pre y max : " << target_box_center_y + target_box_height / 2;
-  return 0;
-}
diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp
deleted file mode 100644
index edaa4ce1ddba251886c90262895333b0a56c3a07..0000000000000000000000000000000000000000
--- a/test/operators/test_concat_op.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/concat_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_googlenet);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::ConcatOp<paddle_mobile::CPU, float>>
-      executor(program, "concat");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {4, 10, 2, 2}, 0, 1);
-  input_tensors.push_back(input1);
-  Tensor input2;
-  auto input2_data = CreateInput<float>(&input2, {4, 20, 2, 2}, 0, 1);
-  input_tensors.push_back(input2);
-  Tensor input3;
-  auto input3_data = CreateInput<float>(&input3, {4, 30, 2, 2}, 0, 1);
-  input_tensors.push_back(input3);
-  Tensor input4;
-  auto input4_data = CreateInput<float>(&input4, {4, 40, 2, 2}, 0, 1);
-  input_tensors.push_back(input4);
-  // 2. input_names
-  vector<string> input_names({
-      "conv2d_3.tmp_1",
-      "conv2d_5.tmp_1",
-      "conv2d_7.tmp_1",
-      "conv2d_8.tmp_1",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"concat_0.tmp_0"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-
-  // 5. test one example.
-  int input_n = 1;
-  int input_c = 2;
-  int input_h = 0;
-  int input_w = 1;
-  int stride0 = input3.numel() / input3.dims()[0];
-  int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1];
-  int stride2 = input3.dims()[3];
-  /// inputx1 (4,10,2,2),
-  /// inputx2 (4,20,2,2),
-  /// inputx3 (4,30,2,2),
-  /// inputx4 (4,40,2,2),
-  /// axis = 1
-  /// output (4,100,2,2)
-  int input_index =
-      input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
-  int output_index = input_n * 100 * 2 * 2 +
-                     (input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 +
-                     input_h * 2 + input_w;
-
-  DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index];
-  DLOG << " output [1,32,0,1] = " << output0_data[output_index];
-  return 0;
-}
diff --git a/test/operators/test_conv_add_relu_op.cpp b/test/operators/test_conv_add_relu_op.cpp
deleted file mode 100644
index 987f52cd62f91b3bc00cc1ef49bd21913e288d75..0000000000000000000000000000000000000000
--- a/test/operators/test_conv_add_relu_op.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/fusion_conv_add_relu_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  //  ../models/image_classification_resnet.inference.model
-  auto program = loader.Load(g_googlenet, true);
-
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<
-      paddle_mobile::CPU,
-      paddle_mobile::operators::FusionConvAddReluOp<paddle_mobile::CPU, float>>
-      executor(program, "fusion_conv_add_relu", true);
-
-  paddle_mobile::framework::Tensor input;
-  GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
-  //  // use SetupTensor if not has local input image .
-  //  SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
-  //                     static_cast<float>(1));
-
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
-  auto output = executor.Predict(input, "data", "conv2d_0.tmp_2", out_ddim);
-
-  auto output_ptr = output->data<float>();
-  for (int j = 0; j < 25; ++j) {
-    DLOG << " value of output: " << output_ptr[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_cov_op.cpp b/test/operators/test_cov_op.cpp
deleted file mode 100644
index a85ad9edba5d3e2256b8d7ee7d7d3c5b7200888d..0000000000000000000000000000000000000000
--- a/test/operators/test_cov_op.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/conv_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::GPU_MALI> loader;
-  //  ../models/image_classification_resnet.inference.model
-  auto program = loader.Load(g_googlenet);
-
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::GPU_MALI, paddle_mobile::operators::ConvOp<
-                                             paddle_mobile::GPU_MALI, float>>
-      executor(program, "conv2d");
-
-  paddle_mobile::framework::Tensor input;
-  GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
-  //  // use SetupTensor if not has local input image .
-  //  SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
-  //                     static_cast<float>(1));
-
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
-  auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim);
-
-  auto output_ptr = output->data<float>();
-  for (int j = 0; j < 20; ++j) {
-    DLOG << " value of output: " << output_ptr[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_depthwise_conv_op.cpp b/test/operators/test_depthwise_conv_op.cpp
deleted file mode 100644
index bd2aad19eda896bad3da8a47f5b70b1a923dc1a7..0000000000000000000000000000000000000000
--- a/test/operators/test_depthwise_conv_op.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/depthwise_conv_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  //  ../models/image_classification_resnet.inference.model
-  auto program = loader.Load(g_mobilenet_ssd);
-
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::DepthwiseConvOp<
-                                        paddle_mobile::CPU, float>>
-      executor(program, "depthwise_conv2d");
-
-  paddle_mobile::framework::LoDTensor input;
-  // GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
-  // use SetupTensor if not has local input image .
-  SetupTensor<float>(&input, {1, 32, 150, 150}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto input_ptr = input.data<float>();
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 150, 150});
-  auto output = executor.Predict(input, "batch_norm_0.tmp_3",
-                                 "depthwise_conv2d_0.tmp_0", out_ddim);
-
-  auto output_ptr = output->data<float>();
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << " value of output: " << output_ptr[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_elementwise_add_op.cpp b/test/operators/test_elementwise_add_op.cpp
deleted file mode 100644
index 0a5e9f7e92701e748df51078b21eb46eec90599d..0000000000000000000000000000000000000000
--- a/test/operators/test_elementwise_add_op.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_resnet);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::ElementwiseAddOp<
-                                        paddle_mobile::CPU, float>>
-      executor(program, "elementwise_add");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {1, 3, 224, 224}, 0, 1);
-  input_tensors.push_back(input1);
-
-  Tensor input2;
-  auto input2_data = CreateInput<float>(&input2, {224}, 0, 1);
-  input_tensors.push_back(input2);
-
-  // 2. input_names
-  vector<string> input_names({
-      "batch_norm_2.tmp_2",
-      "batch_norm_0.tmp_3",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"elementwise_add_0.tmp_0"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-  /// output (1,3,224,224)
-  DLOG << "output memory size : " << output[0]->memory_size();
-  DLOG << "output numel : " << output[0]->numel();
-
-  DLOG << input1_data[226] << " + " << input2_data[2] << " = "
-       << output0_data[226];
-}
diff --git a/test/operators/test_fusion_conv_add_bn_relu_op.cpp b/test/operators/test_fusion_conv_add_bn_relu_op.cpp
deleted file mode 100644
index 7764d95ed72da613459233bd55ddcffdc444318f..0000000000000000000000000000000000000000
--- a/test/operators/test_fusion_conv_add_bn_relu_op.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include "../test_include.h"
-#include "operators/fusion_conv_add_bn_relu_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  //  ../models/image_classification_resnet.inference.model
-  auto program = loader.Load(g_mobilenet, true);
-
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::FusionConvAddBNReluOp<
-                    paddle_mobile::CPU, float>>
-      executor(program, "fusion_conv_add_bn_relu", true);
-
-  std::cout << "executor 4 test: " << std::endl;
-
-  paddle_mobile::framework::Tensor input;
-  GetInput<float>(g_test_image_1x3x224x224_banana, &input, {1, 3, 224, 224});
-  //  // use SetupTensor if not has local input image .
-  //  SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
-  //                     static_cast<float>(1));
-
-  DLOG << " fuck: " << input;
-
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 112, 112});
-  std::cout << "before predict: " << std::endl;
-  auto output =
-      executor.Predict(input, "data", "conv2_1_dw_bn.tmp_2", out_ddim);
-  std::cout << "after predict " << std::endl;
-  auto output_ptr = output->data<float>();
-
-  int stride = output->numel() / 100;
-  for (int i = 0; i < 100; i++) {
-    DLOG << " index:" << i * stride << " value: " << output_ptr[i * stride];
-  }
-
-  //  for (int i = 0; i < 100; i++) {
-  //    DLOG << " index:" << i << " value: "<< output_ptr[i];
-  //  }
-
-  //  for (int j = 0; j < output->numel(); ++j) {
-  //    std::cout << " (index: " << j << " value: " << output_ptr[j] << ") ";
-  //  }
-  std::cout << std::endl;
-  return 0;
-}
diff --git a/test/operators/test_fusion_fc_op.cpp b/test/operators/test_fusion_fc_op.cpp
deleted file mode 100644
index a23bde45cb74f0f75e655821b15e66b1cef4c081..0000000000000000000000000000000000000000
--- a/test/operators/test_fusion_fc_op.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <framework/program/program-optimize/program_optimize.h>
-#include "../test_include.h"
-#include "operators/fusion_fc_op.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-template <typename Dtype>
-class TestFcOp {
- public:
-  explicit TestFcOp(const Program<Dtype> p) : program_(p) {
-    use_optimize_ = true;
-    if (use_optimize_) {
-      to_predict_program_ = program_.optimizeProgram;
-    } else {
-      to_predict_program_ = program_.originProgram;
-    }
-
-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-        to_predict_program_->Blocks();
-    //  DLOG << " **block size " << blocks.size();
-    for (int i = 0; i < blocks.size(); ++i) {
-      std::shared_ptr<BlockDesc> block_desc = blocks[i];
-      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-      //    DLOG << " ops " << ops.size();
-      for (int j = 0; j < ops.size(); ++j) {
-        std::shared_ptr<OpDesc> op = ops[j];
-        if (op->Type() == "fc" && op->Input("X")[0] == "pool2d_13.tmp_0") {
-          DLOG << " fc attr size: " << op->GetAttrMap().size();
-          DLOG << " inputs size: " << op->GetInputs().size();
-          DLOG << " outputs size: " << op->GetOutputs().size();
-          DLOG << " Input X is : " << op->Input("X")[0];
-          DLOG << " Input Y is : " << op->Input("Y")[0];
-          DLOG << " Input Y is : " << op->Input("Z")[0];
-          DLOG << " Output Out is : " << op->Output("Out")[0];
-          std::shared_ptr<operators::FusionFcOp<Dtype, float>> testOp =
-              std::make_shared<operators::FusionFcOp<Dtype, float>>(
-                  op->Type(), op->GetInputs(), op->GetOutputs(),
-                  op->GetAttrMap(), program_.scope);
-          ops_of_block_[*block_desc.get()].push_back(testOp);
-        }
-      }
-    }
-  }
-
-  std::shared_ptr<Tensor> predict(const Tensor &t1, const Tensor &t2,
-                                  const Tensor &t3) {
-    // feed
-    auto scope = program_.scope;
-    Variable *x_feed_value = scope->Var("pool2d_13.tmp_0");
-    auto tensor_x = x_feed_value->GetMutable<LoDTensor>();
-    tensor_x->ShareDataWith(t1);
-
-    Variable *y_feed_value = scope->Var("loss3_classifier-loc_weights");
-    auto tensor_y = y_feed_value->GetMutable<LoDTensor>();
-    tensor_y->ShareDataWith(t2);
-
-    Variable *z_feed_value = scope->Var("loss3_classifier-loc_biases");
-    auto tensor_z = z_feed_value->GetMutable<LoDTensor>();
-    tensor_z->ShareDataWith(t3);
-
-    Variable *con_output = scope->Var("loss3_classifier-loc.tmp_1");
-    auto *output_tensor = con_output->GetMutable<LoDTensor>();
-    output_tensor->mutable_data<float>({3, 10});
-    //  DLOG << typeid(output_tensor).name();
-    //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-    std::shared_ptr<LoDTensor> out_tensor = std::make_shared<LoDTensor>();
-    out_tensor.reset(output_tensor);
-
-    predict(t1, t2, t3, 0);
-    return out_tensor;
-  }
-
- private:
-  const framework::Program<Dtype> program_;
-  std::shared_ptr<ProgramDesc> to_predict_program_;
-  std::map<framework::BlockDesc,
-           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-      ops_of_block_;
-  bool use_optimize_ = false;
-
-  void predict(const Tensor &t1, const Tensor &t2, const Tensor &t3,
-               int block_id) {
-    std::shared_ptr<BlockDesc> to_predict_block =
-        to_predict_program_->Block(block_id);
-    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-      auto op = ops_of_block_[*to_predict_block.get()][j];
-      DLOG << "op -> run()";
-      op->Run();
-    }
-  }
-};
-
-template class TestFcOp<CPU>;
-}  // namespace framework
-}  // namespace paddle_mobile
-int main() {
-  DLOG << "----------**********----------";
-  DLOG << "begin to run Fc Test";
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  //    "../../../test/models/googlenet"
-  auto program = loader.Load(g_googlenet);
-  paddle_mobile::framework::ProgramOptimize optimize;
-  //  program.originProgram->Description("origin");
-  auto optimize_program = optimize.FusionOptimize(program.originProgram);
-
-  program.optimizeProgram = optimize_program;
-
-  if (optimize_program != nullptr) {
-    optimize_program->Description("optimize");
-  } else {
-    LOG(paddle_mobile::kLOG_ERROR) << "optimize_program is null";
-  }
-
-  /// input x (1,3,224,224)
-  paddle_mobile::framework::LoDTensor inputx;
-  SetupTensor<float>(&inputx, {3, 64, 1, 1}, static_cast<float>(1),
-                     static_cast<float>(1));
-  auto *inputx_ptr = inputx.data<float>();
-  /// input y (224,)
-  paddle_mobile::framework::LoDTensor inputy;
-  SetupTensor<float>(&inputy, {64, 10}, static_cast<float>(1.5),
-                     static_cast<float>(1.5));
-  auto *inputy_ptr = inputy.data<float>();
-
-  paddle_mobile::framework::LoDTensor inputz;
-  SetupTensor<float>(&inputz, {10}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *inputz_ptr = inputz.data<float>();
-
-  paddle_mobile::framework::TestFcOp<paddle_mobile::CPU> testFcOp(program);
-
-  auto output = testFcOp.predict(inputx, inputy, inputz);
-  auto *output_ptr = output->data<float>();
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << "value of output: " << output_ptr[j];
-  }
-
-  DLOG << "1 (3,64) * 2 (64,10) = 96(3,10)";
-  DLOG << "output : 96(3,10) + bias(10)";
-
-  return 0;
-}
diff --git a/test/operators/test_im2sequence_op.cpp b/test/operators/test_im2sequence_op.cpp
deleted file mode 100644
index a7512d3bf3cffcb100fe292e50fc7b7b23fa0aa0..0000000000000000000000000000000000000000
--- a/test/operators/test_im2sequence_op.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../executor_for_test.h"
-#include "../test_include.h"
-#include "operators/im2sequence_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_ocr_recg);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::ReluOp<paddle_mobile::CPU, float>>
-      executor(program, "im2sequence");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {2, 2, 3, 3}, -1, 1);
-  input_tensors.push_back(input1);
-
-  // 2. input_names
-  vector<string> input_names({
-      "conv2d_19.tmp_1",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"im2sequence_0.tmp_0"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({8, 9});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-
-  for (int j = 0; j < input_tensors[0].numel(); ++j) {
-    DLOG << " value of input: " << input1_data[j];
-  }
-
-  for (int j = 0; j < output[0]->numel(); ++j) {
-    DLOG << " value of output: " << output0_data[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_lrn_op.cpp b/test/operators/test_lrn_op.cpp
deleted file mode 100644
index d4d9f8da802fc0f5f885a3b2e81cba695776c29e..0000000000000000000000000000000000000000
--- a/test/operators/test_lrn_op.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/lrn_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_googlenet);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::LrnOp<paddle_mobile::CPU, float>>
-      executor(program, "lrn");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {3, 4, 2, 2}, 0, 1);
-  input_tensors.push_back(input1);
-
-  // 2. input_names
-  vector<string> input_names({
-      "pool2d_0.tmp_0",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"pool1_norm1.tmp_1"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-
-  DLOG << " LrnOp input: ";
-  for (int i = 0; i < 3; i++) {
-    for (int j = 0; j < 4; j++) {
-      for (int c = 0; c < 2; c++) {
-        for (int d = 0; d < 2; d++) {
-          DLOGF("%f ", input1_data[i * 16 + j * 4 + c * 2 + d]);
-        }
-        DLOGF("\n");
-      }
-      DLOGF("\n");
-    }
-    DLOGF("\n");
-  }
-  DLOG << " LrnOp output: ";
-  for (int i = 0; i < 3; i++) {
-    for (int j = 0; j < 4; j++) {
-      for (int c = 0; c < 2; c++) {
-        for (int d = 0; d < 2; d++) {
-          DLOGF("%f ", output0_data[i * 16 + j * 4 + c * 2 + d]);
-        }
-        DLOGF("\n");
-      }
-      DLOGF("\n");
-    }
-    DLOGF("\n");
-  }
-  DLOG << input1_data[0] << " / ((1 + 0.00002 * ( " << input1_data[0] << "^2 + "
-       << input1_data[4] << "^2 + " << input1_data[8] << "^2 ))^0.75) = ";
-  DLOG << output0_data[0];
-  return 0;
-}
diff --git a/test/operators/test_mul_op.cpp b/test/operators/test_mul_op.cpp
deleted file mode 100644
index 8ebf0926890497c0ed622b69f163a9f6f5c8612b..0000000000000000000000000000000000000000
--- a/test/operators/test_mul_op.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/mul_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_resnet);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::MulOp<paddle_mobile::CPU, float>>
-      executor(program, "mul");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {3, 2, 1, 1}, 0, 1);
-  input_tensors.push_back(input1);
-  Tensor input2;
-  auto input2_data = CreateInput<float>(&input2, {2, 3}, 0, 1);
-  input_tensors.push_back(input2);
-
-  // 2. input_names
-  vector<string> input_names({
-      "pool2d_0.tmp_0",
-      "fc_0.w_0",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"fc_0.tmp_0"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({3, 3});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-
-  auto dim_1 = input1.numel() / input1.dims()[0];
-  DLOG << " input1 : ";
-  for (int i = 0; i < input1.dims()[0]; ++i) {
-    for (int j = 0; j < dim_1; ++j) {
-      DLOGF("%f ", input1_data[i * dim_1 + j]);
-    }
-    DLOGF("\n");
-  }
-
-  auto dim_2 = input2.numel() / input2.dims()[0];
-  DLOG << " input2 : ";
-  for (int i = 0; i < input2.dims()[0]; ++i) {
-    for (int j = 0; j < dim_2; ++j) {
-      DLOGF("%f ", input2_data[i * dim_2 + j]);
-    }
-    DLOGF("\n");
-  }
-
-  auto dim_output0 = output[0]->numel() / output[0]->dims()[0];
-  DLOG << " output : ";
-  for (int i = 0; i < output[0]->dims()[0]; ++i) {
-    for (int j = 0; j < dim_output0; ++j) {
-      DLOGF("%f ", output0_data[i * dim_2 + j]);
-    }
-    DLOGF("\n");
-  }
-
-  /// output (3,3)
-  DLOG << "output memory size : " << output[0]->memory_size();
-  DLOG << "output numel : " << output[0]->numel();
-
-  DLOG << input1_data[0] << " x " << input2_data[0] << " + " << input1_data[1]
-       << " x " << input2_data[0 + 3] << " = " << output0_data[0];
-  return 0;
-}
diff --git a/test/operators/test_multiclass_nms_op.cpp b/test/operators/test_multiclass_nms_op.cpp
deleted file mode 100644
index e6c41bd4b3bb241964a23accf4633e65818465be..0000000000000000000000000000000000000000
--- a/test/operators/test_multiclass_nms_op.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include "../test_include.h"
-#include "operators/multiclass_nms_op.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-template <typename Dtype>
-class TestMultiClassNMSOp {
- public:
-  explicit TestMultiClassNMSOp(const Program<Dtype> p) : program_(p) {
-    if (use_optimize_) {
-      to_predict_program_ = program_.optimizeProgram;
-    } else {
-      to_predict_program_ = program_.originProgram;
-    }
-
-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-        to_predict_program_->Blocks();
-    //  DLOG << " **block size " << blocks.size();
-    for (auto block_desc : blocks) {
-      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-      //    DLOG << " ops " << ops.size();
-      for (auto op : ops) {
-        if (op->Type() == "multiclass_nms" &&
-            op->Input("BBoxes")[0] == "box_coder_0.tmp_0") {
-          DLOG << " mul attr size: " << op->GetAttrMap().size();
-          DLOG << " inputs size: " << op->GetInputs().size();
-          DLOG << " outputs size: " << op->GetOutputs().size();
-          DLOG << " BBoxes is : " << op->Input("BBoxes")[0];
-          DLOG << " Scores is : " << op->Input("Scores")[0];
-          DLOG << " Out is : " << op->Output("Out")[0];
-          DLOG << " keep_top_k : "
-               << op->GetAttrMap().at("keep_top_k").Get<int>();
-          DLOG << " background_label : "
-               << op->GetAttrMap().at("background_label").Get<int>();
-          DLOG << " nms_eta : " << op->GetAttrMap().at("nms_eta").Get<float>();
-          DLOG << " nms_threshold : "
-               << op->GetAttrMap().at("nms_threshold").Get<float>();
-          DLOG << " nms_top_k : "
-               << op->GetAttrMap().at("nms_top_k").Get<int>();
-          DLOG << " score_threshold : "
-               << op->GetAttrMap().at("score_threshold").Get<float>();
-          //                            DLOG << " variances : " <<
-          //                            op->GetAttrMap().at("variances").Get<std::vector<float>>();
-          //                            DLOG << " aspect_ratios : " <<
-          //                            op->GetAttrMap().at("aspect_ratios").Get<std::vector<float>>();
-          //                            DLOG << " min_sizes : " <<
-          //                            op->GetAttrMap().at("min_sizes").Get<std::vector<float>>();
-          //                            DLOG << " max_sizes : " <<
-          //                            op->GetAttrMap().at("max_sizes").Get<std::vector<float>>();
-          std::shared_ptr<operators::MultiClassNMSOp<Dtype, float>> priorbox =
-              std::make_shared<operators::MultiClassNMSOp<Dtype, float>>(
-                  op->Type(), op->GetInputs(), op->GetOutputs(),
-                  op->GetAttrMap(), program_.scope);
-          ops_of_block_[*block_desc.get()].push_back(priorbox);
-        }
-      }
-    }
-  }
-
-  std::shared_ptr<Tensor> predict(const Tensor &t1, const Tensor &t2) {
-    // feed
-    auto scope = program_.scope;
-    Variable *x1_feed_value = scope->Var("box_coder_0.tmp_0");
-    auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
-    tensor_x1->ShareDataWith(t1);
-
-    Variable *x2_feed_value = scope->Var("transpose_12.tmp_0");
-    auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
-    tensor_x2->ShareDataWith(t2);
-
-    Variable *output = scope->Var("detection_output_0.tmp_0");
-    auto *output_tensor = output->GetMutable<LoDTensor>();
-    output_tensor->mutable_data<float>({1917, 6});
-
-    //  DLOG << typeid(output_tensor).name();
-    //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-    out_tensor.reset(output_tensor);
-
-    predict(t1, t2, 0);
-
-    return out_tensor;
-    // return outvars_tensor;
-  }
-
- private:
-  const framework::Program<Dtype> program_;
-  std::shared_ptr<ProgramDesc> to_predict_program_;
-  std::map<framework::BlockDesc,
-           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-      ops_of_block_;
-  bool use_optimize_ = false;
-
-  void predict(const Tensor &t1, const Tensor &t2, int block_id) {
-    std::shared_ptr<BlockDesc> to_predict_block =
-        to_predict_program_->Block(block_id);
-    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-      auto op = ops_of_block_[*to_predict_block.get()][j];
-      DLOG << "op -> run()";
-      op->Run();
-    }
-  }
-};
-
-template class TestMultiClassNMSOp<CPU>;
-}  // namespace framework
-}  // namespace paddle_mobile
-
-int main() {
-  DLOG << "----------**********----------";
-  DLOG << "begin to run MulticlassNMS Test";
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
-
-  /// input x (1,3,300,300)
-  paddle_mobile::framework::Tensor inputx1;
-  SetupTensor<float>(&inputx1, {10, 1917, 4}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *inputx1_ptr = inputx1.data<float>();
-
-  paddle_mobile::framework::Tensor inputx2;
-  SetupTensor<float>(&inputx2, {10, 21, 1917}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *inputx2_ptr = inputx2.data<float>();
-
-  paddle_mobile::framework::TestMultiClassNMSOp<paddle_mobile::CPU>
-      testMultiClassNMSOp(program);
-
-  auto output = testMultiClassNMSOp.predict(inputx1, inputx2);
-  auto *output_ptr = output->data<float>();
-
-  for (int i = 0; i < output->numel(); i++) {
-    DLOG << output_ptr[i];
-  }
-  return 0;
-}
diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp
deleted file mode 100644
index 2daecd7b4c1a50c612bc784c801208d2e6f31482..0000000000000000000000000000000000000000
--- a/test/operators/test_pool_op.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/pool_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_googlenet));
-  if (program.originProgram == nullptr) {
-    DLOG << "program read file";
-  }
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::PoolOp<paddle_mobile::CPU, float>>
-      executor(program, "pool2d");
-
-  paddle_mobile::framework::Tensor input;
-  SetupTensor<float>(&input, {1, 64, 112, 112}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 56, 56});
-  auto output =
-      executor.Predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim);
-
-  float *output_ptr = output->data<float>();
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << " value of output: " << output_ptr[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_prelu_op.cpp b/test/operators/test_prelu_op.cpp
deleted file mode 100644
index e93d8732d18496721b24cfba1df296250169f8b2..0000000000000000000000000000000000000000
--- a/test/operators/test_prelu_op.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../executor_for_test.h"
-#include "../test_include.h"
-#include "operators/prelu_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_resnet);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::PReluOp<paddle_mobile::CPU, float>>
-      executor(program, "prelu");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {1, 2, 3, 4}, -1, 1);
-  input_tensors.push_back(input1);
-
-  // 2. input_names
-  vector<string> input_names({
-      "batch_norm_0.tmp_2",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"batch_norm_0.tmp_3"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-
-  for (int j = 0; j < output[0]->numel(); ++j) {
-    DLOG << " value of output: " << output0_data[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_prior_box_op.cpp b/test/operators/test_prior_box_op.cpp
deleted file mode 100644
index 8c697a9a7982f05b71caa5bb5f4d12e50dc9d418..0000000000000000000000000000000000000000
--- a/test/operators/test_prior_box_op.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include "../test_include.h"
-#include "operators/prior_box_op.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-template <typename Dtype>
-class TestPriorBoxOp {
- public:
-  explicit TestPriorBoxOp(const Program<Dtype> p) : program_(p) {
-    if (use_optimize_) {
-      to_predict_program_ = program_.optimizeProgram;
-    } else {
-      to_predict_program_ = program_.originProgram;
-    }
-
-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-        to_predict_program_->Blocks();
-    //  DLOG << " **block size " << blocks.size();
-    for (auto block_desc : blocks) {
-      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-      //    DLOG << " ops " << ops.size();
-      for (auto op : ops) {
-        if (op->Type() == "prior_box" &&
-            op->Input("Input")[0] == "batch_norm_26.tmp_3") {
-          DLOG << " mul attr size: " << op->GetAttrMap().size();
-          DLOG << " inputs size: " << op->GetInputs().size();
-          DLOG << " outputs size: " << op->GetOutputs().size();
-          DLOG << " Input is : " << op->Input("Input")[0];
-          DLOG << " Image is : " << op->Input("Image")[0];
-          DLOG << " Output Boxes is : " << op->Output("Boxes")[0];
-          DLOG << " Output Variances is : " << op->Output("Variances")[0];
-          DLOG << " offset : " << op->GetAttrMap().at("offset").Get<float>();
-          DLOG << " step_h : " << op->GetAttrMap().at("step_h").Get<float>();
-          DLOG << " step_w : " << op->GetAttrMap().at("step_w").Get<float>();
-          DLOG << " flip : " << op->GetAttrMap().at("flip").Get<bool>();
-          DLOG << " clip : " << op->GetAttrMap().at("clip").Get<bool>();
-          //                            DLOG << " variances : " <<
-          //                            op->GetAttrMap().at("variances").Get<std::vector<float>>();
-          //                            DLOG << " aspect_ratios : " <<
-          //                            op->GetAttrMap().at("aspect_ratios").Get<std::vector<float>>();
-          //                            DLOG << " min_sizes : " <<
-          //                            op->GetAttrMap().at("min_sizes").Get<std::vector<float>>();
-          //                            DLOG << " max_sizes : " <<
-          //                            op->GetAttrMap().at("max_sizes").Get<std::vector<float>>();
-          std::shared_ptr<operators::PriorBoxOp<Dtype, float>> priorbox =
-              std::make_shared<operators::PriorBoxOp<Dtype, float>>(
-                  op->Type(), op->GetInputs(), op->GetOutputs(),
-                  op->GetAttrMap(), program_.scope);
-          ops_of_block_[*block_desc.get()].push_back(priorbox);
-        }
-      }
-    }
-  }
-
-  std::shared_ptr<Tensor> predict_priorbox(const Tensor &t1, const Tensor &t2) {
-    // feed
-    auto scope = program_.scope;
-    Variable *x1_feed_value = scope->Var("image");
-    auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
-    tensor_x1->ShareDataWith(t1);
-
-    Variable *x2_feed_value = scope->Var("batch_norm_26.tmp_3");
-    auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
-    tensor_x2->ShareDataWith(t2);
-
-    Variable *boxes_output = scope->Var("prior_box_1.tmp_0");
-    auto *boxes_output_tensor = boxes_output->GetMutable<LoDTensor>();
-    boxes_output_tensor->mutable_data<float>({10, 10, 6, 4});
-
-    Variable *variances_output = scope->Var("prior_box_1.tmp_1");
-    auto *variances_output_tesnor = variances_output->GetMutable<LoDTensor>();
-    variances_output_tesnor->mutable_data<float>({10, 10, 6, 4});
-    //  DLOG << typeid(output_tensor).name();
-    //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-    std::shared_ptr<Tensor> outboxes_tensor = std::make_shared<LoDTensor>();
-    outboxes_tensor.reset(boxes_output_tensor);
-
-    std::shared_ptr<Tensor> outvars_tensor = std::make_shared<LoDTensor>();
-    outvars_tensor.reset(variances_output_tesnor);
-    predict_priorbox(t1, t2, 0);
-
-    return outboxes_tensor;
-    // return outvars_tensor;
-  }
-
- private:
-  const framework::Program<Dtype> program_;
-  std::shared_ptr<ProgramDesc> to_predict_program_;
-  std::map<framework::BlockDesc,
-           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-      ops_of_block_;
-  bool use_optimize_ = false;
-
-  void predict_priorbox(const Tensor &t1, const Tensor &t2, int block_id) {
-    std::shared_ptr<BlockDesc> to_predict_block =
-        to_predict_program_->Block(block_id);
-    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-      auto op = ops_of_block_[*to_predict_block.get()][j];
-      DLOG << "op -> run()";
-      op->Run();
-    }
-  }
-};
-
-template class TestPriorBoxOp<CPU>;
-}  // namespace framework
-}  // namespace paddle_mobile
-
-int main() {
-  DLOG << "----------**********----------";
-  DLOG << "begin to run PriorBoxOp Test";
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet_ssd));
-
-  /// input x (1,3,300,300)
-  paddle_mobile::framework::Tensor input_image;
-  SetupTensor<float>(&input_image, {1, 3, 300, 300}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *input_image_ptr = input_image.data<float>();
-
-  paddle_mobile::framework::Tensor inputx1;
-  SetupTensor<float>(&inputx1, {1, 1024, 10, 10}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto *inputx1_ptr = inputx1.data<float>();
-
-  paddle_mobile::framework::TestPriorBoxOp<paddle_mobile::CPU> testPriorBoxOp(
-      program);
-
-  auto output_priorbox = testPriorBoxOp.predict_priorbox(input_image, inputx1);
-  auto *output_priorbox_ptr = output_priorbox->data<float>();
-
-  for (int i = 0; i < output_priorbox->numel(); i++) {
-    DLOG << output_priorbox_ptr[i];
-  }
-  return 0;
-}
diff --git a/test/operators/test_relu_op.cpp b/test/operators/test_relu_op.cpp
deleted file mode 100644
index fad0d0c30a126cc2730e4aa8b87364eee9fc8209..0000000000000000000000000000000000000000
--- a/test/operators/test_relu_op.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/relu_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(g_resnet);
-  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
-                        "program file read fail");
-
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::ReluOp<paddle_mobile::CPU, float>>
-      executor(program, "relu");
-
-  // 1. input_tensors;
-  vector<Tensor> input_tensors;
-
-  Tensor input1;
-  auto input1_data = CreateInput<float>(&input1, {1, 2, 3, 4}, -1, 1);
-  input_tensors.push_back(input1);
-
-  // 2. input_names
-  vector<string> input_names({
-      "batch_norm_0.tmp_2",
-  });
-
-  // 3. output_names
-  vector<string> output_names({"batch_norm_0.tmp_3"});
-
-  // 4. out_dims;
-  vector<DDim> out_ddims;
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
-  out_ddims.push_back(out_ddim);
-
-  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
-                                            output_names, out_ddims);
-
-  auto output0_data = output[0]->data<float>();
-
-  for (int j = 0; j < output[0]->numel(); ++j) {
-    DLOG << " value of output: " << output0_data[j];
-  }
-  return 0;
-}
diff --git a/test/operators/test_reshape_op.cpp b/test/operators/test_reshape_op.cpp
deleted file mode 100644
index 3541151d8a1a286527e715f402df381d2efc094c..0000000000000000000000000000000000000000
--- a/test/operators/test_reshape_op.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/reshape_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet_ssd));
-  if (program.originProgram == nullptr) {
-    DLOG << "program read file";
-  }
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::ReshapeOp<paddle_mobile::CPU, float>>
-      executor(program, "reshape");
-  paddle_mobile::framework::Tensor input;
-  SetupTensor<float>(&input, {2, 3, 3, 2}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto input_ptr = input.data<float>();
-  auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
-  auto output =
-      executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
-  auto *output_ptr = output->data<float>();
-
-  DLOG << "input : ";
-  for (int j = 0; j < input.numel(); ++j) {
-    DLOG << " index " << j << " : " << input_ptr[j];
-  }
-
-  DLOG << "output : ";
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << " index " << j << " : " << output_ptr[j];
-  }
-
-  return 0;
-}
diff --git a/test/operators/test_resize_op.cpp b/test/operators/test_resize_op.cpp
deleted file mode 100644
index f4dcaa6885d92a727e8c97d5106c3b6913a4ab33..0000000000000000000000000000000000000000
--- a/test/operators/test_resize_op.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-#include "operators/resize_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet_ssd));
-  if (program.originProgram == nullptr) {
-    DLOG << "program read file";
-  }
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::ResizeOp<paddle_mobile::CPU, float>>
-      executor(program, "resize");
-  paddle_mobile::framework::Tensor input;
-  SetupTensor<float>(&input, {2, 3, 3, 2}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto input_ptr = input.data<float>();
-  auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
-  auto output =
-      executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
-  auto *output_ptr = output->data<float>();
-
-  DLOG << "input : ";
-  for (int j = 0; j < input.numel(); ++j) {
-    DLOG << " index " << j << " : " << input_ptr[j];
-  }
-
-  DLOG << "output : ";
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << " index " << j << " : " << output_ptr[j];
-  }
-
-  return 0;
-}
diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp
deleted file mode 100644
index c8fac6b9eee5c5777ddb0147bc81d361d4dd09f5..0000000000000000000000000000000000000000
--- a/test/operators/test_sigmoid_op.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../../src/operators/kernel/sigmoid_kernel.h"
-#include "../test_helper.h"
-#include "io/executor.h"
-
-int main() {
-  paddle_mobile::framework::Tensor input;
-  paddle_mobile::framework::Tensor output;
-  SetupTensor<float>(&input, {1, 4, 60, 60}, static_cast<float>(0),
-                     static_cast<float>(1));
-
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 4, 60, 60});
-  output.Resize(out_ddim);
-  paddle_mobile::operators::sigmoid(&input, &output);
-  auto *output_ptr = output.data<float>();
-  for (int j = 0; j < output.numel(); ++j) {
-    DLOG << " value of output: " << output_ptr[j];
-  }
-  DLOG << 5;
-  return 0;
-}
diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp
deleted file mode 100644
index a0184729a8bc5e6b0ba952923eecd5242cfe36d4..0000000000000000000000000000000000000000
--- a/test/operators/test_softmax_op.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_include.h"
-
-#include "operators/softmax_op.h"
-
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet));
-  if (program.originProgram == nullptr) {
-    DLOG << "program read file";
-  }
-  Executor4Test<paddle_mobile::CPU,
-                paddle_mobile::operators::SoftmaxOp<paddle_mobile::CPU, float>>
-      executor(program, "softmax");
-  paddle_mobile::framework::Tensor input;
-  SetupTensor<float>(&input, {1, 1000}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 1000});
-  auto output =
-      executor.Predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim);
-  auto *output_ptr = output->data<float>();
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << " value of output: " << output_ptr[j];
-  }
-
-  return 0;
-}
diff --git a/test/operators/test_transpose_op.cpp b/test/operators/test_transpose_op.cpp
deleted file mode 100644
index f83ee23c25d8f2588e0fe40d5fabc6114129b995..0000000000000000000000000000000000000000
--- a/test/operators/test_transpose_op.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "../test_helper.h"
-#include "../test_include.h"
-#include "operators/transpose_op.h"
-int main() {
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(g_mobilenet_ssd));
-  if (program.originProgram == nullptr) {
-    DLOG << "program read file";
-  }
-  Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::TransposeOp<
-                                        paddle_mobile::CPU, float>>
-      executor(program, "transpose");
-  paddle_mobile::framework::Tensor input;
-  SetupTensor<float>(&input, {1, 2, 3, 4}, static_cast<float>(0),
-                     static_cast<float>(1));
-  auto input_ptr = input.data<float>();
-  auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 4, 2});
-  auto output =
-      executor.Predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim);
-  auto *output_ptr = output->data<float>();
-
-  DLOG << "input : ";
-  for (int j = 0; j < input.numel(); ++j) {
-    DLOG << " index " << j << " : " << input_ptr[j];
-  }
-
-  DLOG << "output : ";
-  for (int j = 0; j < output->numel(); ++j) {
-    DLOG << " index " << j << " : " << output_ptr[j];
-  }
-  DLOG << " for example : ";
-  DLOG << " you can check if input[16] == output[9] ";
-  DLOG << " you can check if input[12] == output[1] ";
-  return 0;
-}
diff --git a/test/test_helper.h b/test/test_helper.h
index ecbc251a815e343f75b1247ffc430e9c52d6abfd..7581405c3d9f14e7e997e73be91cb624ad6d9798 100644
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -41,18 +41,13 @@ static const char *g_resnet_50 = "../models/resnet_50";
 static const char *g_resnet = "../models/resnet";
 static const char *g_googlenet_combine = "../models/googlenet_combine";
 static const char *g_yolo = "../models/yolo";
-static const char *g_yolo_combined = "../models/yolo_combined";
 static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
 
 static const char *g_test_image_1x3x224x224 =
     "../images/test_image_1x3x224x224_float";
 static const char *g_test_image_1x3x224x224_banana =
     "../images/input_3x224x224_banana";
-static const char *g_test_image_desktop_1_3_416_416_nchw_float =
-    "../images/in_put_1_3_416_416_2";
 static const char *g_hand = "../images/hand_image";
-static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
-static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
 static const char *g_img = "../images/img.bin";
 
 using paddle_mobile::framework::DDim;
diff --git a/tools/build.sh b/tools/build.sh
index baa9fe1097b774418899cb20f2f1e63520fa7792..5cf3b0454658c764ff99989de9dca1530c3b55d9 100755
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 NETS=""
-declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet" "mobilenetssd" "nlp" "mobilenetfssd" "genet")
+declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet" "mobilenetssd" "nlp")
 
 build_for_mac() {
     if [ ! `which brew` ]; then
diff --git a/tools/op.cmake b/tools/op.cmake
index 8488dd86bd497a1f8c7425b0525986750ff5ac86..3f27f7fc4ae0d00394b9df63c214b30f98cdd31b 100644
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -127,42 +127,6 @@ if ("nlp" IN_LIST NET)
   set(FOUND_MATCH ON)
 endif()
 
-if ("mobilenetfssd" IN_LIST NET)
-  message("mobilenetfssd enabled")
-  set(FUSION_CONVADDRELU_OP ON)
-  set(FUSION_CONVADDBNRELU_OP ON)
-  set(FUSION_CONVADD_OP ON)
-  set(SOFTMAX_OP ON)
-  set(RESHAPE_OP ON)
-  set(BILINEAR_INTERP_OP ON)
-  set(TRANSPOSE_OP ON)
-  set(CONCAT_OP ON)
-  set(PRIORBOX_OP ON)
-  set(BATCHNORM_OP ON)
-  set(BOXCODER_OP ON)
-  set(MULTICLASSNMS_OP ON)
-  set(FLATTEN_OP ON)
-  set(SPLIT_OP ON)
-  set(SHAPE_OP ON)
-
-  set(FOUND_MATCH ON)
-endif()
-
-if ("genet" IN_LIST NET)
-  message("genet enabled")
-  set(FUSION_CONVADDPRELU_OP ON)
-  set(FUSION_CONVADDADDPRELU_OP ON)
-  set(FUSION_CONVADD_OP ON)
-  set(CONV_TRANSPOSE_OP ON)
-  set(FUSION_CONVADDRELU_OP ON)
-  set(ELEMENTWISEADD_OP ON)
-  set(PRELU_OP ON)
-  set(POOL_OP ON)
-  set(CONCAT_OP ON)
-
-  set(FOUND_MATCH ON)
-endif()
-
 
 if(NOT FOUND_MATCH)
   message("--default--")