fix conflict and merge develop

37151f30 · nhzlx · 0fd9a2ee · 689aee19 · 37151f30 · 37151f30
139 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -70,10 +70,17 @@ build
 cmake-build-debug
 cmake-build-release

-
 #ios demo
 demo/ios/PaddleMobileDemo/PaddleMobileDemo/googlenet_combine/
 demo/ios/PaddleMobileDemo/PaddleMobileDemo/*.jpg
 demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/*.a
 *.xcuserstate
 /tools/quantification/quantify
+
+# metal
+Podfile.lock
+metal/Pods/
+SwiftProtobuf.framework
+paddle-mobile.xcworkspace
+metal/models/
+metal/images/
--- a/README.md
+++ b/README.md
@@ -26,16 +26,6 @@ Paddle-Moible是PaddlePaddle组织下的项目，是一个致力于嵌入式平

 - **ARM CPU**

-|mobilenet arm v7|1线程|2线程|4线程|
-|------------|----|-----|-----|
-|麒麟960(ms)|110.586|70.897|47.474|
-|||||
-|mobilenetssd arm v7|1线程|2线程|4线程|
-|麒麟960(ms)|222.124|138.952|90.856|
-|||||
-|googlenet(v1) arm v7|1线程|2线程|4线程|
-|麒麟960(ms)|348.018|240.304|169.998|
-
    arm cpu是paddle-mobile的主要支持方向，cpu的通用性一直是其优势。嵌入式深度学习，需要大量的cpu汇编实现。我们正在紧锣密鼓的编码，为的是能充分硬件的每一点加速能力。
    arm cpu的优化工作还在进行中，现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms，显然这不是我们的最终目标，我们正在用大量的汇编改写，后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
    

--- a/metal/Podfile
+++ b/metal/Podfile
+platform :ios, ‘9.0’
+use_frameworks!
+
+workspace 'paddle-mobile.xcworkspace'
+
+target 'paddle-mobile-demo' do
+	project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj'
+    pod 'SwiftProtobuf', '~> 1.0'
+end
+
+target 'paddle-mobile' do
+	project 'paddle-mobile/paddle-mobile.xcodeproj'
+	pod 'SwiftProtobuf', '~> 1.0'
+end
+
+target 'paddle-mobile-unit-test' do
+    project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
+    pod 'SwiftProtobuf', '~> 1.0'
+end
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; };
+		FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; };
+		FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; };
+		FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; };
+		FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
+		FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
+		FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
+		FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602C72108580600FACB58 /* MetalHelper.swift */; };
+		FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; };
+		FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; };
+		FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; };
+		FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; };
+		FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */; };
+		FCDFD41B211D91C7005AB38B /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD41A211D91C7005AB38B /* synset.txt */; };
+		FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
+		FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCEEE7D3210627A000444BEC /* banana.jpeg */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		FCEBEC2E20E1392000C0B14D /* Embed Frameworks */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 10;
+			files = (
+				FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */,
+			);
+			name = "Embed Frameworks";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.release.xcconfig"; sourceTree = "<group>"; };
+		18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_demo.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; };
+		FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
+		FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+		FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
+		FC039B8320E11C550081E9F8 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
+		FC039B8620E11C550081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		FC039B8820E11C560081E9F8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		FC3602C72108580600FACB58 /* MetalHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-unit-test/paddle-mobile-unit-test/MetalHelper.swift"; sourceTree = "<group>"; };
+		FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
+		FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
+		FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
+		FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
+		FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelHelper.swift; sourceTree = "<group>"; };
+		FCDFD41A211D91C7005AB38B /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
+		FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		FC039B7B20E11C550081E9F8 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */,
+				30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		5722B50FEC38F55CA9B6A57B /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */,
+				081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		7B7DED984E9EE7BFB45E24E8 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		FC039B7520E11C550081E9F8 = {
+			isa = PBXGroup;
+			children = (
+				FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */,
+				FC039B8020E11C550081E9F8 /* paddle-mobile-demo */,
+				FC039B7F20E11C550081E9F8 /* Products */,
+				5722B50FEC38F55CA9B6A57B /* Pods */,
+				7B7DED984E9EE7BFB45E24E8 /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		FC039B7F20E11C550081E9F8 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
+			isa = PBXGroup;
+			children = (
+				FC0E2C2020EDC03B009C1FAC /* models */,
+				FC0E2C1D20EDC030009C1FAC /* images */,
+				FC039B8120E11C550081E9F8 /* AppDelegate.swift */,
+				FC013927210204A3008100E3 /* PreProcessKernel.metal */,
+				FC039B8320E11C550081E9F8 /* ViewController.swift */,
+				FC039B8520E11C550081E9F8 /* Main.storyboard */,
+				FC039B8820E11C560081E9F8 /* Assets.xcassets */,
+				FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
+				FC039B8D20E11C560081E9F8 /* Info.plist */,
+				FC3602C72108580600FACB58 /* MetalHelper.swift */,
+				FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */,
+			);
+			path = "paddle-mobile-demo";
+			sourceTree = "<group>";
+		};
+		FC0E2C1D20EDC030009C1FAC /* images */ = {
+			isa = PBXGroup;
+			children = (
+				FC918192211DC70500B6F354 /* iphone.JPG */,
+				FC918190211DBC3500B6F354 /* paddle-mobile.png */,
+				FCDFD41A211D91C7005AB38B /* synset.txt */,
+				FCEEE7D3210627A000444BEC /* banana.jpeg */,
+			);
+			name = images;
+			path = ../../images;
+			sourceTree = "<group>";
+		};
+		FC0E2C2020EDC03B009C1FAC /* models */ = {
+			isa = PBXGroup;
+			children = (
+				FCD04E6020F3146A0007374F /* mobilenet */,
+			);
+			name = models;
+			path = ../../models;
+			sourceTree = "<group>";
+		};
+		FCD04E6020F3146A0007374F /* mobilenet */ = {
+			isa = PBXGroup;
+			children = (
+				FCD04E6120F3146A0007374F /* params */,
+				FCD04E6220F3146A0007374F /* model */,
+			);
+			path = mobilenet;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		FC039B7D20E11C550081E9F8 /* paddle-mobile-demo */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = FC039B9020E11C560081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile-demo" */;
+			buildPhases = (
+				9E041A9C487A2E44C709327E /* [CP] Check Pods Manifest.lock */,
+				FC039B7A20E11C550081E9F8 /* Sources */,
+				FC039B7B20E11C550081E9F8 /* Frameworks */,
+				FC039B7C20E11C550081E9F8 /* Resources */,
+				84ED590C0E51ABA9C34F51B5 /* [CP] Embed Pods Frameworks */,
+				FCEBEC2E20E1392000C0B14D /* Embed Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "paddle-mobile-demo";
+			productName = "paddle-mobile-demo";
+			productReference = FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		FC039B7620E11C550081E9F8 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0930;
+				LastUpgradeCheck = 0930;
+				ORGANIZATIONNAME = orange;
+				TargetAttributes = {
+					FC039B7D20E11C550081E9F8 = {
+						CreatedOnToolsVersion = 9.3.1;
+					};
+				};
+			};
+			buildConfigurationList = FC039B7920E11C550081E9F8 /* Build configuration list for PBXProject "paddle-mobile-demo" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = FC039B7520E11C550081E9F8;
+			productRefGroup = FC039B7F20E11C550081E9F8 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				FC039B7D20E11C550081E9F8 /* paddle-mobile-demo */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		FC039B7C20E11C550081E9F8 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FCD04E6320F3146B0007374F /* params in Resources */,
+				FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
+				FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */,
+				FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
+				FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */,
+				FC918193211DC70500B6F354 /* iphone.JPG in Resources */,
+				FCDFD41B211D91C7005AB38B /* synset.txt in Resources */,
+				FCD04E6420F3146B0007374F /* model in Resources */,
+				FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		84ED590C0E51ABA9C34F51B5 /* [CP] Embed Pods Frameworks */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh",
+				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+			);
+			name = "[CP] Embed Pods Frameworks";
+			outputPaths = (
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
+		9E041A9C487A2E44C709327E /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-paddle-mobile-demo-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		FC039B7A20E11C550081E9F8 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
+				FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */,
+				FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
+				FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
+				FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		FC039B8520E11C550081E9F8 /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FC039B8620E11C550081E9F8 /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+		FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FC039B8B20E11C560081E9F8 /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		FC039B8E20E11C560081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		FC039B8F20E11C560081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		FC039B9120E11C560081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE = "";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		FC039B9220E11C560081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE = "";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		FC039B7920E11C550081E9F8 /* Build configuration list for PBXProject "paddle-mobile-demo" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B8E20E11C560081E9F8 /* Debug */,
+				FC039B8F20E11C560081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		FC039B9020E11C560081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile-demo" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B9120E11C560081E9F8 /* Debug */,
+				FC039B9220E11C560081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = FC039B7620E11C550081E9F8 /* Project object */;
+}
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:paddle-mobile-demo.xcodeproj">
+   </FileRef>
+</Workspace>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "0940"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+               BuildableName = "paddle-mobile-demo.app"
+               BlueprintName = "paddle-mobile-demo"
+               ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+      </Testables>
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+            BuildableName = "paddle-mobile-demo.app"
+            BlueprintName = "paddle-mobile-demo"
+            ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+            BuildableName = "paddle-mobile-demo.app"
+            BlueprintName = "paddle-mobile-demo"
+            ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+            BuildableName = "paddle-mobile-demo.app"
+            BlueprintName = "paddle-mobile-demo"
+            ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>paddle-mobile-demo.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>2</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>FC039B7D20E11C550081E9F8</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import UIKit
+
+@UIApplicationMain
+class AppDelegate: UIResponder, UIApplicationDelegate {
+
+    var window: UIWindow?
+
+
+    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
+        // Override point for customization after application launch.
+        return true
+    }
+
+    func applicationWillResignActive(_ application: UIApplication) {
+        // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
+        // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
+    }
+
+    func applicationDidEnterBackground(_ application: UIApplication) {
+        // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
+        // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
+    }
+
+    func applicationWillEnterForeground(_ application: UIApplication) {
+        // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
+    }
+
+    func applicationDidBecomeActive(_ application: UIApplication) {
+        // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
+    }
+
+    func applicationWillTerminate(_ application: UIApplication) {
+        // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
+    }
+
+
+}
+
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/AppIcon.appiconset/Contents.json
+{
+  "images" : [
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "83.5x83.5",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ios-marketing",
+      "size" : "1024x1024",
+      "scale" : "1x"
+    }
+  ],
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/Contents.json
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/Contents.json
+{
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/LaunchScreen.storyboard
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/LaunchScreen.storyboard
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" systemVersion="17A277" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="EHf-IW-A2E">
+            <objects>
+                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="53" y="375"/>
+        </scene>
+    </scenes>
+</document>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+    <device id="retina4_7" orientation="portrait">
+        <adaptation id="fullscreen"/>
+    </device>
+    <dependencies>
+        <deployment identifier="iOS"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14088"/>
+        <capability name="Aspect ratio constraints" minToolsVersion="5.1"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="tne-QT-ifu">
+            <objects>
+                <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="paddle_mobile_demo" customModuleProvider="target" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <subviews>
+                            <imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
+                                <rect key="frame" x="0.0" y="20" width="375" height="247"/>
+                            </imageView>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
+                                <rect key="frame" x="10" y="538" width="68" height="24"/>
+                                <constraints>
+                                    <constraint firstAttribute="width" constant="68" id="Q5J-tq-JSX"/>
+                                    <constraint firstAttribute="height" constant="24" id="SYv-As-Si8"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="20"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
+                                <rect key="frame" x="88" y="510.5" width="287" height="80"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
+                                </constraints>
+                            </pickerView>
+                            <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
+                                <rect key="frame" x="85" y="401" width="290" height="80"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="80" id="yAL-JY-G6b"/>
+                                </constraints>
+                            </pickerView>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha">
+                                <rect key="frame" x="10" y="429" width="65" height="24"/>
+                                <constraints>
+                                    <constraint firstAttribute="width" constant="65" id="6oA-g2-Xq4"/>
+                                    <constraint firstAttribute="height" constant="24" id="EwE-B3-z2R"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="20"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V">
+                                <rect key="frame" x="16" y="597" width="63.5" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Image">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="selectImageAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="5uR-SM-fKO"/>
+                                </connections>
+                            </button>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="XpL-9M-UOp">
+                                <rect key="frame" x="109.5" y="597" width="63" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Load">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="loadAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="fZ5-CQ-jCY"/>
+                                </connections>
+                            </button>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="R90-Yf-S6g">
+                                <rect key="frame" x="202.5" y="597" width="63.5" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Predict">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="predictAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="Iyy-sY-gt4"/>
+                                </connections>
+                            </button>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="a3K-ri-NVs">
+                                <rect key="frame" x="296" y="597" width="63" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Clear">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="clearAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="JYf-UX-rCR"/>
+                                </connections>
+                            </button>
+                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="w7H-Sk-Rai">
+                                <rect key="frame" x="79.5" y="597" width="30" height="30"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="30" id="tje-ct-ded"/>
+                                    <constraint firstAttribute="width" constant="30" id="vYd-Fc-KAj"/>
+                                </constraints>
+                            </view>
+                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="T4O-nx-ciH">
+                                <rect key="frame" x="266" y="597" width="30" height="30"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="30" id="CZQ-vS-4di"/>
+                                    <constraint firstAttribute="width" constant="30" id="fXE-S7-ZXL"/>
+                                </constraints>
+                            </view>
+                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="976-fk-Kx2">
+                                <rect key="frame" x="172.5" y="597" width="30" height="30"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="30" id="JBc-yg-8YH"/>
+                                    <constraint firstAttribute="width" constant="30" id="L4p-hP-s5C"/>
+                                </constraints>
+                            </view>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="耗时:" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="m5L-O7-P31">
+                                <rect key="frame" x="15" y="277" width="350" height="38"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="38" id="6SS-sb-7I2"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="15"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="paddle-mobile.png" translatesAutoresizingMaskIntoConstraints="NO" id="4ey-Xr-U4e">
+                                <rect key="frame" x="90" y="637" width="195" height="30"/>
+                                <constraints>
+                                    <constraint firstAttribute="width" secondItem="4ey-Xr-U4e" secondAttribute="height" multiplier="6.5:1" id="8c5-FF-lB9"/>
+                                </constraints>
+                            </imageView>
+                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" text="结果:" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="VQn-bS-fWp">
+                                <rect key="frame" x="10" y="323" width="355" height="70"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="70" id="07M-Gx-Elk"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="15"/>
+                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
+                            </textView>
+                        </subviews>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <constraints>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="VQn-bS-fWp" secondAttribute="trailing" constant="10" id="1Xg-0h-9SE"/>
+                            <constraint firstItem="avL-VK-Kha" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="2t9-hS-VXa"/>
+                            <constraint firstItem="R90-Yf-S6g" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="76b-Ny-1Og"/>
+                            <constraint firstItem="DlO-dk-RMr" firstAttribute="centerY" secondItem="2EB-m2-a3L" secondAttribute="centerY" id="7R7-7x-IRs"/>
+                            <constraint firstItem="a3K-ri-NVs" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="8Gv-HO-dKf"/>
+                            <constraint firstItem="w7H-Sk-Rai" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="94H-ZN-G7S"/>
+                            <constraint firstItem="2EB-m2-a3L" firstAttribute="top" secondItem="avL-VK-Kha" secondAttribute="bottom" constant="85" id="A5J-Qv-Ux5"/>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="a3K-ri-NVs" secondAttribute="trailing" constant="16" id="Avk-9e-Pvg"/>
+                            <constraint firstItem="DlO-dk-RMr" firstAttribute="leading" secondItem="2EB-m2-a3L" secondAttribute="trailing" constant="10" id="CYY-XV-JFd"/>
+                            <constraint firstItem="T4O-nx-ciH" firstAttribute="leading" secondItem="R90-Yf-S6g" secondAttribute="trailing" id="ImW-FE-Mua"/>
+                            <constraint firstItem="T4O-nx-ciH" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KIi-87-AGM"/>
+                            <constraint firstItem="XpL-9M-UOp" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KWW-qT-Rzf"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="centerY" secondItem="avL-VK-Kha" secondAttribute="centerY" id="KZa-YZ-DEs"/>
+                            <constraint firstItem="2EB-m2-a3L" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="Le3-TN-zOL"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="MeS-HQ-voE"/>
+                            <constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="ZZh-fw-LwK" secondAttribute="bottom" constant="10" id="NUL-Ta-VI8"/>
+                            <constraint firstItem="m5L-O7-P31" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="15" id="RFA-z1-9aB"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="a3K-ri-NVs" secondAttribute="width" id="Rp6-Bh-BN3"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="S0W-0G-75m"/>
+                            <constraint firstItem="w7H-Sk-Rai" firstAttribute="leading" secondItem="wUL-9N-u1V" secondAttribute="trailing" id="VBM-8b-jP0"/>
+                            <constraint firstItem="VQn-bS-fWp" firstAttribute="top" secondItem="m5L-O7-P31" secondAttribute="bottom" constant="8" id="VpS-4N-mOo"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="top" secondItem="2EB-m2-a3L" secondAttribute="bottom" constant="35" id="VpU-j2-gaE"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="XpL-9M-UOp" secondAttribute="width" id="Xrz-oE-aIz"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="R90-Yf-S6g" secondAttribute="width" id="a4b-Rh-yKG"/>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="m5L-O7-P31" secondAttribute="trailing" constant="10" id="aOn-WU-xP7"/>
+                            <constraint firstItem="R90-Yf-S6g" firstAttribute="leading" secondItem="976-fk-Kx2" secondAttribute="trailing" id="amy-QU-hbW"/>
+                            <constraint firstItem="a3K-ri-NVs" firstAttribute="leading" secondItem="T4O-nx-ciH" secondAttribute="trailing" id="dkX-Iq-hYk"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="eIC-fZ-OEE"/>
+                            <constraint firstItem="976-fk-Kx2" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="fFg-pB-eyU"/>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="bottom" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="40" id="fG6-0p-I0P"/>
+                            <constraint firstItem="XpL-9M-UOp" firstAttribute="leading" secondItem="w7H-Sk-Rai" secondAttribute="trailing" id="guC-Db-cA9"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="leading" secondItem="avL-VK-Kha" secondAttribute="trailing" constant="10" id="jNW-iC-u7V"/>
+                            <constraint firstItem="4ey-Xr-U4e" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="o1X-q5-P7j"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="top" secondItem="VQn-bS-fWp" secondAttribute="bottom" constant="8" id="tAE-ss-jlA"/>
+                            <constraint firstItem="4ey-Xr-U4e" firstAttribute="top" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="10" id="udc-wT-jqd"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" id="vXI-l2-CjL"/>
+                            <constraint firstItem="VQn-bS-fWp" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="wtI-Dl-YPq"/>
+                            <constraint firstItem="976-fk-Kx2" firstAttribute="leading" secondItem="XpL-9M-UOp" secondAttribute="trailing" id="wxP-4D-gDn"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="16" id="xzZ-jO-4fI"/>
+                            <constraint firstItem="DlO-dk-RMr" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="z6f-Nb-ASh"/>
+                            <constraint firstItem="4ey-Xr-U4e" firstAttribute="centerX" secondItem="8bC-Xf-vdC" secondAttribute="centerX" id="zzi-Qz-G9G"/>
+                        </constraints>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                    <connections>
+                        <outlet property="elapsedTimeLabel" destination="m5L-O7-P31" id="vJ7-EQ-Z5f"/>
+                        <outlet property="modelPickerView" destination="6MG-gv-hD5" id="l0g-ue-raK"/>
+                        <outlet property="resultTextView" destination="VQn-bS-fWp" id="306-c7-3vM"/>
+                        <outlet property="selectImageView" destination="ZZh-fw-LwK" id="afR-Bv-6AW"/>
+                        <outlet property="threadPickerView" destination="DlO-dk-RMr" id="Kk4-QV-b5o"/>
+                    </connections>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="-724" y="98.50074962518741"/>
+        </scene>
+    </scenes>
+    <resources>
+        <image name="paddle-mobile.png" width="402" height="62"/>
+    </resources>
+</document>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Info.plist
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Info.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>NSCameraUsageDescription</key>
+	<string>use camera</string>
+	<key>NSPhotoLibraryUsageDescription</key>
+	<string>use album</string>
+	<key>UILaunchStoryboardName</key>
+	<string>LaunchScreen</string>
+	<key>UIMainStoryboardFile</key>
+	<string>Main</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+</dict>
+</plist>
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift
+//
+//  MetalHelper.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/25.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Metal
+import MetalKit
+import Foundation
+import paddle_mobile
+import MetalPerformanceShaders
+
+class MetalHelper {
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    let textureLoader: MTKTextureLoader
+    static let shared: MetalHelper = MetalHelper.init()
+    private init(){
+        device = MTLCreateSystemDefaultDevice()!
+        queue = device.makeCommandQueue()!
+        textureLoader = MTKTextureLoader.init(device: device)
+    }
+    
+    static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
+        let tmpTextureDes = MTLTextureDescriptor.init()
+        tmpTextureDes.width = size.width
+        tmpTextureDes.height = size.height
+        tmpTextureDes.depth = 1
+        tmpTextureDes.usage = [.shaderRead, .shaderWrite]
+        tmpTextureDes.pixelFormat = .rgba32Float
+        tmpTextureDes.textureType = .type2D
+        tmpTextureDes.storageMode = .shared
+        tmpTextureDes.cpuCacheMode = .defaultCache
+        let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes)
+        
+        let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device)
+        
+        let buffer = queue.makeCommandBuffer()
+        scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!)
+        buffer?.addCompletedHandler({ (buffer) in
+            complete(dest!)
+        })
+        buffer?.commit()
+    }
+}
+
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift
+//
+//  ModelHelper.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+import MetalKit
+import Foundation
+import paddle_mobile
+import MetalPerformanceShaders
+
+class PreProccess: CusomKernel {
+    init(device: MTLDevice) {
+        let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
+        super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false)
+    }
+}
+
+let modelHelperMap: [SupportModel : ModelHelper] = [.mobilenet : MobileNetHelper.init()]
+
+enum SupportModel: String{
+    case mobilenet = "mobilenet"
+    static func supportedModels() -> [SupportModel] {
+        return [.mobilenet]
+    }
+}
+
+protocol ModelHelper {
+    var dim: [Int] { get }
+    var modelPath: String { get }
+    var paramPath: String { get }
+    var modelDir: String { get }
+    var preprocessKernel: CusomKernel { get }
+    func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void)
+    func resultStr(res: [Float]) -> String
+}
+
+extension ModelHelper {
+    func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
+        let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
+        MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in
+            getTexture(resTexture)
+        }
+    }
+}
+
+struct MobileNetHelper: ModelHelper{
+    class PreWords {
+        var contents: [String] = []
+        init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
+            if let filePath = inBundle.path(forResource: fileName, ofType: type) {
+                let string = try! String.init(contentsOfFile: filePath)
+                contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
+                    String($0[$0.index($0.startIndex, offsetBy: 10)...])
+                }
+            }else{
+                fatalError("no file call \(fileName)")
+            }
+        }
+        subscript(index: Int) -> String{
+            return contents[index]
+        }
+    }
+    let labels = PreWords.init(fileName: "synset")
+    
+    func resultStr(res: [Float]) -> String {
+        var s: [String] = []
+        res.top(r: 5).enumerated().forEach{
+            s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
+        }
+        return s.joined(separator: "\n")
+    }
+    
+    var preprocessKernel: CusomKernel
+    let dim = [1, 224, 224, 3]
+    let modelPath: String
+    let paramPath: String
+    let modelDir: String
+    
+    init() {
+        modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
+        paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
+        modelDir = ""
+        preprocessKernel = PreProccess.init(device: MetalHelper.shared.device)
+    }
+}
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal
+//
+//  PreProcessKernel.metal
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/20.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+
+kernel void preprocess(
+                       texture2d<float, access::read> inTexture [[texture(0)]],
+                       texture2d<float, access::write> outTexture [[texture(1)]],
+                       uint2 gid [[thread_position_in_grid]])
+{
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height()) {
+        return;
+    }
+    const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
+    const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
+    outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
+}
+
+kernel void preprocess_half(
+                       texture2d<half, access::read> inTexture [[texture(0)]],
+                       texture2d<half, access::write> outTexture [[texture(1)]],
+                       uint2 gid [[thread_position_in_grid]])
+{
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height()) {
+        return;
+    }
+    const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
+    const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
+    outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
+}
+
+
+
+
+
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import UIKit
+import MetalKit
+import paddle_mobile
+import MetalPerformanceShaders
+
+let threadSupport = [1]
+
+class ViewController: UIViewController {
+    @IBOutlet weak var resultTextView: UITextView!
+    @IBOutlet weak var selectImageView: UIImageView!
+    @IBOutlet weak var elapsedTimeLabel: UILabel!
+    @IBOutlet weak var modelPickerView: UIPickerView!
+    @IBOutlet weak var threadPickerView: UIPickerView!
+    var selectImage: UIImage?
+    var program: Program?
+    var executor: Executor<Float32>?
+    var modelType: SupportModel = .mobilenet
+    var toPredictTexture: MTLTexture?
+    var modelHelper: ModelHelper {
+        return modelHelperMap[modelType] ?! " has no this type "
+    }
+    var threadNum = 1
+    
+    @IBAction func loadAct(_ sender: Any) {
+        let inModelHelper = modelHelper
+        let queue = MetalHelper.shared.queue
+        let loader = Loader<Float32>.init()
+        do {
+            let modelPath = inModelHelper.modelPath
+            let paraPath = inModelHelper.paramPath
+            
+            program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath)
+            executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!)
+        } catch let error {
+            print(error)
+        }
+    }
+    
+    @IBAction func selectImageAct(_ sender: Any) {
+        let imagePicker = UIImagePickerController()
+        imagePicker.sourceType = .camera
+        imagePicker.delegate = self
+        self.present(imagePicker, animated: true, completion: nil)
+    }
+    
+    @IBAction func clearAct(_ sender: Any) {
+        executor?.clear()
+        program = nil
+        executor = nil
+        
+    }
+    
+    @IBAction func predictAct(_ sender: Any) {        
+        guard let inTexture = toPredictTexture else {
+            resultTextView.text = "请选择图片 ! "
+            return
+        }
+        
+        guard let inExecutor = executor else {
+            resultTextView.text = "请先 load ! "
+            return
+        }
+
+        do {
+            let max = 100
+            var startDate = Date.init()
+            for i in 0..<max {
+                try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in
+                    guard let sSelf = self else {
+                        fatalError()
+                    }
+                    
+                    if i == (max / 2 - 1) {
+                        startDate = Date.init()
+                    }
+                    
+                    if i == max - 1 {
+                        let time = Date.init().timeIntervalSince(startDate)
+                        DispatchQueue.main.async {
+                            sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
+                            sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
+                        }
+                    }
+                }, preProcessKernle: self.modelHelper.preprocessKernel)
+            }
+        } catch let error {
+            print(error)
+        }
+    }
+
+    override func viewDidLoad() {
+        super.viewDidLoad()
+        modelPickerView.delegate = self
+        modelPickerView.dataSource = self
+        threadPickerView.delegate = self
+        threadPickerView.dataSource = self
+        
+        selectImage = UIImage.init(named: "banana.jpeg")
+        selectImageView.image = selectImage
+        modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
+            self?.toPredictTexture = texture
+        }
+    }
+}
+
+extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
+    func numberOfComponents(in pickerView: UIPickerView) -> Int {
+        if pickerView == modelPickerView {
+            return 1
+        } else if pickerView == threadPickerView {
+            return 1
+        } else {
+            fatalError()
+        }
+    }
+    
+    func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
+        if pickerView == modelPickerView {
+            return SupportModel.supportedModels().count
+        } else if pickerView == threadPickerView {
+            return threadSupport.count
+        } else {
+            fatalError()
+        }
+    }
+    
+    public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
+        if pickerView == modelPickerView {
+            return SupportModel.supportedModels()[row].rawValue
+        } else if pickerView == threadPickerView {
+            return "\(threadSupport[row])"
+        } else {
+            fatalError()
+        }
+    }
+    
+    public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
+        if pickerView == modelPickerView {
+            self.modelType = SupportModel.supportedModels()[row]
+        } else if pickerView == threadPickerView {
+            self.threadNum = threadSupport[row]
+        } else {
+            fatalError()
+        }
+    }
+}
+
+extension ViewController:  UIImagePickerControllerDelegate, UINavigationControllerDelegate {
+    func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
+        picker.dismiss(animated: true){[weak self] in
+            guard let sSelf = self, let image =  info["UIImagePickerControllerOriginalImage"] as? UIImage else{
+                fatalError("no image")
+            }
+            sSelf.selectImage = image
+            sSelf.selectImageView.image = image
+            sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in
+                sSelf.toPredictTexture = texture
+            })
+        }
+    }
+}
+
+
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		168DA950D7D6CF91EBF70A17 /* Pods_paddle_mobile_unit_test.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8BCD4792E483BFEE9F5523DE /* Pods_paddle_mobile_unit_test.framework */; };
+		FC607427211DF3B100B17547 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC60734E211DF3B000B17547 /* synset.txt */; };
+		FC607428211DF3B100B17547 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FC60734F211DF3B000B17547 /* banana.jpeg */; };
+		FC607429211DF3B100B17547 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC607350211DF3B000B17547 /* iphone.JPG */; };
+		FC60742A211DF3B100B17547 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC607351211DF3B000B17547 /* paddle-mobile.png */; };
+		FC60742B211DF3B100B17547 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC607354211DF3B000B17547 /* params */; };
+		FC60742C211DF3B100B17547 /* model in Resources */ = {isa = PBXBuildFile; fileRef = FC607355211DF3B000B17547 /* model */; };
+		FC91818D211DAE9A00B6F354 /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */; };
+		FC91818E211DAE9A00B6F354 /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		FCDFD409211D9185005AB38B /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD408211D9185005AB38B /* AppDelegate.swift */; };
+		FCDFD40B211D9185005AB38B /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD40A211D9185005AB38B /* ViewController.swift */; };
+		FCDFD40E211D9185005AB38B /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD40C211D9185005AB38B /* Main.storyboard */; };
+		FCDFD410211D9187005AB38B /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD40F211D9187005AB38B /* Assets.xcassets */; };
+		FCDFD413211D9187005AB38B /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD411211D9187005AB38B /* LaunchScreen.storyboard */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		FC91818F211DAE9B00B6F354 /* Embed Frameworks */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 10;
+			files = (
+				FC91818E211DAE9A00B6F354 /* paddle_mobile.framework in Embed Frameworks */,
+			);
+			name = "Embed Frameworks";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		5CC132C848027BE970FB2637 /* Pods-paddle-mobile-unit-test.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-unit-test.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test.debug.xcconfig"; sourceTree = "<group>"; };
+		72F34AE9677943FC580DE7F4 /* Pods-paddle-mobile-unit-test.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-unit-test.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test.release.xcconfig"; sourceTree = "<group>"; };
+		8BCD4792E483BFEE9F5523DE /* Pods_paddle_mobile_unit_test.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_unit_test.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FC60734E211DF3B000B17547 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
+		FC60734F211DF3B000B17547 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
+		FC607350211DF3B000B17547 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
+		FC607351211DF3B000B17547 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
+		FC607354211DF3B000B17547 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
+		FC607355211DF3B000B17547 /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
+		FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FCDFD405211D9185005AB38B /* paddle-mobile-unit-test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-unit-test.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+		FCDFD408211D9185005AB38B /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
+		FCDFD40A211D9185005AB38B /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
+		FCDFD40D211D9185005AB38B /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		FCDFD40F211D9187005AB38B /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		FCDFD412211D9187005AB38B /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		FCDFD414211D9187005AB38B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		FCDFD402211D9185005AB38B /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC91818D211DAE9A00B6F354 /* paddle_mobile.framework in Frameworks */,
+				168DA950D7D6CF91EBF70A17 /* Pods_paddle_mobile_unit_test.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		CF78F766C11CC8AD67269581 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				8BCD4792E483BFEE9F5523DE /* Pods_paddle_mobile_unit_test.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		F752428B187BC4E0928ACD3D /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				5CC132C848027BE970FB2637 /* Pods-paddle-mobile-unit-test.debug.xcconfig */,
+				72F34AE9677943FC580DE7F4 /* Pods-paddle-mobile-unit-test.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		FC60734D211DF3B000B17547 /* images */ = {
+			isa = PBXGroup;
+			children = (
+				FC60734E211DF3B000B17547 /* synset.txt */,
+				FC60734F211DF3B000B17547 /* banana.jpeg */,
+				FC607350211DF3B000B17547 /* iphone.JPG */,
+				FC607351211DF3B000B17547 /* paddle-mobile.png */,
+			);
+			name = images;
+			path = ../../images;
+			sourceTree = "<group>";
+		};
+		FC607352211DF3B000B17547 /* models */ = {
+			isa = PBXGroup;
+			children = (
+				FC607353211DF3B000B17547 /* mobilenet */,
+			);
+			name = models;
+			path = ../../models;
+			sourceTree = "<group>";
+		};
+		FC607353211DF3B000B17547 /* mobilenet */ = {
+			isa = PBXGroup;
+			children = (
+				FC607354211DF3B000B17547 /* params */,
+				FC607355211DF3B000B17547 /* model */,
+			);
+			path = mobilenet;
+			sourceTree = "<group>";
+		};
+		FCDFD3FC211D9185005AB38B = {
+			isa = PBXGroup;
+			children = (
+				FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */,
+				FCDFD407211D9185005AB38B /* paddle-mobile-unit-test */,
+				FCDFD406211D9185005AB38B /* Products */,
+				F752428B187BC4E0928ACD3D /* Pods */,
+				CF78F766C11CC8AD67269581 /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		FCDFD406211D9185005AB38B /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				FCDFD405211D9185005AB38B /* paddle-mobile-unit-test.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		FCDFD407211D9185005AB38B /* paddle-mobile-unit-test */ = {
+			isa = PBXGroup;
+			children = (
+				FC60734D211DF3B000B17547 /* images */,
+				FC607352211DF3B000B17547 /* models */,
+				FCDFD408211D9185005AB38B /* AppDelegate.swift */,
+				FCDFD40A211D9185005AB38B /* ViewController.swift */,
+				FCDFD40C211D9185005AB38B /* Main.storyboard */,
+				FCDFD40F211D9187005AB38B /* Assets.xcassets */,
+				FCDFD411211D9187005AB38B /* LaunchScreen.storyboard */,
+				FCDFD414211D9187005AB38B /* Info.plist */,
+			);
+			path = "paddle-mobile-unit-test";
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		FCDFD404211D9185005AB38B /* paddle-mobile-unit-test */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = FCDFD417211D9187005AB38B /* Build configuration list for PBXNativeTarget "paddle-mobile-unit-test" */;
+			buildPhases = (
+				5F5A9A9DC0C6307DEA4294C1 /* [CP] Check Pods Manifest.lock */,
+				FCDFD401211D9185005AB38B /* Sources */,
+				FCDFD402211D9185005AB38B /* Frameworks */,
+				FCDFD403211D9185005AB38B /* Resources */,
+				53A2089068F9D64BB96D4322 /* [CP] Embed Pods Frameworks */,
+				FC91818F211DAE9B00B6F354 /* Embed Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "paddle-mobile-unit-test";
+			productName = "paddle-mobile-unit-test";
+			productReference = FCDFD405211D9185005AB38B /* paddle-mobile-unit-test.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		FCDFD3FD211D9185005AB38B /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0940;
+				LastUpgradeCheck = 0940;
+				ORGANIZATIONNAME = orange;
+				TargetAttributes = {
+					FCDFD404211D9185005AB38B = {
+						CreatedOnToolsVersion = 9.4.1;
+					};
+				};
+			};
+			buildConfigurationList = FCDFD400211D9185005AB38B /* Build configuration list for PBXProject "paddle-mobile-unit-test" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = FCDFD3FC211D9185005AB38B;
+			productRefGroup = FCDFD406211D9185005AB38B /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				FCDFD404211D9185005AB38B /* paddle-mobile-unit-test */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		FCDFD403211D9185005AB38B /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC607427211DF3B100B17547 /* synset.txt in Resources */,
+				FC60742B211DF3B100B17547 /* params in Resources */,
+				FC607428211DF3B100B17547 /* banana.jpeg in Resources */,
+				FC60742A211DF3B100B17547 /* paddle-mobile.png in Resources */,
+				FC607429211DF3B100B17547 /* iphone.JPG in Resources */,
+				FC60742C211DF3B100B17547 /* model in Resources */,
+				FCDFD413211D9187005AB38B /* LaunchScreen.storyboard in Resources */,
+				FCDFD410211D9187005AB38B /* Assets.xcassets in Resources */,
+				FCDFD40E211D9185005AB38B /* Main.storyboard in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		53A2089068F9D64BB96D4322 /* [CP] Embed Pods Frameworks */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh",
+				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+			);
+			name = "[CP] Embed Pods Frameworks";
+			outputPaths = (
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
+		5F5A9A9DC0C6307DEA4294C1 /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-paddle-mobile-unit-test-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		FCDFD401211D9185005AB38B /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FCDFD40B211D9185005AB38B /* ViewController.swift in Sources */,
+				FCDFD409211D9185005AB38B /* AppDelegate.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		FCDFD40C211D9185005AB38B /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FCDFD40D211D9185005AB38B /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+		FCDFD411211D9187005AB38B /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FCDFD412211D9187005AB38B /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		FCDFD415211D9187005AB38B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.4;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		FCDFD416211D9187005AB38B /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.4;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		FCDFD418211D9187005AB38B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 5CC132C848027BE970FB2637 /* Pods-paddle-mobile-unit-test.debug.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-unit-test/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile-unit-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		FCDFD419211D9187005AB38B /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 72F34AE9677943FC580DE7F4 /* Pods-paddle-mobile-unit-test.release.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-unit-test/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile-unit-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		FCDFD400211D9185005AB38B /* Build configuration list for PBXProject "paddle-mobile-unit-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FCDFD415211D9187005AB38B /* Debug */,
+				FCDFD416211D9187005AB38B /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		FCDFD417211D9187005AB38B /* Build configuration list for PBXNativeTarget "paddle-mobile-unit-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FCDFD418211D9187005AB38B /* Debug */,
+				FCDFD419211D9187005AB38B /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = FCDFD3FD211D9185005AB38B /* Project object */;
+}
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:paddle-mobile-unit-test.xcodeproj">
+   </FileRef>
+</Workspace>
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>paddle-mobile-unit-test.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>6</integer>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
+//
+//  AppDelegate.swift
+//  paddle-mobile-unit-test
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+
+@UIApplicationMain
+class AppDelegate: UIResponder, UIApplicationDelegate {
+
+    var window: UIWindow?
+
+    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
+        // Override point for customization after application launch.
+        return true
+    }
+
+    func applicationWillResignActive(_ application: UIApplication) {
+        // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
+        // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
+    }
+
+    func applicationDidEnterBackground(_ application: UIApplication) {
+        // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
+        // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
+    }
+
+    func applicationWillEnterForeground(_ application: UIApplication) {
+        // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
+    }
+
+    func applicationDidBecomeActive(_ application: UIApplication) {
+        // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
+    }
+
+    func applicationWillTerminate(_ application: UIApplication) {
+        // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
+    }
+
+
+}
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/AppIcon.appiconset/Contents.json
+{
+  "images" : [
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "83.5x83.5",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ios-marketing",
+      "size" : "1024x1024",
+      "scale" : "1x"
+    }
+  ],
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/Contents.json
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/Contents.json
+{
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/LaunchScreen.storyboard
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/LaunchScreen.storyboard
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" systemVersion="17A277" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="EHf-IW-A2E">
+            <objects>
+                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="53" y="375"/>
+        </scene>
+    </scenes>
+</document>
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/Main.storyboard
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/Main.storyboard
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" systemVersion="17A277" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="tne-QT-ifu">
+            <objects>
+                <viewController id="BYZ-38-t0r" customClass="ViewController" customModuleProvider="target" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
+            </objects>
+        </scene>
+    </scenes>
+</document>
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Info.plist
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Info.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>UILaunchStoryboardName</key>
+	<string>LaunchScreen</string>
+	<key>UIMainStoryboardFile</key>
+	<string>Main</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+</dict>
+</plist>
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
+//
+//  ViewController.swift
+//  paddle-mobile-unit-test
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+import paddle_mobile
+
+class ViewController: UIViewController {
+
+    override func viewDidLoad() {
+        super.viewDidLoad()
+        print(" done ")
+    }
+
+}
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; };
+		FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; };
+		FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9520E11C9A0081E9F8 /* Errors.swift */; };
+		FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9620E11C9A0081E9F8 /* Types.swift */; };
+		FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9A20E11CA00081E9F8 /* Executor.swift */; };
+		FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9D20E11CB20081E9F8 /* Tensor.swift */; };
+		FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9E20E11CB20081E9F8 /* Dim.swift */; };
+		FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA120E11CB70081E9F8 /* Loader.swift */; };
+		FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA420E11CBC0081E9F8 /* ConvOp.swift */; };
+		FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */; };
+		FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; };
+		FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; };
+		FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; };
+		FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BAF20E11CC20081E9F8 /* framework.pb.swift */; };
+		FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; };
+		FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; };
+		FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */; };
+		FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* VarDesc.swift */; };
+		FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; };
+		FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* OpDesc.swift */; };
+		FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; };
+		FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* BlockDesc.swift */; };
+		FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; };
+		FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; };
+		FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
+		FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
+		FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
+		FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1B186520ECF1C600678B91 /* ResizeKernel.swift */; };
+		FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
+		FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
+		FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
+		FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
+		FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
+		FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
+		FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
+		FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
+		FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
+		FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; };
+		FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
+		FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
+		FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
+		FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */; };
+		FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */; };
+		FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6F20F31B720007374F /* ReshapeKernel.swift */; };
+		FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
+		FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
+		FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
+		FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
+		FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
+		FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = "<group>"; };
+		DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = "<group>"; };
+		FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = paddle_mobile.h; sourceTree = "<group>"; };
+		FC039B6E20E11C3C0081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		FC039B9420E11C9A0081E9F8 /* Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Extensions.swift; sourceTree = "<group>"; };
+		FC039B9520E11C9A0081E9F8 /* Errors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Errors.swift; sourceTree = "<group>"; };
+		FC039B9620E11C9A0081E9F8 /* Types.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Types.swift; sourceTree = "<group>"; };
+		FC039B9A20E11CA00081E9F8 /* Executor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Executor.swift; sourceTree = "<group>"; };
+		FC039B9D20E11CB20081E9F8 /* Tensor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Tensor.swift; sourceTree = "<group>"; };
+		FC039B9E20E11CB20081E9F8 /* Dim.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Dim.swift; sourceTree = "<group>"; };
+		FC039BA120E11CB70081E9F8 /* Loader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Loader.swift; sourceTree = "<group>"; };
+		FC039BA420E11CBC0081E9F8 /* ConvOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ConvOp.swift; sourceTree = "<group>"; };
+		FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ElementwiseAddOp.swift; sourceTree = "<group>"; };
+		FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = "<group>"; };
+		FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = "<group>"; };
+		FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = "<group>"; };
+		FC039BAF20E11CC20081E9F8 /* framework.pb.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = framework.pb.swift; sourceTree = "<group>"; };
+		FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = "<group>"; };
+		FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = "<group>"; };
+		FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ProgramDesc.swift; sourceTree = "<group>"; };
+		FC039BB320E11CC20081E9F8 /* VarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VarDesc.swift; sourceTree = "<group>"; };
+		FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = "<group>"; };
+		FC039BB520E11CC20081E9F8 /* OpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OpDesc.swift; sourceTree = "<group>"; };
+		FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = "<group>"; };
+		FC039BB720E11CC20081E9F8 /* BlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BlockDesc.swift; sourceTree = "<group>"; };
+		FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = "<group>"; };
+		FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = "<group>"; };
+		FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
+		FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
+		FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
+		FC1B186520ECF1C600678B91 /* ResizeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ResizeKernel.swift; sourceTree = "<group>"; };
+		FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
+		FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
+		FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
+		FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
+		FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
+		FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
+		FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
+		FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
+		FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
+		FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; };
+		FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
+		FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
+		FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
+		FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxKernel.swift; sourceTree = "<group>"; };
+		FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReshapeOp.swift; sourceTree = "<group>"; };
+		FCD04E6F20F31B720007374F /* ReshapeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReshapeKernel.swift; sourceTree = "<group>"; };
+		FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
+		FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
+		FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
+		FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
+		FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
+		FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		FC039B6620E11C3C0081E9F8 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		336CBE234BF5DE48658DE65F /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		8EB858F9B68D372C9F1CA263 /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */,
+				E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		FC039B6020E11C3C0081E9F8 = {
+			isa = PBXGroup;
+			children = (
+				FC039B6C20E11C3C0081E9F8 /* paddle-mobile */,
+				FC039B6B20E11C3C0081E9F8 /* Products */,
+				8EB858F9B68D372C9F1CA263 /* Pods */,
+				336CBE234BF5DE48658DE65F /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		FC039B6B20E11C3C0081E9F8 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
+			isa = PBXGroup;
+			children = (
+				FC039BAE20E11CC20081E9F8 /* Program */,
+				FC039BA320E11CBC0081E9F8 /* Operators */,
+				FC039BA120E11CB70081E9F8 /* Loader.swift */,
+				FC039B9A20E11CA00081E9F8 /* Executor.swift */,
+				FC039B9C20E11CB20081E9F8 /* framework */,
+				FC039B9320E11C9A0081E9F8 /* Common */,
+				FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */,
+				FC039B6E20E11C3C0081E9F8 /* Info.plist */,
+			);
+			path = "paddle-mobile";
+			sourceTree = "<group>";
+		};
+		FC039B9320E11C9A0081E9F8 /* Common */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B9420E11C9A0081E9F8 /* Extensions.swift */,
+				FC039B9520E11C9A0081E9F8 /* Errors.swift */,
+				FC039B9620E11C9A0081E9F8 /* Types.swift */,
+				FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */,
+				FC60DB8820E9AAA500FF203F /* MetalExtension.swift */,
+				FCDC0FEA21099A1D00DC9EFB /* Tools.swift */,
+			);
+			path = Common;
+			sourceTree = "<group>";
+		};
+		FC039B9C20E11CB20081E9F8 /* framework */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B9D20E11CB20081E9F8 /* Tensor.swift */,
+				FC039B9E20E11CB20081E9F8 /* Dim.swift */,
+				FC9D038320E23B01000F735A /* Texture.swift */,
+			);
+			path = framework;
+			sourceTree = "<group>";
+		};
+		FC039BA320E11CBC0081E9F8 /* Operators */ = {
+			isa = PBXGroup;
+			children = (
+				FC086BA520E67E8500D85EF7 /* Kernels */,
+				FCD592FA20E248EC00252966 /* Base */,
+				FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */,
+				FC039BA420E11CBC0081E9F8 /* ConvOp.swift */,
+				FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */,
+				FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */,
+				FC039BA820E11CBC0081E9F8 /* ReluOp.swift */,
+				FC9D037F20E22FBB000F735A /* FeedOp.swift */,
+				FC9D038120E2312E000F735A /* FetchOp.swift */,
+				FCD04E6520F314C50007374F /* PoolOp.swift */,
+				FCD04E6920F319EC0007374F /* SoftmaxOp.swift */,
+				FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */,
+				FCD04E7120F343420007374F /* ConvAddOp.swift */,
+			);
+			path = Operators;
+			sourceTree = "<group>";
+		};
+		FC039BAE20E11CC20081E9F8 /* Program */ = {
+			isa = PBXGroup;
+			children = (
+				FC039BAF20E11CC20081E9F8 /* framework.pb.swift */,
+				FC039BB020E11CC20081E9F8 /* Scope.swift */,
+				FC039BB120E11CC20081E9F8 /* TensorDesc.swift */,
+				FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */,
+				FC039BB320E11CC20081E9F8 /* VarDesc.swift */,
+				FC039BB420E11CC20081E9F8 /* Program.swift */,
+				FC039BB520E11CC20081E9F8 /* OpDesc.swift */,
+				FC039BB620E11CC20081E9F8 /* Attribute.swift */,
+				FC039BB720E11CC20081E9F8 /* BlockDesc.swift */,
+				FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */,
+			);
+			path = Program;
+			sourceTree = "<group>";
+		};
+		FC086BA520E67E8500D85EF7 /* Kernels */ = {
+			isa = PBXGroup;
+			children = (
+				FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
+				FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
+				FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
+				FC1B186520ECF1C600678B91 /* ResizeKernel.swift */,
+				FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
+				FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */,
+				FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */,
+				FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */,
+				FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
+				FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */,
+				FCD04E6720F315020007374F /* PoolKernel.swift */,
+				FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */,
+				FCD04E6F20F31B720007374F /* ReshapeKernel.swift */,
+				FCD04E7320F3437E0007374F /* ConvAddKernel.swift */,
+			);
+			path = Kernels;
+			sourceTree = "<group>";
+		};
+		FCD592FA20E248EC00252966 /* Base */ = {
+			isa = PBXGroup;
+			children = (
+				FC9D037820E229E4000F735A /* OpParam.swift */,
+				FC039BA620E11CBC0081E9F8 /* Operator.swift */,
+				FC82735820E3C04200BE430A /* OpCreator.swift */,
+			);
+			path = Base;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		FC039B6720E11C3C0081E9F8 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		FC039B6920E11C3C0081E9F8 /* paddle-mobile */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = FC039B7220E11C3C0081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile" */;
+			buildPhases = (
+				AF33BB8D0770A77AC22B5EF4 /* [CP] Check Pods Manifest.lock */,
+				FC039B6520E11C3C0081E9F8 /* Sources */,
+				FC039B6620E11C3C0081E9F8 /* Frameworks */,
+				FC039B6720E11C3C0081E9F8 /* Headers */,
+				FC039B6820E11C3C0081E9F8 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "paddle-mobile";
+			productName = "paddle-mobile";
+			productReference = FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		FC039B6120E11C3C0081E9F8 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0930;
+				ORGANIZATIONNAME = orange;
+				TargetAttributes = {
+					FC039B6920E11C3C0081E9F8 = {
+						CreatedOnToolsVersion = 9.3.1;
+					};
+				};
+			};
+			buildConfigurationList = FC039B6420E11C3C0081E9F8 /* Build configuration list for PBXProject "paddle-mobile" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = FC039B6020E11C3C0081E9F8;
+			productRefGroup = FC039B6B20E11C3C0081E9F8 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				FC039B6920E11C3C0081E9F8 /* paddle-mobile */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		FC039B6820E11C3C0081E9F8 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		AF33BB8D0770A77AC22B5EF4 /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-paddle-mobile-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		FC039B6520E11C3C0081E9F8 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
+				FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
+				FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
+				FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
+				FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
+				FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
+				FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
+				FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
+				FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
+				FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
+				FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
+				FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
+				FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
+				FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
+				FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
+				FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,
+				FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
+				FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
+				FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
+				FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
+				FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
+				FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
+				FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
+				FC9D038420E23B01000F735A /* Texture.swift in Sources */,
+				FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
+				FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */,
+				FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */,
+				FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */,
+				FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
+				FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
+				FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
+				FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
+				FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */,
+				FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
+				FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
+				FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
+				FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
+				FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
+				FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
+				FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
+				FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
+				FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
+				FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
+				FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
+				FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
+				FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
+				FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
+				FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
+				FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		FC039B7020E11C3C0081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Debug;
+		};
+		FC039B7120E11C3C0081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Release;
+		};
+		FC039B7320E11C3C0081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */;
+			buildSettings = {
+				CODE_SIGN_IDENTITY = "";
+				CODE_SIGN_STYLE = Automatic;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				INFOPLIST_FILE = "paddle-mobile/Info.plist";
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
+				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
+				SKIP_INSTALL = YES;
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		FC039B7420E11C3C0081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */;
+			buildSettings = {
+				CODE_SIGN_IDENTITY = "";
+				CODE_SIGN_STYLE = Automatic;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				INFOPLIST_FILE = "paddle-mobile/Info.plist";
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
+				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
+				SKIP_INSTALL = YES;
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		FC039B6420E11C3C0081E9F8 /* Build configuration list for PBXProject "paddle-mobile" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B7020E11C3C0081E9F8 /* Debug */,
+				FC039B7120E11C3C0081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		FC039B7220E11C3C0081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B7320E11C3C0081E9F8 /* Debug */,
+				FC039B7420E11C3C0081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = FC039B6120E11C3C0081E9F8 /* Project object */;
+}
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:paddle-mobile.xcodeproj">
+   </FileRef>
+</Workspace>
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "0940"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
+               BuildableName = "paddle_mobile.framework"
+               BlueprintName = "paddle-mobile"
+               ReferencedContainer = "container:paddle-mobile.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+      </Testables>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
+            BuildableName = "paddle_mobile.framework"
+            BlueprintName = "paddle-mobile"
+            ReferencedContainer = "container:paddle-mobile.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
+            BuildableName = "paddle_mobile.framework"
+            BlueprintName = "paddle-mobile"
+            ReferencedContainer = "container:paddle-mobile.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>paddle-mobile.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>FC039B6920E11C3C0081E9F8</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/metal/paddle-mobile/paddle-mobile/Common/Errors.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Errors.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public enum PaddleMobileError: Error{
+    case loaderError(message: String)
+    case netError(message: String)
+    case memoryError(message: String)
+    case paramError(message: String)
+    case opError(message: String)
+    case predictError(message: String)
+}
--- a/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+// 自定义 ?!  如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息
+precedencegroup ExecutedOrFatalError{
+    associativity: left
+    higherThan: AssignmentPrecedence
+}
+infix operator ?!: ExecutedOrFatalError
+public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{
+    if let inOpt = option {
+        return inOpt
+    }else{
+        print(excuteOrError())
+        fatalError(excuteOrError())
+    }
+}
+
+//Lense
+struct Lense<A, B> {
+    let from: (A) -> B
+    let to: (B, A) -> A
+}
+
+precedencegroup CombineLense{
+    associativity: left
+    higherThan: AssignmentPrecedence
+}
+
+infix operator >>>: CombineLense
+func >>><A, B, C>(left: Lense<B, C>, right: Lense<A, B>) -> Lense<A, C> {
+    return Lense<A, C>.init(from: { (a) -> C in
+        left.from(right.from(a))
+    }, to: { (c, a) -> A in
+        right.to( left.to(c, right.from(a)),a)
+    })
+}
+
+protocol CIntIndex {
+    associatedtype T;
+    subscript(index: CInt) -> T { get set};
+}
+
+extension Array: CIntIndex{
+    typealias T = Element
+    subscript(index: CInt) -> T {
+        get{
+            guard Int64(Int.max) >= Int64(index) else{
+                fatalError("cint index out of Int range")
+            }
+            return self[Int(index)]
+        }
+        set{
+            guard Int64(Int.max) >= Int64(index) else{
+                fatalError("cint index out of Int range")
+            }
+            self[Int(index)] = newValue
+        }
+        
+    }
+}
+
+extension Array where Element: AnyObject{
+    mutating func remove(element: Element) {
+        if let index = index(where: { (node) -> Bool in
+            return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
+        }) {
+            remove(at: index)
+        }
+    }
+    
+}
+
+//MARK: Array extension
+extension Array where Element: Comparable{
+    
+    /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
+    ///
+    /// - Parameter r: 前 r 个元素
+    /// - Returns: [(原有位置, 排好位置的元素)]
+    public func top(r: Int) -> [(Int, Element)] {
+        precondition(r <= self.count)
+        return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
+    }
+}
+
+extension String{
+    func cStr() -> UnsafePointer<Int8>? {
+        return (self as NSString).utf8String
+    }
+}
+
+func address<T: AnyObject>(o: T) -> String {
+    return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
+}
+
+
+
+
--- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+fileprivate var defaultMetalLibrary: MTLLibrary?
+fileprivate var paddleMobileMetalLibrary: MTLLibrary?
+
+extension MTLDevice {
+    func defaultLibrary() -> MTLLibrary {
+        if defaultMetalLibrary == nil {
+            defaultMetalLibrary = makeDefaultLibrary()
+        }
+        if let inDefaultLib = defaultMetalLibrary {
+            return inDefaultLib
+        } else {
+            fatalError(" default metal libary is nil")
+        }
+    }
+    
+    func paddleMobileLibrary() -> MTLLibrary {
+        if paddleMobileMetalLibrary == nil {
+            guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
+                fatalError("Counld't find paddle mobile library")
+            }
+            do {
+                paddleMobileMetalLibrary = try makeLibrary(filepath: path)
+            } catch _ {
+                fatalError("Counld't load paddle mobile library")
+            }
+        }
+        
+        if let inPaddleMobileLib = paddleMobileMetalLibrary {
+            return inPaddleMobileLib
+        } else {
+            fatalError("PaddleMobile metal libary is nil")
+        }
+    }
+    
+    func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState {
+        let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
+        guard let function = useLib.makeFunction(name: funcName) else {
+            fatalError(" function " + funcName + " not found")
+        }
+        do {
+            let pipLine = try makeComputePipelineState(function: function)
+            return pipLine
+        } catch _ {
+            fatalError("make pip line error occured")
+        }
+        
+    }
+    
+    func makeBuffer<P>(value: [P]) -> MTLBuffer {
+        let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared)
+        let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size)
+        for i in 0..<value.count {
+            contents?[i] = value[i]
+        }
+        return buffer!
+    }
+    
+    func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
+        
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.width = textureWidth
+        textureDesc.height = textureHeight
+        textureDesc.depth = 1
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.textureType = .type2DArray
+        textureDesc.storageMode = .shared
+        textureDesc.cpuCacheMode = .defaultCache
+        textureDesc.arrayLength = arrayLength
+        let texture = makeTexture(descriptor: textureDesc)!
+        
+        if arrayLength == 1 && value.count >= 4{
+            let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: value.count * MemoryLayout<P>.size)
+            for i in 0..<value.count {
+                pointer[i] = value[i]
+            }
+            
+            let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
+            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
+            texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow)
+        } else {
+            
+            
+            
+        }
+        
+        return texture
+    }
+}
+
+extension MTLComputeCommandEncoder {
+    func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
+        let slices = (outTexture.arrayLength * 4 + 3)/4
+        
+        let width = computePipline.threadExecutionWidth
+        let height = computePipline.maxTotalThreadsPerThreadgroup/width
+        let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
+        
+//        print(" thread: threads per group: \(threadsPerGroup) ")
+//        print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
+        
+        let groupWidth = (outTexture.width + width - 1)/width
+        let groupHeight = (outTexture.height + height - 1)/height
+        let groupDepth = slices
+        let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
+        
+//        print("groups: \(groups) ")
+//        print("threads per group: \(threadsPerGroup)")
+        
+        setComputePipelineState(computePipline)
+        
+        dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
+    }
+}
+
+
+public extension MTLTexture {
+    
+    func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] {
+        var arr: [P] = floatArray { (p: P) -> P in
+            return p;
+        }
+        var result:  [(index: Int, value: P)] = []
+        if arr.count > 100 && stridable {
+            for j in stride(from: 0, to: arr.count , by: arr.count / 100){
+                result.append((j, arr[j]))
+            }
+        } else {
+            for j in 0..<arr.count {
+                result.append((j, arr[j]))
+            }
+        }
+        return result
+    }
+    
+    func floatArray<P, T>(res: (P) -> T) -> [T] {
+        var fArr: [T] = []
+        if textureType == .type2DArray {
+            for i in 0..<arrayLength{
+                let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
+                let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
+                let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size
+                let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+                getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
+                let p = bytes.assumingMemoryBound(to: P.self)
+               
+                for j in 0..<width * height * depth * 4 {
+                    fArr.append(res(p[j]))
+                }
+                bytes.deallocate()
+            }
+        } else if textureType == .type2D {
+            let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
+            let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
+            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+            getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
+            let p = bytes.assumingMemoryBound(to: P.self)
+
+            for j in 0..<width * height * 4 {
+                fArr.append(res(p[j]))
+            }
+            bytes.deallocate()
+        }
+        return fArr
+    }
+    
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("texture: \(self)")
+        let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
+        print(res)
+  
+//        if textureType == .type2DArray {
+//            for i in 0..<arrayLength{
+//                var str: String = "slice: \(i): \n"
+//                let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
+//                let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
+//                let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
+//                let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+//                getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
+//                let p = bytes.assumingMemoryBound(to: T.self)
+//                str += "2d array count : \(width * height * depth * 4) \n"
+//                if stridable && width * height * depth * 4 > 100 {
+//                    for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
+//                        str += " index \(j): \(p[j])"
+//                    }
+//                } else {
+//                    for j in 0..<width * height * depth * 4 {
+//                        str += " index \(j): \(p[j])"
+//                    }
+//                }
+//
+//                bytes.deallocate()
+//                print(str)
+//            }
+//        } else if textureType == .type2D {
+//            var str: String = "texture 2D: "
+//            let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
+//            let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
+//            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+//            getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
+//            let p = bytes.assumingMemoryBound(to: T.self)
+//            str += "2d count : \(width * width * 4) \n"
+//
+//            if stridable {
+//                for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
+//                    str += "index \(j): \(p[j]) "
+//                }
+//            } else {
+//                for j in 0..<width * height * 4 {
+//                    str += "index \(j): \(p[j]) "
+//                }
+//            }
+//
+//            print(str)
+//            bytes.deallocate()
+//        }
+        return nil
+           
+    }
+}
+
+
+public extension MTLBuffer {
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("MTLBuffer: \(self) ")
+        var str = ""
+        if stridable && length/MemoryLayout<T>.stride > 1000{
+            for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
+                str += " \(contents().assumingMemoryBound(to: T.self)[j])"
+            }
+        } else {
+            for i in 0..<length/MemoryLayout<T>.size {
+                str += " \(contents().assumingMemoryBound(to: T.self)[i])"
+            }
+        }
+        print(str)
+        return nil
+    }
+    
+    func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture {
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.width = textureWidth
+        textureDesc.height = textureHeight
+        textureDesc.depth = 1
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.textureType = .type2DArray
+        textureDesc.storageMode = .shared
+        textureDesc.cpuCacheMode = .defaultCache
+        textureDesc.arrayLength = arrayLength
+        let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)!
+        return texture
+    }
+    
+    
+
+}
+
+
+
+
+
--- a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
+//
+//  TestConvAddBatchNormRelu.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/25.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Metal
+import Foundation
+
+public class PaddleMobileUnitTest {
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    public init(inDevice: MTLDevice, inQueue: MTLCommandQueue) {
+        device = inDevice
+        queue = inQueue
+    }
+    
+    public func testConvAddBnRelu() {
+        let buffer = queue.makeCommandBuffer() ?! " buffer is nil "
+        
+        let input: [Float32] = [
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+        ]
+        
+        let filter: [Float32] = [
+        //1.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        //2.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        //3.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        //4.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        ]
+        
+        let biase: [Float32] = [1.0, 1.0, 1.0, 100.0]
+        let newScalue: [Float32] = [1.0, 1.0, 1.0, 1.0]
+        let newBiase: [Float32] = [1.0, 1.0, 1.0, 1.0]
+        
+        let inputeTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 3, arrayLength: 1)
+        
+        //filter
+        let filterBuffer = device.makeBuffer(value: filter)
+        
+        // biase
+        let biaseBuffer = device.makeBuffer(value: biase)
+        
+        // new scale
+        let newScalueBuffer = device.makeBuffer(value: newScalue)
+        
+        // new biase
+        let newBiaseBuffer = device.makeBuffer(value: newBiase)
+        
+        //output
+        let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 2, textureHeight: 2, arrayLength: 1)
+        
+        let filterSize: (width: Int, height: Int, channel: Int) = (3, 3, 4)
+        let paddings: (Int, Int) = (1, 1)
+        let stride: (Int, Int) = (2, 2)
+        
+        let offsetX = filterSize.width/2 - paddings.0
+        let offsetY = filterSize.height/2 - paddings.1
+        
+        let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0))
+        
+        let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
+        
+        
+        
+        let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param)
+        
+        convAddBnReluKernel.test(commandBuffer: buffer, param: param)
+        
+        buffer.addCompletedHandler { (buffer) in
+            let _: Float32? = inputeTexture.logDesc(header: "input texture", stridable: false)
+            let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
+        }
+        
+        buffer.commit()
+        
+        
+//        let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>)
+        
+        
+//        let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>)
+        
+//        ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>)
+        
+        
+    }
+}
+
+
+
--- a/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
+//
+//  Tools.swift
+//  paddle-mobile
+//
+//  Created by liuRuiLong on 2018/7/26.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Foundation
+
+func writeToLibrary<P: PrecisionType>(fileName: String, array: [P]) {
+    let libraryPath = NSSearchPathForDirectoriesInDomains(.libraryDirectory, .userDomainMask, true).last ?! " library path get error "
+    let filePath = libraryPath + "/" + fileName
+    let fileManager = FileManager.init()
+    fileManager.createFile(atPath: filePath, contents: nil, attributes: nil)
+    let fileHandler = FileHandle.init(forWritingAtPath: filePath) ?! " file handler nil "
+    let data = Data.init(buffer: UnsafeBufferPointer.init(start: array, count: array.count))
+    fileHandler.write(data)
+    fileHandler.closeFile()
+}
+
--- a/metal/paddle-mobile/paddle-mobile/Common/Types.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/Types.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public protocol SummableMultipliable: Equatable {
+    static func +(lhs: Self, rhs: Self) -> Self
+    static func *(lhs: Self, rhs: Self) -> Self
+    static func -(lhs: Self, rhs: Self) -> Self
+}
+public protocol PrecisionType: SummableMultipliable{
+    init(inFloat: Float32)
+    init(inFloat16: Float16)
+    init<P: PrecisionType>(_ inP: P)
+    static var bitSize: UInt { get }
+}
+
+public typealias Float16 = Int16
+extension Float16: PrecisionType {
+    public static func * (prefix: Float16, postfix: Float16) {
+        return prefix * postfix
+    }
+    
+    public init<P>(_ inP: P) where P : PrecisionType {
+        if P.bitSize == Float32.bitSize {
+            self = Float16(inFloat: inP as! Float32)
+        } else if P.bitSize == Float16.bitSize {
+            self = inP as! Float16
+        } else {
+            fatalError()
+        }
+    }
+    
+    public static var bitSize: UInt {
+        return 16
+    }
+    
+    public init(inFloat16: Float16) {
+        self = inFloat16
+    }
+    public init(inFloat: Float32) {
+        self = Int16(inFloat)
+    }
+    
+    
+    
+}
+
+extension Float32: PrecisionType {
+    public init<P>(_ inP: P) where P : PrecisionType {
+        if P.bitSize == Float32.bitSize {
+            self = inP as! Float32
+        } else if P.bitSize == Float16.bitSize {
+            self = Float32.init(inP as! Float16)
+        } else {
+            fatalError()
+        }
+    }
+    
+    public init(inFloat: Float32) {
+        self = inFloat
+    }
+    
+    public init(inFloat16: Float16) {
+        self = Float32.init(inFloat16)
+    }
+    
+    public static var bitSize: UInt {
+        return 32
+    }
+}
+
+public enum DataLayout {
+    case NCHW
+    case NHWC
+}
+
+protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
+}
+
+extension Tensor: Variant {
+}
+
+extension Texture: Variant {
+}
+
+extension ResultHolder: Variant {
+}
+
+extension InputTexture: Variant {
+}
+
+extension MTLTexture where Self: Variant {
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/Executor.swift
+++ b/metal/paddle-mobile/paddle-mobile/Executor.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public class ResultHolder<P: PrecisionType> {
+    public let dim: [Int]
+    public let resultArr: [P]
+    public let elapsedTime: Double
+    public init(inDim: [Int], inResult: [P], inElapsedTime: Double) {
+        dim = inDim
+        resultArr = inResult
+        elapsedTime = inElapsedTime
+    }
+}
+
+extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
+    public var debugDescription: String {
+        var str = ""
+        str += "Dim: \(dim) \n value:[ "
+        if resultArr.count < 20 {
+            for d in resultArr {
+                str += " \(d) "
+            }
+        } else {
+            for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
+                str += " \(resultArr[d]) "
+            }
+        }
+        str += " ]"
+        return str
+    }
+    
+    public var description: String {
+        return debugDescription
+    }
+}
+
+public class Executor<P: PrecisionType> {
+    var ops: [Runable & InferShaperable] = []
+    let program: Program
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
+        program = inProgram
+        device = inDevice
+        queue = inQueue
+        for block in inProgram.programDesc.blocks {
+            //block.ops.count
+            for i in 0..<block.ops.count {
+                let op = block.ops[i]
+                do {
+                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+                    op.inferShape()
+                    ops.append(op)
+                } catch let error {
+                    throw error
+                }
+            }
+            
+//            for op in block.ops {
+//                do {
+//                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+//                    op.inferShape()
+//                    ops.append(op)
+//                } catch let error {
+//                    throw error
+//                }
+//            }
+        }
+    }
+    
+    public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
+        guard let buffer = queue.makeCommandBuffer() else {
+            throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
+        }
+        let resInput: MTLTexture
+        if let inPre = preProcessKernle {
+            do {
+                try inPre.compute(inputTexuture: input, commandBuffer: buffer)
+                resInput = inPre.outputTexture
+            } catch let error {
+                throw error
+            }
+        } else {
+            resInput = input
+        }
+        
+        let beforeDate = Date.init()
+        let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect))
+        program.scope.setInput(input: inputTexture)
+ 
+        for op in ops {
+            do {
+                try op.run(device: device, buffer: buffer)
+            } catch let error {
+                throw error
+            }
+        }
+        
+        buffer.addCompletedHandler { (commandbuffer) in
+//            let inputArr = resInput.floatArray(res: { (p:P) -> P in
+//                return p
+//            })
+//            print(inputArr)
+            
+//            let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
+//            print(stridableInput)
+            
+//            let _: Flo? = input.logDesc(header: "input: ", stridable: true)
+//            for op in self.ops {
+//                op.delogOutput()
+//            }
+//            return
+            
+//            self.ops[2].delogOutput()
+            
+            
+            let afterDate = Date.init()
+            
+            guard let outputVar = self.program.scope.output() else {
+                fatalError("output nil")
+            }
+
+            guard let output = outputVar as? Texture<P> else {
+                fatalError("output var type error")
+            }
+            let resultHodlder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
+                return p
+            }), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
+            completionHandle(resultHodlder)
+        }
+        buffer.commit()
+    }
+    
+    public func clear() {
+        program.scope.clear()
+    }
+    
+}
+
+//public let paddle_executor: Executor = Executor.init()
--- a/metal/paddle-mobile/paddle-mobile/Info.plist
+++ b/metal/paddle-mobile/paddle-mobile/Info.plist
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>$(CURRENT_PROJECT_VERSION)</string>
+	<key>NSPrincipalClass</key>
+	<string></string>
+</dict>
+</plist>
--- a/metal/paddle-mobile/paddle-mobile/Loader.swift
+++ b/metal/paddle-mobile/paddle-mobile/Loader.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+import SwiftProtobuf
+
+public class Loader<P: PrecisionType> {
+    class ParaLoader {
+        let file: UnsafeMutablePointer<FILE>
+        let fileSize: Int
+        var nowIndex: Int
+        init(paramPath: String) throws {
+            guard let tmpFile = fopen(paramPath, "rb") else {
+                throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
+            }
+            file = tmpFile
+            fseek(file, 0, SEEK_END)
+            fileSize = ftell(file)
+            guard fileSize > 0 else {
+                throw PaddleMobileError.loaderError(message: "param file size is too small")
+            }
+            rewind(file)
+            nowIndex = 0
+        }
+        
+        func read(tensor: Tensor<P>) throws {
+            guard nowIndex <= fileSize else {
+                throw PaddleMobileError.loaderError(message: "out of the file range")
+            }
+            
+            func pointerReader<T>(type: T.Type) -> T {
+                let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
+                fread(ptr, 1, MemoryLayout<T>.size, file)
+                nowIndex += MemoryLayout<T>.size
+                let pointee = ptr.pointee
+                ptr.deinitialize(count: MemoryLayout<UInt32>.size)
+                ptr.deallocate()
+                return pointee
+            }
+            
+            let _ = pointerReader(type: UInt32.self)
+            let lodLevel = pointerReader(type: UInt64.self)
+            for _ in 0..<lodLevel {
+                let size = pointerReader(type: UInt64.self)
+                for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
+                    _ = pointerReader(type: size_t.self)
+                }
+            }
+            
+            let _ = pointerReader(type: UInt32.self)
+            
+            let tensorDescSize = pointerReader(type: Int32.self)
+            
+            fseek(file, Int(tensorDescSize), SEEK_CUR)
+            nowIndex += Int(tensorDescSize)
+            
+            /*
+             这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度
+             */
+            
+            //现在模型传入模型为  Float 类型, 这块应该根据模型来
+//            let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
+//            let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
+            let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
+            
+            guard bytesRead == tensor.data.size else {
+                throw PaddleMobileError.loaderError(message: "param read size error")
+            }
+            
+            // TODO: use script to convert
+//            let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
+//            for i in 0..<tensor.numel() {
+//                tensor.data[i] = P.init(inFloat: tmpPointer[i])
+//            }
+//            tmpPointer.deinitialize(count: tmpCapacity)
+//            tmpPointer.deallocate()
+            
+            nowIndex += bytesRead
+        }
+        
+        deinit {
+            fclose(file)
+        }
+    }
+    public init(){}
+    public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
+        guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
+            throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
+        }
+        
+        do {
+            let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
+                serializedData: modelData)
+            
+            let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
+            let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
+            print(programDesc)
+
+            guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else {
+                throw PaddleMobileError.loaderError(message: "load para error")
+            }
+            
+            guard programDesc.blocks.count > 0 else {
+                throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0")
+            }
+            
+            // to get feed key and fetch key
+            let block = programDesc.blocks[0]
+            guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
+                throw PaddleMobileError.loaderError(message: "at least two operator")
+            }
+            guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
+                throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
+            }
+            
+            guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else {
+                throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found")
+            }
+            guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else {
+                throw PaddleMobileError.loaderError(message: "feed key or fetch key not found")
+            }
+            
+            let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey)
+            
+            // to load memory
+            for block in programDesc.blocks {
+                for varDesc in block.vars {
+                    if (varDesc.type == .LodTensor) {
+                        guard let tensorDesc = varDesc.tensorDesc else {
+                            throw PaddleMobileError.loaderError(message: "get tensor desc failed")
+                        }
+                        
+//                        guard (try? tensorDesc.dataType.dataTypeSize()) == MemoryLayout<P>.size else {
+//                            throw PaddleMobileError.memoryError(message: "PrecisionType not support")
+//                        }
+                        
+                        if (varDesc.persistable
+                            && varDesc.type != .FeedMiniBatch
+                            && varDesc.type != .FetchList) {
+                            let dimArr = tensorDesc.dims
+                            
+                            guard dimArr.count > 0 else {
+                                throw PaddleMobileError.loaderError(message: "tensor desc dim size error")
+                            }
+                            
+                            let dim = Dim.init(inDim: dimArr)
+                            let tensor = Tensor<P>.init(inDim: dim, inLayout: tensorDesc.dataLayout)
+                            do {
+                                try paraLoader.read(tensor: tensor)
+                            } catch let error {
+                                throw error
+                            }
+                            tensor.convert(to: .NHWC)
+//                            tensor.initBuffer(device: device)
+                            scope[varDesc.name] = tensor
+                        } else {
+                            let dim = Dim.init(inDim: tensorDesc.NHWCDim)
+                            scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
+                        }
+                    } else {
+                        if varDesc.name == fetchKey {
+                            scope[varDesc.name] = ResultHolder<P>.init(inDim: [], inResult: [], inElapsedTime: 0.0)
+                        } else if varDesc.name == feedKey {
+                        }
+                    }
+                }
+            }
+            
+            let program = Program.init(inProgramDesc: programDesc, inParamPath: paraPath, inScope: scope)
+            
+            return program
+        } catch _ {
+            throw PaddleMobileError.loaderError(message: "protobuf decoder error")
+        }
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+fileprivate var singletons : [String : Any] = [:]
+class OpCreator<P: PrecisionType> {
+    static var shared : OpCreator<P> {
+        let key = String(describing: P.self)
+        if let singleton = singletons[key] {
+            return singleton as! OpCreator<P>
+        } else {
+            let newSingleton = OpCreator<P>()
+            singletons[key] = newSingleton
+            return newSingleton
+        }
+    }
+    
+    func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope) throws -> Runable & InferShaperable {
+        guard let opCreator = opCreators[opDesc.type] else {
+            throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
+        }
+        
+        do {
+            return try opCreator(device, opDesc, scope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let opCreators: [String : (MTLDevice, OpDesc, Scope) throws -> Runable & InferShaperable] =
+        [gConvType                  :     ConvOp<P>.creat,
+         gBatchNormType             :     BatchNormOp<P>.creat,
+         gReluType                  :     ReluOp<P>.creat,
+         gElementwiseAdd            :     ElementwiseAddOp<P>.creat,
+         gFeedType                  :     FeedOp<P>.creat,
+         gFetchType                 :     FetchOp<P>.creat,
+         gConvAddBatchNormReluType  :     ConvAddBatchNormReluOp<P>.creat,
+         gPooType                   :     PoolOp<P>.creat,
+         gSoftmaxType               :     SoftmaxOp<P>.creat,
+         gReshapeType               :     ReshapeOp<P>.creat,
+         gConvAddType               :     ConvAddOp<P>.creat]
+    
+    private init(){}
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+/*
+ let opInputsOutputsKey  = [gConvType         : (inputs: ["Input"], outputs: ["Output"]),
+ gBatchNormType    : (inputs: ["X"], outputs: ["Y"]),
+ gReluType         : (inputs: ["X"], outputs: ["Out"]),
+ gElementwiseAdd   : (inputs: ["X", "Y"], outputs: ["Out"])]
+ */
+
+protocol OpParam {
+    associatedtype OutputType: Variant
+    var output: OutputType { get set }
+    func outputDesc() -> String
+    
+    associatedtype ParamPrecisionType: PrecisionType
+    init(opDesc: OpDesc, inScope: Scope) throws
+    static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
+    static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType
+    static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
+}
+
+extension OpParam {
+    func outputDesc() -> String {
+        return output.debugDescription
+    }
+    
+    static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType {
+        guard let mapKeys = map[key], mapKeys.count > 0 else {
+            throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
+        }
+        guard let variant = from[mapKeys[0]], let v = variant as? VarType else {
+            throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
+        }
+        return v
+    }
+    
+    static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
+            
+            return tensorX
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
+            return tensorInput
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from)
+            return tensorOutput
+        } catch let error {
+            throw error
+        }
+    }
+    static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
+            return tensorOutputY
+        } catch let error {
+            throw error
+        }
+    }
+    static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
+            return tensorY
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from)
+            return out
+        } catch let error {
+            throw error
+        }
+    }
+    static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
+            return tensorFilter
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from)
+            return tensorBias
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from)
+            return tensorMean
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from)
+            return tensorScale
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from)
+            return tensorVariance
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{
+        guard let attr = attrs[key] else {
+            throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
+        }
+        
+        guard let tAttr = attr as? T else {
+            throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
+        }
+        return tAttr
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+protocol Fusion {
+    static func fusionNode() -> Node
+    static func change() -> [String : [(from: String, to: String)]]
+    static func fusionType() -> String
+}
+
+protocol Runable {
+    func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
+    func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
+    func delogOutput()
+}
+
+extension Runable where Self: OperatorProtocol{
+    func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try runImpl(device: device, buffer: buffer)
+        } catch let error {
+            throw error
+        }
+//        print(type + ": " + para.outputDesc())
+    }
+    
+    func delogOutput() {
+        print(type + ": has no implementation" )
+    }
+}
+
+protocol Creator where Self: OperatorProtocol{
+    associatedtype OpType: OperatorProtocol & Runable & InferShaperable
+    static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
+}
+
+extension Creator where Self: OperatorProtocol {
+    static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
+        do {
+            return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+protocol InferShaperable {
+    func inferShape()
+}
+
+protocol OperatorProtocol {
+    associatedtype ParamType
+    associatedtype KerType:  Computable where Self.KerType.ParamType == ParamType
+    var type: String { get }
+    var scope: Scope { get }
+    var inputs: [String : [String]] { get }
+    var paraInputs: [String : [String]] { get set }
+    var outpus: [String : [String]] { get }
+    var attrs: [String : Attr] { get }
+    var para: ParamType { get }
+    var kernel: KerType { get }
+    init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
+}
+
+extension OperatorProtocol {
+    static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
+        do {
+            return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+class Operator <KernelType:  Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
+    typealias ParamType = ParameterType
+    typealias KerType = KernelType
+    let type: String
+    let inputs: [String : [String]]
+    var paraInputs: [String : [String]]
+    let outpus: [String : [String]]
+    let attrs: [String : Attr]
+    let para: ParamType
+    let scope: Scope
+    var kernel: KerType
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        type = opDesc.type
+        scope = inScope
+        inputs = opDesc.inputs
+        outpus = opDesc.outputs
+        attrs =  opDesc.attrs
+        paraInputs = opDesc.paraInputs
+        do {
+            para = try ParamType.init(opDesc:opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+        kernel = KernelType.init(device: device, param: para)
+    }
+}
+
+// op infos
+let gFetchType                  = "fetch"
+let gFeedType                   = "feed"
+let gConvType                   = "conv2d"
+let gBatchNormType              = "batch_norm"
+let gReluType                   = "relu"
+let gElementwiseAdd             = "elementwise_add"
+let gConvAddBatchNormReluType   = "conv_add_batchnorm_relu"
+let gPooType                    = "pool2d"
+let gSoftmaxType                = "softmax"
+let gReshapeType                = "reshape"
+let gConvAddType                = "conv_add"
+
+
+let opInfos = [gConvType                    : (inputs: ["Input"], outputs: ["Output"]),
+               gBatchNormType               : (inputs: ["X"], outputs: ["Y"]),
+               gReluType                    : (inputs: ["X"], outputs: ["Out"]),
+               gElementwiseAdd              : (inputs: ["X"], outputs: ["Out"]),
+               gFeedType                    : (inputs: ["X"], outputs: ["Out"]),
+               gFetchType                   : (inputs: ["X"], outputs: ["Out"]),
+               gConvAddBatchNormReluType    : (inputs: ["Input"], outputs: ["Out"]),
+               gPooType                     : (inputs: ["X"], outputs: ["Out"]),
+               gSoftmaxType                 : (inputs: ["X"], outputs: ["Out"]),
+               gReshapeType                 : (inputs: ["X"], outputs: ["Out"]),
+               gConvAddType                 : (inputs: ["Input"], outputs: ["Out"])]
--- a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
+
+import Foundation
+
+class BatchNormParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
+            inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
+            inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
+            inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
+            inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
+            momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
+            is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+    let inputBias: Tensor<ParamPrecisionType>
+    let inputMean: Tensor<ParamPrecisionType>
+    let inputScale: Tensor<ParamPrecisionType>
+    let inputVariance: Tensor<ParamPrecisionType>
+    let epsilon: Float
+    let momentum: Float
+    let is_test: Bool
+}
+
+class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam<P>>, Runable, Creator, InferShaperable{
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    typealias OpType = BatchNormOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+
+
+
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
+            
+            groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
+            scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
+            mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
+            y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let input: Texture<P>
+    
+    let variance: Tensor<ParamPrecisionType>
+    let bias: Tensor<ParamPrecisionType>
+    let mean: Tensor<ParamPrecisionType>
+    let scale: Tensor<ParamPrecisionType>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    let epsilon: Float32
+    var newScale: MTLBuffer?
+    var newBiase: MTLBuffer?
+    
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+
+class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKernel<P>, ConvAddBatchNormReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
+    typealias OpType = ConvAddBatchNormReluOp<P>
+    
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode
+            --> Node.init(inType: gElementwiseAdd)
+            --> Node.init(inType: gBatchNormType)
+            --> Node.init(inType: gReluType)
+        return beginNode
+    }
+    
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
+    
+    static func fusionType() -> String {
+        return gConvAddBatchNormReluType
+    }
+    
+    func delogOutput() {
+        
+//        let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
+//        para.filter.logDataPointer(header: "filter data pointer: ")
+//        print("filter: \(para.filter)")
+        
+//        print("biase: \(para.y)")
+//        print("padding: \(para.paddings)")
+//        print("stride: \(para.stride)")
+        
+//        let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
+//        let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
+//        let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
+        
+        let output = para.output.metalTexture.floatArray { (p: P) -> P in
+            return p
+        }
+//
+        writeToLibrary(fileName: "output_112x112x32_2", array: output)
+        print(" write done")
+        
+//        let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvAddParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let input: Texture<P>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+
+class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode
+            --> Node.init(inType: gElementwiseAdd)
+        return beginNode
+    }
+    
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
+    
+    static func fusionType() -> String {
+        return gConvAddType
+    }
+    
+    typealias OpType = ConvAddOp<P>
+    
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+    
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let input: Texture<P>
+    let filter: Tensor<ParamPrecisionType>
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+
+class ConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            try super.init(device: device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+        
+    }
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+    
+    typealias OpType = ConvOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    func delogOutput() {
+        print("conv output : ")
+        print(para.output.metalTexture)
+//        let _: Float16? = para.output.metalTexture.logDesc()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
+
+import Foundation
+
+class ElementwiseAddParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
+            inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+            
+            output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    let inputY: Tensor<P>
+    var output: Texture<P>
+    let axis: Int
+}
+
+class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, ElementwiseAddParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ElementwiseAddOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+    }
+}
+
+
+
+
+
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class FeedParam<P: PrecisionType>: OpParam{
+    var output: Texture<P>
+    var input: InputTexture {
+        return scope.input() as! InputTexture
+    }
+    let scope: Scope
+    
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        scope = inScope
+        do {
+            output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    typealias ParamPrecisionType = P
+}
+
+class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<P>>, Runable, Creator, InferShaperable {
+    typealias OpType = FeedOp<P>
+    
+    func inferShape() {
+        //        print("feed  input: \(para.input.expectDim)")
+        print("feed output: \(para.output.dim)")
+        //        para.output.dim =
+        //        para.output.dim = para.input.expectDim
+    }
+    
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+        
+//        let resizeKernel = ResizeKernel<P>.init(device: device)
+//        let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
+//        do {
+//            try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
+//        } catch let error {
+//            throw error
+//        }
+    }
+    
+    func delogOutput() {
+//        para.input.mtlTexture.logDesc()
+//        let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
+//        let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
+    }
+}
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class FetchParam<P: PrecisionType>: OpParam{
+    var output: Texture<P>
+    let input: Texture<P>
+    let scope: Scope
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        scope = inScope
+        do {
+            input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = input
+        } catch let error {
+            throw error
+        }
+    }
+    
+    typealias ParamPrecisionType = P
+}
+
+class FetchKernel<P: PrecisionType>: Kernel, Computable {
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
+    }
+    
+    required init(device: MTLDevice, param: FetchParam<P>) {
+        super.init(device: device, inFunctionName: "texture2d_to_2d_array")
+    }
+}
+
+class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable{
+    func inferShape() {
+        print(para.input.dim)
+    }
+    
+    typealias OpType = FetchOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        scope.setOutput(output: para.output)
+    }
+}
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
+    var newScale: MTLBuffer
+    var newBias: MTLBuffer
+    
+    required init(device: MTLDevice, param: BatchNormParam<P>) {
+        guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
+            fatalError()
+        }
+        guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
+            fatalError()
+        }
+        self.newScale = newScale
+        self.newBias = newBias
+        
+        super.init(device: device, inFunctionName: "batchnorm")
+        
+        let varianceBuffer : MTLBuffer = param.inputVariance.buffer
+        
+        var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
+        let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
+            invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
+        }
+        
+        let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
+        let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
+        let scale : MTLBuffer = param.inputScale.buffer
+        let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
+        let bias : MTLBuffer = param.inputBias.buffer
+        let biasContents = bias.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
+            newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
+            newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
+        }
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        print("BatchNorm compute")
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBuffer(newScale, offset: 0, index: 0)
+        encoder.setBuffer(newBias, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct ConvAddBatchNormReluTestParam: TestParam {
+    let inputTexture: MTLTexture
+    let outputTexture: MTLTexture
+    var metalParam: MetalConvParam
+    let filterBuffer: MTLBuffer
+    let biaseBuffer: MTLBuffer
+    let newScaleBuffer: MTLBuffer
+    let newBiaseBuffer: MTLBuffer
+    let filterSize: (width: Int, height: Int, channel: Int)
+    init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
+        inputTexture = inInputTexture
+        outputTexture = inOutputTexture
+        metalParam = inMetalParam
+        filterBuffer = inFilterBuffer
+        biaseBuffer = inBiaseBuffer
+        newScaleBuffer = inNewScaleBuffer
+        newBiaseBuffer = inNewBiaseBuffer
+        filterSize = inFilterSize
+    }
+}
+
+class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
+    required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
+        if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
+        } else if testParam.filterSize.channel == 1 {
+            super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
+        } else {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
+        }
+    }
+    
+    var metalParam: MetalConvParam!
+
+    required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
+        
+        if param.filter.width == 1 && param.filter.height == 1 {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
+        } else if param.filter.channel == 1 {
+            super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
+        } else {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
+        }
+        
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+
+        param.variance.initBuffer(device: device)
+        param.mean.initBuffer(device: device)
+        param.scale.initBuffer(device: device)
+        param.bias.initBuffer(device: device)
+        
+        let offsetX = param.filter.width/2 - Int(param.paddings[0])
+        let offsetY = param.filter.height/2 - Int(param.paddings[1])
+        
+        print("offset x: \(offsetX)")
+        print("offset y: \(offsetY)")
+        
+        let offsetZ = 0.0
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+        
+        var invs: [P] = []
+        let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {            
+            let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
+            invs.append(P(inv))
+        }
+        
+        let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
+        let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
+        
+        let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
+        let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
+            newScale[i] = invs[i] * scaleContents[i]
+            newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
+        }
+        param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)
+        param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)
+        
+        newScale.deinitialize(count: param.scale.buffer.length)
+        newScale.deallocate()
+        
+        newBiase.deinitialize(count: param.bias.buffer.length)
+        newBiase.deallocate()
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
+        encoder.setBuffer(param.newScale!, offset: 0, index: 3)
+        encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            fatalError()
+        }
+        
+        encoder.setTexture(param.inputTexture, index: 0)
+        encoder.setTexture(param.outputTexture, index: 1)
+        var inMetalParam = param.metalParam
+        encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
+        encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
+        encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
+        encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
+        encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
+        encoder.endEncoding()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
+    var metalParam: MetalConvParam!
+    required init(device: MTLDevice, param: ConvAddParam<P>) {
+        super.init(device: device, inFunctionName: "conv_add_1x1")
+        let offsetX = param.filter.width/2 - Int(param.paddings[0])
+        let offsetY = param.filter.height/2 - Int(param.paddings[1])
+        
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        
+        print("offset x: \(offsetX)")
+        print("offset y: \(offsetY)")
+        
+        let offsetZ = 0.0
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct MetalConvParam {
+    short offsetX;
+    short offsetY;
+    short offsetZ;
+    ushort strideX;
+    ushort strideY;
+};
+
+
+kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device half4 *weights [[buffer(1)]],
+                                         const device half4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device half4 *weights [[buffer(1)]],
+                                         const device half4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input[9];
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
+        input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
+        input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
+        input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
+        input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
+        input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
+        input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
+        input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
+        input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
+        for (int j = 0; j < 9; ++j) {
+            half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.x += dot(input[j], weight_x);
+            
+            half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.y += dot(input[j], weight_y);
+            
+            half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.z += dot(input[j], weight_z);
+            
+            half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.w += dot(input[j], weight_w);
+        }
+    }
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                         constant MetalConvParam &param [[buffer(0)]],
+                         const device half4 *weights [[buffer(1)]],
+                         const device half4 *biase [[buffer(2)]],
+                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = output + biase[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                                   texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                                   constant MetalConvParam &param [[buffer(0)]],
+                                                   const device half *weights [[buffer(1)]],
+                                                   const device half4 *biase [[buffer(2)]],
+                                                   const device float4 *new_scale [[buffer(3)]],
+                                                   const device float4 *new_biase [[buffer(4)]],
+                                                   uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    uint output_slice = gid.z;
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint weithTo = gid.z * kernelHXW * 4;
+    half4 output = half4(0.0);
+    half4 inputs[9];
+    inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
+    inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
+    inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
+    inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
+    inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
+    inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
+    inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
+    inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
+    inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
+    for (int j = 0; j < 9; ++j) {
+        half4 input = inputs[j];
+        output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
+        output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
+        output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
+        output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
+    }
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+
+/*---------------------------------------------*/
+
+
+
+kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input[9];
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
+        input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
+        input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
+        input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
+        input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
+        input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
+        input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
+        input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
+        input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
+        for (int j = 0; j < 9; ++j) {
+            float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.x += dot(input[j], weight_x);
+            
+            float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.y += dot(input[j], weight_y);
+            
+            float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.z += dot(input[j], weight_z);
+            
+            float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.w += dot(input[j], weight_w);
+        }
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = output + biase[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    uint output_slice = gid.z;
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint weithTo = gid.z * kernelHXW * 4;
+    float4 output = float4(0.0);
+    float4 inputs[9];
+    inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
+    inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
+    inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
+    inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
+    inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
+    inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
+    inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
+    inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
+    inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
+    for (int j = 0; j < 9; ++j) {
+        float4 input = inputs[j];
+        output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
+        output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
+        output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
+        output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+
+public struct MetalConvParam {
+    let offsetX: Int16
+    let offsetY: Int16
+    let offsetZ: Int16
+    let strideX: UInt16
+    let strideY: UInt16
+    let paddedZ: UInt16
+}
+
+class ConvKernel<P: PrecisionType>: Kernel, Computable {
+    var metalParam: MetalConvParam!
+    required init(device: MTLDevice, param: ConvParam<P>) {
+        super.init(device: device, inFunctionName: "conv_add_1x1")
+        let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
+        let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
+        let offsetZ = 0.0
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+
+class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
+    required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
+        super.init(device: device, inFunctionName: "elementwise_add")
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam<P>) throws {
+        
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+public protocol TestParam {
+}
+
+public protocol Testable {
+    associatedtype TestParamType: TestParam
+    func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
+    init(device: MTLDevice, testParam: TestParamType)
+}
+
+
+protocol Computable {
+    associatedtype ParamType: OpParam
+    func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
+    init(device: MTLDevice, param: ParamType)
+}
+
+protocol KernelProtocol {
+    var pipline: MTLComputePipelineState { get set }
+    var functionName: String { get set }
+   
+}
+
+open class Kernel {
+    let pipline: MTLComputePipelineState
+    let functionName: String
+    public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
+        pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
+        functionName = inFunctionName
+    }
+}
+
+open class CusomKernel: Kernel {
+    public struct Shape {
+        public let width: Int
+        public let height: Int
+        public let channel: Int
+        public init(inWidth: Int, inHeight: Int, inChannel: Int){
+            width = inWidth
+            height = inHeight
+            channel = inChannel
+        }
+    }
+    let outputTexture: MTLTexture
+    public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.textureType = .type2D
+        textureDesc.width = outputDim.width
+        textureDesc.height = outputDim.height
+        textureDesc.depth = (outputDim.channel + 3) / 4
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.storageMode = .shared
+        outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
+
+        super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
+    }
+    
+    func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(inputTexuture, index: 0)
+        encoder.setTexture(outputTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
+        encoder.endEncoding()
+    }
+    
+}
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct OutputDim {
+    ushort width;
+    ushort height;
+    ushort strideX;
+    ushort strideY;
+};
+
+kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
+                   texture2d_array<half, access::write> outTexture [[texture(1)]],
+                   constant OutputDim &params [[buffer(0)]],
+                   uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
+    const half4 input = inTexture.read(pos);
+    outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
+}
+
+kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                 texture2d_array<half, access::write> outTexture [[texture(1)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    const float4 relu = fmax((float4)input, 0.0);
+    outTexture.write(half4(relu), gid.xy, gid.z);
+}
+
+kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                            texture2d_array<half, access::write> outTexture [[texture(1)]],
+                            const device half4 *biasTerms [[buffer(0)]],
+                            uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    outTexture.write(input, gid.xy, gid.z);
+}
+
+kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                      texture2d_array<half, access::write> outTexture [[texture(1)]],
+                      const device half4 * newScale [[buffer(0)]],
+                      const device half4 * newBias [[buffer(1)]],
+                      uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    half4 output = input * newScale[gid.z] + newBias[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
+//                               texture2d_array<half, access::write> outTexture [[texture(1)]],
+//                               uint3 gid [[thread_position_in_grid]]) {
+//    if (gid.x >= inTexture.get_width() ||
+//        gid.y >= inTexture.get_height()){
+//        return;
+//    }
+//    const half4 input = inTexture.read(gid.xy);
+//    outTexture.write(input, gid.xy, 0);
+//}
+
+kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= inTexture.get_width() ||
+        gid.y >= inTexture.get_height()){
+        return;
+    }
+    const float4 input = inTexture.read(gid.xy);
+    outTexture.write(input, gid.xy, 0);
+}
+
+
+kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= inTexture.get_width() ||
+        gid.y >= inTexture.get_height()){
+        return;
+    }
+    const half4 input = inTexture.read(gid.xy);
+    outTexture.write(input, gid.xy, 0);
+}
+
+struct PoolParam {
+    int ksizeX;
+    int ksizeY;
+    int strideX;
+    int strideY;
+    int paddingX;
+    int paddingY;
+    int poolType;
+};
+
+kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                 texture2d_array<float, access::write> outTexture [[texture(1)]],
+                 constant PoolParam &pm [[buffer(0)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int xmin = gid.x * pm.strideX - pm.paddingX;
+    int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
+    xmin = max(xmin, 0);
+    int ymin = gid.y * pm.strideX - pm.paddingX;
+    int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
+    ymin = max(ymin, 0);
+    
+    float4 r = 0;
+    if (pm.poolType == 0) {
+        r = inTexture.read(uint2(xmin, ymin), gid.z);
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r = fmax(r, inTexture.read(uint2(x, y), gid.z));
+            }
+        }
+    } else if (pm.poolType == 1) {
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r += inTexture.read(uint2(x, y), gid.z);
+            }
+        }
+        r /= pm.ksizeX * pm.ksizeY;
+    }
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+
+kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                 texture2d_array<half, access::write> outTexture [[texture(1)]],
+                 constant PoolParam &pm [[buffer(0)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int xmin = gid.x * pm.strideX - pm.paddingX;
+    int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
+    xmin = max(xmin, 0);
+    int ymin = gid.y * pm.strideX - pm.paddingX;
+    int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
+    ymin = max(ymin, 0);
+    
+    half4 r = 0;
+    if (pm.poolType == 0) {
+        r = inTexture.read(uint2(xmin, ymin), gid.z);
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r = fmax(r, inTexture.read(uint2(x, y), gid.z));
+            }
+        }
+    } else if (pm.poolType == 1) {
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r += inTexture.read(uint2(x, y), gid.z);
+            }
+        }
+        r /= pm.ksizeX * pm.ksizeY;
+    }
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    float4 r = inTexture.read(uint2(0, 0), gid.z);
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                    texture2d_array<half, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    half4 r = inTexture.read(uint2(0, 0), gid.z);
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int zsize = inTexture.get_array_size();
+    float maxv = inTexture.read(uint2(0, 0), 0)[0];
+    for (int z = 0; z < zsize; z++) {
+        float4 r = inTexture.read(uint2(0, 0), z);
+        maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+    }
+    float sum = 0;
+    for (int z = 0; z < zsize; z++) {
+        float4 r = inTexture.read(uint2(0, 0), z);
+        sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+    }
+    float4 rr = inTexture.read(gid.xy, gid.z);
+    rr = exp(rr - maxv) / sum;
+    outTexture.write(rr, gid.xy, gid.z);
+}
+
+
+kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                    texture2d_array<half, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int zsize = inTexture.get_array_size();
+    half maxv = inTexture.read(uint2(0, 0), 0)[0];
+    for (int z = 0; z < zsize; z++) {
+        half4 r = inTexture.read(uint2(0, 0), z);
+        maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+    }
+    float sum = 0;
+    for (int z = 0; z < zsize; z++) {
+        half4 r = inTexture.read(uint2(0, 0), z);
+        sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+    }
+    half4 rr = inTexture.read(gid.xy, gid.z);
+    rr = exp(rr - maxv) / sum;
+    outTexture.write(rr, gid.xy, gid.z);
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct PoolMetalParam {
+    let ksizeX: Int32
+    let ksizeY: Int32
+    let strideX: Int32
+    let strideY: Int32
+    let paddingX: Int32
+    let paddingY: Int32
+    let poolType: Int32
+}
+
+class PoolKernel<P: PrecisionType>: Kernel, Computable{
+    func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        var poolType: Int32
+        switch param.poolType {
+        case "max":
+            poolType = 0
+        case "avg":
+            poolType = 1
+        default:
+            throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType)
+        }
+        var pmp = PoolMetalParam.init(
+            ksizeX: param.ksize[0],
+            ksizeY: param.ksize[1],
+            strideX: param.stride[0],
+            strideY: param.stride[1],
+            paddingX: param.padding[0],
+            paddingY: param.padding[1],
+            poolType: poolType
+        )
+        encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: PoolParam<P>) {
+        super.init(device: device, inFunctionName: "pool")
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ReluKernel<P: PrecisionType>: Kernel, Computable{
+    func compute(commandBuffer: MTLCommandBuffer, param: ReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: ReluParam<P>) {
+        super.init(device: device, inFunctionName: "relu")
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
+    required init(device: MTLDevice, param: ReshapeParam<P>) {
+        super.init(device: device, inFunctionName: "reshape")
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+//
+//import Foundation
+//import MetalPerformanceShaders
+//
+//
+//struct ResizeParam: OpParam{
+//    typealias OutputType = <#type#>
+//    
+//    typealias ParamPrecisionType = <#type#>
+//    
+//    let input: MTLTexture
+//    let output: MTLTexture
+//    let expectDim: Dim
+//}
+//
+//struct OutputDim {
+//    let width: UInt16
+//    let height: UInt16
+//    let strideX: UInt16
+//    let strideY: UInt16
+//}
+//
+//class ResizeKernel<P: PrecisionType>: Kernel, Computable{
+//    var lanczos: MPSImageLanczosScale
+//    required init(device: MTLDevice, param: ResizeParam) {
+//        lanczos = MPSImageLanczosScale.init(device: device)
+//        super.init(device: device, inFunctionName: "resize")
+//    }
+//    func compute(commandBuffer: MTLCommandBuffer, param: ResizeParam) throws {
+////        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+////            throw PaddleMobileError.predictError(message: " encode is nil")
+////        }
+//        lanczos.encode(commandBuffer: commandBuffer, sourceTexture: param.input, destinationTexture: param.output)
+//        
+////        encoder.setTexture(param.input, index: 0)
+////        encoder.setTexture(param.output, index: 1)
+////        let strideX = param.input.width/param.expectDim[2]
+////        let strideY = param.input.height/param.expectDim[1]
+////        var outputDim = OutputDim.init(width: UInt16(param.expectDim[1]), height: UInt16(param.expectDim[2]), strideX: UInt16(strideX), strideY: UInt16(strideY))
+////        encoder.setBytes(&outputDim, length: MemoryLayout<OutputDim>.size, index: 0)
+////        encoder.dispatch(computePipline: pipline, outTexture: param.output)
+////        encoder.endEncoding()
+//    }
+//    
+//
+//    
+//    
+//}
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: SoftmaxParam<P>) {
+        super.init(device: device, inFunctionName: "softmax")
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct Texture2DTo2DArrayParam {
+    let input: MTLTexture
+    let output: MTLTexture
+    let expectDim: Dim
+}
+
+class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
+    func compute(commandBuffer: MTLCommandBuffer, param: FeedParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(param.input.mtlTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: FeedParam<P>) {
+        super.init(device: device, inFunctionName: "texture2d_to_2d_array")
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class PoolParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
+            ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
+            stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
+            globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+//        let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+    var ksize: [Int32]
+    var stride: [Int32]
+    var padding: [Int32]
+    var poolType: String
+    var ceilMode: Bool
+    var globalPooling: Bool
+}
+
+class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = PoolOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    func delogOutput() {
+        print("pool2d delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
+        print(para.ksize)
+        print(para.stride)
+        print(para.padding)
+        print(para.poolType)
+        let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
+
+import Foundation
+
+class ReluParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+}
+
+class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ReluOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+
+
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ReshapeParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+}
+
+class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ReshapeOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+        print("reshape delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
+        let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class SoftmaxParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+}
+
+class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = SoftmaxOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+        print("softmax delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
+        let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+protocol Attr {
+}
+
+extension Bool: Attr {
+}
+
+extension Int: Attr {
+}
+
+extension Float: Attr {
+}
+
+extension Int64: Attr {
+}
+
+extension Array: Attr {
+}
+
+extension String: Attr {
+}
+
+func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr {
+    switch attrDesc.type {
+    case .boolean:
+        return attrDesc.b
+    case .int:
+        return Int(attrDesc.i)
+    case .string:
+        return attrDesc.s
+    case .long:
+        return attrDesc.l
+    case .float:
+        return attrDesc.f
+    case .booleans:
+        return attrDesc.bools
+    case .floats:
+        return attrDesc.floats
+    case .ints:
+        return attrDesc.ints
+    case .strings:
+        return attrDesc.strings
+    default:
+        fatalError(" not support this attr type: \(attrDesc.type)")
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct BlockDesc {
+    let index: Int
+    let parentIndex: Int
+    let vars: [VarDesc]
+    let ops: [OpDesc]
+    init(block: PaddleMobile_Framework_Proto_BlockDesc) {
+        index = Int(block.idx)
+        parentIndex = Int(block.parentIdx)
+        var vars: [VarDesc] = []
+        for varOfBlock in block.vars {
+            vars.append(VarDesc.init(protoVarDesc: varOfBlock))
+        }
+        vars.sort { $0.name < $1.name }
+        self.vars = vars
+        var ops: [OpDesc] = []
+        for op in block.ops {
+            ops.append(OpDesc.init(protoOpDesc: op))
+        }
+        self.ops = ops
+    }
+    
+    init(inVars: [VarDesc], inOps: [OpDesc]) {
+        vars = inVars
+        ops = inOps
+        index = 0
+        parentIndex = 0
+    }
+    
+}
+
+extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        
+        for op in ops {
+            str += op.description
+        }
+        
+        for varDesc in vars {
+            str += varDesc.description
+        }
+        
+        return str
+    }
+    
+    var debugDescription: String {
+        return description
+    }
+    
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct OpDesc {
+    let inputs: [String : [String]]
+    var paraInputs: [String : [String]]
+    var outputs: [String : [String]]
+    let unusedOutputs: [String : [String]]
+    var attrs: [String : Attr] = [:]
+    var type: String
+    init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
+        type = protoOpDesc.type
+        let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
+            var map: [String : [String]] = [:]
+            for opDescVar  in vars {
+                if (canAdd(opDescVar.parameter)) {
+                    map[opDescVar.parameter] = opDescVar.arguments
+                }
+            }
+            return map
+        }
+        
+        inputs = creator(protoOpDesc.inputs) {
+            opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
+        }
+        
+        paraInputs = creator(protoOpDesc.inputs) {
+            !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
+        }
+        
+        outputs = creator(protoOpDesc.outputs) {
+            opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
+        }
+        
+        unusedOutputs = creator(protoOpDesc.outputs) {
+            !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
+        }
+        
+        for attr in protoOpDesc.attrs {
+            if (attr.type != .block) {
+                attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
+            }
+        }
+    }
+}
+
+extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        str += "op type: \(type): \n"
+        str += "    op inputs: \n"
+        str += "        \(inputs) \n"
+        str += "    op para inputs: \n"
+        str += "        \(paraInputs) \n"
+        str += "    op para outputs: \n"
+        str += "        \(outputs) \n"
+        str += "    op attrs: \n"
+        str += "        \(attrs) \n"
+        
+        return str
+    }
+    
+    var debugDescription: String {
+        return description
+    }
+    
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/Program.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/Program.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public struct Program {
+    let paramPath: String
+    let programDesc: ProgramDesc
+    let scope: Scope
+    init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) {
+        programDesc = inProgramDesc
+        paramPath = inParamPath
+        scope = inScope
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public struct ProgramDesc {
+    var blocks: [BlockDesc] = []
+    init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
+        for block in protoProgram.blocks {
+            self.blocks.append(BlockDesc.init(block: block))
+        }
+    }
+    
+    init() {
+    }
+}
+
+extension ProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    public var description: String {
+        var str: String = ""
+        for i in 0..<blocks.count {
+            str += "block - \(i): \n"
+            str += blocks[i].description
+        }
+        return str
+    }
+    
+    public var debugDescription: String {
+        return description
+    }
+    
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+precedencegroup ChainNode {
+    associativity: left
+    higherThan: MultiplicationPrecedence
+}
+
+infix operator --> : ChainNode
+
+class Node {
+    var inputs: [Node] = []
+    var outputs: [Node] = []
+    var type: String
+    var opDesc: OpDesc?
+    init(inOpDesc: OpDesc) {
+        type = inOpDesc.type
+        opDesc = inOpDesc
+    }
+    
+    init(inType: String) {
+        type = inType
+    }
+    
+    static func -->(lNode: Node, rNode: Node) -> Node {
+        lNode.outputs.append(rNode)
+        rNode.inputs.append(lNode)
+        return rNode
+    }
+    
+    func depth(begin: UInt = 1) -> UInt {
+        var beginMax: UInt = 1
+        for output in outputs {
+            let subDepth = output.depth(begin: begin + 1)
+            beginMax = max(begin, subDepth)
+        }
+        beginMax = max(begin, beginMax)
+        return beginMax
+    }
+    
+    func to(depth: UInt) -> Node {
+        let beginNode = Node.init(inType: type)
+        to(depth: depth - 1, withNode: beginNode)
+        return beginNode
+    }
+    
+    func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
+        let fusionNode = fusion.fusionNode()
+        let change = fusion.change()
+        let inOutputs = outputs
+        outputs.removeAll()
+        opDesc?.outputs.removeAll()
+        for i in 0..<inOutputs.count {
+            inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
+        }
+        opDesc?.type = fusion.fusionType()
+        type = fusion.fusionType()
+    }
+    
+    private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
+        guard let inOpdesc = opDesc else {
+            fatalError()
+        }
+        
+        for attr in inOpdesc.attrs {
+            beginNode.opDesc?.attrs[attr.key] = attr.value
+//            print(beginNode.opDesc?.attrs)
+        }
+        
+        for paraInput in inOpdesc.paraInputs {
+            if let inChanges = change[type] {
+                for keyChange in inChanges {
+                    if keyChange.from == paraInput.key {
+                        beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
+                    } else {
+                        beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
+                    }
+                }
+            } else {
+                beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
+            }
+        }
+        
+        if matchNode.outputs.count == 0 {
+            beginNode.outputs.append(contentsOf: outputs)
+            beginNode.opDesc?.outputs = inOpdesc.outputs
+            
+        }
+        removedNodes.append(self)
+        
+        for i in 0..<matchNode.outputs.count {
+            outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
+        }
+        
+    }
+    
+    private func to(depth: UInt, withNode: Node) {
+        if depth < 1 {
+            return
+        }
+        
+        for output in outputs {
+            let node = Node.init(inType: output.type)
+            withNode.outputs.append(node)
+            output.to(depth: depth - 1, withNode: node)
+        }
+    }
+    
+    
+}
+
+extension Node: Equatable {
+    static func == (lhs: Node, rhs: Node) -> Bool {
+        if lhs.outputs.count != rhs.outputs.count {
+            return false
+        }
+        
+        if lhs.type != rhs.type {
+            return false
+        }
+        
+        for i in 0..<lhs.outputs.count {
+            if lhs.outputs[i] != rhs.outputs[i] {
+                return false
+            }
+        }
+        return true
+    }
+    
+}
+
+class ProgramOptimize<P: PrecisionType> {
+    let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self, ConvAddOp<P>.self]
+    func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
+        
+        guard originProgramDesc.blocks.count == 1 else {
+            fatalError(" not support yet")
+        }
+        
+        var mapForNodeChain: [String : Node] = [:]
+        var nodes: [Node] = []
+        var typeMapNodes: [String : [Node]] = [:]
+        let block = originProgramDesc.blocks[0]
+            for opDesc in block.ops {
+                guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
+                    fatalError()
+                }
+                
+                let node = Node.init(inOpDesc: opDesc)
+                for inputKey in opInputKeys {
+                    if let inputs = opDesc.inputs[inputKey] {
+                        for input in inputs {
+                            if let inputNode = mapForNodeChain[input] {
+                                _ = inputNode --> node
+                            }
+                        }
+                    }
+                }
+                
+                for outputKey in outputKeys {
+                    if let outputs = opDesc.outputs[outputKey] {
+                        for output in outputs {
+                            mapForNodeChain[output] = node
+                        }
+                    }
+                }
+                
+                nodes.append(node)
+                
+                if var inNodes = typeMapNodes[opDesc.type] {
+                    inNodes.append(node)
+                    typeMapNodes[opDesc.type] = inNodes
+                } else {
+                    typeMapNodes[opDesc.type] = [node]
+                }
+            }
+            
+            for fusion in fusionOps {
+                let fusionNode = fusion.fusionNode()
+                let depth = fusionNode.depth()
+                if let toMatchNodes = typeMapNodes[fusionNode.type] {
+                    for node in toMatchNodes {
+                        let toNode = node.to(depth: depth)
+                        if toNode == fusionNode {   // match
+                            var removeNodes: [Node] = []
+                            node.folderWith(fusion: fusion, removedNodes: &removeNodes)
+                            for removeNode in removeNodes {
+                                nodes.remove(element: removeNode)
+                            }
+                        }
+                    }
+                }
+            }
+        
+        var ops: [OpDesc] = []
+        for node in nodes {
+            ops.append(node.opDesc!)
+        }
+        
+        var newProgramDesc = ProgramDesc.init()
+        let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
+        newProgramDesc.blocks.append(newBlock)
+        return newProgramDesc
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/Scope.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/Scope.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class Scope {
+    let feedKey: String
+    let fetchKey: String
+    func setInput(input: Variant) {
+        vars[feedKey] = input
+    }
+    
+    func setOutput(output: Variant) {
+        vars[fetchKey] = output
+    }
+    
+    func input() -> Variant? {
+        return vars[feedKey];
+    }
+    
+    func output() -> Variant? {
+        return vars[fetchKey];
+    }
+    
+    init(inFeedKey: String, inFetchKey: String) {
+        feedKey = inFeedKey
+        fetchKey = inFetchKey
+    }
+    
+    var vars: [String : Variant] = [:]
+    subscript(key: String) -> Variant?{
+        get {
+            return vars[key]
+        }
+        set {
+            vars[key] = newValue
+        }
+        
+    }
+
+    func clear(){
+        vars.removeAll()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct TensorDesc {
+    let dims: [Int]
+    let dataType: VarTypeType
+    let dataLayout: DataLayout = .NCHW
+    var NCHWDim: [Int] {
+        get {
+            if dims.count != 4 {
+                return dims
+            }
+            if dataLayout == .NCHW {
+                return dims
+            } else if dataLayout == .NHWC{
+                var resultDims = dims
+                resultDims.swapAt(1, 3)
+                return resultDims
+            } else {
+                fatalError(" not support other layout")
+            }
+        }
+    }
+    
+    var NHWCDim: [Int] {
+        get {
+            if dims.count != 4 {
+                return dims
+            }
+            if dataLayout == .NHWC {
+                return dims
+            } else if dataLayout == .NCHW{
+                var resultDims = dims
+                resultDims.swapAt(1, 3)
+                return resultDims
+            } else {
+                fatalError(" not support other layout")
+            }
+        }
+    }
+    
+    init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
+        dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : 1 }
+        dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
+    }
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+enum VarTypeType: Int {
+    case ErrorType = -1,
+    Bool = 0,
+    Int16 = 1,
+    Int32 = 2,
+    Int64 = 3,
+    FP16 = 4,
+    FP32 = 5,
+    FP64 = 6,
+    LodTensor = 7,
+    SelectedRows = 8,
+    FeedMiniBatch = 9,
+    FetchList = 10,
+    StepScopes = 11,
+    StepLodRankTable = 12,
+    StepLodTensorArray = 13,
+    StepPlaceList = 14,
+    Reader = 15,
+    Channel = 16,
+    Raw = 17,
+    Tuple = 18
+    
+    func dataTypeSize() throws -> Int {
+        switch self {
+        case .FP16:
+            return 2
+        case .FP32:
+            return 4
+        case .FP64:
+            return 8
+        case .Int32:
+            return 4
+        case .Int64:
+            return 8
+        case .Bool:
+            return 1
+        default:
+            throw PaddleMobileError.memoryError(message: "not support \(self) type to get size ")
+        }
+    }
+}
+
+struct VarDesc {
+    let name: String
+    let persistable: Bool
+    let type: VarTypeType
+    let tensorDesc: TensorDesc?
+    init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) {
+        type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType
+        name = protoVarDesc.name
+        persistable = protoVarDesc.persistable
+        switch type {
+        case .SelectedRows:
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.selectedRows)
+        case .LodTensor:
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.lodTensor.tensor)
+        case .StepLodTensorArray:
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray.tensor);
+        default:
+            tensorDesc = .none
+        }
+    }
+}
+
+extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        str += "var name \(name): \n"
+        if let inTensorDesc = tensorDesc {
+            str += " dim size: \(inTensorDesc.dims.count) \n"
+            str += "    dim: \(inTensorDesc.dims) \n"
+        } else {
+            str += " no dim info"
+        }
+        
+        return str
+    }
+    
+    var debugDescription: String {
+        return description
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Program/framework.pb.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/framework.pb.swift
+// DO NOT EDIT.
+//
+// Generated by the Swift generator plugin for the protocol buffer compiler.
+// Source: framework.proto
+//
+// For information on using the generated types, please see the documenation:
+//   https://github.com/apple/swift-protobuf/
+
+// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License. 
+
+import Foundation
+import SwiftProtobuf
+
+// If the compiler emits an error on this type, it is because this file
+// was generated by a version of the `protoc` Swift plug-in that is
+// incompatible with the version of SwiftProtobuf to which you are linking.
+// Please ensure that your are building against the same version of the API
+// that was used to generate this file.
+fileprivate struct _GeneratedWithProtocGenSwiftVersion: SwiftProtobuf.ProtobufAPIVersionCheck {
+  struct _2: SwiftProtobuf.ProtobufAPIVersion_2 {}
+  typealias Version = _2
+}
+
+enum PaddleMobile_Framework_Proto_AttrType: SwiftProtobuf.Enum {
+  typealias RawValue = Int
+  case int // = 0
+  case float // = 1
+  case string // = 2
+  case ints // = 3
+  case floats // = 4
+  case strings // = 5
+  case boolean // = 6
+  case booleans // = 7
+  case block // = 8
+  case long // = 9
+
+  init() {
+    self = .int
+  }
+
+  init?(rawValue: Int) {
+    switch rawValue {
+    case 0: self = .int
+    case 1: self = .float
+    case 2: self = .string
+    case 3: self = .ints
+    case 4: self = .floats
+    case 5: self = .strings
+    case 6: self = .boolean
+    case 7: self = .booleans
+    case 8: self = .block
+    case 9: self = .long
+    default: return nil
+    }
+  }
+
+  var rawValue: Int {
+    switch self {
+    case .int: return 0
+    case .float: return 1
+    case .string: return 2
+    case .ints: return 3
+    case .floats: return 4
+    case .strings: return 5
+    case .boolean: return 6
+    case .booleans: return 7
+    case .block: return 8
+    case .long: return 9
+    }
+  }
+
+}
+
+/// OpDesc describes an instance of a C++ framework::OperatorBase
+/// derived class type.
+struct PaddleMobile_Framework_Proto_OpDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var type: String {
+    get {return _type ?? String()}
+    set {_type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return self._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {self._type = nil}
+
+  var inputs: [PaddleMobile_Framework_Proto_OpDesc.Var] = []
+
+  var outputs: [PaddleMobile_Framework_Proto_OpDesc.Var] = []
+
+  var attrs: [PaddleMobile_Framework_Proto_OpDesc.Attr] = []
+
+  var isTarget: Bool {
+    get {return _isTarget ?? false}
+    set {_isTarget = newValue}
+  }
+  /// Returns true if `isTarget` has been explicitly set.
+  var hasIsTarget: Bool {return self._isTarget != nil}
+  /// Clears the value of `isTarget`. Subsequent reads from it will return its default value.
+  mutating func clearIsTarget() {self._isTarget = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  struct Attr {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var name: String {
+      get {return _name ?? String()}
+      set {_name = newValue}
+    }
+    /// Returns true if `name` has been explicitly set.
+    var hasName: Bool {return self._name != nil}
+    /// Clears the value of `name`. Subsequent reads from it will return its default value.
+    mutating func clearName() {self._name = nil}
+
+    var type: PaddleMobile_Framework_Proto_AttrType {
+      get {return _type ?? .int}
+      set {_type = newValue}
+    }
+    /// Returns true if `type` has been explicitly set.
+    var hasType: Bool {return self._type != nil}
+    /// Clears the value of `type`. Subsequent reads from it will return its default value.
+    mutating func clearType() {self._type = nil}
+
+    var i: Int32 {
+      get {return _i ?? 0}
+      set {_i = newValue}
+    }
+    /// Returns true if `i` has been explicitly set.
+    var hasI: Bool {return self._i != nil}
+    /// Clears the value of `i`. Subsequent reads from it will return its default value.
+    mutating func clearI() {self._i = nil}
+
+    var f: Float {
+      get {return _f ?? 0}
+      set {_f = newValue}
+    }
+    /// Returns true if `f` has been explicitly set.
+    var hasF: Bool {return self._f != nil}
+    /// Clears the value of `f`. Subsequent reads from it will return its default value.
+    mutating func clearF() {self._f = nil}
+
+    var s: String {
+      get {return _s ?? String()}
+      set {_s = newValue}
+    }
+    /// Returns true if `s` has been explicitly set.
+    var hasS: Bool {return self._s != nil}
+    /// Clears the value of `s`. Subsequent reads from it will return its default value.
+    mutating func clearS() {self._s = nil}
+
+    var ints: [Int32] = []
+
+    var floats: [Float] = []
+
+    var strings: [String] = []
+
+    var b: Bool {
+      get {return _b ?? false}
+      set {_b = newValue}
+    }
+    /// Returns true if `b` has been explicitly set.
+    var hasB: Bool {return self._b != nil}
+    /// Clears the value of `b`. Subsequent reads from it will return its default value.
+    mutating func clearB() {self._b = nil}
+
+    var bools: [Bool] = []
+
+    var blockIdx: Int32 {
+      get {return _blockIdx ?? 0}
+      set {_blockIdx = newValue}
+    }
+    /// Returns true if `blockIdx` has been explicitly set.
+    var hasBlockIdx: Bool {return self._blockIdx != nil}
+    /// Clears the value of `blockIdx`. Subsequent reads from it will return its default value.
+    mutating func clearBlockIdx() {self._blockIdx = nil}
+
+    var l: Int64 {
+      get {return _l ?? 0}
+      set {_l = newValue}
+    }
+    /// Returns true if `l` has been explicitly set.
+    var hasL: Bool {return self._l != nil}
+    /// Clears the value of `l`. Subsequent reads from it will return its default value.
+    mutating func clearL() {self._l = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _name: String? = nil
+    fileprivate var _type: PaddleMobile_Framework_Proto_AttrType? = nil
+    fileprivate var _i: Int32? = nil
+    fileprivate var _f: Float? = nil
+    fileprivate var _s: String? = nil
+    fileprivate var _b: Bool? = nil
+    fileprivate var _blockIdx: Int32? = nil
+    fileprivate var _l: Int64? = nil
+  }
+
+  struct Var {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var parameter: String {
+      get {return _parameter ?? String()}
+      set {_parameter = newValue}
+    }
+    /// Returns true if `parameter` has been explicitly set.
+    var hasParameter: Bool {return self._parameter != nil}
+    /// Clears the value of `parameter`. Subsequent reads from it will return its default value.
+    mutating func clearParameter() {self._parameter = nil}
+
+    var arguments: [String] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _parameter: String? = nil
+  }
+
+  init() {}
+
+  fileprivate var _type: String? = nil
+  fileprivate var _isTarget: Bool? = nil
+}
+
+/// OpProto describes a C++ framework::OperatorBase derived class.
+struct PaddleMobile_Framework_Proto_OpProto {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var type: String {
+    get {return _type ?? String()}
+    set {_type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return self._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {self._type = nil}
+
+  var inputs: [PaddleMobile_Framework_Proto_OpProto.Var] = []
+
+  var outputs: [PaddleMobile_Framework_Proto_OpProto.Var] = []
+
+  var attrs: [PaddleMobile_Framework_Proto_OpProto.Attr] = []
+
+  var comment: String {
+    get {return _comment ?? String()}
+    set {_comment = newValue}
+  }
+  /// Returns true if `comment` has been explicitly set.
+  var hasComment: Bool {return self._comment != nil}
+  /// Clears the value of `comment`. Subsequent reads from it will return its default value.
+  mutating func clearComment() {self._comment = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  /// VarProto describes the C++ type framework::Variable.
+  struct Var {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var name: String {
+      get {return _name ?? String()}
+      set {_name = newValue}
+    }
+    /// Returns true if `name` has been explicitly set.
+    var hasName: Bool {return self._name != nil}
+    /// Clears the value of `name`. Subsequent reads from it will return its default value.
+    mutating func clearName() {self._name = nil}
+
+    var comment: String {
+      get {return _comment ?? String()}
+      set {_comment = newValue}
+    }
+    /// Returns true if `comment` has been explicitly set.
+    var hasComment: Bool {return self._comment != nil}
+    /// Clears the value of `comment`. Subsequent reads from it will return its default value.
+    mutating func clearComment() {self._comment = nil}
+
+    var duplicable: Bool {
+      get {return _duplicable ?? false}
+      set {_duplicable = newValue}
+    }
+    /// Returns true if `duplicable` has been explicitly set.
+    var hasDuplicable: Bool {return self._duplicable != nil}
+    /// Clears the value of `duplicable`. Subsequent reads from it will return its default value.
+    mutating func clearDuplicable() {self._duplicable = nil}
+
+    var intermediate: Bool {
+      get {return _intermediate ?? false}
+      set {_intermediate = newValue}
+    }
+    /// Returns true if `intermediate` has been explicitly set.
+    var hasIntermediate: Bool {return self._intermediate != nil}
+    /// Clears the value of `intermediate`. Subsequent reads from it will return its default value.
+    mutating func clearIntermediate() {self._intermediate = nil}
+
+    var dispensable: Bool {
+      get {return _dispensable ?? false}
+      set {_dispensable = newValue}
+    }
+    /// Returns true if `dispensable` has been explicitly set.
+    var hasDispensable: Bool {return self._dispensable != nil}
+    /// Clears the value of `dispensable`. Subsequent reads from it will return its default value.
+    mutating func clearDispensable() {self._dispensable = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _name: String? = nil
+    fileprivate var _comment: String? = nil
+    fileprivate var _duplicable: Bool? = nil
+    fileprivate var _intermediate: Bool? = nil
+    fileprivate var _dispensable: Bool? = nil
+  }
+
+  /// AttrProto describes the C++ type Attribute.
+  struct Attr {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var name: String {
+      get {return _name ?? String()}
+      set {_name = newValue}
+    }
+    /// Returns true if `name` has been explicitly set.
+    var hasName: Bool {return self._name != nil}
+    /// Clears the value of `name`. Subsequent reads from it will return its default value.
+    mutating func clearName() {self._name = nil}
+
+    var type: PaddleMobile_Framework_Proto_AttrType {
+      get {return _type ?? .int}
+      set {_type = newValue}
+    }
+    /// Returns true if `type` has been explicitly set.
+    var hasType: Bool {return self._type != nil}
+    /// Clears the value of `type`. Subsequent reads from it will return its default value.
+    mutating func clearType() {self._type = nil}
+
+    var comment: String {
+      get {return _comment ?? String()}
+      set {_comment = newValue}
+    }
+    /// Returns true if `comment` has been explicitly set.
+    var hasComment: Bool {return self._comment != nil}
+    /// Clears the value of `comment`. Subsequent reads from it will return its default value.
+    mutating func clearComment() {self._comment = nil}
+
+    /// If that attribute is generated, it means the Paddle third
+    /// language binding has responsibility to fill that
+    /// attribute. End-User should not set that attribute.
+    var generated: Bool {
+      get {return _generated ?? false}
+      set {_generated = newValue}
+    }
+    /// Returns true if `generated` has been explicitly set.
+    var hasGenerated: Bool {return self._generated != nil}
+    /// Clears the value of `generated`. Subsequent reads from it will return its default value.
+    mutating func clearGenerated() {self._generated = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _name: String? = nil
+    fileprivate var _type: PaddleMobile_Framework_Proto_AttrType? = nil
+    fileprivate var _comment: String? = nil
+    fileprivate var _generated: Bool? = nil
+  }
+
+  init() {}
+
+  fileprivate var _type: String? = nil
+  fileprivate var _comment: String? = nil
+}
+
+struct PaddleMobile_Framework_Proto_VarType {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var type: PaddleMobile_Framework_Proto_VarType.TypeEnum {
+    get {return _storage._type ?? .bool}
+    set {_uniqueStorage()._type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return _storage._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {_storage._type = nil}
+
+  var selectedRows: PaddleMobile_Framework_Proto_VarType.TensorDesc {
+    get {return _storage._selectedRows ?? PaddleMobile_Framework_Proto_VarType.TensorDesc()}
+    set {_uniqueStorage()._selectedRows = newValue}
+  }
+  /// Returns true if `selectedRows` has been explicitly set.
+  var hasSelectedRows: Bool {return _storage._selectedRows != nil}
+  /// Clears the value of `selectedRows`. Subsequent reads from it will return its default value.
+  mutating func clearSelectedRows() {_storage._selectedRows = nil}
+
+  var lodTensor: PaddleMobile_Framework_Proto_VarType.LoDTensorDesc {
+    get {return _storage._lodTensor ?? PaddleMobile_Framework_Proto_VarType.LoDTensorDesc()}
+    set {_uniqueStorage()._lodTensor = newValue}
+  }
+  /// Returns true if `lodTensor` has been explicitly set.
+  var hasLodTensor: Bool {return _storage._lodTensor != nil}
+  /// Clears the value of `lodTensor`. Subsequent reads from it will return its default value.
+  mutating func clearLodTensor() {_storage._lodTensor = nil}
+
+  var tensorArray: PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc {
+    get {return _storage._tensorArray ?? PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc()}
+    set {_uniqueStorage()._tensorArray = newValue}
+  }
+  /// Returns true if `tensorArray` has been explicitly set.
+  var hasTensorArray: Bool {return _storage._tensorArray != nil}
+  /// Clears the value of `tensorArray`. Subsequent reads from it will return its default value.
+  mutating func clearTensorArray() {_storage._tensorArray = nil}
+
+  var reader: PaddleMobile_Framework_Proto_VarType.ReaderDesc {
+    get {return _storage._reader ?? PaddleMobile_Framework_Proto_VarType.ReaderDesc()}
+    set {_uniqueStorage()._reader = newValue}
+  }
+  /// Returns true if `reader` has been explicitly set.
+  var hasReader: Bool {return _storage._reader != nil}
+  /// Clears the value of `reader`. Subsequent reads from it will return its default value.
+  mutating func clearReader() {_storage._reader = nil}
+
+  var channel: PaddleMobile_Framework_Proto_VarType.ChannelDesc {
+    get {return _storage._channel ?? PaddleMobile_Framework_Proto_VarType.ChannelDesc()}
+    set {_uniqueStorage()._channel = newValue}
+  }
+  /// Returns true if `channel` has been explicitly set.
+  var hasChannel: Bool {return _storage._channel != nil}
+  /// Clears the value of `channel`. Subsequent reads from it will return its default value.
+  mutating func clearChannel() {_storage._channel = nil}
+
+  var tuple: PaddleMobile_Framework_Proto_VarType.Tuple {
+    get {return _storage._tuple ?? PaddleMobile_Framework_Proto_VarType.Tuple()}
+    set {_uniqueStorage()._tuple = newValue}
+  }
+  /// Returns true if `tuple` has been explicitly set.
+  var hasTuple: Bool {return _storage._tuple != nil}
+  /// Clears the value of `tuple`. Subsequent reads from it will return its default value.
+  mutating func clearTuple() {_storage._tuple = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  enum TypeEnum: SwiftProtobuf.Enum {
+    typealias RawValue = Int
+
+    /// Pod Types
+    case bool // = 0
+    case int16 // = 1
+    case int32 // = 2
+    case int64 // = 3
+    case fp16 // = 4
+    case fp32 // = 5
+    case fp64 // = 6
+
+    /// Other types that may need additional descriptions
+    case lodTensor // = 7
+    case selectedRows // = 8
+    case feedMinibatch // = 9
+    case fetchList // = 10
+    case stepScopes // = 11
+    case lodRankTable // = 12
+    case lodTensorArray // = 13
+    case placeList // = 14
+    case reader // = 15
+    case channel // = 16
+
+    /// Any runtime decided variable type is raw
+    /// raw variables should manage their own allocations
+    /// in operators like nccl_op
+    case raw // = 17
+    case tuple // = 18
+
+    init() {
+      self = .bool
+    }
+
+    init?(rawValue: Int) {
+      switch rawValue {
+      case 0: self = .bool
+      case 1: self = .int16
+      case 2: self = .int32
+      case 3: self = .int64
+      case 4: self = .fp16
+      case 5: self = .fp32
+      case 6: self = .fp64
+      case 7: self = .lodTensor
+      case 8: self = .selectedRows
+      case 9: self = .feedMinibatch
+      case 10: self = .fetchList
+      case 11: self = .stepScopes
+      case 12: self = .lodRankTable
+      case 13: self = .lodTensorArray
+      case 14: self = .placeList
+      case 15: self = .reader
+      case 16: self = .channel
+      case 17: self = .raw
+      case 18: self = .tuple
+      default: return nil
+      }
+    }
+
+    var rawValue: Int {
+      switch self {
+      case .bool: return 0
+      case .int16: return 1
+      case .int32: return 2
+      case .int64: return 3
+      case .fp16: return 4
+      case .fp32: return 5
+      case .fp64: return 6
+      case .lodTensor: return 7
+      case .selectedRows: return 8
+      case .feedMinibatch: return 9
+      case .fetchList: return 10
+      case .stepScopes: return 11
+      case .lodRankTable: return 12
+      case .lodTensorArray: return 13
+      case .placeList: return 14
+      case .reader: return 15
+      case .channel: return 16
+      case .raw: return 17
+      case .tuple: return 18
+      }
+    }
+
+  }
+
+  struct TensorDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    /// Should only be PODType. Is enforced in C++
+    var dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum {
+      get {return _dataType ?? .bool}
+      set {_dataType = newValue}
+    }
+    /// Returns true if `dataType` has been explicitly set.
+    var hasDataType: Bool {return self._dataType != nil}
+    /// Clears the value of `dataType`. Subsequent reads from it will return its default value.
+    mutating func clearDataType() {self._dataType = nil}
+
+    /// [UNK, 640, 480] is saved as [-1, 640, 480]
+    var dims: [Int64] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum? = nil
+  }
+
+  struct LoDTensorDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc {
+      get {return _storage._tensor ?? PaddleMobile_Framework_Proto_VarType.TensorDesc()}
+      set {_uniqueStorage()._tensor = newValue}
+    }
+    /// Returns true if `tensor` has been explicitly set.
+    var hasTensor: Bool {return _storage._tensor != nil}
+    /// Clears the value of `tensor`. Subsequent reads from it will return its default value.
+    mutating func clearTensor() {_storage._tensor = nil}
+
+    var lodLevel: Int32 {
+      get {return _storage._lodLevel ?? 0}
+      set {_uniqueStorage()._lodLevel = newValue}
+    }
+    /// Returns true if `lodLevel` has been explicitly set.
+    var hasLodLevel: Bool {return _storage._lodLevel != nil}
+    /// Clears the value of `lodLevel`. Subsequent reads from it will return its default value.
+    mutating func clearLodLevel() {_storage._lodLevel = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _storage = _StorageClass.defaultInstance
+  }
+
+  struct LoDTensorArrayDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc {
+      get {return _storage._tensor ?? PaddleMobile_Framework_Proto_VarType.TensorDesc()}
+      set {_uniqueStorage()._tensor = newValue}
+    }
+    /// Returns true if `tensor` has been explicitly set.
+    var hasTensor: Bool {return _storage._tensor != nil}
+    /// Clears the value of `tensor`. Subsequent reads from it will return its default value.
+    mutating func clearTensor() {_storage._tensor = nil}
+
+    var lodLevel: Int32 {
+      get {return _storage._lodLevel ?? 0}
+      set {_uniqueStorage()._lodLevel = newValue}
+    }
+    /// Returns true if `lodLevel` has been explicitly set.
+    var hasLodLevel: Bool {return _storage._lodLevel != nil}
+    /// Clears the value of `lodLevel`. Subsequent reads from it will return its default value.
+    mutating func clearLodLevel() {_storage._lodLevel = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _storage = _StorageClass.defaultInstance
+  }
+
+  struct ReaderDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var lodTensor: [PaddleMobile_Framework_Proto_VarType.LoDTensorDesc] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+  }
+
+  struct ChannelDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum {
+      get {return _dataType ?? .bool}
+      set {_dataType = newValue}
+    }
+    /// Returns true if `dataType` has been explicitly set.
+    var hasDataType: Bool {return self._dataType != nil}
+    /// Clears the value of `dataType`. Subsequent reads from it will return its default value.
+    mutating func clearDataType() {self._dataType = nil}
+
+    var capacity: Int64 {
+      get {return _capacity ?? 0}
+      set {_capacity = newValue}
+    }
+    /// Returns true if `capacity` has been explicitly set.
+    var hasCapacity: Bool {return self._capacity != nil}
+    /// Clears the value of `capacity`. Subsequent reads from it will return its default value.
+    mutating func clearCapacity() {self._capacity = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum? = nil
+    fileprivate var _capacity: Int64? = nil
+  }
+
+  struct Tuple {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var elementType: [PaddleMobile_Framework_Proto_VarType.TypeEnum] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+  }
+
+  init() {}
+
+  fileprivate var _storage = _StorageClass.defaultInstance
+}
+
+struct PaddleMobile_Framework_Proto_VarDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var name: String {
+    get {return _storage._name ?? String()}
+    set {_uniqueStorage()._name = newValue}
+  }
+  /// Returns true if `name` has been explicitly set.
+  var hasName: Bool {return _storage._name != nil}
+  /// Clears the value of `name`. Subsequent reads from it will return its default value.
+  mutating func clearName() {_storage._name = nil}
+
+  var type: PaddleMobile_Framework_Proto_VarType {
+    get {return _storage._type ?? PaddleMobile_Framework_Proto_VarType()}
+    set {_uniqueStorage()._type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return _storage._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {_storage._type = nil}
+
+  var persistable: Bool {
+    get {return _storage._persistable ?? false}
+    set {_uniqueStorage()._persistable = newValue}
+  }
+  /// Returns true if `persistable` has been explicitly set.
+  var hasPersistable: Bool {return _storage._persistable != nil}
+  /// Clears the value of `persistable`. Subsequent reads from it will return its default value.
+  mutating func clearPersistable() {_storage._persistable = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  init() {}
+
+  fileprivate var _storage = _StorageClass.defaultInstance
+}
+
+struct PaddleMobile_Framework_Proto_BlockDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var idx: Int32 {
+    get {return _idx ?? 0}
+    set {_idx = newValue}
+  }
+  /// Returns true if `idx` has been explicitly set.
+  var hasIdx: Bool {return self._idx != nil}
+  /// Clears the value of `idx`. Subsequent reads from it will return its default value.
+  mutating func clearIdx() {self._idx = nil}
+
+  var parentIdx: Int32 {
+    get {return _parentIdx ?? 0}
+    set {_parentIdx = newValue}
+  }
+  /// Returns true if `parentIdx` has been explicitly set.
+  var hasParentIdx: Bool {return self._parentIdx != nil}
+  /// Clears the value of `parentIdx`. Subsequent reads from it will return its default value.
+  mutating func clearParentIdx() {self._parentIdx = nil}
+
+  var vars: [PaddleMobile_Framework_Proto_VarDesc] = []
+
+  var ops: [PaddleMobile_Framework_Proto_OpDesc] = []
+
+  var forwardBlockIdx: Int32 {
+    get {return _forwardBlockIdx ?? -1}
+    set {_forwardBlockIdx = newValue}
+  }
+  /// Returns true if `forwardBlockIdx` has been explicitly set.
+  var hasForwardBlockIdx: Bool {return self._forwardBlockIdx != nil}
+  /// Clears the value of `forwardBlockIdx`. Subsequent reads from it will return its default value.
+  mutating func clearForwardBlockIdx() {self._forwardBlockIdx = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  init() {}
+
+  fileprivate var _idx: Int32? = nil
+  fileprivate var _parentIdx: Int32? = nil
+  fileprivate var _forwardBlockIdx: Int32? = nil
+}
+
+/// Please refer to
+/// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
+/// for more details.
+/// TODO(panyx0718): A model can have multiple programs. Need a
+/// way to distinguish them. Maybe ID or name?
+struct PaddleMobile_Framework_Proto_ProgramDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var blocks: [PaddleMobile_Framework_Proto_BlockDesc] = []
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  init() {}
+}
+
+// MARK: - Code below here is support for the SwiftProtobuf runtime.
+
+fileprivate let _protobuf_package = "paddle_mobile.framework.proto"
+
+extension PaddleMobile_Framework_Proto_AttrType: SwiftProtobuf._ProtoNameProviding {
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    0: .same(proto: "INT"),
+    1: .same(proto: "FLOAT"),
+    2: .same(proto: "STRING"),
+    3: .same(proto: "INTS"),
+    4: .same(proto: "FLOATS"),
+    5: .same(proto: "STRINGS"),
+    6: .same(proto: "BOOLEAN"),
+    7: .same(proto: "BOOLEANS"),
+    8: .same(proto: "BLOCK"),
+    9: .same(proto: "LONG"),
+  ]
+}
+
+extension PaddleMobile_Framework_Proto_OpDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".OpDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    3: .same(proto: "type"),
+    1: .same(proto: "inputs"),
+    2: .same(proto: "outputs"),
+    4: .same(proto: "attrs"),
+    5: .standard(proto: "is_target"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._type == nil {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.inputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.outputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.attrs) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedMessageField(value: &self.inputs)
+      case 2: try decoder.decodeRepeatedMessageField(value: &self.outputs)
+      case 3: try decoder.decodeSingularStringField(value: &self._type)
+      case 4: try decoder.decodeRepeatedMessageField(value: &self.attrs)
+      case 5: try decoder.decodeSingularBoolField(value: &self._isTarget)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.inputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.inputs, fieldNumber: 1)
+    }
+    if !self.outputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.outputs, fieldNumber: 2)
+    }
+    if let v = self._type {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 3)
+    }
+    if !self.attrs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.attrs, fieldNumber: 4)
+    }
+    if let v = self._isTarget {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpDesc) -> Bool {
+    if self._type != other._type {return false}
+    if self.inputs != other.inputs {return false}
+    if self.outputs != other.outputs {return false}
+    if self.attrs != other.attrs {return false}
+    if self._isTarget != other._isTarget {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpDesc.Attr: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpDesc.protoMessageName + ".Attr"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "type"),
+    3: .same(proto: "i"),
+    4: .same(proto: "f"),
+    5: .same(proto: "s"),
+    6: .same(proto: "ints"),
+    7: .same(proto: "floats"),
+    8: .same(proto: "strings"),
+    10: .same(proto: "b"),
+    11: .same(proto: "bools"),
+    12: .standard(proto: "block_idx"),
+    13: .same(proto: "l"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._name == nil {return false}
+    if self._type == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._name)
+      case 2: try decoder.decodeSingularEnumField(value: &self._type)
+      case 3: try decoder.decodeSingularInt32Field(value: &self._i)
+      case 4: try decoder.decodeSingularFloatField(value: &self._f)
+      case 5: try decoder.decodeSingularStringField(value: &self._s)
+      case 6: try decoder.decodeRepeatedInt32Field(value: &self.ints)
+      case 7: try decoder.decodeRepeatedFloatField(value: &self.floats)
+      case 8: try decoder.decodeRepeatedStringField(value: &self.strings)
+      case 10: try decoder.decodeSingularBoolField(value: &self._b)
+      case 11: try decoder.decodeRepeatedBoolField(value: &self.bools)
+      case 12: try decoder.decodeSingularInt32Field(value: &self._blockIdx)
+      case 13: try decoder.decodeSingularInt64Field(value: &self._l)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._name {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if let v = self._type {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 2)
+    }
+    if let v = self._i {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 3)
+    }
+    if let v = self._f {
+      try visitor.visitSingularFloatField(value: v, fieldNumber: 4)
+    }
+    if let v = self._s {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 5)
+    }
+    if !self.ints.isEmpty {
+      try visitor.visitRepeatedInt32Field(value: self.ints, fieldNumber: 6)
+    }
+    if !self.floats.isEmpty {
+      try visitor.visitRepeatedFloatField(value: self.floats, fieldNumber: 7)
+    }
+    if !self.strings.isEmpty {
+      try visitor.visitRepeatedStringField(value: self.strings, fieldNumber: 8)
+    }
+    if let v = self._b {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 10)
+    }
+    if !self.bools.isEmpty {
+      try visitor.visitRepeatedBoolField(value: self.bools, fieldNumber: 11)
+    }
+    if let v = self._blockIdx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 12)
+    }
+    if let v = self._l {
+      try visitor.visitSingularInt64Field(value: v, fieldNumber: 13)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Bool {
+    if self._name != other._name {return false}
+    if self._type != other._type {return false}
+    if self._i != other._i {return false}
+    if self._f != other._f {return false}
+    if self._s != other._s {return false}
+    if self.ints != other.ints {return false}
+    if self.floats != other.floats {return false}
+    if self.strings != other.strings {return false}
+    if self._b != other._b {return false}
+    if self.bools != other.bools {return false}
+    if self._blockIdx != other._blockIdx {return false}
+    if self._l != other._l {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpDesc.Var: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpDesc.protoMessageName + ".Var"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "parameter"),
+    2: .same(proto: "arguments"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._parameter == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._parameter)
+      case 2: try decoder.decodeRepeatedStringField(value: &self.arguments)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._parameter {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if !self.arguments.isEmpty {
+      try visitor.visitRepeatedStringField(value: self.arguments, fieldNumber: 2)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpDesc.Var) -> Bool {
+    if self._parameter != other._parameter {return false}
+    if self.arguments != other.arguments {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpProto: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".OpProto"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "type"),
+    2: .same(proto: "inputs"),
+    3: .same(proto: "outputs"),
+    4: .same(proto: "attrs"),
+    5: .same(proto: "comment"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._type == nil {return false}
+    if self._comment == nil {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.inputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.outputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.attrs) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._type)
+      case 2: try decoder.decodeRepeatedMessageField(value: &self.inputs)
+      case 3: try decoder.decodeRepeatedMessageField(value: &self.outputs)
+      case 4: try decoder.decodeRepeatedMessageField(value: &self.attrs)
+      case 5: try decoder.decodeSingularStringField(value: &self._comment)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._type {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if !self.inputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.inputs, fieldNumber: 2)
+    }
+    if !self.outputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.outputs, fieldNumber: 3)
+    }
+    if !self.attrs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.attrs, fieldNumber: 4)
+    }
+    if let v = self._comment {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpProto) -> Bool {
+    if self._type != other._type {return false}
+    if self.inputs != other.inputs {return false}
+    if self.outputs != other.outputs {return false}
+    if self.attrs != other.attrs {return false}
+    if self._comment != other._comment {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpProto.Var: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpProto.protoMessageName + ".Var"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "comment"),
+    3: .same(proto: "duplicable"),
+    4: .same(proto: "intermediate"),
+    5: .same(proto: "dispensable"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._name == nil {return false}
+    if self._comment == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._name)
+      case 2: try decoder.decodeSingularStringField(value: &self._comment)
+      case 3: try decoder.decodeSingularBoolField(value: &self._duplicable)
+      case 4: try decoder.decodeSingularBoolField(value: &self._intermediate)
+      case 5: try decoder.decodeSingularBoolField(value: &self._dispensable)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._name {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if let v = self._comment {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 2)
+    }
+    if let v = self._duplicable {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 3)
+    }
+    if let v = self._intermediate {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 4)
+    }
+    if let v = self._dispensable {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpProto.Var) -> Bool {
+    if self._name != other._name {return false}
+    if self._comment != other._comment {return false}
+    if self._duplicable != other._duplicable {return false}
+    if self._intermediate != other._intermediate {return false}
+    if self._dispensable != other._dispensable {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpProto.Attr: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpProto.protoMessageName + ".Attr"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "type"),
+    3: .same(proto: "comment"),
+    4: .same(proto: "generated"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._name == nil {return false}
+    if self._type == nil {return false}
+    if self._comment == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._name)
+      case 2: try decoder.decodeSingularEnumField(value: &self._type)
+      case 3: try decoder.decodeSingularStringField(value: &self._comment)
+      case 4: try decoder.decodeSingularBoolField(value: &self._generated)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._name {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if let v = self._type {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 2)
+    }
+    if let v = self._comment {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 3)
+    }
+    if let v = self._generated {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 4)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpProto.Attr) -> Bool {
+    if self._name != other._name {return false}
+    if self._type != other._type {return false}
+    if self._comment != other._comment {return false}
+    if self._generated != other._generated {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".VarType"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "type"),
+    2: .standard(proto: "selected_rows"),
+    3: .standard(proto: "lod_tensor"),
+    4: .standard(proto: "tensor_array"),
+    5: .same(proto: "reader"),
+    6: .same(proto: "channel"),
+    7: .same(proto: "tuple"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _type: PaddleMobile_Framework_Proto_VarType.TypeEnum? = nil
+    var _selectedRows: PaddleMobile_Framework_Proto_VarType.TensorDesc? = nil
+    var _lodTensor: PaddleMobile_Framework_Proto_VarType.LoDTensorDesc? = nil
+    var _tensorArray: PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc? = nil
+    var _reader: PaddleMobile_Framework_Proto_VarType.ReaderDesc? = nil
+    var _channel: PaddleMobile_Framework_Proto_VarType.ChannelDesc? = nil
+    var _tuple: PaddleMobile_Framework_Proto_VarType.Tuple? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _type = source._type
+      _selectedRows = source._selectedRows
+      _lodTensor = source._lodTensor
+      _tensorArray = source._tensorArray
+      _reader = source._reader
+      _channel = source._channel
+      _tuple = source._tuple
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._type == nil {return false}
+      if let v = _storage._selectedRows, !v.isInitialized {return false}
+      if let v = _storage._lodTensor, !v.isInitialized {return false}
+      if let v = _storage._tensorArray, !v.isInitialized {return false}
+      if let v = _storage._reader, !v.isInitialized {return false}
+      if let v = _storage._channel, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularEnumField(value: &_storage._type)
+        case 2: try decoder.decodeSingularMessageField(value: &_storage._selectedRows)
+        case 3: try decoder.decodeSingularMessageField(value: &_storage._lodTensor)
+        case 4: try decoder.decodeSingularMessageField(value: &_storage._tensorArray)
+        case 5: try decoder.decodeSingularMessageField(value: &_storage._reader)
+        case 6: try decoder.decodeSingularMessageField(value: &_storage._channel)
+        case 7: try decoder.decodeSingularMessageField(value: &_storage._tuple)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._type {
+        try visitor.visitSingularEnumField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._selectedRows {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 2)
+      }
+      if let v = _storage._lodTensor {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 3)
+      }
+      if let v = _storage._tensorArray {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 4)
+      }
+      if let v = _storage._reader {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 5)
+      }
+      if let v = _storage._channel {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 6)
+      }
+      if let v = _storage._tuple {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 7)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._type != other_storage._type {return false}
+        if _storage._selectedRows != other_storage._selectedRows {return false}
+        if _storage._lodTensor != other_storage._lodTensor {return false}
+        if _storage._tensorArray != other_storage._tensorArray {return false}
+        if _storage._reader != other_storage._reader {return false}
+        if _storage._channel != other_storage._channel {return false}
+        if _storage._tuple != other_storage._tuple {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.TypeEnum: SwiftProtobuf._ProtoNameProviding {
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    0: .same(proto: "BOOL"),
+    1: .same(proto: "INT16"),
+    2: .same(proto: "INT32"),
+    3: .same(proto: "INT64"),
+    4: .same(proto: "FP16"),
+    5: .same(proto: "FP32"),
+    6: .same(proto: "FP64"),
+    7: .same(proto: "LOD_TENSOR"),
+    8: .same(proto: "SELECTED_ROWS"),
+    9: .same(proto: "FEED_MINIBATCH"),
+    10: .same(proto: "FETCH_LIST"),
+    11: .same(proto: "STEP_SCOPES"),
+    12: .same(proto: "LOD_RANK_TABLE"),
+    13: .same(proto: "LOD_TENSOR_ARRAY"),
+    14: .same(proto: "PLACE_LIST"),
+    15: .same(proto: "READER"),
+    16: .same(proto: "CHANNEL"),
+    17: .same(proto: "RAW"),
+    18: .same(proto: "TUPLE"),
+  ]
+}
+
+extension PaddleMobile_Framework_Proto_VarType.TensorDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".TensorDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "data_type"),
+    2: .same(proto: "dims"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._dataType == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularEnumField(value: &self._dataType)
+      case 2: try decoder.decodeRepeatedInt64Field(value: &self.dims)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._dataType {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 1)
+    }
+    if !self.dims.isEmpty {
+      try visitor.visitRepeatedInt64Field(value: self.dims, fieldNumber: 2)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.TensorDesc) -> Bool {
+    if self._dataType != other._dataType {return false}
+    if self.dims != other.dims {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.LoDTensorDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".LoDTensorDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "tensor"),
+    2: .standard(proto: "lod_level"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc? = nil
+    var _lodLevel: Int32? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _tensor = source._tensor
+      _lodLevel = source._lodLevel
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._tensor == nil {return false}
+      if let v = _storage._tensor, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularMessageField(value: &_storage._tensor)
+        case 2: try decoder.decodeSingularInt32Field(value: &_storage._lodLevel)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._tensor {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._lodLevel {
+        try visitor.visitSingularInt32Field(value: v, fieldNumber: 2)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.LoDTensorDesc) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._tensor != other_storage._tensor {return false}
+        if _storage._lodLevel != other_storage._lodLevel {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".LoDTensorArrayDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "tensor"),
+    2: .standard(proto: "lod_level"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc? = nil
+    var _lodLevel: Int32? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _tensor = source._tensor
+      _lodLevel = source._lodLevel
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._tensor == nil {return false}
+      if let v = _storage._tensor, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularMessageField(value: &_storage._tensor)
+        case 2: try decoder.decodeSingularInt32Field(value: &_storage._lodLevel)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._tensor {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._lodLevel {
+        try visitor.visitSingularInt32Field(value: v, fieldNumber: 2)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._tensor != other_storage._tensor {return false}
+        if _storage._lodLevel != other_storage._lodLevel {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.ReaderDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".ReaderDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "lod_tensor"),
+  ]
+
+  public var isInitialized: Bool {
+    if !SwiftProtobuf.Internal.areAllInitialized(self.lodTensor) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedMessageField(value: &self.lodTensor)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.lodTensor.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.lodTensor, fieldNumber: 1)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.ReaderDesc) -> Bool {
+    if self.lodTensor != other.lodTensor {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.ChannelDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".ChannelDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "data_type"),
+    2: .same(proto: "capacity"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._dataType == nil {return false}
+    if self._capacity == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularEnumField(value: &self._dataType)
+      case 2: try decoder.decodeSingularInt64Field(value: &self._capacity)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._dataType {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 1)
+    }
+    if let v = self._capacity {
+      try visitor.visitSingularInt64Field(value: v, fieldNumber: 2)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.ChannelDesc) -> Bool {
+    if self._dataType != other._dataType {return false}
+    if self._capacity != other._capacity {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.Tuple: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".Tuple"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "element_type"),
+  ]
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedEnumField(value: &self.elementType)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.elementType.isEmpty {
+      try visitor.visitRepeatedEnumField(value: self.elementType, fieldNumber: 1)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.Tuple) -> Bool {
+    if self.elementType != other.elementType {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".VarDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "type"),
+    3: .same(proto: "persistable"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _name: String? = nil
+    var _type: PaddleMobile_Framework_Proto_VarType? = nil
+    var _persistable: Bool? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _name = source._name
+      _type = source._type
+      _persistable = source._persistable
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._name == nil {return false}
+      if _storage._type == nil {return false}
+      if let v = _storage._type, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularStringField(value: &_storage._name)
+        case 2: try decoder.decodeSingularMessageField(value: &_storage._type)
+        case 3: try decoder.decodeSingularBoolField(value: &_storage._persistable)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._name {
+        try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._type {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 2)
+      }
+      if let v = _storage._persistable {
+        try visitor.visitSingularBoolField(value: v, fieldNumber: 3)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarDesc) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._name != other_storage._name {return false}
+        if _storage._type != other_storage._type {return false}
+        if _storage._persistable != other_storage._persistable {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_BlockDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".BlockDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "idx"),
+    2: .standard(proto: "parent_idx"),
+    3: .same(proto: "vars"),
+    4: .same(proto: "ops"),
+    5: .standard(proto: "forward_block_idx"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._idx == nil {return false}
+    if self._parentIdx == nil {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.vars) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.ops) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularInt32Field(value: &self._idx)
+      case 2: try decoder.decodeSingularInt32Field(value: &self._parentIdx)
+      case 3: try decoder.decodeRepeatedMessageField(value: &self.vars)
+      case 4: try decoder.decodeRepeatedMessageField(value: &self.ops)
+      case 5: try decoder.decodeSingularInt32Field(value: &self._forwardBlockIdx)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._idx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 1)
+    }
+    if let v = self._parentIdx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 2)
+    }
+    if !self.vars.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.vars, fieldNumber: 3)
+    }
+    if !self.ops.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.ops, fieldNumber: 4)
+    }
+    if let v = self._forwardBlockIdx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_BlockDesc) -> Bool {
+    if self._idx != other._idx {return false}
+    if self._parentIdx != other._parentIdx {return false}
+    if self.vars != other.vars {return false}
+    if self.ops != other.ops {return false}
+    if self._forwardBlockIdx != other._forwardBlockIdx {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_ProgramDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".ProgramDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "blocks"),
+  ]
+
+  public var isInitialized: Bool {
+    if !SwiftProtobuf.Internal.areAllInitialized(self.blocks) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedMessageField(value: &self.blocks)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.blocks.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.blocks, fieldNumber: 1)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_ProgramDesc) -> Bool {
+    if self.blocks != other.blocks {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
--- a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public struct Dim {
+    public init(inDim: [Int]) {
+        dims = inDim
+    }
+    
+    mutating func swapeDimAt(index1: Int, index2: Int) {
+        dims.swapAt(index1, index2)
+    }
+    
+    func cout() -> Int {
+        return dims.count
+    }
+    
+    func numel() -> Int {
+        return dims.reduce(1) { $0 * $1 }
+    }
+    
+    static func ==(left: Dim, right: Dim) -> Bool {
+        return left.dims == right.dims;
+    }
+    
+    subscript(index: Int) -> Int {
+        return dims[index];
+    }
+    
+    
+    private(set) var dims: [Int]
+    private init(){
+        fatalError()
+    }
+}
+
+extension Dim: CustomStringConvertible {
+    public var description: String {
+        return "\(dims)"
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Accelerate
+import Foundation
+
+protocol Tensorial: CustomStringConvertible, CustomDebugStringConvertible{
+    var dim: Dim { get set }
+    func numel() -> Int
+    var layout: DataLayout { get }
+}
+
+extension Tensorial {
+    func numel() -> Int {
+        return dim.numel()
+    }
+}
+
+class Tensor<P: PrecisionType>: Tensorial {
+    enum BufferPrecision {
+        case Float32, Float16
+    }
+    
+    var data: Data
+    var dim: Dim
+    var buffer: MTLBuffer!
+    private(set) var layout: DataLayout
+    
+    class Data {
+        init(inSize: Int, inPointer: UnsafeMutablePointer<P>) {
+            size = inSize
+            pointer = inPointer
+        }
+        let size: Int
+        var pointer: UnsafeMutablePointer<P>
+        subscript(index: Int) -> P{
+            get {
+                return pointer[index]
+            }
+            set {
+                pointer[index] = newValue
+            }
+        }
+        func release() {
+            pointer.deinitialize(count: size)
+            pointer.deallocate()
+        }
+        deinit {
+//            release()
+        }
+    }
+ 
+    required init(inDim: Dim, inLayout: DataLayout = .NCHW) {
+        dim = inDim
+        let size = inDim.numel() * MemoryLayout<P>.size
+        let pointer = UnsafeMutablePointer<P>.allocate(capacity: size)
+        data = Data.init(inSize: size, inPointer: pointer)
+        layout = inLayout
+    }
+    
+    func convert(to: DataLayout) {
+        guard to != layout else {
+            return
+        }
+        
+        guard dim.cout() == 4 else {
+            return
+        }
+        
+        guard layout == .NCHW && to == .NHWC else {
+            // other not support
+            return
+        }
+        let newPointer = UnsafeMutablePointer<P>.allocate(capacity: data.size)
+        
+        if layout == .NCHW {
+            NCHW2NHWC(newPtr: newPointer)
+        }
+        
+        data.release()
+        data.pointer = newPointer
+        layout = to
+    }
+    
+    func float32ToFloat16(input: UnsafeMutablePointer<Float32>, output: UnsafeMutableRawPointer, count: Int) {
+        var float32Buffer = vImage_Buffer(data: input,  height: 1, width: UInt(count), rowBytes: count * 4)
+        var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2)
+        guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else {
+            fatalError(" float 32 to float 16 error ! ")
+        }
+    }
+    
+    func initBuffer(device: MTLDevice, precision: BufferPrecision = .Float32) {
+        guard let floatPointer = data.pointer as? UnsafeMutablePointer<Float32> else {
+            fatalError(" not support yet ")
+        }
+        
+        
+        let precisionSize: Int
+        switch precision {
+        case .Float32:
+            precisionSize = 4
+        case .Float16:
+            precisionSize = 2
+        }
+        
+        if dim.cout() == 4 {
+            if layout == .NHWC {
+                let C = dim[3]
+                let cSlices = (C + 3) / 4
+                let paddedC = cSlices * 4
+                let count = paddedC * dim[0] * dim[1] * dim[2]
+                if C == paddedC {
+                    buffer = device.makeBuffer(length: count * precisionSize)
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
+                    }
+                } else if C == 1 {
+                    buffer = device.makeBuffer(length: numel() * precisionSize)
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
+                    }
+                } else {
+                    buffer = device.makeBuffer(length: count * precisionSize)
+                    let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
+                    var tmpPointer = floatPointer
+                    var dstPtr = convertedPointer
+                    for _ in 0..<dim[0] * dim[1] * dim[2] {
+                        for j in 0..<paddedC {
+                            if j < C {
+                                dstPtr[j] = tmpPointer[j]
+                            }
+                        }
+                        tmpPointer += C
+                        dstPtr += paddedC
+                    }
+                    
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
+                    }
+                    
+                    convertedPointer.deinitialize(count: count)
+                    convertedPointer.deallocate()
+                }
+            }
+        } else if dim.cout() == 1 {
+            buffer = device.makeBuffer(length: numel() * precisionSize)
+            switch precision {
+            case .Float32:
+                buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
+            case .Float16:
+                float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
+            }
+        } else {
+            fatalError(" not support !")
+        }
+        //TODO: release
+        data.release()
+    }
+    
+    var width: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[1]
+            } else {
+                fatalError()
+            }
+        }
+    }
+    
+    var height: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[2]
+            } else {
+                fatalError()
+            }
+        }
+    }
+    
+    var channel: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[3]
+            } else {
+                fatalError()
+            }
+        }
+    }
+
+    
+    func NCHW2NHWC(newPtr: UnsafeMutablePointer<P>) {
+        let N = dim[0]
+        let C = dim[1]
+        let H = dim[2]
+        let W = dim[3]
+        let HXW = H * W
+        let CXHXW = C * H * W
+        
+        var index: Int = 0
+        for n in 0..<N {
+            for h in 0..<H{
+                for w in 0..<W{
+                    for c in 0..<C{
+                        newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
+                        index += 1
+                    }
+                }
+            }
+        }
+        dim.swapeDimAt(index1: 1, index2: 3)
+    }
+}
+
+
+extension Tensor {
+    
+    var debugDescription: String {
+        var str = "dim: \(dim) \n"
+        str += "MTLBuffer: \(self.buffer) \n"
+        for i in 0..<buffer.length/MemoryLayout<P>.size {
+            str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
+        }
+        return str
+    }
+    
+    func logDataPointer(header: String = "") {
+        print(header)
+        var str = ""
+        str += "data size: \(data.size) \n"
+        str += "dim: \(dim) \n"
+        for i in 0..<numel() {
+            str += " \(data.pointer[i])"
+        }
+        print(str)
+    }
+    
+    var description: String {
+        return debugDescription
+    }
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+class InputTexture {
+    let mtlTexture: MTLTexture
+    let expectDim: Dim
+    init(inMTLTexture: MTLTexture, inExpectDim: Dim) {
+        mtlTexture = inMTLTexture
+        expectDim = inExpectDim
+    }
+    
+}
+
+extension InputTexture {
+    var description: String {
+        get{
+            return mtlTexture.description
+        }
+    }
+    
+    var debugDescription: String {
+        get {
+            return mtlTexture.debugDescription ?? " MetalTexture "
+        }
+    }
+}
+
+public class Texture<P: PrecisionType>: Tensorial {
+    var dim: Dim
+    let textureDesc: MTLTextureDescriptor
+    var metalTexture: MTLTexture
+    
+    init(device: MTLDevice, inDim: Dim, inLayout: DataLayout = .NHWC) {
+        dim = inDim
+        layout = inLayout
+        let tmpTextureDes = MTLTextureDescriptor.init()
+        if inDim.cout() == 1 {
+            tmpTextureDes.width = inDim[0]
+            tmpTextureDes.textureType = .type1D
+        } else if inDim.cout() == 4 {
+            tmpTextureDes.height = inDim[1]
+            tmpTextureDes.width = inDim[2]
+//            print("n : \(inDim[0])")
+//            print(inDim[3] * inDim[0])
+            tmpTextureDes.depth = 1
+            tmpTextureDes.arrayLength = (inDim[3] * inDim[0] + 3)/4
+            tmpTextureDes.textureType = .type2DArray
+        } else if inDim.cout() == 2 {
+            tmpTextureDes.height = 1
+            tmpTextureDes.width = 1
+            tmpTextureDes.depth = 1
+            tmpTextureDes.arrayLength = (inDim[0] * inDim[1] + 3)/4
+            tmpTextureDes.textureType = .type2DArray
+        } else {
+            fatalError(" not suuprt ")
+        }
+        
+        if MemoryLayout<P>.size == 1 {
+            tmpTextureDes.pixelFormat = .rgba8Unorm
+        } else if MemoryLayout<P>.size == 2 {
+            tmpTextureDes.pixelFormat = .rgba16Float
+        } else if MemoryLayout<P>.size == 4 {
+//            tmpTextureDes.pixelFormat = .r32Float
+            tmpTextureDes.pixelFormat = .rgba32Float
+
+        }
+//        tmpTextureDes.pixelFormat = .rgba16Float
+
+        tmpTextureDes.usage = [.shaderRead, .shaderWrite]
+        tmpTextureDes.storageMode = .shared
+        textureDesc = tmpTextureDes
+        metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
+    }
+    
+//    required public init(inDim: Dim, inLayout: DataLayout = .NHWC, inTexture: MTLTexture) {
+//        dim = inDim
+//        layout = inLayout
+//        metalTexture = inTexture
+//        let tmpTextureDes = MTLTextureDescriptor.init()
+//        
+//        if inDim.cout() == 1 {
+//            tmpTextureDes.width = inDim[0]
+//            tmpTextureDes.textureType = .type1D
+//        } else if inDim.cout() == 2 {
+//            tmpTextureDes.height = inDim[0]
+//            tmpTextureDes.width = inDim[1]
+//            tmpTextureDes.textureType = .type2D
+//        } else if inDim.cout() == 3 {
+//            fatalError(" not support texture dim 3")
+//        } else if inDim.cout() == 4 {
+//            tmpTextureDes.height = inDim[1]
+//            tmpTextureDes.width = inDim[2]
+//            tmpTextureDes.depth = inDim[3] * inDim[1]
+//            tmpTextureDes.textureType = .type2DArray
+//        }
+//        
+//        tmpTextureDes.pixelFormat = .r32Float
+//        tmpTextureDes.storageMode = .shared
+//        textureDesc = tmpTextureDes
+//        let device = MTLCreateSystemDefaultDevice()
+//        metalTexture = device!.makeTexture(descriptor: tmpTextureDes)!
+//    }
+    
+//    init() {
+//        dim = Dim.init(inDim: [])
+//        layout = .NCHW
+//        let device = MTLCreateSystemDefaultDevice()
+//        textureDesc = MTLTextureDescriptor.init()
+//        metalTexture = device!.makeTexture(descriptor: textureDesc)!
+//    }
+    
+    private(set) var layout: DataLayout
+}
+
+extension Texture {
+    public var description: String {
+        return debugDescription
+    }
+    
+    public var debugDescription: String{
+        var str = ""
+        str += "Dim: \(dim) \n value:[ "
+        str += "\(metalTexture)"
+        str += " ]"
+        return str
+    }
+    
+}
--- a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
+++ b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#pragma once
+
+#import <UIKit/UIKit.h>
+
+//! Project version number for paddle_mobile.
+FOUNDATION_EXPORT double paddle_mobileVersionNumber;
+
+//! Project version string for paddle_mobile.
+FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[];
+
+
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -50,6 +50,9 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
    "fusion_elementwise_add_relu";
 const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
 const char *G_OP_TYPE_REGION = "region";
+const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
+const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
+const char *G_OP_TYPE_PRELU = "prelu";

 std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
@@ -57,6 +60,7 @@ std::unordered_map<
        {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
        {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}},
        {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
        {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
@@ -85,6 +89,8 @@ std::unordered_map<
        {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Y"}}},
        {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
        {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
-        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}};
+        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}},
+        {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};

 }  // namespace paddle_mobile
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -113,6 +113,9 @@ extern const char *G_OP_TYPE_FUSION_POOL_BN;
 extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
 extern const char *G_OP_TYPE_FUSION_FC_RELU;
 extern const char *G_OP_TYPE_REGION;
+extern const char *G_OP_TYPE_FUSION_CONV_BN;
+extern const char *G_OP_TYPE_CONV_TRANSPOSE;
+extern const char *G_OP_TYPE_PRELU;

 extern std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>

--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -27,7 +27,12 @@ limitations under the License. */
 #include <cstdio>
 #include <cstring>

-#include "fpga/api/fpga_api.h"
+#include "api.h"
+
+#define FPGA_TEST_MODE
+#ifdef FPGA_TEST_MODE
+#include "common/log.h"
+#endif

 namespace paddle_mobile {
 namespace fpga {
@@ -36,7 +41,11 @@ static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";

 static inline int do_ioctl(int req, const void *arg) {
+#ifdef PADDLE_MOBILE_OS_LINUX
  return ioctl(req, (unsigned int64_t)arg);
+#else
+  return -1;
+#endif
 }

 int open_device() {
@@ -48,26 +57,110 @@ int open_device() {

 // memory management;
 void *fpga_malloc(size_t size) {
+#ifdef PADDLE_MOBILE_OS_LINUX
  return reinterpret_cast<void *>(
      mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
+#else
+  return malloc(size);
+#endif
 }

-void fpga_free(void *ptr) { munmap(ptr, 0); }
+void fpga_free(void *ptr) {
+#ifdef PADDLE_MOBILE_OS_LINUX
+  munmap(ptr, 0);
+#else
+  free(ptr);
+#endif
+}

 void fpga_copy(void *dest, const void *src, size_t num) {
  memcpy(dest, src, num);
 }

 int ComputeFpgaConv(const struct ConvArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   relu_enabled:" << args.relu_enabled
+       << "   sb_address:" << args.sb_address
+       << "   filter_address:" << args.filter_address
+       << "   filter_num:" << args.filter_num
+       << "   group_num:" << args.group_num;
+  DLOG << "   image_address:" << args.image.address
+       << "   image_scale_address:" << args.image.scale_address
+       << "   image_channels:" << args.image.channels
+       << "   image_height:" << args.image.height
+       << "   image_width:" << args.image.width
+       << "   pad_height:" << args.image.pad_height
+       << "   pad_width:" << args.image.pad_width;
+  DLOG << "   kernel_height:" << args.kernel.height
+       << "   kernel_width:" << args.kernel.width
+       << "   stride_h:" << args.kernel.stride_h
+       << "   stride_w:" << args.kernel.stride_w;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
  return do_ioctl(IOCTL_CONFIG_CONV, &args);
 }
+
 int ComputeFpgaPool(const struct PoolingArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   image_address:" << args.image.address
+       << "   image_scale_address:" << args.image.scale_address
+       << "   image_channels:" << args.image.channels
+       << "   image_height:" << args.image.height
+       << "   image_width:" << args.image.width
+       << "   pad_height:" << args.image.pad_height
+       << "   pad_width:" << args.image.pad_width;
+  DLOG << "   kernel_height:" << args.kernel.height
+       << "   kernel_width:" << args.kernel.width
+       << "   stride_h:" << args.kernel.stride_h
+       << "   stride_w:" << args.kernel.stride_w;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
  return do_ioctl(IOCTL_CONFIG_POOLING, &args);
 }
+
 int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   relu_enabled:" << args.relu_enabled << "   const0:" << args.const0
+       << "   const1:" << args.const1;
+  DLOG << "   image0_address:" << args.image0.address
+       << "   image0_scale_address:" << args.image0.scale_address
+       << "   image0_channels:" << args.image0.channels
+       << "   image0_height:" << args.image0.height
+       << "   image0_width:" << args.image0.width
+       << "   pad0_height:" << args.image0.pad_height
+       << "   pad0_width:" << args.image0.pad_width;
+  DLOG << "   image1_address:" << args.image1.address
+       << "   image1_scale_address:" << args.image1.scale_address
+       << "   image1_channels:" << args.image1.channels
+       << "   image1_height:" << args.image1.height
+       << "   image1_width:" << args.image1.width
+       << "   pad1_height:" << args.image1.pad_height
+       << "   pad_width:" << args.image1.pad_width;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
  return do_ioctl(IOCTL_CONFIG_EW, &args);
 }
 int PerformBypass(const struct BypassArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   layout_type:" << args.layout_type
+       << "   convert_type:" << args.convert_type;
+  DLOG << "   image_address:" << args.image.address
+       << "   image_scale_address:" << args.image.scale_address
+       << "   image_channels:" << args.image.channels
+       << "   image_height:" << args.image.height
+       << "   image_width:" << args.image.width
+       << "   pad_height:" << args.image.pad_height
+       << "   pad_width:" << args.image.pad_width;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
  return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
 }


--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
--- a/src/fpga/fpga_quantilization.cpp
+++ b/src/fpga/fpga_quantilization.cpp
@@ -12,22 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "fpga/fpga_quantilization.h"
+#include "fpga/quantization.h"
 #include <algorithm>

 namespace paddle_mobile {
 namespace fpga {

 template <typename Dtype>
-static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
-                       int height, int width) {
-  int offset_height = 0;
-
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int64_t num,
+                       int64_t channel, int64_t height, int64_t width) {
  for (int n = 0; n < num; n++) {
-    int amount_per_row = width * channel;
+    int64_t amount_per_row = width * channel;
    for (int c = 0; c < channel; c++) {
      for (int h = 0; h < height; h++) {
-        int offset_height = h * amount_per_row;
+        int64_t offset_height = h * amount_per_row;
        for (int w = 0; w < width; w++) {
          *(data_out + offset_height + w * channel + c) = *(data_in++);
        }
@@ -38,57 +36,56 @@ static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
 }

 template <typename Dtype>
-static Dtype find_max(Dtype* data, int num) {
+static Dtype find_max(Dtype* data, int64_t num) {
  Dtype max = 0;
  for (int i = 0; i < num; ++i) {
-    max = std::max(max, data[i]);
+    Dtype value = data[i];
+    Dtype abs = value > 0 ? value : -value;
+    max = std::max(max, abs);
  }
  return max;
 }

-template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter) {
-  float scale = 0;
-  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
+// template <typename Dtype>
+void quantize_filter(framework::Tensor* filter) {
+  DLOG << "quantilize_filter........" << filter->dims();

-  const int batch_size = filter->dims()[0];
-  const int channel = filter->dims()[1];
-  const int height = filter->dims()[2];
-  const int width = filter->dims()[3];
+  float scale = 0;
+  auto fix_range = static_cast<float>(std::pow(2, 8 - 1) - 1);

-  int8_t* int_data = nullptr;
-  int8_t* tmp_data = new int[filter->numel()];
+  auto* tmp_data = new int8_t[filter->numel()];

  // 32bit filter -> 8bit filter;
  if (filter->type() == typeid(float)) {
-    float* float_data = filter->data<float>();
-    float max = find_max(float_data, filter->numel());
-
-    scale = (max / fix_range);
+    auto* float_data = filter->data<float>();
+    auto max = find_max<float>(float_data, filter->numel());

-    framework::Tensor* filter = filter;
-    framework::Tensor* quant_filter = new framework::Tensor();
+    scale = (fix_range / max);
+    DLOG << "scale:" << scale;

-    int_data = quant_filter->mutable_data<int8_t>();
    for (int i = 0; i < filter->numel(); ++i) {
-      tmp_data[i] = (int8_t)float_data[i] * scale;
+      tmp_data[i] = (int8_t)(float_data[i] * scale);
    }
-    filter = quant_filter;
  } else {
-    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
-    scale = (max / fix_range);
+    auto max = find_max<int8_t>(filter->data<int8_t>(), filter->numel());
+    scale = (fix_range / max);
+    std::memcpy(tmp_data, filter->data<int8_t>(), (size_t)filter->numel());
+  }

-    int_data = filter->data<int8_t>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      tmp_data[i] = int_data[i];
-    }
-    int_data = filter->mutable_data<int8_t>();
+  if (filter->dims().size() == 4) {
+    const auto batch_size = filter->dims()[0];
+    const auto channel = filter->dims()[1];
+    const auto height = filter->dims()[2];
+    const auto width = filter->dims()[3];
+    chw_to_hwc<int8_t>(tmp_data, filter->mutable_data<int8_t>(), batch_size,
+                       channel, height, width);
+  } else if (filter->dims().size() == 2) {
+    std::memcpy(filter->mutable_data<int8_t>(), tmp_data,
+                (size_t)filter->numel());
  }
-  // NCHW -> NHWC;
-  chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
+
  delete tmp_data;
-  *(filter->fpga_args().scale_pointer()) = scale;
-  return filter;
+  filter->SetFpgaScale(scale);
 }

 }  // namespace fpga

--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -21,10 +21,10 @@ namespace paddle_mobile {
 namespace fpga {

 template <typename Dtype>
-static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
-                       int height, int width);
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int64_t num,
+                       int64_t channel, int64_t height, int64_t width);
+
+void quantize_filter(framework::Tensor* filter);

-template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter);
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -64,7 +64,8 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
 };

 static inline size_t SizeOfType(std::type_index type) {
-  SizeOfTypeFunctor<int, half, float, double, int16_t, int64_t, bool, size_t>
+  SizeOfTypeFunctor<int8_t, int, half, float, double, int16_t, int64_t, bool,
+                    size_t>
      functor;
  size_t size = functor(type);

@@ -115,8 +116,8 @@ class Tensor {
    PADDLE_MOBILE_ENFORCE(
        (std::is_same<T, void>::value ||
         holder_->type().hash_code() == typeid(T).hash_code()),
-        "Tensor holds the wrong type, it holds %s",
-        this->holder_->type().name());
+        "Tensor holds the wrong type, it holds %s ,requested:%s",
+        this->holder_->type().name(), typeid(T).name());

    return reinterpret_cast<const T *>(
        reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
@@ -255,14 +256,26 @@ class Tensor {

 #ifdef PADDLE_MOBILE_FPGA
  struct FPGAArgs {
-    float scale;
+    friend class Tensor;
+
+    inline float *scale_pointer() { return scale_; }
+    inline float scale() { return *scale_; }

-    inline float *scale_pointer() { return &scale; }
+   private:
+    float *scale_;
  };

  struct FPGAArgs fpga_args() const {
-    return fpgaArgs_;
+    FPGAArgs args;
+    args.scale_ = scale.get();
+    return args;
  }
+
+  void SetFpgaScale(float s) { *(scale.get()) = s; }
+
+ private:
+  std::shared_ptr<float> scale = std::make_shared<float>(0);
+
 #endif

 private:
@@ -331,10 +344,6 @@ class Tensor {
   * begins.
   */
  size_t offset_;
-
-#ifdef PADDLE_MOBILE_FPGA
-  FPGAArgs fpgaArgs_;
-#endif
 };

 #ifdef PADDLE_MOBILE_DEBUG
@@ -342,9 +351,12 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
  printer << " dims: " << tensor.dims() << "\n";
  int stride = tensor.numel() / 20;
  stride = stride > 0 ? stride : 1;
+#ifndef PADDLE_MOBILE_FPGA
  for (int i = 0; i < tensor.numel(); i += stride) {
    printer << tensor.data<float>()[i] << " ";
  }
+#endif
+
  return printer;
 }


--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -89,7 +89,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
  } else {
    InitMemory();
  }
-
  std::shared_ptr<framework::BlockDesc> to_predict_block =
      to_predict_program_->Block(0);
  auto &ops = ops_of_block_[*to_predict_block.get()];
@@ -193,8 +192,14 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
    }
    *data += (memory_size * sizeof(uint8_t));
  } else {
-    for (int n = 0; n < memory_size * type_size; ++n) {
-      static_cast<char *>(memory)[n] = (*data)[n];
+    for (int n = 0; n < memory_size; n++) {
+      float value;
+      memcpy(&value, *data + n * type_size, type_size);
+      if (value < 1e-30 && value > -1e-30) {
+        static_cast<float *>(memory)[n] = 0.0;
+      } else {
+        static_cast<float *>(memory)[n] = value;
+      }
    }
    (*data) += (sizeof(char) * memory_size * type_size);
  }

--- a/src/jni/PML.java
+++ b/src/jni/PML.java
+package com.baidu.paddle;
+
+public class PML {
+    /**
+     * load seperated model
+     *
+     * @param modelDir model dir
+     * @return isloadsuccess
+     */
+    public static native boolean load(String modelDir);
+
+    /**
+     * load combined model
+     *
+     * @param modelPath model file path
+     * @param paramPath param file path
+     * @return isloadsuccess
+     */
+    public static native boolean loadCombined(String modelPath, String paramPath);
+
+    /**
+     * load model and qualified params
+     *
+     * @param modelDir qualified model dir
+     * @return isloadsuccess
+     */
+    public static native boolean loadQualified(String modelDir);
+
+    /**
+     * load model and qualified combined params
+     *
+     * @param modelPath model file path
+     * @param paramPath qualified param path
+     * @return isloadsuccess
+     */
+    public static native boolean loadCombinedQualified(String modelPath, String paramPath);
+
+    /**
+     * predict image
+     *
+     * @param buf   of pretreated image (as your model like)
+     * @param ddims format of your input
+     * @return result
+     */
+    public static native float[] predictImage(float[] buf, int[] ddims);
+
+
+    public static native float[] predictYuv(byte[] buf, int imgWidth, int imgHeight, int[] ddims, float[] meanValues);
+
+    /**
+     * clear model data
+     */
+    public static native void clear();
+
+    /**
+     * setThread num when u enable openmp
+     *
+     * @param threadCount threadCount
+     */
+    public static native void setThread(int threadCount);
+
+
+}
--- a/src/jni/paddle_mobile_jni.cpp
+++ b/src/jni/paddle_mobile_jni.cpp
@@ -20,6 +20,12 @@ limitations under the License. */
 #include "framework/tensor.h"
 #include "io/paddle_mobile.h"

+#ifdef ENABLE_EXCEPTION
+
+#include "common/enforce.h"
+
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -33,17 +39,10 @@ using std::string;

 extern const char *ANDROID_LOG_TAG =
    "paddle_mobile LOG built on " __DATE__ " " __TIME__;
-static PaddleMobile<CPU> *shared_paddle_mobile_instance = nullptr;
+paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+static std::mutex shared_mutex;

-// toDo mutex lock
-// static std::mutex shared_mutex;
-
-PaddleMobile<CPU> *getPaddleMobileInstance() {
-  if (nullptr == shared_paddle_mobile_instance) {
-    shared_paddle_mobile_instance = new PaddleMobile<CPU>();
-  }
-  return shared_paddle_mobile_instance;
-}
+PaddleMobile<CPU> *getPaddleMobileInstance() { return &paddle_mobile; }

 string jstring2cppstring(JNIEnv *env, jstring jstr) {
  const char *cstr = env->GetStringUTFChars(jstr, 0);
@@ -55,43 +54,144 @@ string jstring2cppstring(JNIEnv *env, jstring jstr) {
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
                                                          jclass thiz,
                                                          jstring modelPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
  ANDROIDLOGI("load invoked");
  bool optimize = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         optimize);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), optimize);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             optimize);
+#endif
+  return static_cast<jboolean>(isLoadOk);
 }

 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadQualified(
    JNIEnv *env, jclass thiz, jstring modelPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
  ANDROIDLOGI("loadQualified invoked");
  bool optimize = true;
  bool qualified = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         optimize, qualified);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), optimize, qualified);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             optimize, qualified);
+#endif
+
+  return static_cast<jboolean>(isLoadOk);
 }

 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
    JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
  ANDROIDLOGI("loadCombined invoked");
  bool optimize = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         jstring2cppstring(env, paramPath),
-                                         optimize);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
+        optimize);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             jstring2cppstring(env, paramPath),
+                                             optimize);
+#endif
+  return static_cast<jboolean>(isLoadOk);
 }

 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombinedQualified(
    JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
  ANDROIDLOGI("loadCombinedQualified invoked");
  bool optimize = true;
  bool qualified = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         jstring2cppstring(env, paramPath),
-                                         optimize, qualified);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
+        optimize, qualified);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             jstring2cppstring(env, paramPath),
+                                             optimize, qualified);
+#endif
+  return static_cast<jboolean>(isLoadOk);
 }

 JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
    JNIEnv *env, jclass thiz, jfloatArray buf, jintArray ddims) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
  ANDROIDLOGI("predictImage invoked");
+  jfloatArray result = NULL;
+
+#ifdef ENABLE_EXCEPTION
+  ANDROIDLOGE("ENABLE_EXCEPTION!");
+
+  try {
+    jsize ddim_size = env->GetArrayLength(ddims);
+    if (ddim_size != 4) {
+      ANDROIDLOGE("ddims size not equal to 4");
+    }
+    jint *ddim_ptr = env->GetIntArrayElements(ddims, NULL);
+    framework::DDim ddim = framework::make_ddim(
+        {ddim_ptr[0], ddim_ptr[1], ddim_ptr[2], ddim_ptr[3]});
+    int length = framework::product(ddim);
+    int count = 0;
+    float *dataPointer = nullptr;
+    if (nullptr != buf) {
+      dataPointer = env->GetFloatArrayElements(buf, NULL);
+    }
+    framework::Tensor input;
+    input.Resize(ddim);
+    auto input_ptr = input.mutable_data<float>();
+    for (int i = 0; i < length; i++) {
+      input_ptr[i] = dataPointer[i];
+    }
+    auto output = getPaddleMobileInstance()->Predict(input);
+    count = output->numel();
+    result = env->NewFloatArray(count);
+    env->SetFloatArrayRegion(result, 0, count, output->data<float>());
+    env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
+    env->DeleteLocalRef(ddims);
+    env->ReleaseFloatArrayElements(buf, dataPointer, 0);
+    env->DeleteLocalRef(buf);
+
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+
+#else
  jsize ddim_size = env->GetArrayLength(ddims);
  if (ddim_size != 4) {
    ANDROIDLOGE("ddims size not equal to 4");
@@ -100,7 +200,6 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
  framework::DDim ddim = framework::make_ddim(
      {ddim_ptr[0], ddim_ptr[1], ddim_ptr[2], ddim_ptr[3]});
  int length = framework::product(ddim);
-  jfloatArray result = NULL;
  int count = 0;
  float *dataPointer = nullptr;
  if (nullptr != buf) {
@@ -112,12 +211,19 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
  for (int i = 0; i < length; i++) {
    input_ptr[i] = dataPointer[i];
  }
-  auto output = shared_paddle_mobile_instance->Predict(input);
+  auto output = getPaddleMobileInstance()->Predict(input);
  count = output->numel();
  result = env->NewFloatArray(count);
  env->SetFloatArrayRegion(result, 0, count, output->data<float>());
  env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
+  env->DeleteLocalRef(ddims);
+  env->ReleaseFloatArrayElements(buf, dataPointer, 0);
+  env->DeleteLocalRef(buf);
+  env->DeleteLocalRef(dataPointer);
+#endif
+
  ANDROIDLOGI("predictImage finished");
+
  return result;
 }

@@ -170,7 +276,48 @@ void convert_nv21_to_matrix(uint8_t *nv21, float *matrix, int width, int height,
 JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
    JNIEnv *env, jclass thiz, jbyteArray yuv_, jint imgwidth, jint imgHeight,
    jintArray ddims, jfloatArray meanValues) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
  ANDROIDLOGI("predictYuv invoked");
+  jfloatArray result = NULL;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    jsize ddim_size = env->GetArrayLength(ddims);
+    if (ddim_size != 4) {
+      ANDROIDLOGE("ddims size not equal to 4");
+    }
+    jint *ddim_ptr = env->GetIntArrayElements(ddims, NULL);
+    framework::DDim ddim = framework::make_ddim(
+        {ddim_ptr[0], ddim_ptr[1], ddim_ptr[2], ddim_ptr[3]});
+    int length = framework::product(ddim);
+    float matrix[length];
+    jbyte *yuv = env->GetByteArrayElements(yuv_, NULL);
+    float *meansPointer = nullptr;
+    if (nullptr != meanValues) {
+      meansPointer = env->GetFloatArrayElements(meanValues, NULL);
+    }
+    convert_nv21_to_matrix((uint8_t *)yuv, matrix, imgwidth, imgHeight, ddim[3],
+                           ddim[2], meansPointer);
+    int count = 0;
+    framework::Tensor input;
+    input.Resize(ddim);
+    auto input_ptr = input.mutable_data<float>();
+    for (int i = 0; i < length; i++) {
+      input_ptr[i] = matrix[i];
+    }
+    auto output = getPaddleMobileInstance()->Predict(input);
+    count = output->numel();
+    result = env->NewFloatArray(count);
+    env->SetFloatArrayRegion(result, 0, count, output->data<float>());
+    env->ReleaseByteArrayElements(yuv_, yuv, 0);
+    env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
+    env->ReleaseFloatArrayElements(meanValues, meansPointer, 0);
+    ANDROIDLOGI("predictYuv finished");
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+#else
  jsize ddim_size = env->GetArrayLength(ddims);
  if (ddim_size != 4) {
    ANDROIDLOGE("ddims size not equal to 4");
@@ -187,7 +334,6 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
  }
  convert_nv21_to_matrix((uint8_t *)yuv, matrix, imgwidth, imgHeight, ddim[3],
                         ddim[2], meansPointer);
-  jfloatArray result = NULL;
  int count = 0;
  framework::Tensor input;
  input.Resize(ddim);
@@ -195,7 +341,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
  for (int i = 0; i < length; i++) {
    input_ptr[i] = matrix[i];
  }
-  auto output = shared_paddle_mobile_instance->Predict(input);
+  auto output = getPaddleMobileInstance()->Predict(input);
  count = output->numel();
  result = env->NewFloatArray(count);
  env->SetFloatArrayRegion(result, 0, count, output->data<float>());
@@ -203,19 +349,44 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
  env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
  env->ReleaseFloatArrayElements(meanValues, meansPointer, 0);
  ANDROIDLOGI("predictYuv finished");
+#endif
+
  return result;
 }

 JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_setThread(JNIEnv *env,
                                                           jclass thiz,
                                                           jint threadCount) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
  ANDROIDLOGI("setThreadCount %d", threadCount);
+#ifdef ENABLE_EXCEPTION
+  try {
+    getPaddleMobileInstance()->SetThreadNum((int)threadCount);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+#else
  getPaddleMobileInstance()->SetThreadNum((int)threadCount);
+
+#endif
 }

 JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
                                                       jclass thiz) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    getPaddleMobileInstance()->Clear();
+
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+#else
  getPaddleMobileInstance()->Clear();
+
+#endif
 }

 }  // namespace jni

--- a/src/jni/paddle_mobile_jni.h
+++ b/src/jni/paddle_mobile_jni.h
@@ -73,8 +73,8 @@ JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_setThread(JNIEnv *env,
 /**
 * clear data of the net when destroy for android
 */
-JNIEXPORT void JNICALL Java_com_baidu_paddle_PMLL_clear(JNIEnv *env,
-                                                        jclass thiz);
+JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
+                                                       jclass thiz);
 }  // namespace jni
 }  // namespace paddle_mobile
 #ifdef __cplusplus

--- a/src/memory/t_malloc.cpp
+++ b/src/memory/t_malloc.cpp
@@ -18,7 +18,7 @@ limitations under the License. */

 #ifdef PADDLE_MOBILE_FPGA

-#include "fpga/api/fpga_api.h"
+#include "fpga/api.h"

 #endif


--- a/src/operators/conv_transpose_op.cpp
+++ b/src/operators/conv_transpose_op.cpp
@@ -20,4 +20,13 @@ namespace paddle_mobile {
 namespace operators {}
 }  // namespace paddle_mobile

+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+
 #endif
--- a/src/operators/conv_transpose_op.h
+++ b/src/operators/conv_transpose_op.h
@@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile

+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(conv2d_transpose);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(conv2d_transpose);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(conv2d_transpose);
+#endif
+
 #endif
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -38,12 +38,18 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
  }

 #ifdef PADDLE_MOBILE_FPGA
-  void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); }
+
  void Init() {
+    Tensor *output = param_.Out();
+    output->mutable_data<half>();
+  }
+
+  void RunImpl() const {
    const Tensor *input = param_.InputX();
    auto input_ptr = input->data<float>();
    Tensor *output = param_.Out();
    auto output_ptr = output->mutable_data<half>();
+    auto out_address = output->fpga_args().scale_pointer();
    fpga::BypassArgs args;
    args.convert_type = fpga::DATA_FP32_TO_FP16;
    args.layout_type = fpga::LAYOUT_CHW_TO_HWC;
@@ -51,13 +57,16 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
    args.image.channels = input->dims()[1];
    args.image.height = input->dims()[2];
    args.image.width = input->dims()[3];
+    args.image.pad_height = 0;
+    args.image.pad_width = 0;
    args.output.address = output_ptr;
-    param_.SetFpgaArgs(args);
+    args.output.scale_address = out_address;
+    fpga::PerformBypass(args);
  }

 #else
-  void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
  void Init() {}
+  void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
 #endif

 protected:

--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -16,6 +16,8 @@ limitations under the License. */

 #pragma once

+#include <string>
+#include <vector>
 #include "framework/operator.h"
 #include "framework/program/program-optimize/fusion_op_register.h"
 #include "operators/kernel/conv_add_relu_kernel.h"
@@ -65,11 +67,11 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<

 #ifdef PADDLE_MOBILE_CPU

-//#ifndef CONV_ADD_RELU_REGISTER
-//#define CONV_ADD_RELU_REGISTER
-// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
-// FusionConvAddReluOpMatcher());
-//#endif
+#ifndef CONV_ADD_RELU_REGISTER
+#define CONV_ADD_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
+    new FusionConvAddReluOpMatcher());
+#endif

 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU

--- a/src/operators/fusion_conv_bn_op.cpp
+++ b/src/operators/fusion_conv_bn_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#include "operators/fusion_conv_bn_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void FusionConvBNOp<Dtype, T>::InferShape() const {
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();
+
+  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
+                         dilations.size() == paddings.size() &&
+                         paddings.size() == strides.size()),
+                        "ConvParam is not suitable");
+
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(
+        math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
+                             paddings[i], strides[i]));
+  }
+
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  this->param_.Output()->Resize(ddim);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(fusion_conv_bn, ops::FusionConvBNOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_bn, ops::FusionConvBNOp);
+#endif
+
+#endif
--- a/src/operators/fusion_conv_bn_op.h
+++ b/src/operators/fusion_conv_bn_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/conv_bn_kernel.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionConvBNMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionConvBNMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
+  }
+
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"}}}},
+                 removed_nodes);
+  }
+
+  std::string Type() { return G_OP_TYPE_FUSION_CONV_BN; }
+};
+
+template <typename DeviceType, typename T>
+class FusionConvBNOp : public framework::OperatorWithKernel<
+                           DeviceType, FusionConvBNParam<DeviceType>,
+                           operators::ConvBNKernel<DeviceType, T>> {
+ public:
+  FusionConvBNOp(const string &type, const VariableNameMap &inputs,
+                 const VariableNameMap &outputs,
+                 const framework::AttributeMap &attrs,
+                 std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, FusionConvBNParam<DeviceType>,
+                                      operators::ConvBNKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+
+  void InferShape() const override;
+
+ protected:
+};
+
+#ifdef PADDLE_MOBILE_CPU
+
+#ifndef FUSION_CONV_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+#define FUSION_CONV_BN_REGISTER
+#endif
+
+#endif
+
+#ifdef PADDLE_MOBILE_MALI_GPU
+
+#endif
+
+#ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+#define FUSION_CONV_BN_REGISTER
+#endif
+#endif
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(fusion_conv_bn);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_bn);
+#endif
+
+#endif
--- a/src/operators/fusion_conv_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_bn_relu_op.cpp
@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #endif

 #endif
--- a/src/operators/fusion_conv_bn_relu_op.h
+++ b/src/operators/fusion_conv_bn_relu_op.h
@@ -87,6 +87,12 @@ static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
 #endif

 #ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_BN_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
+    new FusionConvBNReluMatcher());
+#define FUSION_CONV_BN_RELU_REGISTER
+#endif
 #endif

 }  // namespace operators
@@ -98,6 +104,7 @@ USE_OP_CPU(fusion_conv_bn_relu);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_bn_relu);
 #endif

 #endif
--- a/src/operators/fusion_elementwise_add_relu_op.h
+++ b/src/operators/fusion_elementwise_add_relu_op.h
@@ -28,7 +28,7 @@ using std::vector;
 class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher {
 public:
  FusioneElementwiseAddReluMatcher() {
-    node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU);
+    node_ = framework::Node(G_OP_TYPE_ELEMENTWISE_ADD);
    node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
  }


--- a/src/operators/kernel/arm/prelu_kernel.cpp
+++ b/src/operators/kernel/arm/prelu_kernel.cpp
@@ -33,77 +33,34 @@ struct PReluFunctor {
 * */
 template <>
 void PReluKernel<CPU, float>::Compute(const PReluParam<CPU> &param) const {
-  const auto *input_x = param.InputX();
-  auto *input_x_ptr = input_x->data<float>();
+  auto *x = param.InputX();
+  auto *alpha = param.InputAlpha();
  auto *out = param.Out();
-  auto *out_ptr = out->mutable_data<float>();
-
-  if (param.Slopes().size() == 1) {
-    PReluFunctor<float> func_(param.Slopes()[0]);
-    math::Transform trans;
-    trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
-  } else if (param.Slopes().size() > 1) {
-    const int dim_size = input_x->dims().size();
-    switch (dim_size) {
-      case 0:
-        break;
-      case 1: {
-        const int input_width = input_x->dims()[0];
-        math::Transform trans;
-
-        #pragma omp parallel for
-        for (int w = 0; w < input_width; ++w) {
-          out_ptr[w] = input_x_ptr[w] * param.Slopes()[w];
-        }
-      } break;
-      case 2: {
-        const int input_height = input_x->dims()[0];
-        const int input_width = input_x->dims()[1];
-
-        math::Transform trans;
-        #pragma omp parallel for
-        for (int h = 0; h < input_height; ++h) {
-          PReluFunctor<float> func_(param.Slopes()[h]);
-          const float *ptr = input_x_ptr + h * input_width;
-          float *optr = out_ptr + +h * input_width;
-          trans(ptr, ptr + input_width, optr, func_);
-        }
-      } break;
-      case 3: {
-        const int chan_size = input_x->dims()[0];
-        const int input_height = input_x->dims()[1];
-        const int input_width = input_x->dims()[2];
-
-        math::Transform trans;
-        #pragma omp parallel for
-        for (int c = 0; c < chan_size; ++c) {
-          PReluFunctor<float> func_(param.Slopes()[c]);
-          int size = input_height * input_width;
-          const float *ptr = input_x_ptr + c * size;
-          float *optr = out_ptr + c * size;
-          trans(ptr, ptr + size, optr, func_);
-        }
-      } break;
-      case 4:
-      default: {
-        const int batch_size = input_x->dims()[0];
-        const int chan_size = input_x->dims()[1];
-        const int input_height = input_x->dims()[2];
-        const int input_width = input_x->dims()[3];
-        math::Transform trans;
-
-        #pragma omp parallel for
-        for (int b = 0; b < batch_size; ++b) {
-          for (int c = 0; c < chan_size; ++c) {
-            PReluFunctor<float> func_(param.Slopes()[c]);
-            int size = input_height * input_width;
-            const float *ptr = input_x_ptr + b * c * size;
-            float *optr = out_ptr + +b * c * size;
-            trans(ptr, ptr + size, optr, func_);
-          }
-        }
-      }  // case 3,default
-      break;
+  std::string mode = param.Mode();
+  const auto *x_ptr = x->data<float>();
+  auto *o_ptr = out->mutable_data<float>();
+  const auto *alpha_ptr = alpha->data<float>();
+  int numel = x->numel();
+  auto dim = x->dims();
+  int index = 0;
+  int i = 0;
+  int temp = 0;
+  if (mode == "channel") {
+    temp = numel / (dim[0] * dim[1]);
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
+      index = (i / temp) % dim[1];
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
+    }
+  } else if (mode == "element") {
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i];
+    }
+  } else {
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i];
    }
  }
 }

--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -12,25 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#ifdef CONV_OP
+#pragma once

-#include "operators/kernel/conv_kernel.h"
-#include "operators/kernel/central-arm-func/conv_arm_func.h"
+#ifdef FUSION_CONVBN_OP
+
+#include <vector>
+#include "framework/ddim.h"
+#include "framework/operator.h"
+#include "operators/math/conv_func.h"
+#include "operators/math/im2col.h"
+#include "operators/math/math_function.h"
+#include "operators/math/vol2col.h"
+#include "operators/op_param.h"

 namespace paddle_mobile {
 namespace operators {

-template <>
-bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
-  return true;
-}
-
-template <>
-void ConvKernel<FPGA, float>::Compute(const ConvParam<FPGA> &param) const {
-  // ConvCompute<float>(param);
-}
+using framework::DDim;
+using framework::OpKernelBase;

-template class ConvKernel<FPGA, float>;
+template <typename DeviceType, typename T>
+class ConvBNKernel : public OpKernelBase<DeviceType, FusionConvBNParam<DeviceType>> {
+ public:
+  void Compute(const FusionConvBNParam<DeviceType> &param) const;
+  bool Init(FusionConvBNParam<DeviceType> *param);
+};

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -15,8 +15,8 @@ limitations under the License. */
 #ifdef FUSION_CONVADDBN_OP

 #include "operators/kernel/conv_add_bn_kernel.h"
-#include "fpga/api/fpga_api.h"
-#include "fpga/quantilization.h"
+#include "fpga/api.h"
+#include "fpga/quantization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
  auto input_ptr = input->data<half>();
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
+  Tensor *filter = param->Filter();

  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();
@@ -37,11 +37,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
  auto bn_scale_ptr = param->InputScale()->data<float>();
  auto bn_bias_ptr = param->InputBias()->data<float>();
  const float epsilon = param->Epsilon();
-  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
                            bias->dims()[0] == param->InputBias()->dims()[0],
-                        "Image channel should be equal to bias number");
+                        "Output channel should be equal to bias number");

-  const int channel = input->dims()[1];
+  const int channel = out->dims()[1];
  float *bs_ptr =
      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
  Tensor *new_scale = new Tensor();
@@ -60,30 +60,27 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
  param->SetNewScale(new_scale);
  param->SetNewBias(new_bias);

-  const Tensor *quant_filter = quantilize_filter(filter);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();

-  // delete original filter?
-  filter = quant_filter;
-
-  auto filter_ptr = filter->data<float>();
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
-  convArgs.filter_address = reinterpret_cast<void *> filter_ptr;
+  convArgs.filter_address = (void *)filter_ptr;
  convArgs.filter_num = filter->dims()[0];
  convArgs.group_num = param->Groups();
-  convArgs.sb_address = reinterpret_cast<void *> bs_ptr;
+  convArgs.sb_address = (void *)bs_ptr;
  convArgs.kernel.stride_h = param->Strides()[0];
  convArgs.kernel.stride_w = param->Strides()[1];
  convArgs.kernel.height = filter->dims()[2];
  convArgs.kernel.width = filter->dims()[3];
-  convArgs.image.address = reinterpret_cast<void *> input_ptr;
+  convArgs.image.address = (void *)input_ptr;
  convArgs.image.channels = input->dims()[1];
  convArgs.image.height = input->dims()[2];
  convArgs.image.width = input->dims()[3];
  convArgs.image.pad_height = param->Paddings()[0];
  convArgs.image.pad_width = param->Paddings()[1];
  convArgs.image.scale_address = input->fpga_args().scale_pointer();
-  convArgs.output.address = reinterpret_cast<void *> out_ptr;
+  convArgs.output.address = (void *)out_ptr;
  convArgs.output.scale_address = out->fpga_args().scale_pointer();
  param->SetFpgaArgs(convArgs);


--- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADDBNRELU_OP

 #include "operators/kernel/conv_add_bn_relu_kernel.h"
-#include "memory/t_malloc.h"
+#include "fpga/quantization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -28,8 +28,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
  auto input_ptr = input->data<half>();
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+  Tensor *filter = param->Filter();
  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();
  auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -37,11 +36,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
  auto bn_scale_ptr = param->InputScale()->data<float>();
  auto bn_bias_ptr = param->InputBias()->data<float>();
  const float epsilon = param->Epsilon();
-  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
                            bias->dims()[0] == param->InputBias()->dims()[0],
-                        "Image channel should be equal to bias number");
+                        "Output channel should be equal to bias number");

-  const int channel = input->dims()[1];
+  const int channel = out->dims()[1];
  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
  Tensor *new_scale = new Tensor();
  Tensor *new_bias = new Tensor();
@@ -58,6 +57,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
  }
  param->SetNewScale(new_scale);
  param->SetNewBias(new_bias);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();

  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;

--- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADDRELU_OP

 #include "operators/kernel/conv_add_relu_kernel.h"
-#include "common/enforce.h"
+#include "fpga/quantization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -27,20 +27,22 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
  auto input_ptr = input->data<half>();
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+  Tensor *filter = param->Filter();
  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();

-  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0],
-                        "Image channel should be equal to bias number");
-  int channel = input->dims()[1];
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
  for (int i = 0; i < channel; i++) {
    bs_ptr[i * 2] = 1;
    bs_ptr[i * 2 + 1] = bias_ptr[i];
  }

+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
+
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
  convArgs.filter_address = (void *)filter_ptr;

--- a/src/operators/kernel/fpga/conv_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#include "operators/kernel/conv_bn_kernel.h"
+#include "fpga/api.h"
+#include "fpga/quantization.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
+  bool relu_enabled = false;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<half>();
+  Tensor *filter = param->Filter();
+
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<half>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
+                        "Output channel should be equal to bias number");
+
+  const int channel = out->dims()[1];
+  float *bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+
+  return true;
+}
+
+template <>
+void ConvBNKernel<FPGA, float>::Compute(const FusionConvBNParam<FPGA> &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvBNKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBNRELU_OP
+
+#include "operators/kernel/conv_bn_relu_kernel.h"
+#include "fpga/quantization.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
+  bool relu_enabled = true;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<half>();
+  Tensor *filter = param->Filter();
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<half>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
+                        "Output channel should be equal to bias number");
+
+  const int channel = out->dims()[1];
+  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+  return true;
+}
+
+template <>
+void ConvBNReluKernel<FPGA, float>::Compute(
+    const FusionConvBNReluParam<FPGA> &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvBNReluKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/fpga/fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #ifdef FUSION_FCRELU_OP
 #include "operators/kernel/fc_relu_kernel.h"
-#include "fpga/api/fpga_api.h"
+
+#include "fpga/api.h"
+#include "fpga/quantization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -23,8 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
  bool relu_enabled = true;
  const Tensor *input_x = param->InputX();
  auto input_x_ptr = input_x->data<half>();
-  const Tensor *input_y = param->InputY();
-  auto input_y_ptr = input_y->data<float>();
+  Tensor *input_y = param->InputY();
  const Tensor *input_z = param->InputZ();
  auto input_z_ptr = input_z->data<float>();
  Tensor *out = param->Out();
@@ -32,13 +33,16 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {

  PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
                        "Image channel should be equal to weight number");
-  int channel = input_x->dims()[1];
+  int channel = out->dims()[1];
  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
  for (int i = 0; i < channel; i++) {
    bs_ptr[i * 2] = 1;
    bs_ptr[i * 2 + 1] = input_z_ptr[i];
  }

+  fpga::quantize_filter(input_y);
+  auto input_y_ptr = input_y->data<int8_t>();
+
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
  convArgs.filter_address = (void *)input_y_ptr;

--- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 #ifdef FUSION_FC_OP

 #include "operators/kernel/fusion_fc_kernel.h"
+#include "fpga/quantization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -23,8 +24,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
  bool relu_enabled = false;
  const Tensor *input_x = param->InputX();
  auto input_x_ptr = input_x->data<half>();
-  const Tensor *input_y = param->InputY();
-  auto input_y_ptr = input_y->data<float>();
+  Tensor *input_y = param->InputY();
  const Tensor *input_z = param->InputZ();
  auto input_z_ptr = input_z->data<float>();
  Tensor *out = param->Out();
@@ -32,13 +32,16 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {

  PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
                        "Image channel should be equal to weight number");
-  int channel = input_x->dims()[1];
+  int channel = out->dims()[1];
  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
  for (int i = 0; i < channel; i++) {
    bs_ptr[i * 2] = 1;
    bs_ptr[i * 2 + 1] = input_z_ptr[i];
  }

+  fpga::quantize_filter(input_y);
+  auto input_y_ptr = input_y->data<int8_t>();
+
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
  convArgs.filter_address = (void *)input_y_ptr;
@@ -55,11 +58,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
  convArgs.image.width = input_x->dims()[3];
  convArgs.image.pad_height = 0;
  convArgs.image.pad_width = 0;
-  convArgs.image.scale_address =
-      input_x->fpga_args().scale_pointer();  // fc input has scale attribute??
+  convArgs.image.scale_address = input_x->fpga_args().scale_pointer();
  convArgs.output.address = (void *)out_ptr;
-  convArgs.output.scale_address =
-      out->fpga_args().scale_pointer();  // fc output has scale attribute??
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
  param->SetFpgaArgs(convArgs);
  return true;
 }

--- a/src/operators/kernel/fpga/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/softmax_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SOFTMAX_OP
+
+#include "../softmax_kernel.h"
+#include "../central-arm-func/softmax_arm_func.h"
+#include "common/types.h"
+#include "fpga/api.h"
+#include "operators/math/softmax.h"
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
+  const Tensor *input = param->InputX();
+  if (input->type() == typeid(half)) {
+    auto input_ptr = input->data<half>();
+    auto output_ptr = param->Out();
+    fpga::BypassArgs args;
+    args.convert_type = fpga::DATA_FP16_TO_FP32;
+    args.layout_type = fpga::LAYOUT_HWC_TO_CHW;
+    args.image.address = (void *)(input_ptr);
+    args.image.height = input->dims()[0];
+    args.image.width = input->dims()[1];
+    args.image.channels = 1;
+    args.output.address = output_ptr;
+    param->SetFpgaArgs(args);
+  }
+
+  return true;
+}
+
+template <>
+void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) const {
+  // SoftmaxCompute<float>(param);
+}
+
+template class SoftmaxKernel<FPGA, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/math/depthwise_conv_3x3.cpp
+++ b/src/operators/math/depthwise_conv_3x3.cpp
@@ -540,15 +540,17 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
  const int hxw = input_height * input_width;

  const int l = input_height;
-  float32x4_t vnewbias = vdupq_n_f32(0.0);
-  float32x4_t vnewscale = vdupq_n_f32(1.0);
+
  float32x4_t vzero = vdupq_n_f32(0);

  for (int b = 0; b < batch_size; b++) {
-    filter_data = filter->data<float>();
+#pragma omp parallel for
    for (int c = 0; c < input_channel; c++) {
-      vnewbias = vdupq_n_f32(newbias_data[c]);
-      vnewscale = vdupq_n_f32(newscale_data[c]);
+      const float *filter_data = filter->data<float>() + c * 9;
+      const float *input_data = input->data<float>() + c * hxw;
+      float *output_data = output->data<float>() + c * hxw;
+      float32x4_t vnewbias = vdupq_n_f32(newbias_data[c]);
+      float32x4_t vnewscale = vdupq_n_f32(newscale_data[c]);

      float w00 = filter_data[0];
      float w01 = filter_data[1];
@@ -560,6 +562,69 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
      float w21 = filter_data[7];
      float w22 = filter_data[8];

+      for (int i = 1; i < output_height - 1; i++) {
+        float *output_ptr;
+        float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3, tmp4,
+            tmp5, out0;
+        for (int m = 1; m < output_width - 4; m += 4) {
+          output_ptr = output_data + i * output_width + m;
+          in0 = vld1q_f32(input_data + (i - 1) * input_width + m - 1);
+          in1 = vld1q_f32(input_data + (i - 1) * input_width + m + 3);
+          in2 = vld1q_f32(input_data + i * input_width + m - 1);
+          in3 = vld1q_f32(input_data + i * input_width + m + 3);
+          in4 = vld1q_f32(input_data + (i + 1) * input_width + m - 1);
+          in5 = vld1q_f32(input_data + (i + 1) * input_width + m + 3);
+
+          tmp0 = vextq_f32(in0, in1, 1);
+          tmp1 = vextq_f32(in0, in1, 2);
+          tmp2 = vextq_f32(in2, in3, 1);
+          tmp3 = vextq_f32(in2, in3, 2);
+          tmp4 = vextq_f32(in4, in5, 1);
+          tmp5 = vextq_f32(in4, in5, 2);
+
+          out0 = vmulq_n_f32(in0, w00);
+          out0 = vmlaq_n_f32(out0, tmp0, w01);
+          out0 = vmlaq_n_f32(out0, tmp1, w02);
+          out0 = vmlaq_n_f32(out0, in2, w10);
+          out0 = vmlaq_n_f32(out0, tmp2, w11);
+          out0 = vmlaq_n_f32(out0, tmp3, w12);
+          out0 = vmlaq_n_f32(out0, in4, w20);
+          out0 = vmlaq_n_f32(out0, tmp4, w21);
+          out0 = vmlaq_n_f32(out0, tmp5, w22);
+
+          out0 = vmlaq_f32(vnewbias, vnewscale, out0);
+          if (if_relu) {
+            out0 = vmaxq_f32(out0, vzero);
+          }
+          vst1q_f32(output_ptr, out0);
+        }
+        int m;
+        for (m = 1; (m + 3) < output_width - 1; m = m + 4) {
+        }
+
+        for (int j = m; j < output_width - 1; j++) {
+          output_data[i * output_width + j] =
+              input_data[(i - 1) * input_width + j - 1] * w00 +
+              input_data[(i - 1) * input_width + j] * w01 +
+              input_data[(i - 1) * input_width + j + 1] * w02 +
+              input_data[(i)*input_width + j - 1] * w10 +
+              input_data[(i)*input_width + j] * w11 +
+              input_data[(i)*input_width + j + 1] * w12 +
+              input_data[(i + 1) * input_width + j - 1] * w20 +
+              input_data[(i + 1) * input_width + j] * w21 +
+              input_data[(i + 1) * input_width + j + 1] * w22;
+          output_data[i * output_width + j] =
+              newscale_data[c] * output_data[i * output_width + j] +
+              newbias_data[c];
+          if (if_relu) {
+            output_data[i * output_width + j] =
+                output_data[i * output_width + j] < 0
+                    ? 0
+                    : output_data[i * output_width + j];
+          }
+        }
+      }
+
      output_data[0] = w11 * input_data[0] + w12 * input_data[1] +
                       w21 * input_data[l] + w22 * input_data[l + 1];
      output_data[l - 1] = w10 * input_data[l - 2] + w11 * input_data[l - 1] +
@@ -699,72 +764,6 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
                  : output_data[(output_height - 1) * output_width + j];
        }
      }
-      #pragma omp parallel for
-      for (int i = 1; i < output_height - 1; i++) {
-        for (int m = 1; (m + 3) < output_width - 1; m = m + 4) {
-          float *output_ptr = output_data + i * output_width + m;
-          float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3,
-              tmp4, tmp5, out0;
-          in0 = vld1q_f32(input_data + (i - 1) * input_width + m - 1);
-          in1 = vld1q_f32(input_data + (i - 1) * input_width + m + 3);
-          in2 = vld1q_f32(input_data + i * input_width + m - 1);
-          in3 = vld1q_f32(input_data + i * input_width + m + 3);
-          in4 = vld1q_f32(input_data + (i + 1) * input_width + m - 1);
-          in5 = vld1q_f32(input_data + (i + 1) * input_width + m + 3);
-
-          tmp0 = vextq_f32(in0, in1, 1);
-          tmp1 = vextq_f32(in0, in1, 2);
-          tmp2 = vextq_f32(in2, in3, 1);
-          tmp3 = vextq_f32(in2, in3, 2);
-          tmp4 = vextq_f32(in4, in5, 1);
-          tmp5 = vextq_f32(in4, in5, 2);
-
-          out0 = vmulq_n_f32(in0, w00);
-          out0 = vmlaq_n_f32(out0, tmp0, w01);
-          out0 = vmlaq_n_f32(out0, tmp1, w02);
-          out0 = vmlaq_n_f32(out0, in2, w10);
-          out0 = vmlaq_n_f32(out0, tmp2, w11);
-          out0 = vmlaq_n_f32(out0, tmp3, w12);
-          out0 = vmlaq_n_f32(out0, in4, w20);
-          out0 = vmlaq_n_f32(out0, tmp4, w21);
-          out0 = vmlaq_n_f32(out0, tmp5, w22);
-
-          out0 = vmlaq_f32(vnewbias, vnewscale, out0);
-          if (if_relu) {
-            out0 = vmaxq_f32(out0, vzero);
-          }
-          vst1q_f32(output_ptr, out0);
-        }
-        int m;
-        for (m = 1; (m + 3) < output_width - 1; m = m + 4) {
-        }
-
-        for (int j = m; j < output_width - 1; j++) {
-          output_data[i * output_width + j] =
-              input_data[(i - 1) * input_width + j - 1] * w00 +
-              input_data[(i - 1) * input_width + j] * w01 +
-              input_data[(i - 1) * input_width + j + 1] * w02 +
-              input_data[(i)*input_width + j - 1] * w10 +
-              input_data[(i)*input_width + j] * w11 +
-              input_data[(i)*input_width + j + 1] * w12 +
-              input_data[(i + 1) * input_width + j - 1] * w20 +
-              input_data[(i + 1) * input_width + j] * w21 +
-              input_data[(i + 1) * input_width + j + 1] * w22;
-          output_data[i * output_width + j] =
-              newscale_data[c] * output_data[i * output_width + j] +
-              newbias_data[c];
-          if (if_relu) {
-            output_data[i * output_width + j] =
-                output_data[i * output_width + j] < 0
-                    ? 0
-                    : output_data[i * output_width + j];
-          }
-        }
-      }
-
-      input_data = input_data + hxw;
-      output_data = output_data + hxw;
-      filter_data = filter_data + 9;
    }
  }

@@ -1466,9 +1465,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
                                     Tensor *output, const Tensor *new_scale,
                                     const Tensor *new_bias, bool if_relu) {
 #if __ARM_NEON
-  const float *input_data = input->data<float>();
-  const float *filter_data = filter->data<float>();
-  float *output_data = output->data<float>();
+#ifdef _OPENMP
  const float *newscale_data = new_scale->data<float>();
  const float *newbias_data = new_bias->data<float>();

@@ -1482,14 +1479,15 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
  const int inhxw = input_height * input_width;
  const int outhxw = output_height * output_width;

-  float32x4_t vnewbias = vdupq_n_f32(0.0);
-  float32x4_t vnewscale = vdupq_n_f32(1.0);
  float32x4_t zero = vdupq_n_f32(0.0);
  for (int b = 0; b < batch_size; b++) {
-    filter_data = filter->data<float>();
+    #pragma omp parallel for
    for (int c = 0; c < input_channel; c++) {
-      vnewbias = vdupq_n_f32(newbias_data[c]);
-      vnewscale = vdupq_n_f32(newscale_data[c]);
+      const float *filter_data = filter->data<float>() + c * 9;
+      const float *input_data = input->data<float>() + c * inhxw;
+      float *output_data = output->data<float>() + c * outhxw;
+      float32x4_t vnewbias = vdupq_n_f32(newbias_data[c]);
+      float32x4_t vnewscale = vdupq_n_f32(newscale_data[c]);

      float w00 = filter_data[0];
      float w01 = filter_data[1];
@@ -1527,7 +1525,9 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
        if (if_relu) {
          out0 = vmaxq_f32(out0, zero);
        }
-        vst1q_f32(output_ptr, out0);
+        vst1q_lane_f32(output_ptr, out0, 0);
+        vst1q_lane_f32(output_ptr + 1, out0, 1);
+        vst1q_lane_f32(output_ptr + 2, out0, 2);
      }
      for (m = 1; m < output_width - 2; m += 3) {
      }
@@ -1543,8 +1543,6 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
        }
      }

-#pragma omp parallel for
-
      for (int i = 1; i < output_height; i += 1) {
        for (int m = 1; m < output_width - 2; m += 3) {
          float *output_ptr = output_data + i * output_width + m;
@@ -1583,7 +1581,9 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
          if (if_relu) {
            out0 = vmaxq_f32(out0, zero);
          }
-          vst1q_f32(output_ptr, out0);
+          vst1q_lane_f32(output_ptr, out0, 0);
+          vst1q_lane_f32(output_ptr + 1, out0, 1);
+          vst1q_lane_f32(output_ptr + 2, out0, 2);
        }
        int m;
        for (m = 1; m < output_width - 2; m += 3) {
@@ -1635,258 +1635,242 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
                                              : output_data[i * output_width];
        }
      }
-
-      input_data = input_data + inhxw;
-      output_data = output_data + outhxw;
-      filter_data = filter_data + 9;
    }
  }

-    //  const float *input_data = input->data<float>();
-    //  const float *filter_data = filter->data<float>();
-    //  float *output_data = output->data<float>();
-    //  const float *newscale_data = new_scale->data<float>();
-    //  const float *newbias_data = new_bias->data<float>();
-    //
-    //  float32x4_t vnewbias = vdupq_n_f32(0.0);
-    //  float32x4_t vnewscale = vdupq_n_f32(1.0);
-    //
-    //  const int in_h = static_cast<int>(input->dims()[2]);
-    //  const int in_w = static_cast<int>(input->dims()[3]);
-    //  const int out_h = static_cast<int>(output->dims()[2]);
-    //  const int out_w = static_cast<int>(output->dims()[3]);
-    //  const int out_l = out_h;
-    //  const int in_l = in_h;
-    //  const int inhxw = in_h * in_w;
-    //  const int outhxw = out_h * out_w;
-    //  const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0;
-    //  const int batch_size = static_cast<int>(input->dims()[0]);
-    //  const int c = static_cast<int>(input->dims()[1]);
-    //  const float *input_row_ptr;
-    //  float *output_row_ptr;
-    //
-    //  const int w_times = (out_w - 2) / 3;
-    //
-    //  float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1];
-    //  float32x4_t elewise_res0, elewise_res1, elewise_res2, res3;
-    //  int out2in_mid;
-    //  float32x4_t zero = vdupq_n_f32(0.0);
-    //  for (int b = batch_size; b > 0; --b) {
-    //    const float *filter_data_tmp = filter_data;
-    //    for (int j = 0; j < c; ++j) {
-    //      auto output_data_tmp = output_data + j * out_h * out_w;
-    //      auto input_data_tmp = input_data + j * in_h * in_w;
-    //      auto input_const = input_data_tmp;
-    //
-    //      vnewbias = vdupq_n_f32(newbias_data[j]);
-    //      vnewscale = vdupq_n_f32(newscale_data[j]);
-    //
-    //      float w00 = filter_data_tmp[0];
-    //      float w01 = filter_data_tmp[1];
-    //      float w02 = filter_data_tmp[2];
-    //      float w10 = filter_data_tmp[3];
-    //      float w11 = filter_data_tmp[4];
-    //      float w12 = filter_data_tmp[5];
-    //      float w20 = filter_data_tmp[6];
-    //      float w21 = filter_data_tmp[7];
-    //      float w22 = filter_data_tmp[8];
-    //
-    //      int h_mid = 0;
-    //
-    //      for (; h_mid < out_h - 1; h_mid++) {
-    //        input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
-    //        output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
-    //
-    //        for (int w4 = 0; w4 < w_times + 1; w4++) {
-    //          if (h_mid == 0) {
-    //            elewise_res1 = zero;
-    //            elewise_res0 = zero;
-    //            elewise_res2 = zero;
-    //          } else {
-    //            elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
-    //            elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
-    //            elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
-    //          }
-    //          input_buff_mid = vld2q_f32(input_row_ptr);
-    //          input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
-    //
-    //          elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1],
-    //          w11); elewise_res0 = vmlaq_n_f32(elewise_res0,
-    //          input_buff_mid.val[0], w10); elewise_res2 =
-    //          vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
-    //
-    //          elewise_res1 =
-    //              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1],
-    //              w21);
-    //          elewise_res0 =
-    //              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0],
-    //              w20);
-    //          elewise_res2 =
-    //              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0],
-    //              w22);
-    //
-    //          res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
-    //                           vaddq_f32(elewise_res0, elewise_res1));
-    //          res3 = vmlaq_f32(vnewbias, vnewscale, res3);
-    //
-    //          if (if_relu) {
-    //            res3 = vmaxq_f32(res3, zero);
-    //          }
-    //          vst1q_f32(output_row_ptr, res3);
-    //
-    //          input_row_ptr += 6;
-    //          output_row_ptr += 3;
-    //        }
-    //      }
-    //      clock();
-    //
-    //      input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
-    //      output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
-    //
-    //      for (int w4 = 0; w4 < w_times + 1; w4++) {
-    //        elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
-    //        elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
-    //        elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
-    //
-    //        input_buff_mid = vld2q_f32(input_row_ptr);
-    //        input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
-    //
-    //        elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1],
-    //        w11); elewise_res0 = vmlaq_n_f32(elewise_res0,
-    //        input_buff_mid.val[0], w10); elewise_res2 =
-    //        vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
-    //
-    //        if (!if_pad) {
-    //          elewise_res1 =
-    //              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1],
-    //              w21);
-    //          elewise_res0 =
-    //              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0],
-    //              w20);
-    //          elewise_res2 =
-    //              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0],
-    //              w22);
-    //        }
-    //        res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
-    //                         vaddq_f32(elewise_res0, elewise_res1));
-    //        res3 = vmlaq_f32(vnewbias, vnewscale, res3);
-    //
-    //        if (if_relu) {
-    //          res3 = vmaxq_f32(res3, zero);
-    //        }
-    //        if ((w4 != w_times)) {
-    //          vst1q_f32(output_row_ptr, res3);
-    //        } else {
-    //          if (out_l - 2 - w_times * 3 == 1) {
-    //            vst1q_lane_f32(output_row_ptr, res3, 0);
-    //          } else if (out_l - 2 - w_times * 3 == 2) {
-    //            vst1q_lane_f32(output_row_ptr, res3, 0);
-    //            vst1q_lane_f32(output_row_ptr + 1, res3, 1);
-    //          }
-    //        }
-    //        input_row_ptr += 6;
-    //        output_row_ptr += 3;
-    //      }
-    //
-    //      output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 +
-    //                           input_const[in_l] * w21 +
-    //                           input_const[in_l + 1] * w22;
-    //
-    //      out2in_mid = (out_l - 1) * 2;
-    //      output_data_tmp[out_l - 1] =
-    //          w10 * input_const[out2in_mid - 1] + w11 *
-    //          input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w -
-    //          1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) * (w12
-    //          * input_const[out2in_mid + 1] +
-    //                          w22 * input_const[out2in_mid + in_w + 1]);
-    //
-    //      out2in_mid = (out_l - 1) * 2 * in_w;
-    //
-    //      output_data_tmp[out_l * (out_l - 1)] =
-    //          w01 * input_const[out2in_mid - in_w] +
-    //          w02 * input_const[out2in_mid - in_w + 1] +
-    //          w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid +
-    //          1] + (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] +
-    //                          w22 * input_const[out2in_mid + in_w + 1]);
-    //      out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2;
-    //
-    //      output_data_tmp[out_l * out_l - 1] =
-    //          w00 * input_const[out2in_mid - in_w - 1] +
-    //          w01 * input_const[out2in_mid - in_w] +
-    //          w10 * input_const[out2in_mid - 1] + w11 *
-    //          input_const[out2in_mid] + (1 - if_pad) * (w20 *
-    //          input_const[out2in_mid + in_w - 1] +
-    //                          w21 * input_const[out2in_mid + in_w] +
-    //                          w02 * input_const[out2in_mid - in_w + 1] +
-    //                          w12 * input_const[out2in_mid + 1] +
-    //                          w22 * input_const[out2in_mid + in_w + 1]);
-    //      output_data_tmp[0] =
-    //          output_data_tmp[0] * newscale_data[j] + newbias_data[j];
-    //      output_data_tmp[out_l - 1] =
-    //          output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j];
-    //      output_data_tmp[out_l * (out_l - 1)] =
-    //          output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] +
-    //          newbias_data[j];
-    //      output_data_tmp[out_l * out_l - 1] =
-    //          output_data_tmp[out_l * out_l - 1] * newscale_data[j] +
-    //          newbias_data[j];
-    //      if (if_relu) {
-    //        output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 :
-    //        output_data_tmp[0]; output_data_tmp[out_l - 1] =
-    //            output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l -
-    //            1];
-    //        output_data_tmp[out_l * (out_l - 1)] =
-    //            output_data_tmp[out_l * (out_l - 1)] < 0
-    //                ? 0
-    //                : output_data_tmp[out_l * (out_l - 1)];
-    //        output_data_tmp[out_l * out_l - 1] =
-    //            output_data_tmp[out_l * out_l - 1] < 0
-    //                ? 0
-    //                : output_data_tmp[out_l * out_l - 1];
-    //      }
-    //      for (int i = 1; i < out_h - 1; i++) {
-    //        out2in_mid = i * 2 * in_w;
-    //        output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w]
-    //        +
-    //                                     w02 * input_const[out2in_mid - in_w +
-    //                                     1] + w11 * input_const[out2in_mid] +
-    //                                     w12 * input_const[out2in_mid + 1] +
-    //                                     w21 * input_const[out2in_mid + in_w]
-    //                                     + w22 * input_const[out2in_mid + in_w
-    //                                     + 1];
-    //
-    //        out2in_mid = i * 2 * in_w + (out_l - 1) * 2;
-    //        output_data_tmp[i * out_l + out_l - 1] =
-    //            w00 * input_const[out2in_mid - in_w - 1] +
-    //            w01 * input_const[out2in_mid - in_w] +
-    //            w10 * input_const[out2in_mid - 1] + w11 *
-    //            input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w
-    //            - 1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) *
-    //            (w02 * input_const[out2in_mid - in_w + 1] +
-    //                            w12 * input_const[out2in_mid + 1] +
-    //                            w22 * input_const[out2in_mid + in_w + 1]);
-    //        output_data_tmp[i * out_l] =
-    //            output_data_tmp[i * out_l] * newscale_data[j] +
-    //            newbias_data[j];
-    //        output_data_tmp[i * out_l + out_l - 1] =
-    //            output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] +
-    //            newbias_data[j];
-    //        if (if_relu) {
-    //          output_data_tmp[i * out_l] =
-    //              output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i *
-    //              out_l];
-    //          output_data_tmp[i * out_l + out_l - 1] =
-    //              output_data_tmp[i * out_l + out_l - 1] < 0
-    //                  ? 0
-    //                  : output_data_tmp[i * out_l + out_l - 1];
-    //        }
-    //      }
-    //      filter_data_tmp += 9;
-    //    }
-    //    input_data += inhxw * c;
-    //    output_data += outhxw * c;
-    //  }
+#else
+
+  const float *input_data = input->data<float>();
+  const float *filter_data = filter->data<float>();
+  float *output_data = output->data<float>();
+  const float *newscale_data = new_scale->data<float>();
+  const float *newbias_data = new_bias->data<float>();
+
+  float32x4_t vnewbias = vdupq_n_f32(0.0);
+  float32x4_t vnewscale = vdupq_n_f32(1.0);
+
+  const int in_h = static_cast<int>(input->dims()[2]);
+  const int in_w = static_cast<int>(input->dims()[3]);
+  const int out_h = static_cast<int>(output->dims()[2]);
+  const int out_w = static_cast<int>(output->dims()[3]);
+  const int out_l = out_h;
+  const int in_l = in_h;
+  const int inhxw = in_h * in_w;
+  const int outhxw = out_h * out_w;
+  const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0;
+  const int batch_size = static_cast<int>(input->dims()[0]);
+  const int c = static_cast<int>(input->dims()[1]);
+  const float *input_row_ptr;
+  float *output_row_ptr;
+
+  const int w_times = (out_w - 2) / 3;
+
+  float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1];
+  float32x4_t elewise_res0, elewise_res1, elewise_res2, res3;
+  int out2in_mid;
+  float32x4_t zero = vdupq_n_f32(0.0);
+  for (int b = batch_size; b > 0; --b) {
+    const float *filter_data_tmp = filter_data;
+    for (int j = 0; j < c; ++j) {
+      auto output_data_tmp = output_data + j * out_h * out_w;
+      auto input_data_tmp = input_data + j * in_h * in_w;
+      auto input_const = input_data_tmp;
+
+      vnewbias = vdupq_n_f32(newbias_data[j]);
+      vnewscale = vdupq_n_f32(newscale_data[j]);
+
+      float w00 = filter_data_tmp[0];
+      float w01 = filter_data_tmp[1];
+      float w02 = filter_data_tmp[2];
+      float w10 = filter_data_tmp[3];
+      float w11 = filter_data_tmp[4];
+      float w12 = filter_data_tmp[5];
+      float w20 = filter_data_tmp[6];
+      float w21 = filter_data_tmp[7];
+      float w22 = filter_data_tmp[8];
+
+      int h_mid = 0;
+
+      for (; h_mid < out_h - 1; h_mid++) {
+        input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
+        output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
+
+        for (int w4 = 0; w4 < w_times + 1; w4++) {
+          if (h_mid == 0) {
+            elewise_res1 = zero;
+            elewise_res0 = zero;
+            elewise_res2 = zero;
+          } else {
+            elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
+            elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
+            elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
+          }
+          input_buff_mid = vld2q_f32(input_row_ptr);
+          input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
+
+          elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11);
+          elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
+          elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
+
+          elewise_res1 =
+              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
+          elewise_res0 =
+              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20);
+          elewise_res2 =
+              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22);
+
+          res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
+                           vaddq_f32(elewise_res0, elewise_res1));
+          res3 = vmlaq_f32(vnewbias, vnewscale, res3);
+
+          if (if_relu) {
+            res3 = vmaxq_f32(res3, zero);
+          }
+          vst1q_f32(output_row_ptr, res3);
+
+          input_row_ptr += 6;
+          output_row_ptr += 3;
+        }
+      }
+      clock();
+
+      input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
+      output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
+
+      for (int w4 = 0; w4 < w_times + 1; w4++) {
+        elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
+        elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
+        elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
+
+        input_buff_mid = vld2q_f32(input_row_ptr);
+        input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
+
+        elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11);
+        elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
+        elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
+
+        if (!if_pad) {
+          elewise_res1 =
+              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
+          elewise_res0 =
+              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20);
+          elewise_res2 =
+              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22);
+        }
+        res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
+                         vaddq_f32(elewise_res0, elewise_res1));
+        res3 = vmlaq_f32(vnewbias, vnewscale, res3);
+
+        if (if_relu) {
+          res3 = vmaxq_f32(res3, zero);
+        }
+        if ((w4 != w_times)) {
+          vst1q_f32(output_row_ptr, res3);
+        } else {
+          if (out_l - 2 - w_times * 3 == 1) {
+            vst1q_lane_f32(output_row_ptr, res3, 0);
+          } else if (out_l - 2 - w_times * 3 == 2) {
+            vst1q_lane_f32(output_row_ptr, res3, 0);
+            vst1q_lane_f32(output_row_ptr + 1, res3, 1);
+          }
+        }
+        input_row_ptr += 6;
+        output_row_ptr += 3;
+      }
+
+      output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 +
+                           input_const[in_l] * w21 +
+                           input_const[in_l + 1] * w22;
+
+      out2in_mid = (out_l - 1) * 2;
+      output_data_tmp[out_l - 1] =
+          w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
+          w20 * input_const[out2in_mid + in_w - 1] +
+          w21 * input_const[out2in_mid + in_w] +
+          (1 - if_pad) * (w12 * input_const[out2in_mid + 1] +
+                          w22 * input_const[out2in_mid + in_w + 1]);

+      out2in_mid = (out_l - 1) * 2 * in_w;
+
+      output_data_tmp[out_l * (out_l - 1)] =
+          w01 * input_const[out2in_mid - in_w] +
+          w02 * input_const[out2in_mid - in_w + 1] +
+          w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] +
+          (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] +
+                          w22 * input_const[out2in_mid + in_w + 1]);
+      out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2;
+
+      output_data_tmp[out_l * out_l - 1] =
+          w00 * input_const[out2in_mid - in_w - 1] +
+          w01 * input_const[out2in_mid - in_w] +
+          w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
+          (1 - if_pad) * (w20 * input_const[out2in_mid + in_w - 1] +
+                          w21 * input_const[out2in_mid + in_w] +
+                          w02 * input_const[out2in_mid - in_w + 1] +
+                          w12 * input_const[out2in_mid + 1] +
+                          w22 * input_const[out2in_mid + in_w + 1]);
+      output_data_tmp[0] =
+          output_data_tmp[0] * newscale_data[j] + newbias_data[j];
+      output_data_tmp[out_l - 1] =
+          output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j];
+      output_data_tmp[out_l * (out_l - 1)] =
+          output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] +
+          newbias_data[j];
+      output_data_tmp[out_l * out_l - 1] =
+          output_data_tmp[out_l * out_l - 1] * newscale_data[j] +
+          newbias_data[j];
+      if (if_relu) {
+        output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 : output_data_tmp[0];
+        output_data_tmp[out_l - 1] =
+            output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l - 1];
+        output_data_tmp[out_l * (out_l - 1)] =
+            output_data_tmp[out_l * (out_l - 1)] < 0
+                ? 0
+                : output_data_tmp[out_l * (out_l - 1)];
+        output_data_tmp[out_l * out_l - 1] =
+            output_data_tmp[out_l * out_l - 1] < 0
+                ? 0
+                : output_data_tmp[out_l * out_l - 1];
+      }
+      for (int i = 1; i < out_h - 1; i++) {
+        out2in_mid = i * 2 * in_w;
+        output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w] +
+                                     w02 * input_const[out2in_mid - in_w + 1] +
+                                     w11 * input_const[out2in_mid] +
+                                     w12 * input_const[out2in_mid + 1] +
+                                     w21 * input_const[out2in_mid + in_w] +
+                                     w22 * input_const[out2in_mid + in_w + 1];
+
+        out2in_mid = i * 2 * in_w + (out_l - 1) * 2;
+        output_data_tmp[i * out_l + out_l - 1] =
+            w00 * input_const[out2in_mid - in_w - 1] +
+            w01 * input_const[out2in_mid - in_w] +
+            w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
+            w20 * input_const[out2in_mid + in_w - 1] +
+            w21 * input_const[out2in_mid + in_w] +
+            (1 - if_pad) * (w02 * input_const[out2in_mid - in_w + 1] +
+                            w12 * input_const[out2in_mid + 1] +
+                            w22 * input_const[out2in_mid + in_w + 1]);
+        output_data_tmp[i * out_l] =
+            output_data_tmp[i * out_l] * newscale_data[j] + newbias_data[j];
+        output_data_tmp[i * out_l + out_l - 1] =
+            output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] +
+            newbias_data[j];
+        if (if_relu) {
+          output_data_tmp[i * out_l] =
+              output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i * out_l];
+          output_data_tmp[i * out_l + out_l - 1] =
+              output_data_tmp[i * out_l + out_l - 1] < 0
+                  ? 0
+                  : output_data_tmp[i * out_l + out_l - 1];
+        }
+      }
+      filter_data_tmp += 9;
+    }
+    input_data += inhxw * c;
+    output_data += outhxw * c;
+  }
+#endif
 #endif
 }


--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
@@ -2957,8 +2957,8 @@ void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
      "vmov.f32   q15,    #0.0          \n\t"

      "subs       %[kc1], %[kc1], #1    \n\t"
-      "blt        end_kc1_%=            \n\t"
-      "loop_kc1_%=:                     \n\t"
+      "blt        2f                    \n\t"
+      "1:                               \n\t"

      //      "pld        [%[a_ptr], #128]       \n\t"
      //      "pld        [%[b_ptr], #128]       \n\t"
@@ -3030,12 +3030,12 @@ void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
      "vmla.f32   q15,  q3,   d2[1]       \n\t"

      "subs       %[kc1], %[kc1], #1      \n\t"
-      "bge        loop_kc1_%=             \n\t"
-      "end_kc1_%=:                        \n\t"
+      "bge        1b                      \n\t"
+      "2:                                 \n\t"

      "subs       %[kc2], %[kc2], #1      \n\t"
-      "blt        end_kc2_%=              \n\t"
-      "loop_kc2_%=:                       \n\t"
+      "blt        4f                      \n\t"
+      "3:                                 \n\t"

      "vld1.32    {d0-d2},  [%[a_ptr]]!   \n\t"
      "vld1.32    {q2, q3}, [%[b_ptr]]!   \n\t"
@@ -3054,8 +3054,8 @@ void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
      "vmla.f32   q15,  q3,   d2[1]       \n\t"

      "subs       %[kc2], %[kc2], #1      \n\t"
-      "bge        loop_kc2_%=             \n\t"
-      "end_kc2_%=:                        \n\t"
+      "bge        3b                      \n\t"
+      "4:                                 \n\t"

      "mov        r5,     %[c]            \n\t"
      "mov        r6,     %[step]         \n\t"
@@ -3113,8 +3113,8 @@ void AddDot8x12(int k, const float *a, const float *b, float *c, int ldc) {
      "dup      v28.4s,    wzr     \n\t"

      "subs       %[kc1], %[kc1], #1    \n\t"
-      "blt        end_kc1_%=            \n\t"
-      "loop_kc1_%=:                     \n\t"
+      "blt        2f                    \n\t"
+      "1:                               \n\t"

      "prfm     pldl1keep,         [%[a_ptr],   #32]  \n\t"
      "prfm     pldl1keep,         [%[b_ptr],   #48]  \n\t"
@@ -3149,8 +3149,8 @@ void AddDot8x12(int k, const float *a, const float *b, float *c, int ldc) {
      "fmla     v28.4s,   v4.4s,   v1.s[3]       \n\t"

      "subs       %[kc1], %[kc1], #1      \n\t"
-      "bge        loop_kc1_%=             \n\t"
-      "end_kc1_%=:                        \n\t"
+      "bge        1b                      \n\t"
+      "2:                                 \n\t"

      "st1      {v5.4s,   v6.4s,  v7.4s},    [%[c]],   %[step]   \n\t"
      "st1      {v8.4s,   v9.4s,  v10.4s},   [%[c]],   %[step]   \n\t"
@@ -3205,8 +3205,8 @@ void AddDot6x16(int k, const float *a, const float *b, float *c, int ldc) {
      "dup      v29.4s,    wzr     \n\t"

      "subs       %[kc1], %[kc1], #1    \n\t"
-      "blt        end_kc1_%=            \n\t"
-      "loop_kc1_%=:                     \n\t"
+      "blt        2f                    \n\t"
+      "1:                               \n\t"

      "prfm   pldl1keep,  [%[a_ptr],  #24]  \n\t"
      "prfm   pldl1keep,  [%[b_ptr],  #64]  \n\t"
@@ -3245,8 +3245,8 @@ void AddDot6x16(int k, const float *a, const float *b, float *c, int ldc) {
      "fmla     v29.4s,   v5.4s,   v1.s[1]       \n\t"

      "subs       %[kc1], %[kc1], #1      \n\t"
-      "bge        loop_kc1_%=             \n\t"
-      "end_kc1_%=:                        \n\t"
+      "bge        1b                      \n\t"
+      "2:                                 \n\t"

      "st1      {v6.4s,  v7.4s,  v8.4s,  v9.4s},    [%[c]],   %[step]   \n\t"
      "st1      {v10.4s, v11.4s, v12.4s, v13.4s},   [%[c]],   %[step]   \n\t"

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -24,7 +24,7 @@ limitations under the License. */
 #include "framework/tensor.h"
 #include "framework/variable.h"
 #ifdef PADDLE_MOBILE_FPGA
-#include "fpga/api/fpga_api.h"
+#include "fpga/api.h"
 #endif

 namespace paddle_mobile {
@@ -73,6 +73,11 @@ struct DtypeTensorTrait<GPU_MALI> {

 class OpParam {
 protected:
+  template <typename T>
+  static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Alpha", inputs, scope);
+  }
+
  template <typename T>
  static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
    return GetVarValue<T>("Input", inputs, scope);
@@ -248,7 +253,7 @@ class ConvParam : OpParam {

  const RType *Input() const { return input_; }

-  const RType *Filter() const { return filter_; }
+  RType *Filter() const { return filter_; }

  RType *Output() const { return output_; }

@@ -655,6 +660,21 @@ class SoftmaxParam : public OpParam {
 private:
  RType *input_x_;
  RType *out_;
+
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  std::shared_ptr<RType> float_input_x_;
+  fpga::BypassArgs fpga_bypass_args;
+
+ public:
+  RType *FloatInput() {
+    return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
+  }
+  void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
+  const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
+  void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
+#endif
 };
 #endif

@@ -752,16 +772,6 @@ class FeedParam : public OpParam {
  RType *input_x_;
  RType *out_;
  int batch_size;
-
-#ifdef PADDLE_MOBILE_FPGA
-
- private:
-  fpga::BypassArgs fpga_bypass_args;
-
- public:
-  const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
-  void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
-#endif
 };

 template <typename Dtype>
@@ -1009,19 +1019,24 @@ class PReluParam : public OpParam {
 public:
  PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
             const AttributeMap &attrs, const Scope &scope) {
+    DLOG << "PReluParam inputs before";
    input_x_ = InputXFrom<GType>(inputs, scope);
+    alpha_ = InputAlphaFrom<GType>(inputs, scope);
+    framework::DDim dims = alpha_->dims();
    out_ = OutFrom<GType>(outputs, scope);
-    slopes_ = GetAttr<vector<float>>("slopes", attrs);
+    mode_ = GetAttr<std::string>("mode", attrs);
+    DLOG << "PReluParam mode after" << mode_;
  }
-
  const RType *InputX() const { return input_x_; }
+  const RType *InputAlpha() const { return alpha_; }
  RType *Out() const { return out_; }
-  const vector<float> &Slopes() const { return slopes_; }
+  const std::string &Mode() const { return mode_; }

 private:
  RType *input_x_;
  RType *out_;
-  vector<float> slopes_;
+  RType *alpha_;
+  std::string mode_;
 };
 #endif

@@ -1043,7 +1058,11 @@ class FusionFcParam : public OpParam {
  }
  const RType *InputX() const { return input_x_; }

+#ifdef PADDLE_MOBILE_FPGA
+  RType *InputY() const { return input_y_; }
+#else
  const RType *InputY() const { return input_y_; }
+#endif

  const RType *InputZ() const { return input_z_; }

@@ -1104,7 +1123,11 @@ class FusionConvAddParam : public OpParam {

  const RType *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
  const RType *Filter() const { return filter_; }
+#endif

  RType *Output() const { return output_; }

@@ -1184,7 +1207,11 @@ class FusionConvAddBNReluParam : public OpParam {

  const RType *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
  const RType *Filter() const { return filter_; }
+#endif

  RType *Output() const { return output_; }

@@ -1249,6 +1276,99 @@ class FusionConvAddBNReluParam : public OpParam {
 };
 #endif

+#ifdef FUSION_CONVBN_OP
+template <typename Dtype>
+class FusionConvBNParam : public OpParam {
+ typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+ typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  FusionConvBNParam(const VariableNameMap &inputs,
+                    const VariableNameMap &outputs, const AttributeMap &attrs,
+                    const Scope &scope) {
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_y_ = OutputYFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
+  }
+
+  const RType *Input() const { return input_; }
+
+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
+  const RType *Filter() const { return filter_; }
+#endif
+  RType *Output() const { return output_y_; }
+
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
+  const RType *InputBias() const { return input_bias_; }
+
+  const RType *InputMean() const { return input_mean_; }
+
+  const RType *InputScale() const { return input_scale_; }
+
+  const RType *InputVariance() const { return input_variance_; }
+
+  const float &Epsilon() const { return epsilon_; }
+
+  const float &Momentum() const { return momentum_; }
+
+  const bool &IsTest() const { return is_test_; }
+
+  void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
+
+  void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
+
+  const RType *NewScale() const { return new_scale_; }
+
+  const RType *NewBias() const { return new_bias_; }
+
+ protected:
+  RType *input_;
+  RType *output_y_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
+  RType *input_bias_;
+  RType *input_mean_;
+  RType *input_scale_;
+  RType *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  RType *new_bias_;
+  RType *new_scale_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::ConvArgs fpga_conv_args;
+
+ public:
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
+#endif
+};
+#endif
+
 #ifdef FUSION_CONVADDBN_OP
 template <typename Dtype>
 class FusionConvAddBNParam : public OpParam {
@@ -1282,8 +1402,11 @@ class FusionConvAddBNParam : public OpParam {

  const RType *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
  const RType *Filter() const { return filter_; }
-
+#endif
  RType *Output() const { return output_y_; }

  const vector<int> &Strides() const { return strides_; }
@@ -1459,7 +1582,11 @@ class FusionConvBNReluParam : public OpParam {

  const RType *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
  const RType *Filter() const { return filter_; }
+#endif

  RType *Output() const { return output_; }

@@ -1510,6 +1637,15 @@ class FusionConvBNReluParam : public OpParam {
  bool is_test_;
  RType *new_bias_;
  RType *new_scale_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::ConvArgs fpga_conv_args;
+
+ public:
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
+#endif
 };
 #endif


--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -34,6 +34,7 @@ REGISTER_OPERATOR_CPU(softmax, ops::SoftmaxOp);
 REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
 #endif

 #endif
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -55,6 +55,7 @@ USE_OP_CPU(softmax);
 USE_OP_MALI_GPU(softmax);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(softmax);
 #endif

 #endif
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET)
    # gen test
    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-mobilenet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
 elseif ("yolo" IN_LIST NET)
    # gen test
    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
@@ -22,6 +27,18 @@ elseif("resnet" IN_LIST NET)
    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-resnet paddle-mobile)
 elseif("FPGAnets" IN_LIST NET)
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-resnet paddle-mobile)
+
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-fpga-concat-op paddle-mobile)
+elseif("mobilenetssd" IN_LIST NET)
+    # gen test
+    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenetssd paddle-mobile)
 else ()

    # gen test
@@ -138,6 +155,14 @@ else ()
    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-mobilenetssd paddle-mobile)

+     # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-genet paddle-mobile)
+
    # gen test
    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
    target_link_libraries(test-sigmoid paddle-mobile)
@@ -164,8 +189,7 @@ else ()

 endif()

-if(FPGA)
-    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-tensor-quant paddle-mobile)
-
-endif()
+# if(FPGA)
+#     ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+#     target_link_libraries(test-tensor-quant paddle-mobile)
+# endif()
--- a/test/common/test_gemm_perf.cpp
+++ b/test/common/test_gemm_perf.cpp
@@ -14,6 +14,7 @@ limitations under the License. */

 #include <iostream>
 #include "../test_helper.h"
+#include "../test_include.h"
 #include "operators/math/gemm.h"
 #include "operators/math/math_function.h"

@@ -26,6 +27,8 @@ limitations under the License. */
 #define k 1024

 int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
  Tensor aa, bb, cc, scale, bias;
  auto aaptr = aa.mutable_data<float>({m, k});
  auto bbptr = bb.mutable_data<float>({k, n});

--- a/test/fpga/test_concat_op.cpp
+++ b/test/fpga/test_concat_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/concat_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::FPGA> loader;
+  auto program = loader.Load(g_googlenet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::FPGA,
+                paddle_mobile::operators::ConcatOp<paddle_mobile::FPGA, float>>
+      executor(program, "concat");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {4, 10, 2, 2}, 0, 1);
+  input_tensors.push_back(input1);
+  Tensor input2;
+  auto input2_data = CreateInput<float>(&input2, {4, 20, 2, 2}, 0, 1);
+  input_tensors.push_back(input2);
+  Tensor input3;
+  auto input3_data = CreateInput<float>(&input3, {4, 30, 2, 2}, 0, 1);
+  input_tensors.push_back(input3);
+  Tensor input4;
+  auto input4_data = CreateInput<float>(&input4, {4, 40, 2, 2}, 0, 1);
+  input_tensors.push_back(input4);
+  // 2. input_names
+  vector<string> input_names({
+      "conv2d_3.tmp_1",
+      "conv2d_5.tmp_1",
+      "conv2d_7.tmp_1",
+      "conv2d_8.tmp_1",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"concat_0.tmp_0"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  // 5. test one example.
+  int input_n = 1;
+  int input_c = 2;
+  int input_h = 0;
+  int input_w = 1;
+  int stride0 = input3.numel() / input3.dims()[0];
+  int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1];
+  int stride2 = input3.dims()[3];
+  /// inputx1 (4,10,2,2),
+  /// inputx2 (4,20,2,2),
+  /// inputx3 (4,30,2,2),
+  /// inputx4 (4,40,2,2),
+  /// axis = 1
+  /// output (4,100,2,2)
+  int input_index =
+      input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
+  int output_index = input_n * 100 * 2 * 2 +
+                     (input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 +
+                     input_h * 2 + input_w;
+
+  DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index];
+  DLOG << " output [1,32,0,1] = " << output0_data[output_index];
+  return 0;
+}
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -12,23 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"

 int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  bool optimize = false;
-  if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time1 = time();
-    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(g_resnet, true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 3, 32, 32};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+    // 预热一次
+    paddle_mobile.Predict(input, dims);
    auto time3 = time();
-    auto vec_result = paddle_mobile.Predict(input, dims);
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
    auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
  }
+
  return 0;
 }
--- a/test/net/test_genet_combine.cpp
+++ b/test/net/test_genet_combine.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
+                         std::string(g_genet_combine) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 128, 128};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,7 +17,14 @@ limitations under the License. */
 #include "../test_include.h"

 int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+
  paddle_mobile.SetThreadNum(4);
  bool optimize = true;
  auto time1 = time();

--- a/test/net/test_mobilenet.cpp
+++ b/test/net/test_mobilenet.cpp
@@ -20,7 +20,11 @@ int main() {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
  paddle_mobile.SetThreadNum(4);
  auto time1 = time();
-  if (paddle_mobile.Load(g_mobilenet, true)) {
+  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+  //                     std::string(g_mobilenet_detect) + "/params", true);
+
+  auto isok = paddle_mobile.Load(g_mobilenet, true);
+  if (isok) {
    auto time2 = time();
    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;

@@ -39,10 +43,14 @@ int main() {
    for (int i = 0; i < 10; ++i) {
      auto vec_result = paddle_mobile.Predict(input, dims);
    }
+    DLOG << vec_result;
    auto time4 = time();
    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
              << std::endl;
  }

+  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+               "是否存在?"
+            << std::endl;
  return 0;
 }
--- a/test/net/test_mobilenet_combine.cpp
+++ b/test/net/test_mobilenet_combine.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
+                         std::string(g_mobilenet_combined) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
--- a/test/net/test_resnet.cpp
+++ b/test/net/test_resnet.cpp
@@ -12,16 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"

 int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+  paddle_mobile.SetThreadNum(4);
  auto time1 = time();
-  if (paddle_mobile.Load(g_resnet, false)) {
+  if (paddle_mobile.Load(g_resnet, true)) {
    auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
    std::vector<int64_t> dims{1, 3, 32, 32};
    Tensor input_tensor;
    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
@@ -29,10 +36,15 @@ int main() {

    std::vector<float> input(input_tensor.data<float>(),
                             input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
    paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
    auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
  }

  return 0;

--- a/test/net/test_squeezenet.cpp
+++ b/test/net/test_squeezenet.cpp
@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"

 int main() {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
  //  ../../../test/models/googlenet
  //  ../../../test/models/mobilenet
  auto time1 = time();
-  if (paddle_mobile.Load(g_squeezenet, false)) {
+  if (paddle_mobile.Load(g_squeezenet, true)) {
    auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
    std::vector<int64_t> dims{1, 3, 227, 227};
    Tensor input_tensor;
    SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
@@ -31,10 +32,15 @@ int main() {

    std::vector<float> input(input_tensor.data<float>(),
                             input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
    paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
    auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
  }

  return 0;

--- a/test/net/test_yolo.cpp
+++ b/test/net/test_yolo.cpp
@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"

 int main() {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
  //  ../../../test/models/googlenet
  //  ../../../test/models/mobilenet
  auto time1 = time();
-  if (paddle_mobile.Load(g_yolo, false)) {
+  if (paddle_mobile.Load(g_yolo, true)) {
    auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;

    std::vector<int64_t> dims{1, 3, 227, 227};
    Tensor input_tensor;
@@ -32,10 +33,15 @@ int main() {

    std::vector<float> input(input_tensor.data<float>(),
                             input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
    paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
    auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
  }
  return 0;
 }
--- a/test/operators/test_fusion_conv_add_bn_relu_op.cpp
+++ b/test/operators/test_fusion_conv_add_bn_relu_op.cpp
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

+#include <iostream>
 #include "../test_include.h"
 #include "operators/fusion_conv_add_bn_relu_op.h"


--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -26,7 +26,10 @@ limitations under the License. */

 static const char *g_ocr = "../models/ocr";
 static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
+static const char *g_genet_combine = "../models/enet";
 static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
+static const char *g_mobilenet_combined = "../models/mobilenet_combine";
+static const char *g_mobilenet_detect = "../models/mobilenet-detect";
 static const char *g_squeezenet = "../models/squeezenet";
 static const char *g_googlenet = "../models/googlenet";
 static const char *g_mobilenet = "../models/mobilenet";

--- a/tools/build.sh
+++ b/tools/build.sh
 #!/usr/bin/env bash
 NETS=""
-declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet")
+declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet" "mobilenetssd")

 build_for_mac() {
    if [ ! `which brew` ]; then

--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET)
  set(ELEMENTWISEADD_OP ON)
  set(RELU_OP ON)
  set(SOFTMAX_OP ON)
-  set(SOFTMAX_OP ON)
+  set(MUL_OP ON)
  set(DEPTHWISECONV_OP ON)
  set(BATCHNORM_OP ON)
  set(POOL_OP ON)
@@ -33,6 +33,28 @@ if ("mobilenet" IN_LIST NET)
 endif()


+if ("mobilenetssd" IN_LIST NET)
+  message("mobilenetssd enabled")
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_DWCONVBNRELU_OP ON)
+  set(FUSION_CONVADD_OP ON)
+  set(MULTICLASSNMS_OP ON)
+  set(SOFTMAX_OP ON)
+  set(TRANSPOSE_OP ON)
+    #feed
+  set(PRIORBOX_OP ON)
+  set(CONCAT_OP ON)
+  set(BOXCODER_OP ON)
+  set(RESHAPE_OP ON)
+#fetch
+  #total
+
+  set(FOUND_MATCH ON)
+
+endif()
+
+
 if ("yolo" IN_LIST NET)
  message("yolo enabled")
  set(BATCHNORM_OP ON)
@@ -64,6 +86,8 @@ if ("resnet" IN_LIST NET)
  set(RELU_OP ON)
  set(ELEMENTWISEADD_OP ON)
  set(POOL_OP ON)
+  set(BATCHNORM_OP ON)
+  set(MUL_OP ON)
  set(RESHAPE_OP ON)
  set(SOFTMAX_OP ON)

@@ -82,6 +106,9 @@ if ("FPGAnets" IN_LIST NET)
  set(CONCAT_OP ON)
  set(SOFTMAX_OP ON)
  set(DROPOUT_OP ON)
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_CONVBN_OP ON)
+  set(FUSION_CONVADD_OP ON)

  set(FOUND_MATCH ON)   
 endif()
@@ -240,8 +267,8 @@ endif()
 if (FUSION_ELEMENTWISEADDRELU_OP)
  add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP)
 endif()
-if (REGION_OP)
-  add_definitions(-DREGION_OP)
+if (FUSION_CONVBN_OP)
+  add_definitions(-DFUSION_CONVBN_OP)
 endif()

 if (CONV_TRANSPOSE_OP)